In [1]:
# Demonstrating Recommendation System Filtering Techniques: Collaborative, Content-Based, and Hybrid

In [5]:
## 1. Setup & Libraries
!pip install scikit-surprise --quiet

In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-package

ImportError: numpy.core.multiarray failed to import (auto-generated because you didn't call 'numpy.import_array()' after cimporting numpy; use '<void>numpy._import_array' to disable if you are certain you don't need it).

In [None]:
## 2. Load Sample Dataset (MovieLens 100k)
data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=0.25)

In [None]:
## 3. Collaborative Filtering (SVD)
model = SVD()
model.fit(trainset)
predictions = model.test(testset)
print("Collaborative Filtering RMSE:")
accuracy.rmse(predictions)

In [None]:
## 4. Content-Based Filtering using Book Data
books_url = 'https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv'
ratings_url = 'https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv'

In [None]:
books = pd.read_csv(books_url)
ratings = pd.read_csv(ratings_url)

In [None]:
user_item = ratings.pivot(index='user_id', columns='book_id', values='rating').fillna(0)
item_sim = cosine_similarity(user_item.T)
item_sim_df = pd.DataFrame(item_sim, index=user_item.columns, columns=user_item.columns)

In [None]:
def recommend_similar_items(item_id, top_n=5):
    return item_sim_df[item_id].sort_values(ascending=False)[1:top_n+1]

In [None]:
print("Content-Based Recommendations for Book ID 1:")
print(recommend_similar_items(1))

In [None]:
## 5. Hybrid Filtering (Simple Weighted Combination)
def hybrid_score(user_id, item_id, alpha=0.5):
    try:
        cf_pred = model.predict(user_id, item_id).est
    except:
        cf_pred = 0
    cbf_sim = item_sim_df.get(item_id, pd.Series()).mean()
    return alpha * cf_pred + (1 - alpha) * cbf_sim

In [None]:
## 6. Demonstration of a Hybrid Score
sample_user = 123
sample_item = 42
print(f"Hybrid Score for User {sample_user} and Item {sample_item}: {hybrid_score(sample_user, sample_item)}")