Building Recommendation Engines in Python
Rob O'Callaghan
Director of Data
similarities = cosine_similarity(user_ratings_pivot)
cosine_similarity_df = pd.DataFrame(user_ratings_pivot, index=user_ratings_pivot.index, columns=user_ratings_pivot.index)
cosine_similarity_df.head()
User 001 User 002 User 003
User 001 1.0 -0.4 0.3
User 002 -0.4 1.0 -0.5
User 003 0.3 -0.5 1.0
... ... ... ...
User 001 User 002 User 003
User 001 1.0 -0.4 0.3
User 002 -0.4 1.0 -0.5
User 003 0.3 -0.5 1.0
... ... ... ...
v
User 001 User 002 |User 003|
-------------------------------------------------------
User 001 1.0 -0.4 | 0.3| <-
-------------------------------------------------------
User 002 -0.4 1.0 -0.5
User 003 0.3 -0.5 1.0
... ... ... ...
v
User 001 |User 002 | User 003
-----------------------------------------
User 001 1.0 | -0.4 | <- 0.3
------------------------------------------
User 002 -0.4 1.0 -0.5
User 003 0.3 -0.5 1.0
... ... ... ...
user_similarity_series = user_similarities.loc['user_001']
ordered_similarities = user_similarity_series.sort_values(ascending=False)
nearest_neighbors = ordered_similarities[1:4].index print(nearest_neighbors)
user_007
user_042
user_003
neighbor_ratings = user_ratings_table.reindex(nearest_neighbors)
neighbor_ratings['Catch-22'].mean()
3.2
print(user_ratings_pivot)
The Great Gatsby Catch-22 Fifty Shades of Grey
User_233 0.0 0.0 0.0
User_651 0.0 0.5 -0.5
... ... ... ...
print(user_ratings_table)
The Great Gatsby Catch-22 Fifty Shades of Grey
User_233 NaN NaN NaN
User_651 NaN 5.0 4.0
... ... ... ...v
user_ratings_pivot.drop("Catch-22", axis=1, inplace=True)
target_user_x = user_ratings_pivot.loc[["user_001"]] print(target_user_x)
The Great Gatsby Fifty Shades of Grey Iliad
User_001 4.0 2.0 3.0
other_users_y = user_ratings_table["Catch-22"]
print(other_users_y)
[NaN, '5.0', '3.0', '4.0', '5.0' ...]
other_users_x = user_ratings_pivot[other_users_y.notnull()]
print(other_users_x)
The Great Gatsby Fifty Shades of Grey Iliad
User_651 0.0 -0.5 -0.5
User_442 1.0 0.0 1.0
... ... ... ...
other_users_y.dropna(inplace=True)
print(other_users_y)
['5.0', '3.0', '4.0','5.0' ...]
from sklearn.neighbors import KNeighborsRegressor user_knn = KNeighborsRegressor(metric='cosine', n_neighbors=3)
user_knn.fit(other_users_x, other_users_y)
user_user_pred = user_knn.predict(target_user_x) print(user_user_pred)
3.3
from sklearn.neighbors import KNeighborsClassifier user_knn = KNeighborsClassifier(metric='cosine', n_neighbors=3)
user_knn.fit(other_users_x, other_users_y)
user_user_pred = user_knn.predict(target_user_x) print(user_user_pred)
3
Building Recommendation Engines in Python