Building Recommendation Engines in Python
Rob O'Callaghan
Director of Data
print(book_ratings_df.shape)
(220, 500)
avg_ratings = book_ratings_df.mean(axis=1)
print(avg_ratings)
array([[4.5 ],
[3.5],
[2.5],
[3.5],
...
[2.2]])
user_ratings_pivot_centered = user_ratings_df.sub(avg_ratings, axis=0) user_ratings_df.fillna(0, inplace=True)
print(user_ratings_df)
The Great Gatsby The Catcher in the Rye Fifty Shades of Grey
User_233 0.0 0.0 0.0
User_651 0.0 0.5 -0.5
User_965 0.5 -0.5 0.0
... ... ... ...
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(user_ratings_pivot_centered)
print(U.shape)
(610, 6)
print(Vt.shape)
(6, 1000)
print(sigma)
[3.0, 4.8, -12.6, -3.8, 8.2, 7.3]
sigma = np.diag(sigma)
print(sigma)
array([ 3.0 , 0. , 0. , 0. , 0. , 0. ],
[ 0. , 4.8 , 0. , 0. , 0. , 0. ],
[ 0. , 0. , -12.6 , 0. , 0. , 0. ],
[ 0. , 0. , 0. , -3.8 , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 8.2 , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 7.3 ]),
recalculated_ratings = np.dot(U, sigma)
recalculated_ratings = np.dot(np.dot(U, sigma), Vt)
print(recalculated_ratings)
[[ 0.1 -0.9 -3.6. ... ]
[ -2.3 0.5 -0.5 ... ]
[ 0.5 -0.5 2.0 ... ]
[ ... ... ... ... ]]
recalculated_ratings = recalculated_ratings + avg_ratings.values.reshape(-1, 1)
print(recalculated_ratings)
[[ 4.6 3.6 0.9 ... ]
[ 1.8 4.0 3.0 ... ]
[ 3.0 2.0 4.5 ... ]
[ ... ... ... ... ]]
print(book_ratings_df)
[[ 5.0 4.0 NA ... ]
[ NA 4.0 3.0 ... ]
[ 3.0 2.0 NA ... ]
[ ... ... ... ... ]]
Building Recommendation Engines in Python