Building Recommendation Engines in Python
Rob O'Callaghan
Director of Data




print(book_ratings_df.shape)
(220, 500)
avg_ratings = book_ratings_df.mean(axis=1)print(avg_ratings)
array([[4.5 ],
       [3.5],
       [2.5],
       [3.5],
        ... 
       [2.2]])
user_ratings_pivot_centered = user_ratings_df.sub(avg_ratings, axis=0) user_ratings_df.fillna(0, inplace=True)print(user_ratings_df)
          The Great Gatsby    The Catcher in the Rye    Fifty Shades of Grey                    
User_233               0.0                       0.0                     0.0
User_651               0.0                       0.5                    -0.5
User_965               0.5                      -0.5                     0.0
     ...               ...                       ...                     ...
from scipy.sparse.linalg import svdsU, sigma, Vt = svds(user_ratings_pivot_centered)
print(U.shape)
(610, 6)
print(Vt.shape)
(6, 1000)
print(sigma)
[3.0, 4.8, -12.6, -3.8, 8.2, 7.3]
sigma = np.diag(sigma)
print(sigma)
array([   3.0    ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ],
       [  0.     ,   4.8    ,   0.     ,   0.     ,   0.     ,   0.     ],
       [  0.     ,   0.     , -12.6    ,   0.     ,   0.     ,   0.     ],
       [  0.     ,   0.     ,   0.     ,  -3.8    ,   0.     ,   0.     ],
       [  0.     ,   0.     ,   0.     ,   0.     ,   8.2    ,   0.     ],
       [  0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   7.3    ]),




recalculated_ratings =        np.dot(U, sigma)     
recalculated_ratings = np.dot(np.dot(U, sigma), Vt)
print(recalculated_ratings)
[[  0.1      -0.9       -3.6.     ...   ]
 [ -2.3       0.5       -0.5      ...   ]
 [  0.5      -0.5        2.0      ...   ]
 [ ...        ...        ...      ...   ]]
recalculated_ratings = recalculated_ratings + avg_ratings.values.reshape(-1, 1)
print(recalculated_ratings)
[[  4.6       3.6        0.9      ...   ]
 [  1.8       4.0        3.0      ...   ]
 [  3.0       2.0        4.5      ...   ]
 [ ...        ...        ...      ...   ]]
print(book_ratings_df)
[[  5.0       4.0         NA      ...   ]
 [   NA       4.0        3.0      ...   ]
 [  3.0       2.0         NA      ...   ]
 [ ...        ...        ...      ...   ]]
Building Recommendation Engines in Python