Joining Data with pandas
Aaren Stubberfield
Instructor
movie_to_genres = pd.read_csv('tmdb_movie_to_genres.csv')
tv_genre = movie_to_genres[movie_to_genres['genre'] == 'TV Movie']
print(tv_genre)
movie_id genre
4998 10947 TV Movie
5994 13187 TV Movie
7443 22488 TV Movie
10061 78814 TV Movie
10790 153397 TV Movie
10835 158150 TV Movie
11096 205321 TV Movie
11282 231617 TV Movie
m = movie_to_genres['genre'] == 'TV Movie'
tv_genre = movie_to_genres[m]
print(tv_genre)
movie_id genre
4998 10947 TV Movie
5994 13187 TV Movie
7443 22488 TV Movie
10061 78814 TV Movie
10790 153397 TV Movie
10835 158150 TV Movie
11096 205321 TV Movie
11282 231617 TV Movie
id title popularity release_date
0 257 Oliver Twist 20.415572 2005-09-23
1 14290 Better Luck ... 3.877036 2002-01-12
2 38365 Grown Ups 38.864027 2010-06-24
3 9672 Infamous 3.6808959999... 2006-11-16
4 12819 Alpha and Omega 12.300789 2010-09-17
movie_id genre
4998 10947 TV Movie
5994 13187 TV Movie
7443 22488 TV Movie
10061 78814 TV Movie
10790 153397 TV Movie
tv_movies = movies.merge(tv_genre, how='right',
left_on='id', right_on='movie_id')
print(tv_movies.head())
id title popularity release_date movie_id genre
0 153397 Restless 0.812776 2012-12-07 153397 TV Movie
1 10947 High School ... 16.536374 2006-01-20 10947 TV Movie
2 231617 Signed, Seal... 1.444476 2013-10-13 231617 TV Movie
3 78814 We Have Your... 0.102003 2011-11-12 78814 TV Movie
4 158150 How to Fall ... 1.923514 2012-07-21 158150 TV Movie
m = movie_to_genres['genre'] == 'Family'
family = movie_to_genres[m].head(3)
movie_id genre
0 12 Family
1 35 Family
2 105 Family
m = movie_to_genres['genre'] == 'Comedy'
comedy = movie_to_genres[m].head(3)
movie_id genre
0 5 Comedy
1 13 Comedy
2 35 Comedy
family_comedy = family.merge(comedy, on='movie_id', how='outer',
suffixes=('_fam', '_com'))
print(family_comedy)
movie_id genre_fam genre_com
0 12 Family NaN
1 35 Family Comedy
2 105 Family NaN
3 5 NaN Comedy
4 13 NaN Comedy
Joining Data with pandas