Reshaping Data with pandas
Maria Eugenia Inzaugarat
Data Scientist
cities
city country zip_code
0 Los Angeles USA [90001, 90004, 90008]
1 Madrid Spain [28001, 28004, 28005]
2 Rabat Morocco [10010, 10170]
cities_explode = cities['zip_code'].explode()
cities_explode
0 90001
0 90004
0 90008
1 28001
1 28004
1 28005
2 10010
2 10170
cities[['city', 'country']]
cities[['city', 'country']].merge(cities_explode, )
cities[['city', 'country']].merge(cities_explode, left_index=True, right_index=True)
city country zip_code
0 Los Angeles USA 90001
0 Los Angeles USA 90004
0 Los Angeles USA 90008
1 Madrid Spain 28001
1 Madrid Spain 28004
1 Madrid Spain 28005
2 Rabat Morocco 10010
2 Rabat Morocco 10170
cities_explode = cities.explode('zip_code')
cities_explode
city country zip_code
0 Los Angeles USA 90001
0 Los Angeles USA 90004
0 Los Angeles USA 90008
1 Madrid Spain 28001
1 Madrid Spain 28004
1 Madrid Spain 28005
2 Rabat Morocco 10010
2 Rabat Morocco 10170
cities_explode.reset_index(drop=True, inplace=True)
city country zip_code
0 Los Angeles USA 90001
1 Los Angeles USA 90004
2 Los Angeles USA 90008
3 Madrid Spain 28001
4 Madrid Spain 28004
5 Madrid Spain 28005
6 Rabat Morocco 10010
7 Rabat Morocco 10170
cities_new
city country zip_code
0 Los Angeles USA [90001, 90004, 90008]
1 Madrid Spain []
2 Rabat Morocco [10010, 10170]
cities_new.explode('zip_code')
city country zip_code
0 Los Angeles USA 90001
0 Los Angeles USA 90004
0 Los Angeles USA 90008
1 Madrid Spain NaN
2 Rabat Morocco 10010
2 Rabat Morocco 10170
cities
city country zip_code
0 Los Angeles USA 90001, 90004, 90008
1 Madrid Spain 28001, 28004, 28005
2 Rabat Morocco 10010, 10170
cities['zip_code'].str.split(',', expand=True)
0 1 2
0 90001 90004 90008
1 28001 28004 28005
2 10010 10170 None
cites.assign(zip_code= )
cites.assign(zip_code=cities['zip_code'].str.split(','))
cites.assign(zip_code=cities['zip_code'].str.split(',')).explode('zip_code')
city country zip_code
0 Los Angeles USA 90001
0 Los Angeles USA 90004
0 Los Angeles USA 90008
1 Madrid Spain 28001
1 Madrid Spain 28004
1 Madrid Spain 28005
2 Rabat Morocco 10010
2 Rabat Morocco 10170
Reshaping Data with pandas