Reshaping Data with pandas
Maria Eugenia Inzaugarat
Data Scientist
json_normalize().writers = ["Mary Shelley", "Ernest Hemingway"] books = ["{'title': 'Frankenstein', 'year': 1818}", "{'title': 'The Old Man and the Sea', 'year': 1951}"]collection = pd.DataFrame( )
writers = ["Mary Shelley", "Ernest Hemingway"] books = ["{'title': 'Frankenstein', 'year': 1818}", "{'title': 'The Old Man and the Sea', 'year': 1951}"]collection = pd.DataFrame(dict( ))
writers = ["Mary Shelley", "Ernest Hemingway"] books = ['{"title": "Frankenstein", "year": "1818"}', '{"title": "The Old Man and the Sea", "year":"1951"}']collection = pd.DataFrame(dict(writers=writers, books=books))collection
writers books
0 Mary Shelley {'title': 'Frankenstein', 'year': 1818}
1 Ernest Hemingway {'title': 'The Old Man and the Sea', 'year': 1951}
import jsonbooks = collection['books']
import jsonbooks = collection['books'].apply( )
import jsonbooks = collection['books'].apply(json.loads)
import jsonbooks = collection['books'].apply(json.loads).apply(pd.Series)books
title year
0 Frankenstein 1818
1 The Old Man and the Sea 1951
collection = collection.drop(columns='books')pd.concat([collection, books], axis=1)
writers title year
0 Mary Shelley Frankenstein 1818
1 Ernest Hemingway The Old Man and the Sea 1951
import json books = collection['books'].apply(json.loads)
import json books = collection['books'].apply(json.loads).to_list()books_dump = json.dumps(books)new_books = pd.read_json(books_dump)new_books
title year
0 Frankenstein 1818
1 The Old Man and the Sea 1951
pd.concat([collection['writers'], new_books], axis=1)
writers title year
0 Mary Shelley Frankenstein 1818
1 Ernest Hemingway The Old Man and the Sea 1951
Reshaping Data with pandas