Reshaping Data with pandas
Maria Eugenia Inzaugarat
Data Scientist
json_normalize()
.writers = ["Mary Shelley", "Ernest Hemingway"] books = ["{'title': 'Frankenstein', 'year': 1818}", "{'title': 'The Old Man and the Sea', 'year': 1951}"]
collection = pd.DataFrame( )
writers = ["Mary Shelley", "Ernest Hemingway"] books = ["{'title': 'Frankenstein', 'year': 1818}", "{'title': 'The Old Man and the Sea', 'year': 1951}"]
collection = pd.DataFrame(dict( ))
writers = ["Mary Shelley", "Ernest Hemingway"] books = ['{"title": "Frankenstein", "year": "1818"}', '{"title": "The Old Man and the Sea", "year":"1951"}']
collection = pd.DataFrame(dict(writers=writers, books=books))
collection
writers books
0 Mary Shelley {'title': 'Frankenstein', 'year': 1818}
1 Ernest Hemingway {'title': 'The Old Man and the Sea', 'year': 1951}
import json
books = collection['books']
import json
books = collection['books'].apply( )
import json
books = collection['books'].apply(json.loads)
import json
books = collection['books'].apply(json.loads).apply(pd.Series)
books
title year
0 Frankenstein 1818
1 The Old Man and the Sea 1951
collection = collection.drop(columns='books')
pd.concat([collection, books], axis=1)
writers title year
0 Mary Shelley Frankenstein 1818
1 Ernest Hemingway The Old Man and the Sea 1951
import json books = collection['books'].apply(json.loads)
import json books = collection['books'].apply(json.loads).to_list()
books_dump = json.dumps(books)
new_books = pd.read_json(books_dump)
new_books
title year
0 Frankenstein 1818
1 The Old Man and the Sea 1951
pd.concat([collection['writers'], new_books], axis=1)
writers title year
0 Mary Shelley Frankenstein 1818
1 Ernest Hemingway The Old Man and the Sea 1951
Reshaping Data with pandas