Reshaping Data with pandas
Maria Eugenia Inzaugarat
Data Scientist
my_writer
{
"first" : "Mary",
"last" : "Shelley",
"country" : "England",
"books" : 12
}
writers
writers = [
{
"first": "Mary",
"last": "Shelley",
"books": {"title": "Frankenstein", "year": 1818}
},
{
"first": "Ernest",
"last": "Hemingway",
"books": {"title": "The Old Man and the Sea", "year": 1951}
}
]
from pandas import json_normalize
json_normalize(writers)
first last books.title books.year
0 Mary Shelley Frankenstein 1818
1 Ernest Hemingway The Old Man and the Sea 1951
writers_norm = json_normalize(writers, sep='_')
writers_norm
first last books_title books_year
0 Mary Shelley Frankenstein 1818
1 Ernest Hemingway The Old Man and the Sea 1951
pd.wide_to_long(writers_norm, stubnames=['books'], i=['first', 'last'], j='feature', sep='_', suffix='\w+')
books
first last feature
Mary Shelley title Frankenstein
year 1818
Ernest Hemingway title The Old Man and the Sea
year 1951
writers
[
{'name': 'Mary',
'last': 'Shelley',
'books': [{'title': 'Frankestein', 'year': 1818},
{'title': 'Mathilda ', 'year': 1819},
{'title': 'The Last Man', 'year': 1826}]},
{'name': 'Ernest',
'last': 'Hemmingway',
'books': [{'title': 'The Old Man and the Sea', 'year': 1951},
{'title': 'The Sun Also Rises', 'year': 1927}]}
]
json_normalize(writers)
name last books
0 Mary Shelley [{'title': 'Frankestein', 'year': 1818}, {'tit...
1 Ernest Hemmingway [{'title': 'The Old Man and the Sea', 'year': ...
json_normalize(writers, record_path='books')
title year
0 Frankestein 1818
1 Mathilda 1819
2 The Last Man 1826
3 The Old Man and the Sea 1951
4 The Sun Also Rises 1927
json_normalize(writers, record_path='books', meta=['name', 'last'])
title year name last
0 Frankestein 1818 Mary Shelley
1 Mathilda 1819 Mary Shelley
2 The Last Man 1826 Mary Shelley
3 The Old Man and the Sea 1951 Ernest Hemmingway
4 The Sun Also Rises 1927 Ernest Hemmingway
Reshaping Data with pandas