Reshaping Data with tidyr
Jeroen Boeye
Head of Machine Learning, Faktion
planet_df %>%
unnest_longer(moons) %>%
unnest_wider(moons) %>%
unnest_wider(moon_data)
# A tibble: 174 x 4
planet moon_name radius density
<chr> <chr> <dbl> <dbl>
1 Mercury NA NA NA
2 Venus NA NA NA
3 Earth Moon 1738. 3.34
4 Jupiter Io 1822. 3.53
5 Jupiter Europa 1561. 3.01
6 Jupiter Ganymede 2631. 1.94
7 Jupiter Callisto 2410. 1.83
8 Jupiter Amalthea 83.4 0.849
# ... with 166 more rows
moons :List of 8
$ :List of 67
..$ :List of 2
.. ..$ moon_name: chr "Io"
.. ..$ moon_data:List of 2
.. .. ..$ radius : num 1822
.. .. ..$ density: num 3.53
planet_df %>%
hoist(
moons,
first_moon = list(1, "moon_name"),
radius = list(1, "moon_data", "radius"))
# A tibble: 8 x 4
planet first_moon radius moons
<chr> <chr> <dbl> <list>
1 Mercury NA NA <NULL>
2 Venus NA NA <NULL>
3 Earth Moon 1738. <list [1]>
4 Jupiter Io 1822. <list [67]>
5 Mars Phobos 11.1 <list [2]>
6 Neptune Triton 1353. <list [14]>
7 Saturn Mimas 198. <list [61]>
8 Uranus Ariel 579. <list [27]>
planet_df %>%
unnest_longer(moons) %>%
hoist(
moons,
moon_name = "moon_name",
radius = list("moon_data", "radius")
)
# A tibble: 174 x 4
planet moon_name radius moons
<chr> <chr> <dbl> <list>
1 Mercury NA NA <NULL>
2 Venus NA NA <NULL>
3 Earth Moon 1738. <named list [1]>
4 Jupiter Io 1822. <named list [1]>
5 Jupiter Europa 1561. <named list [1]>
6 Jupiter Ganymede 2631. <named list [1]>
7 Jupiter Callisto 2410. <named list [1]>
8 Jupiter Amalthea 83.4 <named list [1]>
9 Jupiter Himalia 85 <named list [1]>
10 Jupiter Elara 43 <named list [1]>
# ... with 164 more rows
city_df
# A tibble: 5 x 2
city json
<chr> <list>
1 Beijing <named list [2]>
2 Buenos Aires <named list [2]>
3 New Delhi <named list [2]>
4 New York <named list [2]>
5 Paris <named list [2]>
city_df %>%
unnest_wider(json)
# A tibble: 5 x 3
city results status
<chr> <list> <chr>
1 Beijing <list [1]> OK
2 Buenos Aires <list [1]> OK
3 New Delhi <list [1]> OK
4 New York <list [1]> OK
5 Paris <list [1]> OK
city_df %>%
unnest_wider(json) %>%
unnest_longer(results) %>%
unnest_wider(results)
city address_components formatted_address geometry
<chr> <list> <chr> <list>
1 Beijing <list [3]> Beijing, China <named list [4]>
2 Buenos Aires <list [3]> Buenos Aires, Argentina <named list [4]>
3 New Delhi <list [3]> New Delhi, Delhi, India <named list [4]>
4 New York <list [3]> New York, NY, USA <named list [4]>
5 Paris <list [4]> Paris, France <named list [4]>
# ... with 4 more variables: place_id <chr>, types <list>, partial_match <lgl>, status <chr>
city_df %>%
unnest_wider(json) %>%
unnest_longer(results) %>%
unnest_wider(results) %>%
unnest_wider(geometry) %>%
unnest_wider(location) %>%
select(city, lat, lng)
# A tibble: 5 x 3
city lat lng
<chr> <dbl> <dbl>
1 Beijing 39.9 116.
2 Buenos Aires -34.6 -58.4
3 New Delhi 28.6 77.2
4 New York 40.7 -74.0
5 Paris 48.9 2.35
city_df %>%
hoist(json,
lat = list("results", 1, "geometry", "location", "lat"),
lng = list("results", 1, "geometry", "location", "lng"))
# A tibble: 5 x 4
city lat lng json
<chr> <dbl> <dbl> <list>
1 Beijing 39.9 116. <named list [2]>
2 Buenos Aires -34.6 -58.4 <named list [2]>
3 New Delhi 28.6 77.2 <named list [2]>
4 New York 40.7 -74.0 <named list [2]>
5 Paris 48.9 2.35 <named list [2]>
Reshaping Data with tidyr