Reshaping Data with tidyr
Jeroen Boeye
Head of Machine Learning, Faktion
tibble(character = star_wars_list) %>%
unnest_wider(character)
# A tibble: 2 x 2
name films
<chr> <list>
1 Darth Vader <chr [4]>
2 Jar Jar Binks <chr [2]>
tibble(character = star_wars_list) %>%
unnest_wider(character) %>%
unnest_wider(films)
# A tibble: 2 x 5
name ...1 ...2 ...3 ...4
<chr> <chr> <chr> <chr> <chr>
1 Darth Vader Revenge of the Sith Return of the Jedi The Empire Strikes Back A New Hope
2 Jar Jar Binks Attack of the Clones The Phantom Menace NA NA
tibble(character = star_wars_list) %>%
unnest_wider(character) %>%
unnest_longer(films)
# A tibble: 45 x 2
name films
<chr> <chr>
1 Chewbacca Revenge of the Sith
2 Chewbacca Return of the Jedi
3 Chewbacca The Empire Strikes Back
4 Chewbacca A New Hope
5 Chewbacca The Force Awakens
6 Darth Vader Revenge of the Sith
7 Darth Vader Return of the Jedi
8 Darth Vader The Empire Strikes Back
# ... with 37 more rows
course_df
# A tibble: 4 x 2
ch_id metadata
<chr> <list>
1 CH1 <named list [3]>
2 CH2 <named list [3]>
3 CH3 <named list [3]>
4 CH4 <named list [3]>
course_df %>%
unnest_wider(metadata)
# A tibble: 4 x 4
ch_id chapter_title status lessons
<chr> <chr> <chr> <list>
1 CH1 Tidy Data Complete <list [3]>
2 CH2 From Wide to Long and Back Complete <list [4]>
3 CH3 Expanding Data Complete <list [3]>
4 CH4 Rectangling Data In progress <list [4]>
course_df %>%
unnest_wider(metadata) %>%
unnest_longer(lessons)
# A tibble: 14 x 4
ch_id chapter_title status lessons
<chr> <chr> <chr> <list>
1 CH1 Tidy Data Complete <named list [3]>
2 CH1 Tidy Data Complete <named list [3]>
3 CH1 Tidy Data Complete <named list [3]>
4 CH2 From Wide to Long and Back Complete <named list [3]>
# ... with 10 more rows
course_df %>%
unnest_wider(metadata) %>%
unnest_longer(lessons) %>%
unnest_wider(lessons)
# A tibble: 14 x 6
ch_id chapter_title status l_id lesson_title exercises
<chr> <chr> <chr> <chr> <chr> <list>
1 CH1 Tidy Data Complete L1 What is tidy data? <list [2]>
2 CH1 Tidy Data Complete L2 Columns with multiple values <list [3]>
3 CH1 Tidy Data Complete L3 Missing values <list [3]>
4 CH2 From Wide to Long and Back Complete L1 From wide to long data <list [3]>
# ... with 10 more rows
course_df %>%
unnest_wider(metadata) %>%
unnest_longer(lessons) %>%
unnest_wider(lessons) %>%
select(ch_id, l_id, exercises) %>%
unnest_longer(exercises)
# A tibble: 41 x 3
ch_id l_id exercises
<chr> <chr> <list>
1 CH1 L1 <named list [2]>
2 CH1 L1 <named list [2]>
3 CH1 L2 <named list [2]>
4 CH1 L2 <named list [2]>
5 CH1 L2 <named list [2]>
6 CH1 L3 <named list [2]>
7 CH1 L3 <named list [2]>
8 CH1 L3 <named list [2]>
# ... with 33 more rows
course_df %>%
unnest_wider(metadata) %>%
unnest_longer(lessons) %>%
unnest_wider(lessons) %>%
select(ch_id, l_id, exercises) %>%
unnest_longer(exercises) %>%
unnest_wider(exercises)
# A tibble: 41 x 4
ch_id l_id ex_id complete
<chr> <chr> <chr> <lgl>
1 CH1 L1 E1 TRUE
2 CH1 L1 E2 TRUE
3 CH1 L2 E1 TRUE
4 CH1 L2 E2 TRUE
5 CH1 L2 E3 TRUE
6 CH1 L3 E1 TRUE
7 CH1 L3 E2 TRUE
8 CH1 L3 E3 TRUE
# ... with 33 more rows
course_df %>%
unnest_wider(metadata) %>%
unnest_longer(lessons) %>%
unnest_wider(lessons) %>%
select(ch_id, l_id, exercises) %>%
unnest_longer(exercises) %>%
unnest_wider(exercises) %>%
summarize(pct_complete = mean(complete))
# A tibble: 1 x 1
pct_complete
<dbl>
1 0.780
Reshaping Data with tidyr