Reshaping Data with tidyr
Jeroen Boeye
Head of Machine Learning, Faktion
space_dogs_df
# A tibble: 42 x 4
date name_1 name_2 result
<date> <chr> <chr> <chr>
1 1951-06-26 Lisa-2 Ryzhik-2 recovered safely
2 1951-07-22 Dezik Tsygan recovered safely
3 1951-07-29 Dezik Lisa parachute failed, both dogs died
4 1951-08-15 Chizhik Mishka recovered safely
5 1951-08-19 Ryzhik Smeliy recovered safely
# ... with 37 more rows
dog_df %>%
pivot_longer(
c(name_1, name_2),
names_to = "id",
values_to = "name",
values_drop_na = TRUE
) %>%
select(-result)
# A tibble: 81 x 3
date id name
<date> <chr> <chr>
1 1951-06-26 name_1 Lisa-2
2 1951-06-26 name_2 Ryzhik-2
3 1951-07-22 name_1 Dezik
4 1951-07-22 name_2 Tsygan
5 1951-07-29 name_1 Dezik
6 1951-07-29 name_2 Lisa
7 1951-08-15 name_1 Chizhik
8 1951-08-15 name_2 Mishka
9 1951-08-19 name_1 Ryzhik
# ... with 72 more rows
dog_df %>%
pivot_longer(
c(name_1, name_2),
names_to = "id",
values_to = "name",
values_drop_na = TRUE,
names_prefix = "name_"
) %>%
select(-result)
# A tibble: 81 x 3
date id name
<date> <chr> <chr>
1 1951-06-26 1 Lisa-2
2 1951-06-26 2 Ryzhik-2
3 1951-07-22 1 Dezik
4 1951-07-22 2 Tsygan
5 1951-07-29 1 Dezik
6 1951-07-29 2 Lisa
7 1951-08-15 1 Chizhik
8 1951-08-15 2 Mishka
9 1951-08-19 1 Ryzhik
# ... with 72 more rows
dog_df %>%
pivot_longer(
c(name_1, name_2),
names_to = "id",
values_to = "name",
values_drop_na = TRUE,
names_prefix = "name_",
names_transform = list(id = as.integer)
) %>%
select(-result)
# A tibble: 81 x 3
date id name
<date> <int> <chr>
1 1951-06-26 1 Lisa-2
2 1951-06-26 2 Ryzhik-2
3 1951-07-22 1 Dezik
4 1951-07-22 2 Tsygan
5 1951-07-29 1 Dezik
6 1951-07-29 2 Lisa
7 1951-08-15 1 Chizhik
8 1951-08-15 2 Mishka
9 1951-08-19 1 Ryzhik
# ... with 72 more rows
dog_df %>%
pivot_longer(
starts_with("name_"),
names_to = "id",
values_to = "name",
values_drop_na = TRUE,
names_prefix = "name_",
names_transform = list(id = as.integer)
) %>%
select(-result)
# A tibble: 81 x 3
date id name
<date> <int> <chr>
1 1951-06-26 1 Lisa-2
2 1951-06-26 2 Ryzhik-2
3 1951-07-22 1 Dezik
4 1951-07-22 2 Tsygan
5 1951-07-29 1 Dezik
6 1951-07-29 2 Lisa
7 1951-08-15 1 Chizhik
8 1951-08-15 2 Mishka
9 1951-08-19 1 Ryzhik
# ... with 72 more rows
apple_revenue_df
# A tibble: 4 x 5
segment `2019_Q1` `2019_Q2` `2019_Q3` `2019_Q4`
<chr> <dbl> <dbl> <dbl> <dbl>
1 iPhone 52.0 31.0 26.0 33.4
2 Mac 7.42 5.51 5.82 6.99
3 iPad 6.73 4.87 5.02 4.66
4 Other 18.2 16.6 17.0 19.0
apple_df %>%
pivot_longer(
-segment,
names_to = c("year", "quarter"),
values_to = "revenue",
names_sep = "_Q",
names_transform = list(
year = as.integer,
quarter = as.integer
)
)
# A tibble: 16 x 4
segment year quarter revenue
<chr> <int> <int> <dbl>
1 iPhone 2019 1 52.0
2 iPhone 2019 2 31.0
3 iPhone 2019 3 26.0
4 iPhone 2019 4 33.4
5 Mac 2019 1 7.42
6 Mac 2019 2 5.51
7 Mac 2019 3 5.82
8 Mac 2019 4 6.99
# ... with 8 more rows
Reshaping Data with tidyr