Reshaping Data with tidyr
Jeroen Boeye
Head of Machine Learning, Faktion
album_df
# A tibble: 3 x 3
year artist n_albums
<int> <chr> <int>
1 1977 Beatles 2
2 1977 Rolling Stones 1
3 1979 Beatles 1
album_df %>%
complete(year, artist)
# A tibble: 4 x 3
year artist n_albums
<int> <chr> <int>
1 1977 Beatles 2
2 1977 Rolling Stones 1
3 1979 Beatles 1
4 1979 Rolling Stones NA
album_df %>%
complete(year, artist, fill = list(n_albums = 0L))
# A tibble: 4 x 3
year artist n_albums
<int> <chr> <int>
1 1977 Beatles 2
2 1977 Rolling Stones 1
3 1979 Beatles 1
4 1979 Rolling Stones 0
album_df %>%
complete(
year,
artist = c(
"Beatles",
"Rolling Stones",
"ABBA"),
fill = list(n_albums = 0L)
)
# A tibble: 6 x 3
year artist n_albums
<int> <chr> <int>
1 1977 ABBA 0
2 1977 Beatles 2
3 1977 Rolling Stones 1
4 1979 ABBA 0
5 1979 Beatles 1
6 1979 Rolling Stones 0
album_df %>%
complete(
year = 1977:1979,
artist,
fill = list(n_albums = 0L)
)
# A tibble: 6 x 3
year artist n_albums
<int> <chr> <int>
1 1977 Beatles 2
2 1977 Rolling Stones 1
3 1978 Beatles 0
4 1978 Rolling Stones 0
5 1979 Beatles 1
6 1979 Rolling Stones 0
full_seq(c(1977, 1979), period = 1)
1977 1978 1979
full_seq(c(1977, 1979, 1980, 1980, 1980), period = 1)
1977 1978 1979 1980
full_seq(album_df$year, period = 1)
1977 1978 1979
album_df %>%
complete(
year = full_seq(year, period = 1),
artist,
fill = list(n_albums = 0L)
)
# A tibble: 6 x 3
year artist n_albums
<dbl> <chr> <int>
1 1977 Beatles 2
2 1977 Rolling Stones 1
3 1978 Beatles 0
4 1978 Rolling Stones 0
5 1979 Beatles 1
6 1979 Rolling Stones 0
full_seq(c(as.Date("2000-01-01"), as.Date("2000-01-10")), period = 1)
[1] "2000-01-01" "2000-01-02" "2000-01-03" "2000-01-04" "2000-01-05"
[6] "2000-01-06" "2000-01-07" "2000-01-08" "2000-01-09" "2000-01-10"
Reshaping Data with tidyr