Reshaping Data with tidyr
Jeroen Boeye
Head of Machine Learning, Faktion
# A tibble: 5 x 4
drink ingredient quantity unit
<chr> <chr> <int> <chr>
1 Chocolate milk milk 1 L
2 Chocolate milk chocolate 100 g
3 Chocolate milk sugar 20 g
4 Orange juice oranges 3 NA
5 Orange juice sugar 20 g
moon_df
# A tibble: 4 x 2
year people_on_moon
<int> <int>
1 1969 4
2 1970 NA
3 1971 4
4 1972 4
5 1973 NA
moon_df %>%
replace_na(list(people_on_moon = 0L))
# A tibble: 4 x 2
year people_on_moon
<int> <int>
1 1969 4
2 1970 0
3 1971 4
4 1972 4
5 1973 0
typeof(0L)
[1] "integer"
typeof(0)
[1] "double"
cumul_moon_df
# A tibble: 5 x 3
year people_on_moon total_people_on_moon
<int> <int> <int>
1 1969 4 4
2 1970 NA NA
3 1971 4 8
4 1972 4 12
5 1973 NA NA
cumul_moon_df %>%
fill(total_people_on_moon)
# A tibble: 5 x 3
year people_on_moon total_people_on_moon
<int> <int> <int>
1 1969 4 4
2 1970 NA 4
3 1971 4 8
4 1972 4 12
5 1973 NA 12
cumul_moon_df %>%
fill(total_people_on_moon, .direction = "down")
# A tibble: 5 x 3
year people_on_moon total_people_on_moon
<int> <int> <int>
1 1969 4 4
2 1970 NA 4
3 1971 4 8
4 1972 4 12
5 1973 NA 12
cumul_moon_df %>%
fill(total_people_on_moon, .direction = "up")
# A tibble: 5 x 3
year people_on_moon total_people_on_moon
<int> <int> <int>
1 1969 4 4
2 1970 NA 8
3 1971 4 8
4 1972 4 12
5 1973 NA NA
moon_df %>%
drop_na()
# A tibble: 3 x 2
year people_on_moon
<int> <int>
1 1969 4
2 1971 4
3 1972 4
mars_df
# A tibble: 5 x 3
year people_on_moon people_on_mars
<int> <int> <int>
1 1969 4 NA
2 1970 NA NA
3 1971 4 NA
4 1972 4 NA
5 1973 NA NA
mars_df %>%
drop_na()
# A tibble: 0 x 3
# ... with 3 variables: year <int>, people_on_moon <int>, people_on_mars <int>
mars_df %>%
drop_na(people_on_moon)
# A tibble: 3 x 3
year people_on_moon people_on_mars
<int> <int> <int>
1 1969 4 NA
2 1971 4 NA
3 1972 4 NA
Reshaping Data with tidyr