Reshaping Data with tidyr
Jeroen Boeye
Head of Machine Learning, Faktion
nuke_df
# A tibble: 13 x 3
country year n_bombs
<chr> <int> <int>
1 United States 1945 3
2 United States 1946 2
3 United States 1948 3
4 Russian Federation 1949 1
5 Russian Federation 1951 2
6 United States 1951 16
# ... with 7 more rows
full_df <- expand_grid(
year = 1945:1954,
country = c(
"Russian Federation",
"United Kingdom",
"United States")
)
full_df
# A tibble: 30 x 2
year country
<int> <chr>
1 1945 Russian Federation
2 1945 United Kingdom
3 1945 United States
4 1946 Russian Federation
5 1946 United Kingdom
6 1946 United States
7 1947 Russian Federation
8 1947 United Kingdom
# ... with 22 more rows
nuke_df %>%
right_join(
full_df,
by = c("country", "year")
) %>%
arrange(year)
# A tibble: 30 x 3
country year n_bombs
<chr> <int> <int>
1 United States 1945 3
2 Russian Federation 1945 NA
3 United Kingdom 1945 NA
4 United States 1946 2
5 Russian Federation 1946 NA
6 United Kingdom 1946 NA
7 Russian Federation 1947 NA
8 United Kingdom 1947 NA
# ... with 22 more rows
nuke_df %>%
right_join(
full_df,
by = c("country", "year")
) %>%
arrange(year) %>%
replace_na(list(n_bombs = 0L))
# A tibble: 30 x 3
country year n_bombs
<chr> <int> <int>
1 United States 1945 3
2 Russian Federation 1945 0
3 United Kingdom 1945 0
4 United States 1946 2
5 Russian Federation 1946 0
6 United Kingdom 1946 0
7 Russian Federation 1947 0
8 United Kingdom 1947 0
# ... with 22 more rows
full_df %>%
anti_join(
nuke_df,
by = c("country", "year")
)
# A tibble: 17 x 2
year country
<int> <chr>
1 1945 Russian Federation
2 1945 United Kingdom
3 1946 Russian Federation
4 1946 United Kingdom
5 1947 Russian Federation
6 1947 United Kingdom
7 1947 United States
8 1948 Russian Federation
# ... with 9 more rows
Reshaping Data with tidyr