Reshaping Data with tidyr
Jeroen Boeye
Head of Machine Learning, Faktion
separate()
separate_rows()
nuke_df
# A tibble: 2 x 6
country `1945` `1946` `1948` `1949` `1951`
<chr> <int> <int> <int> <int> <int>
1 United States 3 2 3 NA 16
2 Russian Federation NA NA NA 1 2
nuke_df %>%
pivot_longer(`1945`:`1951`)
# A tibble: 10 x 3
country name value
<chr> <chr> <int>
1 United States 1945 3
2 United States 1946 2
3 United States 1948 3
4 United States 1949 NA
5 United States 1951 16
6 Russian Federation 1945 NA
# ... with 4 more rows
nuke_df %>%
pivot_longer(c(`1945`, `1946`, `1948`, `1949`, `1951`))
# A tibble: 10 x 3
country name value
<chr> <chr> <int>
1 United States 1945 3
2 United States 1946 2
3 United States 1948 3
4 United States 1949 NA
5 United States 1951 16
6 Russian Federation 1945 NA
# ... with 4 more rows
nuke_df %>%
pivot_longer(-country)
# A tibble: 10 x 3
country name value
<chr> <chr> <int>
1 United States 1945 3
2 United States 1946 2
3 United States 1948 3
4 United States 1949 NA
5 United States 1951 16
6 Russian Federation 1945 NA
# ... with 4 more rows
nuke_df %>%
pivot_longer(-country, names_to = "year", values_to = "n_bombs")
# A tibble: 10 x 3
country year n_bombs
<chr> <chr> <int>
1 United States 1945 3
2 United States 1946 2
3 United States 1948 3
4 United States 1949 NA
5 United States 1951 16
6 Russian Federation 1945 NA
# ... with 4 more rows
nuke_df %>%
pivot_longer(
-country,
names_to = "year",
values_to = "n_bombs",
values_drop_na = TRUE
)
# A tibble: 6 x 3
country year n_bombs
<chr> <chr> <int>
1 United States 1945 3
2 United States 1946 2
3 United States 1948 3
4 United States 1951 16
5 Russian Federation 1949 1
6 Russian Federation 1951 2
nuke_df %>%
pivot_longer(
-country,
names_to = "year",
values_to = "n_bombs",
values_drop_na = TRUE,
names_transform = list(year = as.integer)
)
# A tibble: 6 x 3
country year n_bombs
<chr> <int> <int>
1 United States 1945 3
2 United States 1946 2
3 United States 1948 3
4 United States 1951 16
5 Russian Federation 1949 1
6 Russian Federation 1951 2
Reshaping Data with tidyr