Categorical Data in the Tidyverse
Emily Robinson
Data Scientist
flying_etiquette %>%
mutate(height = fct_collapse(height,
under_5_3 = c("Under 5 ft.", "5'0\"", "5'1\"", "5'2\""),
over_6_1 = c("6'1\"", "6'2\"", "6'3\"", "6'4\"",
"6'5\"", "6'6\" and above"))) %>%
pull(height) %>%
levels()
[1] "under_5_3" "5'10\"" "5'11\"" "5'3\""
[5] "5'4\"" "5'5\"" "5'6\"" "5'7\""
[9] "5'8\"" "5'9\"" "6'0\"" "over_6_1"
flying_etiquette %>%
mutate(new_height = fct_other(height, keep = c("6'4\"", "5'1\""))) %>%
count(new_height)
# A tibble: 4 x 2
new_height n
<fct> <int>
1 "5'1\"" 19
2 "6'4\"" 11
3 Other 828
4 NA 182
flying_etiquette %>%
mutate(new_height = fct_other(height,
drop = c("Under 5 ft.", "5'0\"", "5'1\"", "5'2\"", "5'3\""))) %>%
pull(new_height) %>%
levels()
[1] "5'4\"" "5'5\"" "5'6\""
[4] "5'7\"" "5'8\"" "5'9\""
[7] "5'10\"" "5'11\"" "6'0\""
[10] "6'1\"" "6'2\"" "6'3\""
[13] "6'4\"" "6'5\"" "6'6\" and above"
[16] "Other"
flying_etiquette %>%
mutate(new_height = fct_lump_prop(height, prop = .08)) %>%
count(new_height)
new_height n
<fct> <int>
1 "5'4\"" 79
2 "5'6\"" 75
3 "5'7\"" 76
4 "5'8\"" 76
5 Other 552
6 NA 182
flying_etiquette %>%
mutate(new_height = fct_lump_n(height, n = 3)) %>%
count(new_height)
new_height n
<fct> <int>
1 "5'4\"" 79
2 "5'7\"" 76
3 "5'8\"" 76
4 Other 627
5 NA 182
Categorical Data in the Tidyverse