Collapsing factor levels

Categorical Data in the Tidyverse

Emily Robinson

Data Scientist

A bar chart, titled "frequency of heights in 538 flying etiquette survey", with height on the x-axis and count on the y-axis. The x-axis goes from "under 5 ft" to "6'6" and above", with a bar for each inch.

Categorical Data in the Tidyverse

fct_collapse()

flying_etiquette %>%
  mutate(height = fct_collapse(height, 
             under_5_3 = c("Under 5 ft.", "5'0\"", "5'1\"", "5'2\""),
             over_6_1 = c("6'1\"", "6'2\"", "6'3\"", "6'4\"", 
             "6'5\"", "6'6\" and above"))) %>%
  pull(height) %>%
  levels()
 [1] "under_5_3" "5'10\""    "5'11\""    "5'3\""    
 [5] "5'4\""     "5'5\""     "5'6\""     "5'7\""    
 [9] "5'8\""     "5'9\""     "6'0\""     "over_6_1"
Categorical Data in the Tidyverse

fct_other(): keep

 

flying_etiquette %>%
  mutate(new_height = fct_other(height, keep = c("6'4\"", "5'1\""))) %>%
  count(new_height)
# A tibble: 4 x 2
  new_height     n
  <fct>      <int>
1 "5'1\""       19
2 "6'4\""       11
3 Other        828
4 NA           182
Categorical Data in the Tidyverse

fct_other(): drop

flying_etiquette %>%
  mutate(new_height = fct_other(height, 
  drop = c("Under 5 ft.", "5'0\"", "5'1\"", "5'2\"", "5'3\""))) %>%
  pull(new_height) %>%
  levels()
 [1] "5'4\""           "5'5\""           "5'6\""          
 [4] "5'7\""           "5'8\""           "5'9\""          
 [7] "5'10\""          "5'11\""          "6'0\""          
[10] "6'1\""           "6'2\""           "6'3\""          
[13] "6'4\""           "6'5\""           "6'6\" and above"
[16] "Other"
Categorical Data in the Tidyverse

fct_lump_prop()

flying_etiquette %>%
  mutate(new_height = fct_lump_prop(height, prop = .08)) %>%
  count(new_height)
  new_height     n
  <fct>      <int>
1 "5'4\""       79
2 "5'6\""       75
3 "5'7\""       76
4 "5'8\""       76
5 Other        552
6 NA           182
Categorical Data in the Tidyverse

fct_lump_n()

flying_etiquette %>%
  mutate(new_height = fct_lump_n(height, n = 3)) %>%
  count(new_height)
  new_height     n
  <fct>      <int>
1 "5'4\""       79
2 "5'7\""       76
3 "5'8\""       76
4 Other        627
5 NA           182
Categorical Data in the Tidyverse

Let's practice!

Categorical Data in the Tidyverse

Preparing Video For Download...