Categorical Data in the Tidyverse
Emily Robinson
Data Scientist
# A tibble: 1,040 x 27
RespondentID travel_amount do_recline height
<dbl> <fct> <fct> <fct>
1 3436139758 Once a year or le... NA NA
2 3434278696 Once a year or le... About half th... "6'3\""
3 3434275578 Once a year or le... Usually "5'8\""
4 3434268208 Once a year or le... Always "5'11\""
5 3434250245 Once a month or l... About half th... "5'7\""
# ... with 1,035 more rows, and 23 more variables:
# children_sub_18 <fct>, middle_arm_rest_three <fct>,
# middle_arm_rest_two <fct>, window_shade_control <fct>,
# rude_move_seats <fct>, rude_talk <fct>,
# times_get_up <fct>, recliner_obligation <fct>,
# rude_recline <fct>, eliminate_recline <fct>,
# rude_switch_seats_friend <fct>,
# rude_switch_seats_family <fct>, rude_bathroom <fct>,
# rude_walking <fct>, rude_baby <fct>,
# rude_unruly_children <fct>, personal_electronics <fct>,
# smoking <fct>, gender <fct>, age <fct>, income <fct>,
# education <fct>, location <fct>
levels(flying_etiquette$middle_arm_rest_three)
[1] "Other (please specify)"
[2] "The arm rests should be shared"
[3] "The people in the aisle and window seats get both arm rests"
[4] "The person in the middle seat gets both arm rests"
[5] "Whoever puts their arm on the arm rest first"
ggplot(flying_etiquette,
aes(x = fct_infreq(middle_arm_rest_three))) +
geom_bar() +
labs(x = "Arm rest opinions")
ggplot(flying_etiquette, aes(x = fct_infreq(middle_arm_rest_three))) +
geom_bar() +
labs(x = "Arm rest opinions")
ggplot(flying_etiquette, aes(x = fct_infreq(middle_arm_rest_three))) +
geom_bar() +
coord_flip() +
labs(x = "Arm rest opinions")
flying_etiquette %>%
mutate(middle_arm_rest_three = fct_recode(middle_arm_rest_three,
"Other" = "Other (please specify)",
"Everyone should share" = "The arm rests should be shared",
"Aisle and window people" =
"The people in the aisle and window seats get both arm rests",
"Middle person" = "The person in the middle seat gets both arm rests",
"Fastest person" = "Whoever puts their arm on the arm rest first"
)) %>%
count(middle_arm_rest_three)
# A tibble: 6 x 2
middle_arm_rest_three n
<fct> <int>
1 Everyone should share 587
2 Middle person 119
3 Fastest person 87
4 Other 45
5 Aisle and window people 18
6 NA 184
flying_etiquette %>%
mutate(middle_arm_rest_three = fct_recode(middle_arm_rest_three,
"Everyone should share" = "The arm rests should be shared")) %>%
count(middle_arm_rest_three)
# A tibble: 6 x 2
middle_arm_rest_three n
<fct> <int>
1 Other (please specify) 45
2 Everyone should share 587
3 The people in the aisle and window seats get both … 18
4 The person in the middle seat gets both arm rests 119
5 Whoever puts their arm on the arm rest first 87
6 NA 184
flying_etiquette %>%
mutate(middle_arm_rest_three = fct_recode(middle_arm_rest_three,
"Everyone should share" = "arm rests should be share")) %>%
count(middle_arm_rest_three)
# A tibble: 6 x 2
middle_arm_rest_three n
<fct> <int>
1 Other (please specify) 45
2 The arm rests should be shared 587
3 The people in the aisle and window seats get both … 18
4 The person in the middle seat gets both arm rests 119
5 Whoever puts their arm on the arm rest first 87
6 NA 184
Warning message:
Unknown levels in `f`: arm rests should be share
Categorical Data in the Tidyverse