Inference for Categorical Data in R
Andrew Bray
Assistant Professor of Statistics at Reed College
ggplot(gss2016, aes(x = party)) +
geom_bar() +
geom_hline(yintercept = 149/3, color = "goldenrod", size = 2)
tab <- gss2016 %>%
select(party) %>%
table()
tab
Dem Ind Rep
43 72 34
p_uniform <- c(Dem = 1/3, Ind = 1/3, Rep = 1/3)
chisq.test(tab, p = p_uniform)$stat
X-squared
15.87919
gss2016 %>%
specify(response = party) %>%
hypothesize(null = "point", p = p_uniform) %>%
generate(reps = 1, type = "simulate")
# A tibble: 149 x 2
# Groups: replicate [1]
party replicate
<fct> <fct>
1 I 1
2 D 1
3 I 1
4 I 1
5 D 1
6 R 1
7 I 1
8 R 1
9 D 1
10 I 1
# ... with 139 more rows
sim_1 <- gss2016 %>%
specify(response = party) %>%
hypothesize(null = “point”, p = p_uniform) %>%
generate(reps = 1, type = "simulate")
ggplot(sim_1, aes(x = party)) +
geom_bar()
Inference for Categorical Data in R