Inference for Categorical Data in R
Andrew Bray
Assistant Professor of Statistics at Reed College
gss2016 %>%
ggplot(aes(x = cappun)) +
geom_bar()
p_hat <- gss2016 %>%
summarize(mean(cappun == "FAVOR")) %>%
pull()
p_hat
0.5666667
null <- gss2016 %>%
specify(
response = cappun,
success = "FAVOR"
) %>%
hypothesize(
null = "point",
p = 0.5
) %>%
generate(
reps = 500,
type = "simulate"
) %>%
calculate(stat = "prop")
A tibble: 500 x 2
replicate stat
<fct> <dbl>
1 1 0.48
2 2 0.447
3 3 0.48
4 4 0.44
5 5 0.407
6 6 0.52
7 7 0.413
8 8 0.553
9 9 0.52
10 10 0.467
# … with 490 more rows
ggplot(null, aes(x = stat)) +
geom_density() +
geom_vline(
xintercept = p_hat,
color = "red"
)
null %>%
summarize(mean(stat > p_hat)) %>%
pull() * 2
Inference for Categorical Data in R