Inferentie voor categorische gegevens in R
Andrew Bray
Assistant Professor of Statistics at Reed College
Conclusie: het echte aandeel Amerikanen dat gelukkig is, ligt tussen 0,705 en 0,841.
Wat bedoelen we met confident?
ds1 <- filter(gss, year == 2016)p_hat <- ds1 %>% summarize(mean(happy == "HAPPY")) %>% pull()SE <- ds1 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7073114 0.8393553











ds2 <- filter(gss, year == 2014)p_hat <- ds1 %>% summarize(mean(happy == "HAPPY")) %>% pull()SE <- ds1 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.8348831 0.9384503

ds3 <- filter(gss, year == 2012)p_hat <- ds1 %>% summarize(mean(happy == "HAPPY")) %>% pull()SE <- ds1 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

Interpretatie: “We zijn 95% zeker dat het echte aandeel Amerikanen dat gelukkig is tussen 0,705 en 0,841 ligt.”
Breedte van het interval hangt af van
npInferentie voor categorische gegevens in R