Inferenza per dati categorici in R
Andrew Bray
Assistant Professor of Statistics at Reed College
Conclusione: la vera proporzione di americani felici è tra 0,705 e 0,841.
Cosa intendiamo per “confidenti”?
ds1 <- filter(gss, year == 2016)p_hat <- ds1 %>% summarize(mean(happy == "HAPPY")) %>% pull()SE <- ds1 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7073114 0.8393553











ds2 <- filter(gss, year == 2014)p_hat <- ds1 %>% summarize(mean(happy == "HAPPY")) %>% pull()SE <- ds1 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.8348831 0.9384503

ds3 <- filter(gss, year == 2012)p_hat <- ds1 %>% summarize(mean(happy == "HAPPY")) %>% pull()SE <- ds1 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

ds3 <- filter(gss, year == 2012) p_hat <- ds3 %>% summarize(mean(happy == "HAPPY")) %>% pull() SE <- ds3 %>% specify(response = happy, success = "HAPPY") %>% generate(reps = 500, type = "bootstrap") %>% calculate(stat = "prop") %>% summarize(sd(stat)) %>% pull()c(p_hat - 2 * SE, p_hat + 2 * SE)
0.7626359 0.8906974

Interpretazione: “Siamo al 95% sicuri che la vera proporzione di americani felici sia tra 0,705 e 0,841.”
L’ampiezza dell’intervallo dipende da
npInferenza per dati categorici in R