Analyzing Survey Data in R
Kelly McConville
Assistant Professor of Statistics
out <- svyby(formula = ~DaysPhysHlthBad, by = ~SmokeNow,
design = NHANES_design,
FUN = svymean, na.rm = TRUE,
keep.names = FALSE)
out
SmokeNow DaysPhysHlthBad se
1 No 3.908984 0.1996290
2 Yes 4.951750 0.2346189
ggplot(data = out, mapping = aes(x = SmokeNow, y = DaysPhysHlthBad)) +
geom_col() +
labs(y = "Monthly Average Number\n of Bad Health Days",
x = "Smoker?")
out <- mutate(out, lower = DaysPhysHlthBad - se,
upper = DaysPhysHlthBad + se)
out
SmokeNow DaysPhysHlthBad se lower upper
1 No 3.908984 0.1996290 3.709355 4.108613
2 Yes 4.951750 0.2346189 4.717131 5.186369
ggplot(data = out, mapping = aes(x = SmokeNow, y = DaysPhysHlthBad,
ymin = lower, ymax = upper)) +
geom_col(fill = "lightblue") + geom_errorbar(width = 0.5) +
labs(y = "Monthly Average Number\n of Bad Health Days",
x = "Smoker?")
ggplot(data = NHANESraw, mapping = aes(x = DaysPhysHlthBad,
weight = WTMEC4YR)) +
geom_histogram(binwidth = 1, color = "white") +
labs(x = "Number of Bad Health Days in a Month")
NHANESraw %>%
filter(!is.na(DaysPhysHlthBad)) %>%
mutate(WTMEC4YR_std = WTMEC4YR/sum(WTMEC4YR)) %>%
ggplot(mapping = aes(x = DaysPhysHlthBad, weight = WTMEC4YR_std)) +
geom_density(bw = 0.6, fill = "lightblue") +
labs(x = "Number of Bad Health Days in a Month")
NHANESraw %>%
filter(!is.na(DaysPhysHlthBad),
!is.na(SmokeNow)) %>%
group_by(SmokeNow) %>%
mutate(WTMEC4YR_std = WTMEC4YR/sum(WTMEC4YR)) %>%
ggplot(mapping =
aes(x = DaysPhysHlthBad,
weight = WTMEC4YR_std)) +
geom_density(bw = 0.6, fill = "lightblue") +
labs(x = "Number of Bad Health Days in a Month") +
facet_wrap(~SmokeNow, labeller = "label_both")
Analyzing Survey Data in R