Inference for Linear Regression in R
Jo Hardin
Professor, Pomona College
library(broom)
alpha <- .05
crit_val <- qt((1-alpha/2), df = nrow(starbucks) - 2)
newfood <- data.frame(Fat = c(0,10,20,30))
augment(lm(Calories ~ Fat, data=starbucks), newdata = newfood) %>%
mutate(lowMean = .fitted - crit_val*.se.fit,
upMean = .fitted + crit_val*.se.fit)
Fat .fitted .se.fit lowMean upMean 1 0 147.9833 14.971985 118.3153 177.6513
2 10 275.5693 8.516206 258.6938 292.4447
3 20 403.1552 7.378555 388.5341 417.7763 4 30 530.7412 13.035040 504.9114 556.5710
predMeans <- augment(lm(Calories ~ Fat, data = starbucks)) %>%
select(Calories, Fat, .fitted, .se.fit) %>%
mutate(lowMean = .fitted - crit_val*.se.fit,
upMean = .fitted + crit_val*.se.fit)
head(predMeans)
Calories Fat .fitted .se.fit lowMean upMean
1 300 5 211.7763 11.473843 189.0401 234.5125
2 380 6 224.5349 10.823741 203.0869 245.9828
3 410 22 428.6724 8.176354 412.4704 444.8744
4 460 23 441.4310 8.663769 424.2632 458.5989
5 420 22 428.6724 8.176354 412.4704 444.8744
6 380 16 352.1209 6.756473 338.7324 365.5093
ggplot(predMeans, aes(x = Fat, y = Calories)) +
geom_point() +
stat_smooth(method = "lm", se = FALSE) +
geom_ribbon(aes(ymin = lowMean, ymax = upMean), alpha=.2)
alpha <- .05
crit_val <- qt((1-alpha/2), df = nrow(twins) - 2)
FatCal_lm <- lm(Calories ~ Fat, data = starbucks)
FatCal_gl <- glance(FatCal_lm)
FatCal_sig <- pull(FatCal_gl, sigma)
FatCal_pred <- augment(FatCal_lm) %>%
mutate(.se.pred = sqrt(FatCal_sig^2 + .se.fit^2))
predResp <- FatCal_pred %>%
mutate(lowResp = .fitted - crit_val*.se.pred, upResp = .fitted + crit_val*.se.pred)
predResp
A tibble: 113 x 12
Calories Fat .fitted .se.fit .resid .hat .sigma
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 300 5 211.7763 11.473843 88.223722 0.025567957 71.57640
2 380 6 224.5349 10.823741 155.465125 0.022752704 70.50502
3 410 22 428.6724 8.176354 -18.672436 0.012983674 72.05959
... with 103 more rows, and 5 more variables: .cooksd <dbl>, .std.resid <dbl>, .se.pred <dbl>,
lowResp <dbl>, upResp bl>
ggplot(predResp, aes(x = Fat, y = Calories)) +
geom_point() +
stat_smooth(method = "lm", se = FALSE) +
geom_ribbon(aes(ymin = lowResp, ymax = upResp), alpha = .2)
Inference for Linear Regression in R