Modeling with Data in the Tidyverse
Albert Y. Kim
Assistant Professor of Statistical and Data Sciences
# Model 1: price as a function of size and year built
model_price_1 <- lm(log10_price ~ log10_size + yr_built,
data = house_prices)
# Sum of squared residuals:
get_regression_points(model_price_1) %>%
mutate(sq_residuals = residual^2) %>%
summarize(sum_sq_residuals = sum(sq_residuals))
# A tibble: 1 x 1
sum_sq_residuals
<dbl>
1 585.
# Mean squared error: use mean() instead of sum():
get_regression_points(model_price_1) %>%
mutate(sq_residuals = residual^2) %>%
summarize(mse = mean(sq_residuals))
# A tibble: 1 x 1
mse
<dbl>
1 0.0271
# Root mean squared error:
get_regression_points(model_price_1) %>%
mutate(sq_residuals = residual^2) %>%
summarize(mse = mean(sq_residuals)) %>%
mutate(rmse = sqrt(mse))
# A tibble: 1 x 2
mse rmse
<dbl> <dbl>
1 0.0271 0.164
# Recreate data frame of "new" houses
new_houses <- data_frame(
log10_size = c(2.9, 3.6),
condition = factor(c(3, 4))
)
new_houses
# A tibble: 2 x 2
log10_size condition
<dbl> <fct>
1 2.9 3
2 3.6 4
# Get predictions
get_regression_points(model_price_3,
newdata = new_houses)
# A tibble: 2 x 4
ID log10_size condition log10_price_hat
<int> <dbl> <fct> <dbl>
1 1 2.9 3 5.34
2 2 3.6 4 5.94
# Compute RMSE
get_regression_points(model_price_3,
newdata = new_houses) %>%
mutate(sq_residuals = residual^2) %>%
summarize(mse = mean(sq_residuals)) %>%
mutate(rmse = sqrt(mse))
Error in mutate_impl(.data, dots) :
Evaluation error: object 'residual' not found.
Modeling with Data in the Tidyverse