More than 2 explanatory variables

Intermediate Regression in R

Richie Cotton

Data Evangelist at DataCamp

From last time

ggplot(
  fish,
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno")

scatter-color-fish-length-height-mass-inferno.png

Intermediate Regression in R

Faceting by species

ggplot(
  fish,
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno") +
  facet_wrap(vars(species))

scatter-fish-length-height-mass-species.png

Intermediate Regression in R

Different levels of interaction

No interactions

lm(mass_g ~ length_cm + height_cm + species + 0, data = fish)

2-way interactions between pairs of variables

lm(
  mass_g ~ length_cm + height_cm + species + length_cm:height_cm + length_cm:species + height_cm:species + 0, 
  data = fish
)

3-way interaction between all three variables

lm(
  mass_g ~ length_cm + height_cm + species + length_cm:height_cm + length_cm:species + height_cm:species + length_cm:height_cm:species + 0, 
  data = fish
)
Intermediate Regression in R

All the interactions

lm(
  mass_g ~ length_cm + height_cm + species + length_cm:height_cm + length_cm:species + height_cm:species + length_cm:height_cm:species + 0, 
  data = fish
)
lm(
  mass_g ~ length_cm * height_cm * species + 0, 
  data = fish
)
Intermediate Regression in R

Only 2-way interactions

lm(
  mass_g ~ length_cm + height_cm + species + length_cm:height_cm + length_cm:species + height_cm:species + 0, 
  data = fish
)
lm(
  mass_g ~ (length_cm + height_cm + species) ^ 2 + 0, 
  data = fish
)
lm(
  mass_g ~ I(length_cm) ^ 2 + height_cm + species + 0, 
  data = fish
)
1 To square explanatory variables, see "Introduction to Regression in R", Chapter 2, "Transforming variables"
Intermediate Regression in R

The prediction flow

mdl_mass_vs_all <- lm(mass_g ~ length_cm * height_cm * species * 0, data = fish)

explanatory_data <- expand_grid(
  length_cm = seq(5, 60, 6),
  height_cm = seq(2, 20, 2),
  species = unique(fish$species)
)

prediction_data <- explanatory_data %>% 
  mutate(mass_g = predict(mdl_mass_vs_all, explanatory_data))
Intermediate Regression in R

Visualizing predictions

ggplot(
  fish,
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno") +
  facet_wrap(vars(species)) +
  geom_point(
    data = prediction_data, 
    size = 3, shape = 15
  )

scatter-fish-length-height-mass-species-pred.png

Intermediate Regression in R

Let's practice!

Intermediate Regression in R

Preparing Video For Download...