Two numeric explanatory variables

Intermediate Regression in R

Richie Cotton

Data Evangelist at DataCamp

Visualizing 3 numeric variables

  • 3D scatter plot
  • 2D scatter plot with response as color
Intermediate Regression in R

Another column for the fish dataset

species mass_g length_cm height_cm
Bream 1000 33.5 18.96
Bream 925 36.2 18.75
Roach 290 24.0 8.88
Roach 390 29.5 9.48
Perch 1100 39.0 12.80
Perch 1000 40.2 12.60
Pike 1250 52.0 10.69
Pike 1650 59.0 10.81
Intermediate Regression in R

3D scatter plot

library(plot3D)

scatter3D(fish$length_cm, fish$height_cm, fish$mass_g)
library(plot3D)
library(magrittr)

fish %$%
  scatter3D(length_cm, height_cm, mass_g)
Intermediate Regression in R

3D scatter plot

library(plot3D)
library(magrittr)

fish %$%
  scatter3D(length_cm, height_cm, mass_g)

scatter3d-fish-length-height-mass.png

Intermediate Regression in R

2D scatter plot, color for response

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point()

scatter-color-fish-length-height-mass.png

Intermediate Regression in R

Viridis color scales

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno")

scatter-color-fish-length-height-mass-inferno.png

Intermediate Regression in R

Modeling with 2 numeric explanatory variables

mdl_mass_vs_both <- lm(mass_g ~ length_cm + height_cm, data = fish)
Call:
lm(formula = mass_g ~ length_cm + height_cm, data = fish)

Coefficients:
(Intercept)    length_cm    height_cm  
    -622.16        28.97        26.34
Intermediate Regression in R

The prediction flow

explanatory_data <- expand_grid(
  length_cm = seq(5, 60, 5),
  height_cm = seq(2, 20, 2)
)

prediction_data <- explanatory_data %>% 
  mutate(
    mass_g = predict(mdl_mass_vs_both, explanatory_data)
  )
Intermediate Regression in R

Plotting the predictions

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno") +
  geom_point(
    data = prediction_data, shape = 15, size = 3
  )

scatter-color-fish-length-height-mass-inferno-pred.png

Intermediate Regression in R

Including an interaction

mdl_mass_vs_both_inter <- lm(mass_g ~ length_cm * height_cm, data = fish)
Call:
lm(formula = mass_g ~ length_cm * height_cm, data = fish)

Coefficients:
        (Intercept)            length_cm            height_cm  length_cm:height_cm  
           159.1144               0.3001             -78.1234               3.5455
Intermediate Regression in R

The prediction flow again

explanatory_data <- expand_grid(
  length_cm = seq(5, 60, 5),
  height_cm = seq(2, 20, 2)
)

prediction_data <- explanatory_data %>% 
  mutate(
    mass_g = predict(mdl_mass_vs_both_inter, explanatory_data)
  )
Intermediate Regression in R

Plotting the predictions

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno") +
  geom_point(
    data = prediction_data, shape = 15, size = 3
  )

scatter-color-fish-length-height-mass-inferno-pred-inter.png

Intermediate Regression in R

Let's practice!

Intermediate Regression in R

Preparing Video For Download...