Two numeric explanatory variables

Regressione intermedia in R

Richie Cotton

Data Evangelist at DataCamp

Visualizing 3 numeric variables

  • 3D scatter plot
  • 2D scatter plot with response as color
Regressione intermedia in R

Another column for the fish dataset

species mass_g length_cm height_cm
Bream 1000 33.5 18.96
Bream 925 36.2 18.75
Roach 290 24.0 8.88
Roach 390 29.5 9.48
Perch 1100 39.0 12.80
Perch 1000 40.2 12.60
Pike 1250 52.0 10.69
Pike 1650 59.0 10.81
Regressione intermedia in R

3D scatter plot

library(plot3D)

scatter3D(fish$length_cm, fish$height_cm, fish$mass_g)
library(plot3D)
library(magrittr)

fish %$%
  scatter3D(length_cm, height_cm, mass_g)
Regressione intermedia in R

3D scatter plot

library(plot3D)
library(magrittr)

fish %$%
  scatter3D(length_cm, height_cm, mass_g)

scatter3d-fish-length-height-mass.png

Regressione intermedia in R

2D scatter plot, color for response

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point()

scatter-color-fish-length-height-mass.png

Regressione intermedia in R

Viridis color scales

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno")

scatter-color-fish-length-height-mass-inferno.png

Regressione intermedia in R

Modeling with 2 numeric explanatory variables

mdl_mass_vs_both <- lm(mass_g ~ length_cm + height_cm, data = fish)
Call:
lm(formula = mass_g ~ length_cm + height_cm, data = fish)

Coefficients:
(Intercept)    length_cm    height_cm  
    -622.16        28.97        26.34
Regressione intermedia in R

The prediction flow

explanatory_data <- expand_grid(
  length_cm = seq(5, 60, 5),
  height_cm = seq(2, 20, 2)
)

prediction_data <- explanatory_data %>% 
  mutate(
    mass_g = predict(mdl_mass_vs_both, explanatory_data)
  )
Regressione intermedia in R

Plotting the predictions

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno") +
  geom_point(
    data = prediction_data, shape = 15, size = 3
  )

scatter-color-fish-length-height-mass-inferno-pred.png

Regressione intermedia in R

Including an interaction

mdl_mass_vs_both_inter <- lm(mass_g ~ length_cm * height_cm, data = fish)
Call:
lm(formula = mass_g ~ length_cm * height_cm, data = fish)

Coefficients:
        (Intercept)            length_cm            height_cm  length_cm:height_cm  
           159.1144               0.3001             -78.1234               3.5455
Regressione intermedia in R

The prediction flow again

explanatory_data <- expand_grid(
  length_cm = seq(5, 60, 5),
  height_cm = seq(2, 20, 2)
)

prediction_data <- explanatory_data %>% 
  mutate(
    mass_g = predict(mdl_mass_vs_both_inter, explanatory_data)
  )
Regressione intermedia in R

Plotting the predictions

ggplot(
  fish, 
  aes(length_cm, height_cm, color = mass_g)
) +
  geom_point() +
  scale_color_viridis_c(option = "inferno") +
  geom_point(
    data = prediction_data, shape = 15, size = 3
  )

scatter-color-fish-length-height-mass-inferno-pred-inter.png

Regressione intermedia in R

Let's practice!

Regressione intermedia in R

Preparing Video For Download...