Modellen per categorie

Intermediary Regression in R

Richie Cotton

Data Evangelist at DataCamp

4 categorieën

unique(fish$species)
"Bream" "Roach" "Perch" "Pike"
Intermediary Regression in R

De dataset splitsen

De slimme manier

  • base-R: split() + lapply()
  • dplyr: nest_by() + mutate()

De eenvoudige manier

bream <- fish %>% 
  filter(species == "Bream")
perch <- fish %>% 
  filter(species == "Perch")
pike <- fish %>% 
  filter(species == "Pike")
roach <- fish %>% 
  filter(species == "Roach")
Intermediary Regression in R

4 modellen

mdl_bream <- lm(mass_g ~ length_cm, data = bream)
Call:
lm(formula = mass_g ~ length_cm, data = bream)

Coefficients:
(Intercept)    length_cm  
   -1035.35        54.55
mdl_pike <- lm(mass_g ~ length_cm, data = pike)
Call:
lm(formula = mass_g ~ length_cm, data = pike)

Coefficients:
(Intercept)    length_cm  
   -1540.82        53.19
mdl_perch <- lm(mass_g ~ length_cm, data = perch)
Call:
lm(formula = mass_g ~ length_cm, data = perch)

Coefficients:
(Intercept)    length_cm  
    -619.18        38.91
mdl_roach <- lm(mass_g ~ length_cm, data = roach)
Call:
lm(formula = mass_g ~ length_cm, data = roach)

Coefficients:
(Intercept)    length_cm  
    -329.38        23.32
Intermediary Regression in R

Verklarende data

explanatory_data <- tibble(
  length_cm = seq(5, 60, 5)
)
Intermediary Regression in R

Voorspellingen maken

prediction_data_bream <- explanatory_data %>%
  mutate(
    mass_g = predict(mdl_bream, explanatory_data),
    species = "Bream"
  )
prediction_data_pike <- explanatory_data %>%
  mutate(
    mass_g = predict(mdl_perch, explanatory_data),
    species = "Perch"
  )
prediction_data_perch <- explanatory_data %>%
  mutate(
    mass_g = predict(mdl_pike, explanatory_data),
    species = "Pike"
  )
prediction_data_roach <- explanatory_data %>%
  mutate(
    mass_g = predict(mdl_roach, explanatory_data),
    species = "Roach"
  )
Intermediary Regression in R

Voorspellingen visualiseren

ggplot(fish, aes(length_cm, mass_g, color = species)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)

spreiding-massa-vs-lengte-per-soort-inter.png

Intermediary Regression in R

Je voorspellingen toevoegen

ggplot(fish,aes(length_cm, mass_g, color = species)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  geom_point(data = prediction_data_bream, size = 3, shape = 15) +
  geom_point(data = prediction_data_perch, size = 3, shape = 15) +
  geom_point(data = prediction_data_pike, size = 3, shape = 15) +
  geom_point(data = prediction_data_roach, size = 3, shape = 15)

spreiding-massa-vs-lengte-per-soort-inter-met-voorsp.png

Intermediary Regression in R

Determinatiecoëfficiënt

mdl_fish <- lm(mass_g ~ length_cm + species, data = fish)

mdl_fish %>% 
  glance() %>% 
  pull(adj.r.squared)
0.917
mdl_bream %>% glance() %>% pull(adj.r.squared)
0.874
mdl_perch %>% glance() %>% pull(adj.r.squared)
0.917
mdl_pike %>% glance() %>% pull(adj.r.squared)
0.941
mdl_roach %>% glance() %>% pull(adj.r.squared)
0.815
Intermediary Regression in R

Standaardfout van residu’s

mdl_fish %>% 
  glance() %>% 
  pull(sigma)
103
mdl_bream %>% glance() %>% pull(sigma)
74.2
mdl_perch %>% glance() %>% pull(sigma)
100
mdl_pike %>% glance() %>% pull(sigma)
120
mdl_roach %>% glance() %>% pull(sigma)
38.2
Intermediary Regression in R

Laten we oefenen!

Intermediary Regression in R

Preparing Video For Download...