Fitting multiple models

Case Study: Exploratory Data Analysis in R

Dave Robinson

Chief Data Scientist, DataCamp

nest() turns data into one row per country

library(tidyr)
by_year_country %>%
  nest(-country)
# A tibble: 200 × 2
                           country              data
                             <chr>            <list>
1                      Afghanistan <tibble [34 × 3]>
2                        Argentina <tibble [34 × 3]>
3                        Australia <tibble [34 × 3]>
4                          Belarus <tibble [34 × 3]>
5                          Belgium <tibble [34 × 3]>
6  Bolivia, Plurinational State of <tibble [34 × 3]>
7                           Brazil <tibble [34 × 3]>
8                           Canada <tibble [34 × 3]>
9                            Chile <tibble [34 × 3]>
10                        Colombia <tibble [34 × 3]>
# ... with 190 more rows
# A tibble: 34 × 3
     year total percent_yes
    <dbl> <int>       <dbl>
1    1947    34   0.3823529
2    1949    51   0.6078431
3    1951    25   0.7600000
4    1953    26   0.7629308
5    1955    37   0.7297297
6    1957    34   0.5294118
7    1959    54   0.6111111
8    1961    76   0.6052632
9    1963    32   0.7812500
10   1965    40   0.8500000

# ... with 24 more rows
Case Study: Exploratory Data Analysis in R

map() applies an operation to each item in a list

v <- list(1, 2, 3)

map(v, ~ . * 10)
[[1]]
[1] 10

[[2]]
[1] 20

[[3]]
[1] 30
Case Study: Exploratory Data Analysis in R

map() fits a model to each dataset

library(purrr)
by_year_country %>%
  nest(-country) %>%
  mutate(models = map(data, ~ lm(percent_yes ~ year, .)))
# A tibble: 200 × 3
                           country              data   models
                             <chr>            <list>   <list>
1                      Afghanistan <tibble [34 × 3]> <S3: lm>
2                        Argentina <tibble [34 × 3]> <S3: lm>
3                        Australia <tibble [34 × 3]> <S3: lm>
4                          Belarus <tibble [34 × 3]> <S3: lm>
5                          Belgium <tibble [34 × 3]> <S3: lm>
6  Bolivia, Plurinational State of <tibble [34 × 3]> <S3: lm>
7                           Brazil <tibble [34 × 3]> <S3: lm>
8                           Canada <tibble [34 × 3]> <S3: lm>
9                            Chile <tibble [34 × 3]> <S3: lm>
10                        Colombia <tibble [34 × 3]> <S3: lm>
# ... with 190 more rows
Case Study: Exploratory Data Analysis in R

tidy turns each model into a data frame

by_year_country %>%
  nest(-country) %>%
  mutate(models = map(data, ~ lm(percent_yes ~ year, .))) %>%
  mutate(tidied = map(models, tidy))
# A tibble: 200 × 4
                           country              data   models               tidied
                             <chr>            <list>   <list>               <list>
1                      Afghanistan <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
2                        Argentina <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
3                        Australia <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
4                          Belarus <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
5                          Belgium <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
6  Bolivia, Plurinational State of <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
7                           Brazil <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
8                           Canada <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
9                            Chile <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
10                        Colombia <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
# ... with 190 more rows
tidy(model1)
         term       estimate    std.error statistic         p.value
1 (Intercept) -11.063084650 1.4705189228 -7.523252 1.444892e-08
2        year   0.006009299 0.0007426499  8.091698 3.064797e-09
Case Study: Exploratory Data Analysis in R

unnest() combines the tidied models

by_year_country %>%
  nest(-country) %>%
  mutate(models = map(data, ~ lm(percent_yes ~ year, .))) %>%

mutate(tidied = map(models, tidy)) %>% unnest(tidied)
# A tibble: 399 × 6
       country        term      estimate    std.error statistic      p.value
         <chr>       <chr>         <dbl>        <dbl>     <dbl>        <dbl>
1  Afghanistan (Intercept) -11.063084650 1.4705189228 -7.523252 1.444892e-08
2  Afghanistan        year   0.006009299 0.0007426499  8.091698 3.064797e-09
3    Argentina (Intercept)  -9.464512565 2.1008982371 -4.504984 8.322481e-05
4    Argentina        year   0.005148829 0.0010610076  4.852773 3.047078e-05
5    Australia (Intercept)  -4.545492536 2.1479916283 -2.116159 4.220387e-02
6    Australia        year   0.002567161 0.0010847910  2.366503 2.417617e-02
7      Belarus (Intercept)  -7.000692717 1.5024232546 -4.659601 5.329950e-05
8      Belarus        year   0.003907557 0.0007587624  5.149908 1.284924e-05
9      Belgium (Intercept)  -5.845534016 1.5153390521 -3.857575 5.216573e-04
10     Belgium        year   0.003203234 0.0007652852  4.185673 2.072981e-04
# ... with 389 more rows
Case Study: Exploratory Data Analysis in R

Let's practice!

Case Study: Exploratory Data Analysis in R

Preparing Video For Download...