Case Study: Exploratory Data Analysis in R
Dave Robinson
Chief Data Scientist, DataCamp
library(tidyr)
by_year_country %>%
nest(-country)
# A tibble: 200 × 2
country data
<chr> <list>
1 Afghanistan <tibble [34 × 3]>
2 Argentina <tibble [34 × 3]>
3 Australia <tibble [34 × 3]>
4 Belarus <tibble [34 × 3]>
5 Belgium <tibble [34 × 3]>
6 Bolivia, Plurinational State of <tibble [34 × 3]>
7 Brazil <tibble [34 × 3]>
8 Canada <tibble [34 × 3]>
9 Chile <tibble [34 × 3]>
10 Colombia <tibble [34 × 3]>
# ... with 190 more rows
# A tibble: 34 × 3
year total percent_yes
<dbl> <int> <dbl>
1 1947 34 0.3823529
2 1949 51 0.6078431
3 1951 25 0.7600000
4 1953 26 0.7629308
5 1955 37 0.7297297
6 1957 34 0.5294118
7 1959 54 0.6111111
8 1961 76 0.6052632
9 1963 32 0.7812500
10 1965 40 0.8500000
# ... with 24 more rows
v <- list(1, 2, 3)
map(v, ~ . * 10)
[[1]]
[1] 10
[[2]]
[1] 20
[[3]]
[1] 30
library(purrr)
by_year_country %>%
nest(-country) %>%
mutate(models = map(data, ~ lm(percent_yes ~ year, .)))
# A tibble: 200 × 3
country data models
<chr> <list> <list>
1 Afghanistan <tibble [34 × 3]> <S3: lm>
2 Argentina <tibble [34 × 3]> <S3: lm>
3 Australia <tibble [34 × 3]> <S3: lm>
4 Belarus <tibble [34 × 3]> <S3: lm>
5 Belgium <tibble [34 × 3]> <S3: lm>
6 Bolivia, Plurinational State of <tibble [34 × 3]> <S3: lm>
7 Brazil <tibble [34 × 3]> <S3: lm>
8 Canada <tibble [34 × 3]> <S3: lm>
9 Chile <tibble [34 × 3]> <S3: lm>
10 Colombia <tibble [34 × 3]> <S3: lm>
# ... with 190 more rows
by_year_country %>%
nest(-country) %>%
mutate(models = map(data, ~ lm(percent_yes ~ year, .))) %>%
mutate(tidied = map(models, tidy))
# A tibble: 200 × 4
country data models tidied
<chr> <list> <list> <list>
1 Afghanistan <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
2 Argentina <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
3 Australia <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
4 Belarus <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
5 Belgium <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
6 Bolivia, Plurinational State of <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
7 Brazil <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
8 Canada <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
9 Chile <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
10 Colombia <tibble [34 × 3]> <S3: lm> <data.frame [2 × 5]>
# ... with 190 more rows
tidy(model1)
term estimate std.error statistic p.value
1 (Intercept) -11.063084650 1.4705189228 -7.523252 1.444892e-08
2 year 0.006009299 0.0007426499 8.091698 3.064797e-09
by_year_country %>% nest(-country) %>% mutate(models = map(data, ~ lm(percent_yes ~ year, .))) %>%
mutate(tidied = map(models, tidy)) %>% unnest(tidied)
# A tibble: 399 × 6
country term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan (Intercept) -11.063084650 1.4705189228 -7.523252 1.444892e-08
2 Afghanistan year 0.006009299 0.0007426499 8.091698 3.064797e-09
3 Argentina (Intercept) -9.464512565 2.1008982371 -4.504984 8.322481e-05
4 Argentina year 0.005148829 0.0010610076 4.852773 3.047078e-05
5 Australia (Intercept) -4.545492536 2.1479916283 -2.116159 4.220387e-02
6 Australia year 0.002567161 0.0010847910 2.366503 2.417617e-02
7 Belarus (Intercept) -7.000692717 1.5024232546 -4.659601 5.329950e-05
8 Belarus year 0.003907557 0.0007587624 5.149908 1.284924e-05
9 Belgium (Intercept) -5.845534016 1.5153390521 -3.857575 5.216573e-04
10 Belgium year 0.003203234 0.0007652852 4.185673 2.072981e-04
# ... with 389 more rows
Case Study: Exploratory Data Analysis in R