Introduction to the Tidyverse
David Robinson
Chief Data Scientist, DataCamp
by_year <- gapminder %>%
group_by(year) %>%
summarize(totalPop = sum(pop),
meanLifeExp = mean(lifeExp))
by_year
# A tibble: 12 x 3
year totalPop meanLifeExp
<int> <dbl> <dbl>
1 1952 2406957150 49.05762
2 1957 2664404580 51.50740
3 1962 2899782974 53.60925
4 1967 3217478384 55.67829
5 1972 3576977158 57.64739
6 1977 3930045807 59.57016
7 1982 4289436840 61.53320
8 1987 4691477418 63.21261
9 1992 5110710260 64.16034
10 1997 5515204472 65.01468
11 2002 5886977579 65.69492
12 2007 6251013179 67.00742
ggplot(by_year, aes(x = year, y = totalPop)) +
geom_point()
ggplot(by_year, aes(x = year, y = totalPop)) +
geom_point() +
expand_limits(y = 0)
by_year_continent <- gapminder %>%
group_by(year, continent) %>%
summarize(totalPop = sum(pop),
meanLifeExp = mean(lifeExp))
by_year_continent
# A tibble: 60 x 4
# Groups: year [?]
year continent totalPop meanLifeExp
<int> <fct> <dbl> <dbl>
1 1952 Africa 237640501 39.13550
2 1952 Americas 345152446 53.27984
3 1952 Asia 1395357351 46.31439
4 1952 Europe 418120846 64.40850
5 1952 Oceania 10686006 69.25500
6 1957 Africa 264837738 41.26635
7 1957 Americas 386953916 55.96028
8 1957 Asia 1562780599 49.31854
9 1957 Europe 437890351 66.70307
10 1957 Oceania 11941976 70.29500
# ... with 50 more rows
ggplot(by_year_continent, aes(x = year, y = totalPop, color = continent)) +
geom_point() +
expand_limits(y = 0)
Introduction to the Tidyverse