Data Manipulation with dplyr
James Chapman
Curriculum Manager, DataCamp
babynames %>%
group_by(year) %>%
summarize(year_total = sum(number))
# A tibble: 28 x 2
year year_total
<dbl> <int>
1 1880 201478
2 1885 240822
3 1890 301352
4 1895 350934
5 1900 450148
6 1905 423875
7 1910 590607
8 1915 1830351
9 1920 2259494
10 1925 2330750
# … with 18 more rows
babynames %>%
group_by(year) %>%
mutate(year_total = sum(number))
# A tibble: 332,595 x 4
# Groups: year [28]
year name number year_total
<dbl> <chr> <int> <int>
1 1880 Aaron 102 201478
2 1880 Ab 5 201478
3 1880 Abbie 71 201478
4 1880 Abbott 5 201478
5 1880 Abby 6 201478
6 1880 Abe 50 201478
7 1880 Abel 9 201478
8 1880 Abigail 12 201478
9 1880 Abner 27 201478
10 1880 Abraham 81 201478
# … with 332,585 more rows
babynames %>%
group_by(year) %>%
mutate(year_total = sum(number)) %>%
ungroup()
# A tibble: 332,595 x 4
year name number year_total
<dbl> <chr> <int> <int>
1 1880 Aaron 102 201478
2 1880 Ab 5 201478
3 1880 Abbie 71 201478
4 1880 Abbott 5 201478
5 1880 Abby 6 201478
6 1880 Abe 50 201478
7 1880 Abel 9 201478
8 1880 Abigail 12 201478
9 1880 Abner 27 201478
10 1880 Abraham 81 201478
# … with 332,585 more rows
babynames %>%
group_by(year) %>%
mutate(year_total = sum(number)) %>%
ungroup() %>%
mutate(fraction = number / year_total)
# A tibble: 332,595 x 5
year name number year_total fraction
<dbl> <chr> <int> <int> <dbl>
1 1880 Aaron 102 201478 0.000506
2 1880 Ab 5 201478 0.0000248
3 1880 Abbie 71 201478 0.000352
4 1880 Abbott 5 201478 0.0000248
5 1880 Abby 6 201478 0.0000298
6 1880 Abe 50 201478 0.000248
7 1880 Abel 9 201478 0.0000447
8 1880 Abigail 12 201478 0.0000596
9 1880 Abner 27 201478 0.000134
10 1880 Abraham 81 201478 0.000402
# … with 332,585 more rows
Data Manipulation with dplyr