Data Manipulation with dplyr
James Chapman
Curriculum Manager, DataCamp
babynames
# A tibble: 332,595 x 3
year name number
<dbl> <chr> <int>
1 1880 Aaron 102
2 1880 Ab 5
3 1880 Abbie 71
4 1880 Abbott 5
5 1880 Abby 6
6 1880 Abe 50
7 1880 Abel 9
8 1880 Abigail 12
9 1880 Abner 27
10 1880 Abraham 81
# … with 332,585 more rows
babynames %>%
filter(name == "Amy")
# A tibble: 28 x 3
year name number
<dbl> <chr> <int>
1 1880 Amy 167
2 1885 Amy 240
3 1890 Amy 275
4 1895 Amy 303
5 1900 Amy 335
6 1905 Amy 269
7 1910 Amy 287
8 1915 Amy 624
9 1920 Amy 624
10 1925 Amy 560
# … with 18 more rows
babynames_filtered <- babynames %>%
filter(name == "Amy")
library(ggplot2)
ggplot(babynames_filtered, aes(x = year, y = number))
babynames_filtered <- babynames %>%
filter(name == "Amy")
library(ggplot2)
ggplot(babynames_filtered, aes(x = year, y = number)) +
geom_line()
%in%
: used to filter for multiple valuesbabynames_multiple <- babynames %>%
filter(name %in% c("Amy", "Christopher"))
babynames %>%
group_by(name) %>%
slice_max(number, n = 1)
# A tibble: 54,881 x 3
# Groups: name [48,040]
year name number
<dbl> <chr> <int>
1 1880 Arch 61
2 1880 Bird 17
3 1880 Ednah 6
4 1880 Erasmus 5
5 1880 Garfield 122
6 1880 Harve 17
7 1880 Lidie 7
8 1880 Loula 13
9 1880 Lovisa 5
10 1880 Lulie 8
# … with 54,871 more rows
Data Manipulation with dplyr