Data Manipulation with dplyr
James Chapman
Curriculum Manager, DataCamp
counties %>%
count()
# A tibble: 1 x 1
n
<int>
1 3138
counties %>%
count(state)
# A tibble: 50 x 2
state n
<chr> <int>
1 Alabama 67
2 Alaska 28
3 Arizona 15
4 Arkansas 75
5 California 58
6 Colorado 64
7 Connecticut 8
8 Delaware 3
9 Florida 67
10 Georgia 159
# … with 40 more rows
counties %>%
count(state, sort = TRUE)
# A tibble: 50 x 2
state n
<chr> <int>
1 Texas 253
2 Georgia 159
3 Virginia 133
4 Kentucky 120
5 Missouri 115
6 Kansas 105
7 Illinois 102
8 North Carolina 100
9 Iowa 99
10 Tennessee 95
# … with 40 more rows
counties %>%
select(state, county, population)
# A tibble: 3,138 x 3
state county population
<chr> <chr> <dbl>
1 Alabama Autauga 55221
2 Alabama Baldwin 195121
3 Alabama Barbour 26932
4 Alabama Bibb 22604
5 Alabama Blount 57710
6 Alabama Bullock 10678
7 Alabama Butler 20354
8 Alabama Calhoun 116648
9 Alabama Chambers 34079
10 Alabama Cherokee 26008
# … with 3,128 more rows
counties %>%
count(state, wt = population, sort = TRUE)
# A tibble: 50 x 2
state n
<chr> <dbl>
1 California 38421464
2 Texas 26538497
3 New York 19673174
4 Florida 19645772
5 Illinois 12873761
6 Pennsylvania 12779559
7 Ohio 11575977
8 Georgia 10006693
9 Michigan 9900571
10 North Carolina 9845333
# … with 40 more rows
Data Manipulation with dplyr