Data Manipulation with dplyr
James Chapman
Curriculum Manager, DataCamp
counties_selected <- counties %>%
select(state, county, population, unemployment)
counties_selected
# A tibble: 3,138 x 4
state county population unemployment
<chr> <chr> <dbl> <dbl>
1 Alabama Autauga 55221 7.6
2 Alabama Baldwin 195121 7.5
3 Alabama Barbour 26932 17.6
4 Alabama Bibb 22604 8.3
5 Alabama Blount 57710 7.7
6 Alabama Bullock 10678 18
7 Alabama Butler 20354 10.9
8 Alabama Calhoun 116648 12.3
9 Alabama Chambers 34079 8.9
10 Alabama Cherokee 26008 7.9
# … with 3,128 more rows
unemployed_population = population * unemployment / 100
counties_selected %>%
mutate(unemployed_population = population * unemployment / 100)
# A tibble: 3,138 x 5
state county population unemployment unemployed_population
<chr> <chr> <dbl> <dbl> <dbl>
1 Alabama Autauga 55221 7.6 4197.
2 Alabama Baldwin 195121 7.5 14634.
3 Alabama Barbour 26932 17.6 4740.
4 Alabama Bibb 22604 8.3 1876.
5 Alabama Blount 57710 7.7 4444.
6 Alabama Bullock 10678 18 1922.
7 Alabama Butler 20354 10.9 2219.
8 Alabama Calhoun 116648 12.3 14348.
9 Alabama Chambers 34079 8.9 3033.
10 Alabama Cherokee 26008 7.9 2055.
# … with 3,128 more rows
counties_selected %>%
mutate(unemployed_population = population * unemployment / 100) %>%
arrange(desc(unemployed_population))
# A tibble: 3,138 x 5
state county population unemployment unemployed_population
<chr> <chr> <dbl> <dbl> <dbl>
1 California Los Angeles 10038388 10 1003839.
2 Illinois Cook 5236393 10.7 560294.
3 Texas Harris 4356362 7.5 326727.
4 Arizona Maricopa 4018143 7.7 309397.
5 California Riverside 2298032 12.9 296446.
6 California San Diego 3223096 8.7 280409.
7 Michigan Wayne 1778969 14.9 265066.
8 California San Bernardino 2094769 12.6 263941.
9 Florida Miami-Dade 2639042 10 263904.
10 New York Kings 2595259 10 259526.
# … with 3,128 more rows
counties %>%
mutate(state, county, population, unemployment,
unemployed_population = population * unemployment / 100, .keep = "none") %>%
arrange(desc(unemployed_population))
# A tibble: 3,138 x 5
state county population unemployment unemployed_population
<chr> <chr> <dbl> <dbl> <dbl>
1 California Los Angeles 10038388 10 1003839.
2 Illinois Cook 5236393 10.7 560294.
3 Texas Harris 4356362 7.5 326727.
4 Arizona Maricopa 4018143 7.7 309397.
5 California Riverside 2298032 12.9 296446.
6 California San Diego 3223096 8.7 280409.
7 Michigan Wayne 1778969 14.9 265066.
8 California San Bernardino 2094769 12.6 263941.
9 Florida Miami-Dade 2639042 10 263904.
10 New York Kings 2595259 10 259526.
# … with 3,128 more rows
Data Manipulation with dplyr