Data Manipulation with dplyr
James Chapman
Curriculum Manager, DataCamp
counties_selected <- counties %>%
select(state, county, population, unemployment)
counties_selected
# A tibble: 3,138 x 4
state county population unemployment
<chr> <chr> <dbl> <dbl>
1 Alabama Autauga 55221 7.6
2 Alabama Baldwin 195121 7.5
3 Alabama Barbour 26932 17.6
4 Alabama Bibb 22604 8.3
5 Alabama Blount 57710 7.7
6 Alabama Bullock 10678 18
7 Alabama Butler 20354 10.9
8 Alabama Calhoun 116648 12.3
9 Alabama Chambers 34079 8.9
10 Alabama Cherokee 26008 7.9
# … with 3,128 more rows
counties_selected %>%
arrange(population)
# A tibble: 3,138 x 4
state county population unemployment
<chr> <chr> <dbl> <dbl>
1 Hawaii Kalawao 85 0
2 Texas King 267 5.1
3 Nebraska McPherson 433 0.9
4 Montana Petroleum 443 6.6
5 Nebraska Arthur 448 4
6 Nebraska Loup 548 0.7
7 Nebraska Blaine 551 0.7
8 New Mexico Harding 565 6
9 Texas Kenedy 565 0
10 Colorado San Juan 606 13.8
# … with 3,128 more rows
counties_selected %>%
arrange(desc(population))
# A tibble: 3,138 x 4
state county population unemployment
<chr> <chr> <dbl> <dbl>
1 California Los Angeles 10038388 10
2 Illinois Cook 5236393 10.7
3 Texas Harris 4356362 7.5
4 Arizona Maricopa 4018143 7.7
5 California San Diego 3223096 8.7
6 California Orange 3116069 7.6
7 Florida Miami-Dade 2639042 10
8 New York Kings 2595259 10
9 Texas Dallas 2485003 7.6
10 New York Queens 2301139 8.6
# … with 3,128 more rows
counties_selected %>% arrange(desc(population)) %>%
filter(state == "New York")
# A tibble: 62 x 4
state county population unemployment
<chr> <chr> <dbl> <dbl>
1 New York Kings 2595259 10
2 New York Queens 2301139 8.6
3 New York New York 1629507 7.5
4 New York Suffolk 1501373 6.4
5 New York Bronx 1428357 14
6 New York Nassau 1354612 6.4
7 New York Westchester 967315 7.6
8 New York Erie 921584 7
9 New York Monroe 749356 7.7
10 New York Richmond 472481 6.9
# … with 52 more rows
counties_selected %>%
arrange(desc(population)) %>%
filter(unemployment < 6)
# A tibble: 949 x 4
state county population unemployment
<chr> <chr> <dbl> <dbl>
1 Virginia Fairfax 1128722 4.9
2 Utah Salt Lake 1078958 5.8
3 Hawaii Honolulu 984178 5.6
4 Texas Collin 862215 4.9
5 Texas Denton 731851 5.7
6 Texas Fort Bend 658331 5.1
7 Kansas Johnson 566814 4.5
8 Maryland Anne Arundel 555280 5.9
9 Colorado Jefferson 552344 5.9
10 Utah Utah 551957 5.5
# … with 939 more rows
counties_selected %>%
arrange(desc(population)) %>%
filter(state == "New York",
unemployment < 6)
# A tibble: 5 x 4
state county population unemployment
<chr> <chr> <dbl> <dbl>
1 New York Tompkins 103855 5.9
2 New York Chemung 88267 5.4
3 New York Madison 72427 5.1
4 New York Livingston 64801 5.4
5 New York Seneca 35144 5.5
Data Manipulation with dplyr