The mutate() verb

Data Manipulation with dplyr

James Chapman

Curriculum Manager, DataCamp

counties_selected <- counties %>%
  select(state, county, population, unemployment)

counties_selected
# A tibble: 3,138 x 4
   state   county   population unemployment
   <chr>   <chr>         <dbl>        <dbl>
 1 Alabama Autauga       55221          7.6
 2 Alabama Baldwin      195121          7.5
 3 Alabama Barbour       26932         17.6
 4 Alabama Bibb          22604          8.3
 5 Alabama Blount        57710          7.7
 6 Alabama Bullock       10678         18  
 7 Alabama Butler        20354         10.9
 8 Alabama Calhoun      116648         12.3
 9 Alabama Chambers      34079          8.9
10 Alabama Cherokee      26008          7.9
# … with 3,128 more rows
Data Manipulation with dplyr

Total number of unemployed people

unemployed_population = population * unemployment / 100
Data Manipulation with dplyr

mutate()

counties_selected %>%
  mutate(unemployed_population = population * unemployment / 100)
# A tibble: 3,138 x 5
   state   county   population unemployment unemployed_population
   <chr>   <chr>         <dbl>        <dbl>                 <dbl>
 1 Alabama Autauga       55221          7.6                 4197.
 2 Alabama Baldwin      195121          7.5                14634.
 3 Alabama Barbour       26932         17.6                 4740.
 4 Alabama Bibb          22604          8.3                 1876.
 5 Alabama Blount        57710          7.7                 4444.
 6 Alabama Bullock       10678         18                   1922.
 7 Alabama Butler        20354         10.9                 2219.
 8 Alabama Calhoun      116648         12.3                14348.
 9 Alabama Chambers      34079          8.9                 3033.
10 Alabama Cherokee      26008          7.9                 2055.
# … with 3,128 more rows
Data Manipulation with dplyr
counties_selected %>%
  mutate(unemployed_population = population * unemployment / 100) %>%
  arrange(desc(unemployed_population))
# A tibble: 3,138 x 5
   state      county         population unemployment unemployed_population
   <chr>      <chr>               <dbl>        <dbl>                 <dbl>
 1 California Los Angeles      10038388         10                1003839.
 2 Illinois   Cook              5236393         10.7               560294.
 3 Texas      Harris            4356362          7.5               326727.
 4 Arizona    Maricopa          4018143          7.7               309397.
 5 California Riverside         2298032         12.9               296446.
 6 California San Diego         3223096          8.7               280409.
 7 Michigan   Wayne             1778969         14.9               265066.
 8 California San Bernardino    2094769         12.6               263941.
 9 Florida    Miami-Dade        2639042         10                 263904.
10 New York   Kings             2595259         10                 259526.
# … with 3,128 more rows
Data Manipulation with dplyr

Selecting and transforming with mutate()

counties %>%

mutate(state, county, population, unemployment,
unemployed_population = population * unemployment / 100, .keep = "none") %>%
arrange(desc(unemployed_population))
# A tibble: 3,138 x 5
   state      county         population unemployment unemployed_population
   <chr>      <chr>               <dbl>        <dbl>                 <dbl>
 1 California Los Angeles      10038388         10                1003839.
 2 Illinois   Cook              5236393         10.7               560294.
 3 Texas      Harris            4356362          7.5               326727.
 4 Arizona    Maricopa          4018143          7.7               309397.
 5 California Riverside         2298032         12.9               296446.
 6 California San Diego         3223096          8.7               280409.
 7 Michigan   Wayne             1778969         14.9               265066.
 8 California San Bernardino    2094769         12.6               263941.
 9 Florida    Miami-Dade        2639042         10                 263904.
10 New York   Kings             2595259         10                 259526.
# … with 3,128 more rows
Data Manipulation with dplyr

Let's practice!

Data Manipulation with dplyr

Preparing Video For Download...