The left_join verb

Joining Data with dplyr

Chris Cardillo

Data Scientist

Batmobile vs. Batwing

LEGO-batmobile

Joining Data with dplyr

Recall: inner join

inventory_parts_joined <- inventories %>%
  inner_join(inventory_parts, by = c("id" = "inventory_id")) %>%
  select(-id, -version) %>%
  arrange(desc(quantity))

inventory_parts_joined
# A tibble: 258,958 x 4
   set_num  part_num color_id quantity
   <chr>    <chr>       <dbl>    <dbl>
 1 40179-1  3024           72      900
 2 40179-1  3024           15      900
 3 40179-1  3024            0      900
 4 40179-1  3024           71      900
 5 40179-1  3024           14      900
 6 k34434-1 3024           15      810
 7 21010-1  3023          320      771
 8 k34431-1 3024            0      720
 9 42083-1  2780            0      684
10 k34434-1 3024            0      540
# … with 258,948 more rows
Joining Data with dplyr

Filter for LEGO sets

batmobile <- inventory_parts_joined %>%
  filter(set_num == "7784-1") %>%
  select(-set_num)
batwing <- inventory_parts_joined %>%
  filter(set_num == "70916-1") %>%
  select(-set_num)
Joining Data with dplyr

Comparing tables

batmobile
# A tibble: 173 x 3
   part_num color_id quantity
   <chr>       <dbl>    <dbl>
 1 3023           72       62
 2 2780            0       28
 3 50950           0       28
 4 3004           71       26
 5 43093           1       25
 6 3004            0       23
 7 3010            0       21
 8 30363           0       21
 9 32123b         14       19
10 3622            0       18
# … with 163 more rows
batwing
# A tibble: 309 x 3
   part_num color_id quantity
   <chr>       <dbl>    <dbl>
 1 3023            0       22
 2 3024            0       22
 3 3623            0       20
 4 11477           0       18
 5 99207          71       18
 6 2780            0       17
 7 3666            0       16
 8 22385           0       14
 9 3710            0       14
10 99563           0       13
# … with 299 more rows
Joining Data with dplyr

Parts and pieces

LEGO-parts-and-pieces.png

Joining Data with dplyr

Joining with multiple columns

batmobile %>%
  inner_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 45 x 4
   part_num color_id quantity_batmobile quantity_batwing
   <chr>       <dbl>              <dbl>            <dbl>
 1 2780            0                 28               17
 2 50950           0                 28                2
 3 3004           71                 26                2
 4 43093           1                 25                6
 5 3004            0                 23                4
 6 3622            0                 18                2
 7 4286            0                 16                1
 8 3039            0                 12                2
 9 4274           71                 12                7
10 3001            0                 11                4
# … with 35 more rows
Joining Data with dplyr

The left join

batmobile %>%
  left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 173 x 4
   part_num color_id quantity_batmobile quantity_batwing
   <chr>       <dbl>              <dbl>            <dbl>
 1 3023           72                 62               NA
 2 2780            0                 28               17
 3 50950           0                 28                2
 4 3004           71                 26                2
 5 43093           1                 25                6
 6 3004            0                 23                4
 7 3010            0                 21               NA
 8 30363           0                 21               NA
 9 32123b         14                 19               NA
10 3622            0                 18                2
# … with 163 more rows
Joining Data with dplyr

Join review

Inner join inner-join

Left join left join

Joining Data with dplyr

Let's practice!

Joining Data with dplyr

Preparing Video For Download...