The full_join verb

Joining Data with dplyr

Chris Cardillo

Data Scientist

Left and right joins

batwing %>%
  left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 309 x 4
   part_num color_id quantity_batmobile quantity_batwing
   <chr>       <dbl>              <dbl>            <dbl>
 1 3023            0                 22               22
 2 3024            0                 22               22
 3 3623            0                 20               20
 4 11477           0                 18               18
 5 99207          71                 18               18
 6 2780            0                 17               17
 7 3666            0                 16               16
 8 22385           0                 14               14
 9 3710            0                 14               14
10 99563           0                 13               13
# … with 299 more rows
Joining Data with dplyr

The full join

full-join

Joining Data with dplyr

Joining and filtering

inventory_parts_joined <- inventories %>%
  inner_join(inventory_parts, by = c("id" = "inventory_id")) %>%
  arrange(desc(quantity)) %>%
  select(-id, -version)

batmobile <- inventory_parts_joined %>%
  filter(set_num == "7784-1") %>%
  select(-set_num)

batwing <- inventory_parts_joined %>%
  filter(set_num == "70916-1") %>%
  select(-set_num)
Joining Data with dplyr

Batmobile vs. Batwing

batmobile
# A tibble: 173 x 3
   part_num color_id quantity
   <chr>       <dbl>    <dbl>
 1 3023           72       62
 2 2780            0       28
 3 50950           0       28
 4 3004           71       26
 5 43093           1       25
 6 3004            0       23
 7 3010            0       21
 8 30363           0       21
 9 32123b         14       19
10 3622            0       18
# … with 163 more rows
batwing
# A tibble: 309 x 3
   part_num color_id quantity
   <chr>       <dbl>    <dbl>
 1 3023            0       22
 2 3024            0       22
 3 3623            0       20
 4 11477           0       18
 5 99207          71       18
 6 2780            0       17
 7 3666            0       16
 8 22385           0       14
 9 3710            0       14
10 99563           0       13
# … with 299 more rows
Joining Data with dplyr

Joining it all together

Left join: keep all batmobile

batmobile %>%
  left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))

Right join: keep all batwing

batmobile %>%
  right_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))

Full join: keep all both

batmobile %>%
  full_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
Joining Data with dplyr

Full join result

batmobile %>%
  full_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 440 x 4
   part_num color_id quantity_batmobile quantity_batwing
   <chr>       <dbl>              <dbl>            <dbl>
 1 3023           72                 62               NA
 2 2780            0                 28               17
 3 50950           0                 28                2
 4 3004           71                 26                2
 5 43093           1                 25                6
 6 3004            0                 23                4
 7 3010            0                 21               NA
 8 30363           0                 21               NA
 9 32123b         14                 19               NA
10 3622            0                 18                2
# … with 430 more rows
Joining Data with dplyr

Replace NA: multiple variables

library(tidyr)

batmobile %>%
  full_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing")) %>%
  replace_na(list(quantity_batmobile = 0,
                  quantity_batwing = 0))
Joining Data with dplyr

Let's practice!

Joining Data with dplyr

Preparing Video For Download...