The right_join verb

Joining Data with dplyr

Chris Cardillo

Data Scientist

The right join

batmobile %>%
  left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 173 x 4
   part_num color_id quantity_batmobile quantity_batwing
   <chr>       <dbl>              <dbl>            <dbl>
 1 3023           72                 62               NA
 2 2780            0                 28               17
 3 50950           0                 28                2
 4 3004           71                 26                2
 5 43093           1                 25                6
 6 3004            0                 23                4
 7 3010            0                 21               NA
 8 30363           0                 21               NA
 9 32123b         14                 19               NA
10 3622            0                 18                2
# … with 163 more rows
Joining Data with dplyr

The left and right join

Left join left join

Right join right join

Joining Data with dplyr

Mirror images

batmobile %>%
  right_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 312 x 4
   part_num color_id quantity_batmobile quantity_batwing
   <chr>       <dbl>              <dbl>            <dbl>
 1 3023            0                 NA               22
 2 3024            0                  2               22
 3 3623            0                 10               20
 4 11477           0                 NA               18
 5 99207          71                 NA               18
 6 2780            0                 28               17
 7 2780            0                  1               17
 8 3666            0                 NA               16
 9 22385           0                 NA               14
10 3710            0                 NA               14
# … with 302 more rows
Joining Data with dplyr

Count and sort

sets %>%
  count(theme_id, sort = TRUE)
# A tibble: 569 x 2
   theme_id     n
      <dbl> <int>
 1      501   122
 2      494   111
 3      435    94
 4      505    94
 5      632    93
 6      371    89
 7      497    86
 8      503    82
 9      516    78
10      220    72
# … with 559 more rows
Joining Data with dplyr

Inner join

sets %>%
  count(theme_id, sort = TRUE) %>%
  inner_join(themes, by = c("theme_id" = "id"))
# A tibble: 569 x 4
   theme_id     n name              parent_id
      <dbl> <int> <chr>                 <dbl>
 1      501   122 Gear                     NA
 2      494   111 Friends                  NA
 3      435    94 Ninjago                  NA
 4      505    94 Basic Set               504
 5      632    93 Town                    504
 6      371    89 Supplemental            365
 7      497    86 Books                    NA
 8      503    82 Key Chain               501
 9      516    78 Duplo and Explore       507
10      220    72 City                    217
# … with 559 more rows
Joining Data with dplyr

Right join

sets %>%
  count(theme_id, sort = TRUE) %>%
  right_join(themes, by = c("theme_id" = "id"))
# A tibble: 665 x 4
   theme_id     n name           parent_id
      <dbl> <int> <chr>              <dbl>
 1        1    58 Technic               NA
 2        2     1 Arctic Technic         1
 3        3     4 Competition            1
 4        4    13 Expert Builder         1
 5        5     6 Model                  1
 6        6     7 Airport                5
 7        7    20 Construction           5
 8        8    NA Farm                   5
 9        9     2 Fire                   5
10       10     3 Harbor                 5
# … with 655 more rows
Joining Data with dplyr

Replace NAs

library(tidyr)

sets %>%
  count(theme_id, sort = TRUE) %>%
  right_join(themes, by = c("theme_id" = "id")) %>%
  replace_na(list(n = 0))
# A tibble: 665 x 4
   theme_id     n name           parent_id
      <dbl> <dbl> <chr>              <dbl>
 1        1    58 Technic               NA
 2        2     1 Arctic Technic         1
 3        3     4 Competition            1
 4        4    13 Expert Builder         1
 5        5     6 Model                  1
 6        6     7 Airport                5
 7        7    20 Construction           5
 8        8     0 Farm                   5
 9        9     2 Fire                   5
10       10     3 Harbor                 5
# … with 655 more rows
Joining Data with dplyr

Let's practice!

Joining Data with dplyr

Preparing Video For Download...