Joining Data with dplyr
Chris Cardillo
Data Scientist
inner_join
left_join
right_join
full_join
batmobile %>%
left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 173 x 4
part_num color_id quantity_batmobile quantity_batwing
<chr> <dbl> <dbl> <dbl>
1 3023 72 62 NA
2 2780 0 28 17
3 50950 0 28 2
4 3004 71 26 2
5 43093 1 25 6
6 3004 0 23 4
7 3010 0 21 NA
8 30363 0 21 NA
9 32123b 14 19 NA
10 3622 0 18 2
# … with 163 more rows
semi_join()
anti_join()
Semi join
Semi join
Anti join
batmobile %>%
semi_join(batwing, by = c("color_id", "part_num"))
# A tibble: 45 x 3
part_num color_id quantity
<chr> <dbl> <dbl>
1 2780 0 28
2 50950 0 28
3 3004 71 26
4 43093 1 25
5 3004 0 23
6 3622 0 18
7 4286 0 16
8 3039 0 12
9 4274 71 12
10 3001 0 11
# … with 35 more rows
batmobile %>%
anti_join(batwing, by = c("color_id", "part_num"))
# A tibble: 128 x 3
part_num color_id quantity
<chr> <dbl> <dbl>
1 3023 72 62
2 3010 0 21
3 30363 0 21
4 32123b 14 19
5 50950 320 18
6 6541 0 18
7 3040b 0 14
8 3298 0 14
9 3660 0 14
10 42022 0 14
# … with 118 more rows
themes %>%
semi_join(sets, by = c("id" = "theme_id"))
# A tibble: 569 x 3
id name parent_id
<dbl> <chr> <dbl>
1 1 Technic NA
2 2 Arctic Technic 1
3 3 Competition 1
4 4 Expert Builder 1
5 5 Model 1
6 6 Airport 5
7 7 Construction 5
8 9 Fire 5
9 10 Harbor 5
10 11 Off-Road 5
# … with 559 more rows
themes %>%
anti_join(sets, by = c("id" = "theme_id"))
# A tibble: 96 x 3
id name parent_id
<dbl> <chr> <dbl>
1 8 Farm 5
2 24 Airport 23
3 25 Castle 23
4 26 Construction 23
5 27 Race 23
6 28 Harbor 23
7 29 Train 23
8 32 Robot 23
9 34 Building 23
10 35 Cargo 23
# … with 86 more rows
inner_join
left_join
right_join
full_join
semi_join
anti_join
Joining Data with dplyr