Joining Data with dplyr
Chris Cardillo
Data Scientist
inventory_parts_joined <- inventories %>%
inner_join(inventory_parts, by = c("id" = "inventory_id")) %>%
select(-id, -version) %>%
arrange(desc(quantity))
inventory_parts_joined
# A tibble: 258,958 x 4
set_num part_num color_id quantity
<chr> <chr> <dbl> <dbl>
1 40179-1 3024 72 900
2 40179-1 3024 15 900
3 40179-1 3024 0 900
4 40179-1 3024 71 900
5 40179-1 3024 14 900
6 k34434-1 3024 15 810
7 21010-1 3023 320 771
8 k34431-1 3024 0 720
9 42083-1 2780 0 684
10 k34434-1 3024 0 540
# … with 258,948 more rows
batmobile <- inventory_parts_joined %>%
filter(set_num == "7784-1") %>%
select(-set_num)
batwing <- inventory_parts_joined %>%
filter(set_num == "70916-1") %>%
select(-set_num)
batmobile
# A tibble: 173 x 3
part_num color_id quantity
<chr> <dbl> <dbl>
1 3023 72 62
2 2780 0 28
3 50950 0 28
4 3004 71 26
5 43093 1 25
6 3004 0 23
7 3010 0 21
8 30363 0 21
9 32123b 14 19
10 3622 0 18
# … with 163 more rows
batwing
# A tibble: 309 x 3
part_num color_id quantity
<chr> <dbl> <dbl>
1 3023 0 22
2 3024 0 22
3 3623 0 20
4 11477 0 18
5 99207 71 18
6 2780 0 17
7 3666 0 16
8 22385 0 14
9 3710 0 14
10 99563 0 13
# … with 299 more rows
batmobile %>%
inner_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 45 x 4
part_num color_id quantity_batmobile quantity_batwing
<chr> <dbl> <dbl> <dbl>
1 2780 0 28 17
2 50950 0 28 2
3 3004 71 26 2
4 43093 1 25 6
5 3004 0 23 4
6 3622 0 18 2
7 4286 0 16 1
8 3039 0 12 2
9 4274 71 12 7
10 3001 0 11 4
# … with 35 more rows
batmobile %>%
left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 173 x 4
part_num color_id quantity_batmobile quantity_batwing
<chr> <dbl> <dbl> <dbl>
1 3023 72 62 NA
2 2780 0 28 17
3 50950 0 28 2
4 3004 71 26 2
5 43093 1 25 6
6 3004 0 23 4
7 3010 0 21 NA
8 30363 0 21 NA
9 32123b 14 19 NA
10 3622 0 18 2
# … with 163 more rows
Inner join
Left join
Joining Data with dplyr