Joining Data with dplyr
Chris Cardillo
Data Scientist
batwing %>%
left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 309 x 4
part_num color_id quantity_batmobile quantity_batwing
<chr> <dbl> <dbl> <dbl>
1 3023 0 22 22
2 3024 0 22 22
3 3623 0 20 20
4 11477 0 18 18
5 99207 71 18 18
6 2780 0 17 17
7 3666 0 16 16
8 22385 0 14 14
9 3710 0 14 14
10 99563 0 13 13
# … with 299 more rows
inventory_parts_joined <- inventories %>%
inner_join(inventory_parts, by = c("id" = "inventory_id")) %>%
arrange(desc(quantity)) %>%
select(-id, -version)
batmobile <- inventory_parts_joined %>%
filter(set_num == "7784-1") %>%
select(-set_num)
batwing <- inventory_parts_joined %>%
filter(set_num == "70916-1") %>%
select(-set_num)
batmobile
# A tibble: 173 x 3
part_num color_id quantity
<chr> <dbl> <dbl>
1 3023 72 62
2 2780 0 28
3 50950 0 28
4 3004 71 26
5 43093 1 25
6 3004 0 23
7 3010 0 21
8 30363 0 21
9 32123b 14 19
10 3622 0 18
# … with 163 more rows
batwing
# A tibble: 309 x 3
part_num color_id quantity
<chr> <dbl> <dbl>
1 3023 0 22
2 3024 0 22
3 3623 0 20
4 11477 0 18
5 99207 71 18
6 2780 0 17
7 3666 0 16
8 22385 0 14
9 3710 0 14
10 99563 0 13
# … with 299 more rows
Left join: keep all batmobile
batmobile %>%
left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
Right join: keep all batwing
batmobile %>%
right_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
Full join: keep all both
batmobile %>%
full_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
batmobile %>%
full_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 440 x 4
part_num color_id quantity_batmobile quantity_batwing
<chr> <dbl> <dbl> <dbl>
1 3023 72 62 NA
2 2780 0 28 17
3 50950 0 28 2
4 3004 71 26 2
5 43093 1 25 6
6 3004 0 23 4
7 3010 0 21 NA
8 30363 0 21 NA
9 32123b 14 19 NA
10 3622 0 18 2
# … with 430 more rows
library(tidyr)
batmobile %>%
full_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing")) %>%
replace_na(list(quantity_batmobile = 0,
quantity_batwing = 0))
Joining Data with dplyr