Joining Data with dplyr
Chris Cardillo
Data Scientist
batmobile %>%
left_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 173 x 4
part_num color_id quantity_batmobile quantity_batwing
<chr> <dbl> <dbl> <dbl>
1 3023 72 62 NA
2 2780 0 28 17
3 50950 0 28 2
4 3004 71 26 2
5 43093 1 25 6
6 3004 0 23 4
7 3010 0 21 NA
8 30363 0 21 NA
9 32123b 14 19 NA
10 3622 0 18 2
# … with 163 more rows
Left join
Right join
batmobile %>%
right_join(batwing, by = c("part_num", "color_id"), suffix = c("_batmobile", "_batwing"))
# A tibble: 312 x 4
part_num color_id quantity_batmobile quantity_batwing
<chr> <dbl> <dbl> <dbl>
1 3023 0 NA 22
2 3024 0 2 22
3 3623 0 10 20
4 11477 0 NA 18
5 99207 71 NA 18
6 2780 0 28 17
7 2780 0 1 17
8 3666 0 NA 16
9 22385 0 NA 14
10 3710 0 NA 14
# … with 302 more rows
sets %>%
count(theme_id, sort = TRUE)
# A tibble: 569 x 2
theme_id n
<dbl> <int>
1 501 122
2 494 111
3 435 94
4 505 94
5 632 93
6 371 89
7 497 86
8 503 82
9 516 78
10 220 72
# … with 559 more rows
sets %>%
count(theme_id, sort = TRUE) %>%
inner_join(themes, by = c("theme_id" = "id"))
# A tibble: 569 x 4
theme_id n name parent_id
<dbl> <int> <chr> <dbl>
1 501 122 Gear NA
2 494 111 Friends NA
3 435 94 Ninjago NA
4 505 94 Basic Set 504
5 632 93 Town 504
6 371 89 Supplemental 365
7 497 86 Books NA
8 503 82 Key Chain 501
9 516 78 Duplo and Explore 507
10 220 72 City 217
# … with 559 more rows
sets %>%
count(theme_id, sort = TRUE) %>%
right_join(themes, by = c("theme_id" = "id"))
# A tibble: 665 x 4
theme_id n name parent_id
<dbl> <int> <chr> <dbl>
1 1 58 Technic NA
2 2 1 Arctic Technic 1
3 3 4 Competition 1
4 4 13 Expert Builder 1
5 5 6 Model 1
6 6 7 Airport 5
7 7 20 Construction 5
8 8 NA Farm 5
9 9 2 Fire 5
10 10 3 Harbor 5
# … with 655 more rows
library(tidyr)
sets %>%
count(theme_id, sort = TRUE) %>%
right_join(themes, by = c("theme_id" = "id")) %>%
replace_na(list(n = 0))
# A tibble: 665 x 4
theme_id n name parent_id
<dbl> <dbl> <chr> <dbl>
1 1 58 Technic NA
2 2 1 Arctic Technic 1
3 3 4 Competition 1
4 4 13 Expert Builder 1
5 5 6 Model 1
6 6 7 Airport 5
7 7 20 Construction 5
8 8 0 Farm 5
9 9 2 Fire 5
10 10 3 Harbor 5
# … with 655 more rows
Joining Data with dplyr