Joining Data with dplyr
Chris Cardillo
Data Scientist
sets %>%
inner_join(themes, by = c("theme_id" = "id"), suffix = c("_set", "_theme"))
# A tibble: 4,977 x 6
set_num name_set year theme_id name_theme parent_id
<chr> <chr> <dbl> <dbl> <chr> <dbl>
1 700.3-1 Medium Gift Set (ABB) 1949 365 System NA
2 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 Supplemental 365
3 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371 Supplemental 365
4 700.1-2 Extra-Large Gift Set (Mursten) 1953 366 Basic Set 365
5 700.F-1 Automatic Binding Bricks - Small Brick Set (Lego Mursten) 1953 371 Supplemental 365
6 700.24-1 Individual 2 x 12 Bricks 1954 371 Supplemental 365
7 700.C.1-1 Individual 1 x 6 x 4 Panorama Window (with glass) 1954 371 Supplemental 365
8 700.C.4-1 Individual 1 x 4 x 3 Window (with glass) 1954 371 Supplemental 365
9 700.H-1 Individual 4 x 4 Corner Bricks 1954 371 Supplemental 365
10 1200-1 LEGO Town Plan Board, Large Plastic 1955 372 Town Plan 365
# … with 4,967 more rows
inventories
# A tibble: 15,174 x 3
id version set_num
<dbl> <dbl> <chr>
1 1 1 7922-1
2 3 1 3931-1
3 4 1 6942-1
4 15 1 5158-1
5 16 1 903-1
6 17 1 850950-1
7 19 1 4444-1
8 21 1 3474-1
9 22 1 30277-1
10 25 1 71012-11
# … with 15,164 more rows
sets %>%
inner_join(inventories, by = "set_num")
# A tibble: 5,056 x 6
set_num name year theme_id id version
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 700.3-1 Medium Gift Set (ABB) 1949 365 24197 1
2 700.3-1 Medium Gift Set (ABB) 1949 365 24214 2
3 700.3-1 Medium Gift Set (ABB) 1949 365 24215 3
4 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 11831 1
5 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 24230 2
6 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 24231 3
7 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 24232 4
8 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 24233 5
9 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371 537 1
10 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371 24240 2
# … with 5,046 more rows
sets %>%
inner_join(inventories, by = "set_num") %>%
filter(version == 1)
# A tibble: 4,976 x 6
set_num name year theme_id id version
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 700.3-1 Medium Gift Set (ABB) 1949 365 24197 1
2 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 11831 1
3 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371 537 1
4 700.1-2 Extra-Large Gift Set (Mursten) 1953 366 12985 1
5 700.F-1 Automatic Binding Bricks - Small Brick Set (Lego Mursten) 1953 371 11265 1
6 700.24-1 Individual 2 x 12 Bricks 1954 371 7645 1
7 700.C.1-1 Individual 1 x 6 x 4 Panorama Window (with glass) 1954 371 3896 1
8 700.C.4-1 Individual 1 x 4 x 3 Window (with glass) 1954 371 3663 1
9 700.H-1 Individual 4 x 4 Corner Bricks 1954 371 15503 1
10 1200-1 LEGO Town Plan Board, Large Plastic 1955 372 10761 1
# … with 4,966 more rows
inventory_parts
# A tibble: 258,958 x 4
inventory_id part_num color_id quantity
<dbl> <chr> <dbl> <dbl>
1 21 3009 7 50
2 25 21019c00pat004pr1033 15 1
3 25 24629pr0002 78 1
4 25 24634pr0001 5 1
5 25 24782pr0001 5 1
6 25 88646 0 1
7 25 973pr3314c01 5 1
8 26 14226c11 0 3
9 26 2340px2 15 1
10 26 2340px3 15 1
# … with 258,948 more rows
Joining Data with dplyr