Joining Data with dplyr
Chris Cardillo
Data Scientist
sets
# A tibble: 4,977 x 4
set_num name year theme_id
<chr> <chr> <dbl> <dbl>
1 700.3-1 Medium Gift Set (ABB) 1949 365
2 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371
3 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371
4 700.1-2 Extra-Large Gift Set (Mursten) 1953 366
5 700.F-1 Automatic Binding Bricks - Small Brick Set (Lego Mursten) 1953 371
6 700.24-1 Individual 2 x 12 Bricks 1954 371
7 700.C.1-1 Individual 1 x 6 x 4 Panorama Window (with glass) 1954 371
8 700.C.4-1 Individual 1 x 4 x 3 Window (with glass) 1954 371
9 700.H-1 Individual 4 x 4 Corner Bricks 1954 371
10 1200-1 LEGO Town Plan Board, Large Plastic 1955 372
# … with 4,967 more rows
sets %>% head(3)
# A tibble: 4,977 x 4
set_num name year theme_id
<chr> <chr> <dbl> <dbl>
1 700.3-1 Medium Gift Set (ABB) 1949 365
2 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371
3 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371
themes %>% head(3)
# A tibble: 665 x 3
id name parent_id
<dbl> <chr> <dbl>
1 1 Technic NA
2 2 Arctic Technic 1
3 3 Competition 1
sets %>%
inner_join(themes, by = c("theme_id" = "id"))
# A tibble: 4,977 x 6
set_num name.x year theme_id name.y parent_id
<chr> <chr> <dbl> <dbl> <chr> <dbl>
1 700.3-1 Medium Gift Set (ABB) 1949 365 System NA
2 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 Supplemental 365
3 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371 Supplemental 365
4 700.1-2 Extra-Large Gift Set (Mursten) 1953 366 Basic Set 365
5 700.F-1 Automatic Binding Bricks - Small Brick Set (Lego Mursten) 1953 371 Supplemental 365
6 700.24-1 Individual 2 x 12 Bricks 1954 371 Supplemental 365
7 700.C.1-1 Individual 1 x 6 x 4 Panorama Window (with glass) 1954 371 Supplemental 365
8 700.C.4-1 Individual 1 x 4 x 3 Window (with glass) 1954 371 Supplemental 365
9 700.H-1 Individual 4 x 4 Corner Bricks 1954 371 Supplemental 365
10 1200-1 LEGO Town Plan Board, Large Plastic 1955 372 Town Plan 365
# … with 4,967 more rows
sets %>%
inner_join(themes, by = c("theme_id" = "id"), suffix = c("_set", "_theme"))
# A tibble: 4,977 x 6
set_num name_set year theme_id name_theme parent_id
<chr> <chr> <dbl> <dbl> <chr> <dbl>
1 700.3-1 Medium Gift Set (ABB) 1949 365 System NA
2 700.1.1-1 Single 2 x 4 Brick (ABB) 1950 371 Supplemental 365
3 700.B.2-1 Single 1 x 2 x 3 Window without Glass (ABB) 1950 371 Supplemental 365
4 700.1-2 Extra-Large Gift Set (Mursten) 1953 366 Basic Set 365
5 700.F-1 Automatic Binding Bricks - Small Brick Set (Lego Mursten) 1953 371 Supplemental 365
6 700.24-1 Individual 2 x 12 Bricks 1954 371 Supplemental 365
7 700.C.1-1 Individual 1 x 6 x 4 Panorama Window (with glass) 1954 371 Supplemental 365
8 700.C.4-1 Individual 1 x 4 x 3 Window (with glass) 1954 371 Supplemental 365
9 700.H-1 Individual 4 x 4 Corner Bricks 1954 371 Supplemental 365
10 1200-1 LEGO Town Plan Board, Large Plastic 1955 372 Town Plan 365
# … with 4,967 more rows
sets %>%
inner_join(themes, by = c("theme_id" = "id"), suffix = c("_set", "_theme")) %>%
count(name_theme, sort = TRUE)
# A tibble: 419 x 2
name_theme n
<chr> <int>
1 Supplemental 180
2 Basic Set 171
3 Technic 144
4 Friends 133
5 Gear 122
6 City 120
7 Town 117
8 Ninjago 95
9 Service Packs 94
10 Star Wars 94
# … with 409 more rows
parts %>% head(3)
# A tibble: 17,501 x 3
part_num name part_cat_id
<chr> <chr> <dbl>
1 0901 Baseplate 16 x 30 with Set 080 Yellow House Print 1
2 0902 Baseplate 16 x 24 with Set 080 Small White House Print 1
3 0903 Baseplate 16 x 24 with Set 080 Red House Print 1
part_categories %>% head(3)
# A tibble: 64 x 2
id name
<dbl> <chr>
1 1 Baseplates
2 3 Bricks Sloped
3 4 Duplo, Quatro and Primo
Joining Data with dplyr