Zoom into Array Fields with $unwind

Introduction to MongoDB in Python

Donny Winston

Instructor

Sizing and summing

list(db.prizes.aggregate([
    {"$project": {"n_laureates": {"$size": "$laureates"},
                  "year": 1, "category": 1, "_id": 0}}
]))
[{'year': '2018', 'category': 'physics', 'n_laureates': 3},
 {'year': '2018', 'category': 'chemistry', 'n_laureates': 3},
 {'year': '2018', 'category': 'medicine', 'n_laureates': 2},
 ...]
list(db.prizes.aggregate([
    {"$project": {"n_laureates": {"$size": "$laureates"},
                  "category": 1}},
    {"$group": {"_id": "$category", "n_laureates": 
                {"$sum": "$n_laureates"}}},
    {"$sort": {"n_laureates": -1}},
]))
[{'_id': 'medicine', 'n_laureates': 216},
 {'_id': 'physics', 'n_laureates': 210},
 {'_id': 'chemistry', 'n_laureates': 181},
 {'_id': 'peace', 'n_laureates': 133},
 {'_id': 'literature', 'n_laureates': 114},
 {'_id': 'economics', 'n_laureates': 81}]
Introduction to MongoDB in Python

How to $unwind

list(db.prizes.aggregate([
    {"$unwind": "$laureates"},
    {"$project": {
        "_id": 0, "year": 1, "category": 1,
        "laureates.surname": 1, "laureates.share": 1}},
    {"$limit": 3}
]))
[{'year': '2018',
  'category': 'physics',
  'laureates': {'surname': 'Ashkin', 'share': '2'}},
 {'year': '2018',
  'category': 'physics',
  'laureates': {'surname': 'Mourou', 'share': '4'}},
 {'year': '2018',
  'category': 'physics',
  'laureates': {'surname': 'Strickland', 'share': '4'}}]
Introduction to MongoDB in Python

Renormalization, anyone?

list(db.prizes.aggregate([
    {"$unwind": "$laureates"},
    {"$project": {"year": 1, "category": 1, "laureates.id": 1}},
    {"$group": {"_id": {"$concat": ["$category", ":", "$year"]},
                "laureate_ids": {"$addToSet": "$laureates.id"}}},
    {"$limit": 5}
]))
[{'_id': 'medicine:1901', 'laureate_ids': ['293']},
 {'_id': 'peace:1902', 'laureate_ids': ['465', '464']},
 {'_id': 'physics:1902', 'laureate_ids': ['3', '2']},
 {'_id': 'peace:1903', 'laureate_ids': ['466']},
 {'_id': 'medicine:1903', 'laureate_ids': ['295']}]
Introduction to MongoDB in Python

$unwind and count 'em, one by one

list(db.prizes.aggregate([
    {"$project": {"n_laureates": {"$size": "$laureates"}, "category": 1}},
    {"$group": {"_id": "$category", "n_laureates": {"$sum": "$n_laureates"}}},
    {"$sort": {"n_laureates": -1}},
]))
list(db.prizes.aggregate([
    {"$unwind": "$laureates"},
    {"$group": {"_id": "$category", "n_laureates": {"$sum": 1}}},
    {"$sort": {"n_laureates": -1}},
]))
[{'_id': 'medicine', 'n_laureates': 216},
 {'_id': 'physics', 'n_laureates': 210},
 {'_id': 'chemistry', 'n_laureates': 181},
 {'_id': 'peace', 'n_laureates': 133},
 {'_id': 'literature', 'n_laureates': 114},
 {'_id': 'economics', 'n_laureates': 81}]
Introduction to MongoDB in Python

$lookup

list(db.prizes.aggregate([
    {"$match": {"category": "economics"}},
    {"$unwind": "$laureates"},
    {"$lookup": {"from": "laureates", "foreignField": "id",
                 "localField": "laureates.id", "as": "laureate_bios"}},
    {"$unwind": "$laureate_bios"},
    {"$group": {"_id": None,
                "bornCountries": 
                {"$addToSet": "$laureate_bios.bornCountry"}
    }},
]))
[{'_id': None,
  'bornCountries': [
    'the Netherlands', 'British West Indies (now Saint Lucia)', 'Italy',
    'Germany (now Poland)', 'Hungary', 'Austria', 'India', 'USA', 
    'Canada', 'British Mandate of Palestine (now Israel)', 'Norway',
    'Russian Empire (now Russia)', 'Russia', 'Finland', 'Scotland',
    'France', 'Sweden', 'Germany', 'Russian Empire (now Belarus)',
    'United Kingdom', 'Cyprus'
  ]}]
bornCountries = db.laureates.distinct(
    "bornCountry", {"prizes.category": "economics"})
assert set(bornCountries) == set(agg[0]['bornCountries'])
Introduction to MongoDB in Python

Time to unwind... with exercises!

Introduction to MongoDB in Python

Preparing Video For Download...