Data Transformation with Polars
Liam Brannigan
Data Scientist & Polars Contributor




restaurants = pl.read_csv("restuarants.csv")
shape: (4, 5)
| business | location | review | price | type |
| --- | --- | --- | --- | --- |
| str | str | f64 | i64 | str |
|-----------------|-------------|--------|-------|------------|
| 7burgers | Wakey Wakey | 4.2 | 15 | restaurant |
| Costa Coffee | City Point | 4.5 | 8 | café |
| Costa Coffee | Waterloo | 4.1 | 8 | café |
| The Queens Head | Denman St. | 4.7 | 25 | bar |
users = pl.read_csv("app_users.csv")
shape: (3, 3)
| user_name | budget | type |
| --- | --- | --- |
| str | i64 | str |
|-----------|--------|------------|
| Bob | 12 | café |
| Alice | 15 | restaurant |
| Charlie | 22 | bar |
restaurants.join_where(
)
restaurants.join_where(
users,
)
restaurants.join_where(
users,
pl.col("type") == pl.col("type_right")
)
restaurants.join_where(
users,
pl.col("type") == pl.col("type_right")
)
shape: (4, 8)
| business | type | ... | user_name | budget |
| --- | --- | ... | --- | --- |
| str | str | ... | str | i64 |
|-----------------|------------|-----|-----------|--------|
| 7burgers | restaurant | ... | Alice | 15 |
| Costa Coffee | café | ... | Bob | 12 |
| Costa Coffee | café | ... | Bob | 12 |
| The Queens Head | bar | ... | Charlie | 22 |
restaurants.join_where(
users,
pl.col("type") == pl.col("type_right"),
pl.col("price") <= pl.col("budget")
)
shape: (3, 8)
| user_name | budget | business | price | type | ... |
| --- | --- | --- | --- | --- | ... |
| str | i64 | str | i64 | str | ... |
|-----------|--------|--------------|-------|------------|-----|
| Alice | 15 | 7burgers | 15 | restaurant | ... |
| Bob | 12 | Costa Coffee | 8 | café | ... |
| Bob | 12 | Costa Coffee | 8 | café | ... |
shape: (4, 3)
| business | date | rating |
| --- | --- | --- |
| str | date | i64 |
|----------|-------------|--------|
| 7burgers | 2025-03-15 | 5 |
| 7burgers | 2025-06-20 | 3 |
| 7burgers | 2025-09-10 | 4 |
| 7burgers | 2025-11-25 | 5 |
$$
shape: (3, 4)
| user | business | date | score |
| --- | --- | --- | --- |
| str | str | date | f64 |
|--------|----------|------------|-------|
| Charlie | 7burgers | 2025-05-01 | 3.8 |
| Bob | 7burgers | 2025-08-01 | 4.2 |
| Alice | 7burgers | 2025-12-01 | 4.5 |
reviews.sort("date").join_asof(
)
$$
$$
$$
$$
join_asof - join as of a certain datereviews.sort("date").join_asof(
inspections.sort("date"),
)
reviews.sort("date").join_asof(
inspections.sort("date"),
on="date",
)
reviews.sort("date").join_asof(
inspections.sort("date"),
on="date",
strategy="backward"
)
| user | date | score | ... | rating |
| --- | --- | --- | ... | --- |
| str | date | f64 | ... | i64 |
|---------|------------|-------|-----|--------|
| Charlie | 2025-05-01 | 3.8 | ... | 5 |
| Bob | 2025-08-01 | 4.2 | ... | 3 |
| Alice | 2025-12-01 | 4.5 | ... | 5 |
reviews.sort("date").join_asof(
inspections.sort("date"),
on="date",
strategy="nearest"
)
| user | date | score | ... | rating |
| --- | --- | --- | ... | --- |
| str | date | f64 | ... | i64 |
|---------|------------|-------|-----|--------|
| Charlie | 2025-05-01 | 3.8 | ... | 5 |
| Bob | 2025-08-01 | 4.2 | ... | 4 |
| Alice | 2025-12-01 | 4.5 | ... | 5 |
$$

$$

$$

$$

reviews.sort("date").join_asof( restaurants.sort("date"), on="date",by=["business", "location"])
| user | business | location | date | score | ... | rating |
| --- | --- | --- | --- | --- | ... | --- |
| str | str | str | date | f64 | ... | i64 |
|---------|-----------------|-------------|------------|-------|-----|--------|
| Charlie | 7burgers | Wakey Wakey | 2025-05-01 | 3.8 | ... | null |
| Bob | Costa Coffee | City Point | 2025-05-01 | 4.0 | ... | 5 |
| Bob | 7burgers | Wakey Wakey | 2025-08-01 | 4.2 | ... | null |
| Alice | 7burgers | Wakey Wakey | 2025-12-01 | 4.5 | ... | 4 |
| Alice | The Queens Head | Denman St. | 2025-12-01 | 4.7 | ... | 5 |
Data Transformation with Polars