Scaling and Optimizing Data Pipelines with Polars
Liam Brannigan
Data Scientist & Polars Contributor



SELECT *
FROM building_permits
LIMIT 5
shape: (5, 5)
| issue_date | permit_type | review_type | street_name | work_description |
| --- | --- | --- | --- | --- |
| date | str | str | str | str |
|------------|-------------|-------------|--------------|------------------------------|
| 2026-04-30 | Signs | SIGN PERMIT | CLARK ST | Wall sign install |
| 2026-04-28 | Renovation | SELF CERT | OAKLEY AVE | Add dormer to 8-unit roof |
| 2026-03-21 | Easy Permit | EASY PERMIT | SEMINARY AVE | Parapet masonry repair |
| 2026-03-11 | Easy Permit | EASY PERMIT | RHODES AVE | Rebuild south chimney side |
| 2026-03-03 | Easy Permit | EASY PERMIT | GIDDINGS ST | Porch lintel + tuckpointing |
import polars as pl
uri = "postgresql://analyst:[email protected]:5432/buildings"
postgresql:// protocolanalyst:secret username and passworddata.chicago.org:5432 host and portbuildings database namequery = """
SELECT issue_date, permit_type
FROM building_permits
WHERE issue_date >= DATE '2026-01-01'
"""
query = """
SELECT issue_date, permit_type
FROM building_permits
WHERE issue_date >= DATE '2026-01-01'
"""
permit_counts = (
pl.read_database_uri(query=query, uri=uri)
)
query = """
SELECT issue_date, permit_type
FROM building_permits
WHERE issue_date >= DATE '2026-01-01'
"""
permit_counts = (
pl.read_database_uri(query=query, uri=uri)
.group_by("permit_type")
.len()
.sort("len", descending=True)
)
permit_counts
shape: (3, 2)
| permit_type | len |
| --- | --- |
| str | u32 |
|---------------------------------|-----|
| Easy Permit | 58 |
| New Construction | 14 |
| Renovation | 11 |
pl.read_database_uri(
query=query,
uri=uri,
engine="connectorx",
)
pl.read_database_uri(
query=query,
uri=uri,
engine="adbc",
)
permit_counts.write_database(
)
permit_counts.write_database(
table_name="permit_type_summary",
connection=uri,
)
permit_counts.write_database(
table_name="permit_type_summary",
connection=uri,
if_table_exists="replace",
)
appendfailimport duckdb
db = duckdb.connect("permits.duckdb")
$$
import duckdb
db = duckdb.connect("permits.duckdb")
result = db.sql("SELECT * FROM permit_type_summary").pl()
shape: (3, 2)
| permit_type | len |
| --- | --- |
| str | u32 |
|---------------------------------|-----|
| Easy Permit | 58 |
| New Construction | 14 |
| Renovation | 11 |
Scaling and Optimizing Data Pipelines with Polars