Building Data Pipelines with Airflow
Volker Janz
Senior Developer Advocate at Astronomer

from airflow.models import DagBagdag_bag = DagBag(include_examples=False)def test_no_import_errors(): assert len(dag_bag.import_errors) == 0def test_dag_loaded(): assert "daily_etl" in dag_bag.dags

ModuleNotFoundError: missing provider package or wrong import pathNameError: renamed variable, function, or typoImportError: circular imports between Dag filesIn the Dag file:
def clean_record(record):
return {
"name": record["name"].strip(),
"email": record["email"].lower(),
}
@task
def transform(records):
return [clean_record(r)
for r in records]
In the test file:
from dags.data_cleaning import (
clean_record,
)
def test_strips_whitespace():
result = clean_record(
{"name": " Alice ",
"email": "[email protected]"}
)
assert result["name"] == "Alice"
import pytest from airflow.models import DagBag from pendulum import datetime dag_bag = DagBag(include_examples=False)def test_etl_pipeline(): dag = dag_bag.get_dag("etl_output") assert dag is not None dag.test(logical_date=datetime(2026, 1, 15)) output = Path("/tmp/etl_results.json") assert output.exists() results = json.loads(output.read_text()) assert len(results) == 2

Building Data Pipelines with Airflow