Introduction to Data Engineering
Vincent Vankrunkelsven
Data Engineer @ DataCamp
extract_course_data()
and extract_rating_data()
transform_fill_programming_language()
transform_avg_rating()
transform_courses_to_recommend()
transform_recommendations()
recommendations.to_sql(
"recommendations",
db_engine,
if_exists="append",
)
def etl(db_engines): # Extract the data courses = extract_course_data(db_engines) rating = extract_rating_data(db_engines) # Clean up courses data courses = transform_fill_programming_language(courses)
# Get the average course ratings avg_course_rating = transform_avg_rating(rating)
# Get eligible user and course id pairs courses_to_recommend = transform_courses_to_recommend( rating, courses, )
# Calculate the recommendations recommendations = transform_recommendations( avg_course_rating, courses_to_recommend, )
# Load the recommendations into the database load_to_dwh(recommendations, db_engine))
from airflow.models import DAG from airflow.operators.python_operator import PythonOperator dag = DAG(dag_id="recommendations", scheduled_interval="0 0 * * *")
task_recommendations = PythonOperator( task_id="recommendations_task", python_callable=etl, )
Introduction to Data Engineering