Designing Forecasting Pipelines for Production
Rami Krispin
Senior Manager, Data Science and Engineering











from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from mlforecast.utils import PredictionIntervals
from window_ops.expanding import expanding_mean
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import Lasso, LinearRegression, Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from utilsforecast.plotting import plot_series
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
from statistics import mean
ts = pd.read_csv("data/data.csv") ts["ds"] = pd.to_datetime(ts["ds"]) ts = ts.sort_values("ds") ts = ts[["unique_id", "ds", "y"]]end = ts["ds"].max() start = end - datetime.timedelta(hours = 24 * 31 * 25) ts = ts[ts["ds"] >= start]os.environ['NIXTLA_ID_AS_COL'] = '1'
ml_models = { "lightGBM": LGBMRegressor(n_estimators=500, verbosity=-1), "xgboost": XGBRegressor(), "linear_regression": LinearRegression(), "lasso": Lasso(), "ridge": Ridge() }mlf = MLForecast( models= ml_models, freq='h', lags=list(range(1, 24)), date_features=["month", "day", "dayofweek", "week", "hour"])
Window Settings
partitions = 10  
step_size = 24  
h = 72
Prediction Intervals Settings
n_windows = 5
method = "conformal_distribution"
pi = PredictionIntervals(h=h, n_windows = n_windows , method = method)
levels = [95]
bkt_df = mlf.cross_validation(
        df = ts,
        h = h,
        step_size = step_size,
        n_windows = partitions,
        prediction_intervals = pi, 
        level = levels)
print(bkt_df.head())
     unique_id    ds                     cutoff                 y            lightGBM         
0    1            2024-04-22 00:00:00    2024-04-21 23:00:00    421082.60    421089.155837    
1    1            2024-04-22 01:00:00    2024-04-21 23:00:00    429728.30    425700.453391    
2    1            2024-04-22 02:00:00    2024-04-21 23:00:00    430690.96    424382.613668    
3    1            2024-04-22 03:00:00    2024-04-21 23:00:00    420094.58    409967.877157    
4    1            2024-04-22 04:00:00    2024-04-21 23:00:00    403292.36    393175.446116    

Designing Forecasting Pipelines for Production