Parallel Programming with Dask in Python
James Fulton
Climate Informatics Researcher
import dask_ml
# Import regression model
from sklearn.linear_model import SGDRegressor
# Create instance of model
model = SGDRegressor()
# Fit model to data
model.fit(X, y)
# Make predictions
y_pred = model.predict(X)
# Import regression model from sklearn.linear_model import SGDRegressor # Create instance of model model = SGDRegressor()
# Import Dask-ML wrapper for model from dask_ml.wrappers import Incremental
# Wrap model dask_model = Incremental(model, scoring='neg_mean_squared_error')
# Fit on Dask DataFrames or arrays dask_model.fit(dask_X, dask_y) # not lazy
# Loop through data multiple times
for i in range(10):
dask_model.partial_fit(dask_X, dask_y) # not lazy
y_pred = dask_model.predict(dask_X)
print(y_pred)
dask.array<_predict, shape=(nan,), dtype=int64, chunksize=(nan,), chunktype=...>
print(y_pred.compute())
array([0.465557, 0.905675, 0.285214, ..., 0.249454, 0.559624, 0.823475])
Parallel Programming with Dask in Python