Machine Learning for Time Series Data in Python
Chris Holdgraf
Fellow, Berkeley Institute for Data Science
classification_model.predict(X_test)
array([0, 1, 1, 0])
regression_model.predict(X_test)
array([0.2, 1.4, 3.6, 0.6])
fig, axs = plt.subplots(1, 2)
# Make a line plot for each timeseries
axs[0].plot(x, c='k', lw=3, alpha=.2)
axs[0].plot(y)
axs[0].set(xlabel='time', title='X values = time')
# Encode time as color in a scatterplot
axs[1].scatter(x_long, y_long, c=np.arange(len(x_long)), cmap='viridis')
axs[1].set(xlabel='x', ylabel='y', title='Color = time')
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)
model.predict(X)
alphas = [.1, 1e2, 1e3]
ax.plot(y_test, color='k', alpha=.3, lw=3)
for ii, alpha in enumerate(alphas):
y_predicted = Ridge(alpha=alpha).fit(X_train, y_train).predict(X_test)
ax.plot(y_predicted, c=cmap(ii / len(alphas)))
ax.legend(['True values', 'Model 1', 'Model 2', 'Model 3'])
ax.set(xlabel="Time")
$$ 1 - \frac{error(model)}{variance(testdata)} $$
from sklearn.metrics import r2_score
print(r2_score(y_predicted, y_test))
0.08
Machine Learning for Time Series Data in Python