Machine Learning for Time Series Data in Python
Chris Holdgraf
Fellow, Berkeley Institute for Data Science
# Iterating over the "split" method yields train/test indices
for tr, tt in cv.split(X, y):
    model.fit(X[tr], y[tr])
    model.score(X[tt], y[tt])
KFold cross-validation splits your data into multiple "folds" of equal sizeIt is one of the most common cross-validation routines
  from sklearn.model_selection import KFold
  cv = KFold(n_splits=5)
  for tr, tt in cv.split(X, y):
      ...
fig, axs = plt.subplots(2, 1)
# Plot the indices chosen for validation on each loop
axs[0].scatter(tt, [0] * len(tt), marker='_', s=2, lw=40)
axs[0].set(ylim=[-.1, .1], title='Test set indices (color=CV loop)', 
           xlabel='Index of raw data')
# Plot the model predictions on each iteration
axs[1].plot(model.predict(X[tt]))
axs[1].set(title='Test set predictions on each CV loop', 
           xlabel='Prediction index')

You should not shuffle your data when making predictions with timeseries.
  from sklearn.model_selection import ShuffleSplit
  cv = ShuffleSplit(n_splits=3)
  for tr, tt in cv.split(X, y):
      ...

# Import and initialize the cross-validation iterator
from sklearn.model_selection import TimeSeriesSplit
cv = TimeSeriesSplit(n_splits=10)
fig, ax = plt.subplots(figsize=(10, 5))
for ii, (tr, tt) in enumerate(cv.split(X, y)):
    # Plot training and test indices
    l1 = ax.scatter(tr, [ii] * len(tr), c=[plt.cm.coolwarm(.1)], 
                    marker='_', lw=6)
    l2 = ax.scatter(tt, [ii] * len(tt), c=[plt.cm.coolwarm(.9)], 
                    marker='_', lw=6)
    ax.set(ylim=[10, -1], title='TimeSeriesSplit behavior', 
           xlabel='data index', ylabel='CV iteration')
    ax.legend([l1, l2], ['Training', 'Validation'])

def myfunction(estimator, X, y):
    y_pred = estimator.predict(X)
    my_custom_score = my_custom_function(y_pred, y)
    return my_custom_score
def my_pearsonr(est, X, y):
    # Generate predictions and convert to a vector 
    y_pred = est.predict(X).squeeze()
    # Use the numpy "corrcoef" function to calculate a correlation matrix
    my_corrcoef_matrix = np.corrcoef(y_pred, y.squeeze())
    # Return a single correlation value from the matrix
    my_corrcoef = my_corrcoef[1, 0]
    return my_corrcoef
Machine Learning for Time Series Data in Python