Machine Learning for Time Series Data in Python
Chris Holdgraf
Fellow, Berkeley Institute for Data Science
ixs = np.arange(audio.shape[-1])
time = ixs / sfreq
fig, ax = plt.subplots()
ax.plot(time, audio)
print(audio.shape)
# (n_files, time)
(20, 7000)
means = np.mean(audio, axis=-1)
maxs = np.max(audio, axis=-1)
stds = np.std(audio, axis=-1)
print(means.shape)
# (n_files,)
(20,)
# Import a linear classifier
from sklearn.svm import LinearSVC
# Note that means are reshaped to work with scikit-learn
X = np.column_stack([means, maxs, stds])
y = labels.reshape(-1, 1)
model = LinearSVC()
model.fit(X, y)
from sklearn.metrics import accuracy_score
# Different input data
predictions = model.predict(X_test)
# Score our model with % correct
# Manually
percent_score = sum(predictions == labels_test) / len(labels_test)
# Using a sklearn scorer
percent_score = accuracy_score(labels_test, predictions)
Machine Learning for Time Series Data in Python