Machine Learning for Finance in Python
Nathan George
Data Science Professor
# make train and test features
train_size = int(0.8 * features.shape[0])
train_features = features[:train_size]
train_targets = targets[:train_size]
test_features = features[train_size:]
test_targets = targets[train_size:]
print(features.shape)
(230, 3)
from sklearn.ensemble import RandomForestRegressor # fit the model and check scores on train and test rfr = RandomForestRegressor(n_estimators=300, random_state=42) rfr.fit(train_features, train_targets)
print(rfr.score(train_features, train_targets)) print(rfr.score(test_features, test_targets))
0.8382262317599827
0.09504859048985377
# get predictions from model on train and test test_predictions = rfr.predict(test_features)
# calculate and plot returns from our RF predictions and the QQQ returns test_returns = np.sum(returns_monthly.iloc[train_size:] * test_predictions, axis=1)
plt.plot(test_returns, label='algo') plt.plot(returns_monthly['QQQ'].iloc[train_size:], label='QQQ') plt.legend() plt.show()
cash = 1000 algo_cash = [cash]
for r in test_returns: cash *= 1 + r algo_cash.append(cash)
# calculate performance for QQQ cash = 1000 # reset cash amount qqq_cash = [cash] for r in returns_monthly['QQQ'].iloc[train_size:]: cash *= 1 + r qqq_cash.append(cash)
print('algo returns:', (algo_cash[-1] - algo_cash[0]) / algo_cash[0]) print('QQQ returns:', (qqq_cash[-1] - qqq_cash[0]) / qqq_cash[0])
algo returns: 0.5009443507049591
QQQ returns: 0.5186775933696601
plt.plot(algo_cash, label='algo')
plt.plot(qqq_cash, label='QQQ')
plt.ylabel('$')
plt.legend() # show the legend
plt.show()
Machine Learning for Finance in Python