Machine Learning for Finance in Python
Nathan George
Data Science Professor
Random Forests
from sklearn.ensemble import RandomForestRegressor
random_forest = RandomForestRegressor()
random_forest.fit(train_features, train_targets)
print(random_forest.score(train_features, train_targets))
random_forest = RandomForestRegressor(n_estimators=200,
max_depth=5,
max_features=4,
random_state=42)
from sklearn.model_selection import ParameterGrid grid = {'n_estimators': [200], 'max_depth':[3, 5], 'max_features': [4, 8]}
from pprint import pprint pprint(list(ParameterGrid(grid)))
[{'max_depth': 3, 'max_features': 4, 'n_estimators': 200},
{'max_depth': 3, 'max_features': 8, 'n_estimators': 200},
{'max_depth': 5, 'max_features': 4, 'n_estimators': 200},
{'max_depth': 5, 'max_features': 8, 'n_estimators': 200}]
test_scores = [] # loop through the parameter grid, set hyperparameters, save the scores for g in ParameterGrid(grid): rfr.set_params(**g) # ** is "unpacking" the dictionary rfr.fit(train_features, train_targets) test_scores.append(rfr.score(test_features, test_targets))
# find best hyperparameters from the test score and print best_idx = np.argmax(test_scores) print(test_scores[best_idx]) print(ParameterGrid(grid)[best_idx])
0.05594252725411142
{'max_depth': 5, 'max_features': 8, 'n_estimators': 200}
Machine Learning for Finance in Python