Dimensionality Reduction in Python
Jeroen Boeye
Head of Machine Learning, Faktion
from sklearn.linear_model import Lasso
la = Lasso(alpha=0.05)
la.fit(X_train, y_train)
# Actual coefficients = [5 2 0]
print(la.coef_)
[ 4.91 1.76 0. ]
print(la.score(X_test, y_test))
0.974
from sklearn.linear_model import LassoCV
lcv = LassoCV()
lcv.fit(X_train, y_train)
print(lcv.alpha_)
0.09
mask = lcv.coef_ != 0
print(mask)
[ True True False ]
reduced_X = X.loc[:, mask]
Random forest is combination of decision trees.
We can use combination of models for feature selection too.
from sklearn.linear_model import LassoCV
lcv = LassoCV()
lcv.fit(X_train, y_train)
lcv.score(X_test, y_test)
0.99
lcv_mask = lcv.coef_ != 0
sum(lcv_mask)
66
from sklearn.feature_selection import RFE from sklearn.ensemble import RandomForestRegressor rfe_rf = RFE(estimator=RandomForestRegressor(), n_features_to_select=66, step=5, verbose=1) rfe_rf.fit(X_train, y_train)
rf_mask = rfe_rf.support_
from sklearn.feature_selection import RFE from sklearn.ensemble import GradientBoostingRegressor rfe_gb = RFE(estimator=GradientBoostingRegressor(), n_features_to_select=66, step=5, verbose=1) rfe_gb.fit(X_train, y_train)
gb_mask = rfe_gb.support_
import numpy as np
votes = np.sum([lcv_mask, rf_mask, gb_mask], axis=0)
print(votes)
array([3, 2, 2, ..., 3, 0, 1])
mask = votes >= 2
reduced_X = X.loc[:, mask]
Dimensionality Reduction in Python