Dimensionality Reduction in Python
Jeroen Boeye
Head of Machine Learning, Faktion
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_train_std = scaler.fit_transform(X_train)
from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score lr = LogisticRegression() lr.fit(X_train_std, y_train)
X_test_std = scaler.transform(X_test)
y_pred = lr.predict(X_test_std) print(accuracy_score(y_test, y_pred))
0.99
print(lr.coef_)
array([[-3. , 0.14, 7.46, 1.22, 0.87]])
print(dict(zip(X.columns, abs(lr.coef_[0]))))
{'chestdepth': 3.0,
'handlength': 0.14,
'neckcircumference': 7.46,
'shoulderlength': 1.22,
'earlength': 0.87}
X.drop('handlength', axis=1, inplace=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) lr.fit(scaler.fit_transform(X_train), y_train) print(accuracy_score(y_test, lr.predict(scaler.transform(X_test))))
0.99
from sklearn.feature_selection import RFE rfe = RFE(estimator=LogisticRegression(), n_features_to_select=2, verbose=1)
rfe.fit(X_train_std, y_train)
Fitting estimator with 5 features.
Fitting estimator with 4 features.
Fitting estimator with 3 features.
Dropping a feature will affect other feature's coefficients
X.columns[rfe.support_]
Index(['chestdepth', 'neckcircumference'], dtype='object')
print(dict(zip(X.columns, rfe.ranking_)))
{'chestdepth': 1,
'handlength': 4,
'neckcircumference': 1,
'shoulderlength': 2,
'earlength': 3}
print(accuracy_score(y_test, rfe.predict(X_test_std)))
0.99
Dimensionality Reduction in Python