Fraud Detection in Python
Charlotte Werger
Data Scientist
model = RandomForestClassifier(class_weight='balanced')
model = RandomForestClassifier(class_weight='balanced_subsample')
model = LogisticRegression(class_weight='balanced')
model = SVC(kernel='linear', class_weight='balanced', probability=True)
model = RandomForestClassifier(class_weight={0:1,1:4},random_state=1)
model = LogisticRegression(class_weight={0:1,1:4}, random_state=1)
model = RandomForestClassifier(n_estimators=10,
criterion='gini',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
max_features='auto',
n_jobs=-1,
class_weight=None)
from sklearn.model_selection import GridSearchCV
# Create the parameter grid param_grid = { 'max_depth': [80, 90, 100, 110], 'max_features': [2, 3], 'min_samples_leaf': [3, 4, 5], 'min_samples_split': [8, 10, 12], 'n_estimators': [100, 200, 300, 1000] }
# Define which model to use model = RandomForestRegressor()
# Instantiate the grid search model grid_search_model = GridSearchCV(estimator = model, param_grid = param_grid, cv = 5, n_jobs = -1, scoring='f1')
# Fit the grid search to the data grid_search_model.fit(X_train, y_train)
# Get the optimal parameters grid_search_model.best_params_
{'bootstrap': True,
'max_depth': 80,
'max_features': 3,
'min_samples_leaf': 5,
'min_samples_split': 12,
'n_estimators': 100}
# Get the best_estimator results
grid_search.best_estimator_
grid_search.best_score_
Fraud Detection in Python