Extreme Gradient Boosting with XGBoost
Sergey Fogelson
Head of Data Science, TelevisaUnivision
import pandas as pd import xgboost as xgb import numpy as np housing_data = pd.read_csv("ames_housing_trimmed_processed.csv") X,y = housing_data[housing_data.columns.tolist()[:-1]], housing_data[housing_data.columns.tolist()[-1]] housing_dmatrix = xgb.DMatrix(data=X,label=y)
untuned_params={"objective":"reg:squarederror"}
untuned_cv_results_rmse = xgb.cv(dtrain=housing_dmatrix, params=untuned_params,nfold=4, metrics="rmse",as_pandas=True,seed=123)
print("Untuned rmse: %f" %((untuned_cv_results_rmse["test-rmse-mean"]).tail(1)))
Untuned rmse: 34624.229980
import pandas as pd import xgboost as xgb import numpy as np housing_data = pd.read_csv("ames_housing_trimmed_processed.csv") X,y = housing_data[housing_data.columns.tolist()[:-1]], housing_data[housing_data.columns.tolist()[-1]] housing_dmatrix = xgb.DMatrix(data=X,label=y)
tuned_params = {"objective":"reg:squarederror",'colsample_bytree': 0.3, 'learning_rate': 0.1, 'max_depth': 5}
tuned_cv_results_rmse = xgb.cv(dtrain=housing_dmatrix, params=tuned_params, nfold=4, num_boost_round=200, metrics="rmse", as_pandas=True, seed=123)
print("Tuned rmse: %f" %((tuned_cv_results_rmse["test-rmse-mean"]).tail(1)))
Tuned rmse: 29812.683594
Extreme Gradient Boosting with XGBoost