Introduction to Predictive Analytics in Python
Nele Verbiest, Ph.D
Data Scientist @PythonPredictions
variable = "max_gift" number_bins = 3 basetable["disc_max_gift"] = pd.qcut(basetable[variable], number_bins)
basetable.groupby("disc_max_gift").size()
disc_mean_gift
[2, 84.25] 33330
(84.25, 96.833] 33330
(96.833, 197] 33330
dtype: int64
variables_model = ["income_average","mean_gift","gender_M","min_gift","age"] def check_discretize(basetable, variable, threshold): return(len(basetable.groupby(variable))>threshold)
check_discretize(basetable, "mean_gift",5)
True
check_discretize(basetable, "income_average",5)
False
variables_model = ["income_average","mean_gift","gender_M","min_gift","age"] def check_discretize(basetable, variable, threshold): return(len(basetable.groupby(variable))>threshold)
threshold = 5 number_bins = 5 for variable in variables_model: if check_discretize(basetable, variable, threshold): new_variable = "disc" + variable basetable[new_variable] = pd.qcut(basetable[variable], number_bins)
basetable["disc_age"] = pd.qcut(basetable["age"], 5)
basetable["disc_age"].unique()
[(38, 49], (68, 110], [19, 38], (49, 59], (59, 68]]
basetable["disc_age"] = pd.cut(basetable["age"],[18,30,40,50,60,110])
basetable.groupby("disc_age").size()
disc_age
(18, 30] 10017
(30, 40] 14448
(40, 50] 19002
(50, 60] 24684
(60, 110] 31849
Introduction to Predictive Analytics in Python