Machine Learning for Marketing in Python
Karolis Urbonas
Head of Analytics & Science, Amazon
# First option - log transformation
wholesale_log = np.log(wholesale)
sns.pairplot(wholesale_log, diag_kind='kde')
plt.show()
# Second option - Box-Cox transformation
from scipy import stats
def boxcox_df(x):
x_boxcox, _ = stats.boxcox(x)
return x_boxcox
wholesale_boxcox = wholesale.apply(boxcox_df, axis=0)
sns.pairplot(wholesale_boxcox, diag_kind='kde')
plt.show()
StandardScaler()
module from sklearn
from sklearn.preprocessing import StandardScaler scaler = StandardScaler()
scaler.fit(wholesale_boxcox) wholesale_scaled = scaler.transform(wholesale_boxcox) wholesale_scaled_df = pd.DataFrame(data=wholesale_scaled, index=wholesale_boxcox.index, columns=wholesale_boxcox.columns) wholesale_scaled_df.agg(['mean','std']).round()
Fresh Milk Grocery Frozen Detergents_Paper Delicassen
mean -0.0 0.0 0.0 0.0 -0.0 0.0
std 1.0 1.0 1.0 1.0 1.0 1.0
Machine Learning for Marketing in Python