Customer Analytics and A/B Testing in Python
Ryan Grossman
Data Scientist, EDO
import pandas as pd
import matplotlib.pyplot as plt
# Remove users who lapsed within the past week
conv_sub_data = sub_data_demo[
sub_data_demo.lapse_date <= max_lapse_date]
# Calculate week one conversion rate by lapse date
sub_time = (conv_sub_data.subscription_date -
conv_sub_data.lapse_date).dt.days
conv_sub_data['sub_time'] = sub_time
conversion_data = conv_sub_data.groupby(
by=['lapse_date'], as_index=False
).agg({'sub_time': [gc7]})
# Plot our conversion rate over time
conversion_data.plot()
plt.show()
# Find the date boundries to limit our data by current_date = pd.to_datetime('2018-03-17') # 6 * 28 to reprsent the past 6 months start_date = current_date - timedelta(days=(6*28))
# A mask for our conversion rate data conv_filter = ( conversion_data.lapse_date >= start_date) & (conversion_data.lapse_date <= current_date) ) # Filter our conversion rate data con_data_filt = conversion_data[conv_filter]
conv_data_filt.plot(x='lapse_date', y='sub_time')
plt.show()
# After filtering and calculating daily conversion...
# Pivot the results to have one colum per country
conv_data_cntry = pd.pivot_table(
conv_data_cntry, values=['sub_time'],
columns=['country'], index=['lapse_date'],fill_value=0
)
...
# Pivot the results to have one colum per device
conv_data_dev = pd.pivot_table(
conv_data_dev, values=['sub_time'],
columns=['device'], index=['lapse_date'],fill_value=0
)
events
: Holidays and events impacting user behavior
events = pd.read_csv('events.csv')
releases
: iOS and Android software releases
releases = pd.read_csv('releases.csv')
releases.head()
Date Event
2018-03-14 iOS Release
2018-03-03 Android Release
2018-01-13 iOS Release
2018-01-15 Android Release
plt.axvline()
: Plots vertical line at the x-interceptcolor
: Specify the color of the plotted linelinestyle
: The type of line to plot# Plot the conversion rate trend per device conv_data_dev.plot( x=['lapse_date'], y=['iOS', 'and'] )
# Iterate through the events and plot each one events.Date = pd.to_datetime(events.Date) for row in events.iterrows(): tmp = row[1] plt.axvline( x=tmp.Date, color='k', linestyle='--' )
# Iterate through the releases and plot each one
releases.Date = pd.to_datetime(releases.Date)
for row in releases.iterrows():
tmp = row[1]
# plot iOS releases as a blue lines
if tmp.Event == 'iOS Release':
plt.axvline(x=tmp.Date, color='b', linestyle='--')
# plot Android releases as red lines
else:
plt.axvline(x=tmp.Date, color='r', linestyle='--')
plt.show()
Customer Analytics and A/B Testing in Python