Analyzing Marketing Campaigns with pandas
Jill Rosok
Data Scientist
# Calculate pre-error conversion rate
# Bug arose sometime around '2018-01-11'
house_ads_no_bug = house_ads[house_ads['date_served'] < '2018-01-11']
lang_conv = conversion_rate(house_ads_no_bug,
['language_displayed'])
# Index other language conversion rate against English
spanish_index = lang_conv['Spanish']/lang_conv['English']
arabic_index = lang_conv['Arabic']/lang_conv['English']
german_index = lang_conv['German']/lang_conv['English']
print("Spanish index:", spanish_index)
print("Arabic index:", arabic_index)
print("German index:", german_index)
Spanish index: 1.6819248826291078
Arabic index: 5.045774647887324
German index: 4.485133020344288
# Create actual conversion DataFrame
language_conversion = house_ads.groupby(['date_served', \
'language_preferred'])\
.agg({'user_id':'nunique',\
'converted':'sum'})
expected_conversion = pd.DataFrame(
language_conversion.unstack(level=1)
)
user_id converted
language_preferred Arabic English German Spanish Arabic English German Spanish
date_served
2018-01-01 2.0 171.0 5.0 11.0 2 13 1 0
2018-01-02 3.0 200.0 5.0 10.0 0 14 3 0
2018-01-03 2.0 179.0 3.0 8.0 0 15 1 1
2018-01-04 2.0 149.0 2.0 14.0 0 12 0 3
2018-01-05 NaN 143.0 1.0 14.0 NaN 17 False 3
...
...
# Create English conversion rate column for affected period
language_conversion['actual_english_conversions'] = \
language_conversion.loc\
['2018-01-11':'2018-01-31']\
[('converted','English')]
language_conversion['actual_english_rate'] = \
language_conversion.loc['2018-01-11':'2018-01-31']\
[('converted','English')]/language_conversion.loc\
['2018-01-11':'2018-01-31'][('user_id','English')]
# Create expected conversion rates for each language
language_conversion['expected_spanish_rate'] = \
language_conversion['actual_english_rate']*spanish_index
language_conversion['expected_arabic_rate'] = \
language_conversion['actual_english_rate']*arabic_index
language_conversion['expected_german_rate'] = \
language_conversion['actual_english_rate']*german_index
# Multiply total ads served by expected conversion rate
language_conversion['expected_spanish_conversions'] = \
language_conversion['expected_spanish_rate']/100
*language_conversion[('user_id','Spanish')]
language_conversion['expected_arabic_conversions'] = \
language_conversion['expected_arabic_rate']/100
*language_conversion[('user_id','Arabic')]
language_conversion['expected_german_conversions'] = \
language_conversion['expected_german_rate']/100
*language_conversion[('user_id','German')]
bug_period = language_conversion.loc['2018-01-11':'2018-01-31']
# Sum expected subscribers for each language
expected_subs = bug_period['expected_spanish_conv_rate'].agg('sum') + \
bug_period['expected_arabic_conv_rate'].agg('sum') + \
bug_period['expected_german_conv_rate'].agg('sum')
# Calculate how many subscribers we actually got
actual_subs = bug_period[('converted','Spanish')].sum() + \
bug_period[('converted','Arabic')].agg('sum') + \
bug_period[('converted','German')].agg('sum')
lost_subs = expected_subs - actual_subs
print(lost_subs)
32.144143192488265
Analyzing Marketing Campaigns with pandas