import pandas as pd
import numpy as np
import os
import altair as alt
alt.data_transformers.disable_max_rows()
from datetime import datetime
from scipy import stats
import matplotlib.pyplot as plt
#cwd
cwd=os.getcwd()
This is the second of a two part series. In this piece we will evaluate the results of the A/B test.
This exercise is taken from the Udemy course on A/B Testing , which relates to a social media company Kittengram who will run an A/B test in their website after partnering with an Ads company that focuses on Cat Ads in order to increase their Click Through Rate (CTR). The company provides two dataset 1) Activity Level of Users and 2)CTR, post-processed
- Exploring the datasets we corroborated that equal number of users were assigned to the .
-
## Loading the data from a .csv file
data = pd.read_csv(cwd+"/resources/assignments.csv")
data.head()
userid | ts | groupid | |
---|---|---|---|
0 | c5d77c89-33a3-4fe3-9e31-179dec09d49c | 2021-11-02T07:31:42Z | 0 |
1 | 9061d751-7a94-44d3-8792-5ca5ec59aa89 | 2021-11-13T07:43:51Z | 0 |
2 | a5b70ae7-f07c-4773-9df4-ce112bc9dc48 | 2021-11-20T19:26:07Z | 0 |
3 | d2646662-269f-49de-aab1-8776afced9a3 | 2021-11-20T11:09:02Z | 0 |
4 | 2d9b23b7-4e5e-4162-9f0f-49e593fdd2b5 | 2021-11-04T07:42:07Z | 0 |
## converting ts to date
data['dt'] = data['ts'].map(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ').strftime("%Y-%m-%d"))
data.head(5)
userid | ts | groupid | dt | |
---|---|---|---|---|
0 | c5d77c89-33a3-4fe3-9e31-179dec09d49c | 2021-11-02T07:31:42Z | 0 | 2021-11-02 |
1 | 9061d751-7a94-44d3-8792-5ca5ec59aa89 | 2021-11-13T07:43:51Z | 0 | 2021-11-13 |
2 | a5b70ae7-f07c-4773-9df4-ce112bc9dc48 | 2021-11-20T19:26:07Z | 0 | 2021-11-20 |
3 | d2646662-269f-49de-aab1-8776afced9a3 | 2021-11-20T11:09:02Z | 0 | 2021-11-20 |
4 | 2d9b23b7-4e5e-4162-9f0f-49e593fdd2b5 | 2021-11-04T07:42:07Z | 0 | 2021-11-04 |
data.describe()
groupid | |
---|---|
count | 60000.000000 |
mean | 0.500817 |
std | 0.500003 |
min | 0.000000 |
25% | 0.000000 |
50% | 1.000000 |
75% | 1.000000 |
max | 1.000000 |
data.groupby(['groupid']).count()
userid | ts | dt | |
---|---|---|---|
groupid | |||
0 | 29951 | 29951 | 29951 |
1 | 30049 | 30049 | 30049 |
data_count=data.groupby(['groupid', 'dt']).count().reset_index()
data_count.head()
groupid | dt | userid | ts | |
---|---|---|---|---|
0 | 0 | 2021-11-01 | 1497 | 1497 |
1 | 0 | 2021-11-02 | 1467 | 1467 |
2 | 0 | 2021-11-03 | 1532 | 1532 |
3 | 0 | 2021-11-04 | 1509 | 1509 |
4 | 0 | 2021-11-05 | 1503 | 1503 |
alt.Chart(data_count).mark_line(size=3).encode(
alt.X('dt'),
alt.Y('userid'),
color='groupid:O',
tooltip=['userid']
).properties(
width=600,
height=400
)
data_act = pd.read_csv(cwd+"/resources/activity_all.csv")
data_act.head()
userid | dt | groupid | activity_level | |
---|---|---|---|---|
0 | a5b70ae7-f07c-4773-9df4-ce112bc9dc48 | 2021-10-01 | 0 | 0 |
1 | d2646662-269f-49de-aab1-8776afced9a3 | 2021-10-01 | 0 | 0 |
2 | c4d1cfa8-283d-49ad-a894-90aedc39c798 | 2021-10-01 | 1 | 0 |
3 | 6889f87f-5356-4904-a35a-6ea5020011db | 2021-10-01 | 0 | 0 |
4 | dbee604c-474a-4c9d-b013-508e5a0e3059 | 2021-10-01 | 1 | 0 |
data_act.groupby(['groupid','dt']).describe()
#we can already see a difference in mean activity and the 50% & 75% percentiles
activity_level | |||||||||
---|---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | ||
groupid | dt | ||||||||
0 | 2021-10-01 | 29951.0 | 5.241762 | 6.516640 | 0.0 | 0.0 | 1.0 | 10.0 | 20.0 |
2021-10-02 | 29951.0 | 5.255885 | 6.509838 | 0.0 | 0.0 | 1.0 | 10.0 | 20.0 | |
2021-10-03 | 29951.0 | 5.266068 | 6.511458 | 0.0 | 0.0 | 1.0 | 10.0 | 20.0 | |
2021-10-04 | 29951.0 | 5.212447 | 6.511711 | 0.0 | 0.0 | 1.0 | 10.0 | 20.0 | |
2021-10-05 | 29951.0 | 5.177590 | 6.512791 | 0.0 | 0.0 | 1.0 | 10.0 | 20.0 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1 | 2021-11-26 | 30049.0 | 10.031216 | 5.770582 | 0.0 | 5.0 | 10.0 | 15.0 | 20.0 |
2021-11-27 | 30049.0 | 10.026024 | 5.774141 | 0.0 | 5.0 | 10.0 | 15.0 | 20.0 | |
2021-11-28 | 30049.0 | 9.975307 | 5.788257 | 0.0 | 5.0 | 10.0 | 15.0 | 20.0 | |
2021-11-29 | 30049.0 | 9.970781 | 5.799546 | 0.0 | 5.0 | 10.0 | 15.0 | 20.0 | |
2021-11-30 | 30049.0 | 9.963926 | 5.764812 | 0.0 | 5.0 | 10.0 | 15.0 | 20.0 |
122 rows × 8 columns
data_act.query('activity_level > 0').groupby(['dt', 'groupid']).count().reset_index().head()
dt | groupid | userid | activity_level | |
---|---|---|---|---|
0 | 2021-10-01 | 0 | 15337 | 15337 |
1 | 2021-10-01 | 1 | 15297 | 15297 |
2 | 2021-10-02 | 0 | 15354 | 15354 |
3 | 2021-10-02 | 1 | 15421 | 15421 |
4 | 2021-10-03 | 0 | 15423 | 15423 |
alt.Chart(data_act.query('activity_level > 0').groupby(['dt', 'groupid']).count().reset_index()).mark_line(size=3).encode(
alt.X('dt'),
alt.Y('userid'),
color='groupid:O',
tooltip=['userid']
).properties(
width=600,
height=400
)
(
data_act.query('activity_level > 0 and groupid == 0 and dt >= "2021-11-01"')
.groupby(['dt','groupid']).count().reset_index()[['groupid','activity_level']].describe()
)
groupid | activity_level | |
---|---|---|
count | 30.0 | 30.000000 |
mean | 0.0 | 15782.000000 |
std | 0.0 | 371.077276 |
min | 0.0 | 15163.000000 |
25% | 0.0 | 15335.000000 |
50% | 0.0 | 15990.500000 |
75% | 0.0 | 16045.000000 |
max | 0.0 | 16147.000000 |
(
data_act.query('activity_level > 0 and groupid == 1 and dt >= "2021-11-01"')
.groupby(['dt','groupid']).count().reset_index()[['groupid','activity_level']].describe()
)
groupid | activity_level | |
---|---|---|
count | 30.0 | 30.000000 |
mean | 1.0 | 29302.433333 |
std | 0.0 | 30.417422 |
min | 1.0 | 29255.000000 |
25% | 1.0 | 29280.000000 |
50% | 1.0 | 29300.000000 |
75% | 1.0 | 29321.000000 |
max | 1.0 | 29382.000000 |
data_act.query('dt >= "2021-11-01"').groupby(['groupid']).describe()
activity_level | ||||||||
---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | |
groupid | ||||||||
0 | 898530.0 | 5.402211 | 6.55557 | 0.0 | 0.0 | 1.0 | 11.0 | 20.0 |
1 | 901470.0 | 9.996304 | 5.78868 | 0.0 | 5.0 | 10.0 | 15.0 | 20.0 |
data_act.query('dt < "2021-11-01"').groupby('groupid').describe()
activity_level | ||||||||
---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | |
groupid | ||||||||
0 | 928481.0 | 5.245635 | 6.521184 | 0.0 | 0.0 | 1.0 | 10.0 | 20.0 |
1 | 931519.0 | 5.240952 | 6.520811 | 0.0 | 0.0 | 1.0 | 10.0 | 20.0 |
data_act_count = data_act.query('activity_level > 0').groupby(['groupid','dt']).count().reset_index()
data_act_count.head()
groupid | dt | userid | activity_level | |
---|---|---|---|---|
0 | 0 | 2021-10-01 | 15337 | 15337 |
1 | 0 | 2021-10-02 | 15354 | 15354 |
2 | 0 | 2021-10-03 | 15423 | 15423 |
3 | 0 | 2021-10-04 | 15211 | 15211 |
4 | 0 | 2021-10-05 | 15126 | 15126 |
alt.Chart(data_act_count).mark_line(size=3).encode(
alt.X('dt'),
alt.Y('userid'),
color='groupid:O',
tooltip=['userid']
).properties(
width=600,
height=400
)
By the activity levels
from scipy.stats import ttest_ind
data_act.query('groupid == 0')['activity_level'].to_numpy()
array([ 0, 0, 0, ..., 20, 20, 20])
res = ttest_ind(data_act.query('groupid == 0 and dt >= "2021-11-01"')['activity_level'].to_numpy(),
data_act.query('groupid == 1 and dt >= "2021-11-01"')['activity_level'].to_numpy()).pvalue
print(res)
# Results
# if res=1 there would be absolutely no difference. (you can test this by comparing groupid==0 with itself)
# if res=0 means a significant high significance and therefore is extremely unlikely that the results would be explained by other than the a/b test implications
0.0
"{:.100f}".format(res)
'0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'
before = data_act_count.query('dt < "2021-11-01"')
after = data_act_count.query('dt >= "2021-11-01"')
before.head()
groupid | dt | userid | activity_level | |
---|---|---|---|---|
0 | 0 | 2021-10-01 | 15337 | 15337 |
1 | 0 | 2021-10-02 | 15354 | 15354 |
2 | 0 | 2021-10-03 | 15423 | 15423 |
3 | 0 | 2021-10-04 | 15211 | 15211 |
4 | 0 | 2021-10-05 | 15126 | 15126 |
np.mean(before.query('groupid == 0')['userid'].to_numpy())
15320.870967741936
np.mean(before.query('groupid == 1')['userid'].to_numpy())
15352.516129032258
res = ttest_ind(before.query('groupid == 0')['userid'].to_numpy(), before.query('groupid == 1')['userid']
.to_numpy()).pvalue
print(res)
# Results
# if res=1 there would be absolutely no difference. (you can test this by comparing groupid==0 with itself)
# if res=0 means a significant high significance and therefore is extremely unlikely that
# the results would be explained by other than the a/b test implications
0.1630842353828084
"{:.100f}".format(res)
'0.1630842353828083901579049097563256509602069854736328125000000000000000000000000000000000000000000000'
np.mean(after.query('groupid == 0')['userid'].to_numpy())
15782.0
np.mean(after.query('groupid == 1')['userid'].to_numpy())
29302.433333333334
res = ttest_ind(after.query('groupid == 0')['userid'].to_numpy(), after.query('groupid == 1')['userid']
.to_numpy()).pvalue
print(res)
6.590603584107244e-84
"{:.100f}".format(res)
'0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000065906035841072442'
data_ctr = pd.read_csv(cwd+"/resources/ctr_all.csv")
data_ctr.head()
userid | dt | groupid | ctr | |
---|---|---|---|---|
0 | 60389fa7-2d71-4cdf-831c-c2bb277ffa1e | 2021-11-13 | 0 | 31.81 |
1 | b59cb225-d160-4851-92d2-7cc8120a2f63 | 2021-11-13 | 0 | 30.46 |
2 | aa336050-934e-453f-a5b0-dd881fcd114e | 2021-11-13 | 0 | 34.25 |
3 | 8df767f4-a10f-4322-a722-676b7e02b372 | 2021-11-13 | 0 | 34.92 |
4 | a74762ed-4da0-42ab-91d2-40d7e808dfe9 | 2021-11-13 | 0 | 34.95 |
data_ctr_avg = data_ctr.groupby(['groupid','dt']).mean().reset_index()
alt.Chart(data_ctr_avg).mark_line(size=5).encode(
alt.X('dt'),
alt.Y('ctr'),
color='groupid:O',
tooltip=['ctr']
).properties(
width=600,
height=400
)
before = data_ctr.query('dt < "2021-11-01"')[['groupid', 'ctr']]
after = data_ctr.query('dt >= "2021-11-01"')[['groupid', 'ctr']]
after
groupid | ctr | |
---|---|---|
0 | 0 | 31.81 |
1 | 0 | 30.46 |
2 | 0 | 34.25 |
3 | 0 | 34.92 |
4 | 0 | 34.95 |
... | ... | ... |
2303403 | 1 | 37.27 |
2303404 | 1 | 39.14 |
2303405 | 1 | 40.05 |
2303406 | 1 | 38.14 |
2303407 | 1 | 37.98 |
1352533 rows × 2 columns
before.query('groupid == 0')['ctr'].to_numpy().mean()
33.00091277553074
before.query('groupid == 1')['ctr'].to_numpy().mean()
32.99957172093258
after.query('groupid == 0')['ctr'].to_numpy().mean()
32.996977569382835
after.query('groupid == 1')['ctr'].to_numpy().mean()
37.99695912626142
before.query('groupid == 0')['ctr'].to_numpy().std()
1.7336979501682888
before.query('groupid == 1')['ctr'].to_numpy().std()
1.7296548367391134
after.query('groupid == 0')['ctr'].to_numpy().std()
1.7331985918552912
after.query('groupid == 1')['ctr'].to_numpy().std()
1.7323710606903675
res = ttest_ind(before.query('groupid == 0')['ctr'].to_numpy(), before.query('groupid == 1')['ctr']
.to_numpy()).pvalue
print(res)
# Results
# if res=1 there would be absolutely no difference. (you can test this by comparing groupid==0 with itself)
# if res=0 means a significant high significance and therefore is extremely unlikely that
# the results would be explained by other than the a/b test implications
0.705741417344299
res = ttest_ind(after.query('groupid == 0')['ctr'].to_numpy(), after.query('groupid == 1')['ctr']
.to_numpy()).pvalue
print(res)
# Results
# if res=1 there would be absolutely no difference. (you can test this by comparing groupid==0 with itself)
# if res=0 means a significant high significance and therefore is extremely unlikely that
# the results would be explained by other than the a/b test implications
0.0
"{:.100f}".format(res)
'0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'