Experimental Design in Python
James Chapman
Curriculum Manager, DataCamp
athletes.head()
Athlete_ID Initial_Fitness_Level Muscle_Gain_kg
0 113 Beginner 3.225102
1 30 Advanced 3.976548
2 183 Intermediate 5.165449
3 200 Beginner 2.188297
4 194 Beginner 4.724162
.groupby()
to shuffle within blocksblocks = athletes.groupby('Initial_Fitness_Level').apply(
lambda x: x.sample(frac=1)
)
blocks = blocks.reset_index(drop=True) blocks
Athlete_ID Initial_Fitness_Level Muscle_Gain_kg
0 198 Advanced 5.742
1 146 Advanced 6.248
2 157 Advanced 6.049
.. ... ... ...
198 164 Intermediate 6.134
199 178 Intermediate 6.591
numpy.random.choice()
for random treatment assignment within blocksblocks['Treatment'] = np.random.choice( ['Cardio', 'Strength Training', 'Mixed'], size=len(blocks))
blocks.sample(n=5)
Athlete_ID Initial_Fitness_Level Muscle_Gain_kg Treatment
87 194 Beginner 4.724 Cardio
54 3 Advanced 3.731 Strength Training
177 80 Intermediate 6.758 Mixed
146 183 Intermediate 5.165 Strength Training
60 190 Advanced 3.763 Cardio
import seaborn as sns
sns.boxplot(x='Initial_Fitness_Level', y='Muscle_Gain_kg', hue='Treatment', data=blocks)
plt.show()
from scipy.stats import f_oneway
blocks.groupby('Initial_Fitness_Level').apply(
lambda x: f_oneway(x[x['Treatment'] == 'Cardio']['Muscle_Gain_kg'],
x[x['Treatment'] == 'Mixed']['Muscle_Gain_kg'],
x[x['Treatment'] == 'Strength Training']['Muscle_Gain_kg'])
)
Block
Initial_Fitness_Level
Advanced (0.7951054385317405, 0.4555687666120679)
Beginner (0.1085790370950905, 0.8972754969684291)
Intermediate (0.5678877824942661, 0.5698403547950377)
dtype: object
import seaborn as sns
sns.boxplot(x='Initial_Fitness_Level', y='Muscle_Gain_kg', data=blocks)
plt.show()
f_oneway(
blocks[blocks['Initial_Fitness_Level'] == "Advanced"]['Muscle_Gain_kg'],
blocks[blocks['Initial_Fitness_Level'] == "Beginner"]['Muscle_Gain_kg'],
blocks[blocks['Initial_Fitness_Level'] == "Intermediate"]['Muscle_Gain_kg']
)
F_onewayResult(statistic=2.325058605244051, pvalue=0.10045536062209368)
Experimental Design in Python