Sampling in Python
James Chapman
Curriculum Manager, DataCamp
dice = expand_grid(
{'die1': [1, 2, 3, 4, 5, 6],
'die2': [1, 2, 3, 4, 5, 6],
'die3': [1, 2, 3, 4, 5, 6],
'die4': [1, 2, 3, 4, 5, 6]
}
)
die1 die2 die3 die4
0 1 1 1 1
1 1 1 1 2
2 1 1 1 3
3 1 1 1 4
4 1 1 1 5
... ... ... ... ...
1291 6 6 6 2
1292 6 6 6 3
1293 6 6 6 4
1294 6 6 6 5
1295 6 6 6 6
[1296 rows x 4 columns]
dice['mean_roll'] = (dice['die1'] +
dice['die2'] +
dice['die3'] +
dice['die4']) / 4
print(dice)
die1 die2 die3 die4 mean_roll
0 1 1 1 1 1.00
1 1 1 1 2 1.25
2 1 1 1 3 1.50
3 1 1 1 4 1.75
4 1 1 1 5 2.00
... ... ... ... ... ...
1291 6 6 6 2 5.00
1292 6 6 6 3 5.25
1293 6 6 6 4 5.50
1294 6 6 6 5 5.75
1295 6 6 6 6 6.00
[1296 rows x 5 columns]
dice['mean_roll'] = dice['mean_roll'].astype('category')
dice['mean_roll'].value_counts(sort=False).plot(kind="bar")
n_dice = list(range(1, 101))
n_outcomes = []
for n in n_dice:
n_outcomes.append(6**n)
outcomes = pd.DataFrame(
{"n_dice": n_dice,
"n_outcomes": n_outcomes})
outcomes.plot(x="n_dice",
y="n_outcomes",
kind="scatter")
plt.show()
import numpy as np
np.random.choice(list(range(1, 7)), size=4, replace=True).mean()
import numpy as np sample_means_1000 = [] for i in range(1000): sample_means_1000.append( np.random.choice(list(range(1, 7)), size=4, replace=True).mean() )
print(sample_means_1000)
[3.25, 3.25, 1.75, 2.0, 2.0, 1.0, 1.0, 2.75, 2.75, 2.5, 3.0, 2.0, 2.75,
...
1.25, 2.0, 2.5, 2.5, 3.75, 1.5, 1.75, 2.25, 2.0, 1.5, 3.25, 3.0, 3.5]
plt.hist(sample_means_1000, bins=20)
Sampling in Python