Sampling in R
Richie Cotton
Data Evangelist at DataCamp
library(tidyr)
dice <- expand_grid(
die1 = 1:6,
die2 = 1:6,
die3 = 1:6,
die4 = 1:6
)
# A tibble: 1,296 x 4
die1 die2 die3 die4
<int> <int> <int> <int>
1 1 1 1 1
2 1 1 1 2
3 1 1 1 3
4 1 1 1 4
5 1 1 1 5
6 1 1 1 6
7 1 1 2 1
8 1 1 2 2
9 1 1 2 3
10 1 1 2 4
# ... with 1,286 more rows
dice <- expand_grid(
die1 = 1:6,
die2 = 1:6,
die3 = 1:6,
die4 = 1:6
) %>%
mutate(
mean_roll = (die1 + die2 + die3 + die4) / 4
)
# A tibble: 1,296 x 5
die1 die2 die3 die4 mean_roll
<int> <int> <int> <int> <dbl>
1 1 1 1 1 1
2 1 1 1 2 1.25
3 1 1 1 3 1.5
4 1 1 1 4 1.75
5 1 1 1 5 2
6 1 1 1 6 2.25
7 1 1 2 1 1.25
8 1 1 2 2 1.5
9 1 1 2 3 1.75
10 1 1 2 4 2
# ... with 1,286 more rows
ggplot(dice, aes(factor(mean_roll))) +
geom_bar()
outcomes <- tibble(
n_dice = 1:100,
n_outcomes = 6 ^ n_dice
)
ggplot(outcomes, aes(n_dice, n_outcomes)) +
geom_point()
four_rolls <- sample(
1:6, size = 4, replace = TRUE
)
mean(four_rolls)
sample_means_1000 <- replicate(
n = 1000,
expr = {
four_rolls <- sample(
1:6, size = 4, replace = TRUE
)
mean(four_rolls)
}
)
sample_means <- tibble(
sample_mean = sample_means_1000
)
# A tibble: 1,000 x 1
sample_mean
<dbl>
1 4
2 4.5
3 2.5
4 3.75
5 3.75
6 4
7 3
8 4.75
9 3.75
10 4.25
# ... with 990 more rows
ggplot(sample_means, aes(factor(sample_mean))) +
geom_bar()
Sampling in R