Approximate sampling distributions

Sampling in R

Richie Cotton

Data Evangelist at DataCamp

4 dice

Four diced being rolled.

library(tidyr)
dice <- expand_grid(
  die1 = 1:6,
  die2 = 1:6,
  die3 = 1:6,
  die4 = 1:6
)
# A tibble: 1,296 x 4
    die1  die2  die3  die4
   <int> <int> <int> <int>
 1     1     1     1     1
 2     1     1     1     2
 3     1     1     1     3
 4     1     1     1     4
 5     1     1     1     5
 6     1     1     1     6
 7     1     1     2     1
 8     1     1     2     2
 9     1     1     2     3
10     1     1     2     4
# ... with 1,286 more rows
Sampling in R

Mean roll

dice <- expand_grid(
  die1 = 1:6,
  die2 = 1:6,
  die3 = 1:6,
  die4 = 1:6
) %>% 
  mutate(
    mean_roll = (die1 + die2 + die3 + die4) / 4
  )
# A tibble: 1,296 x 5
    die1  die2  die3  die4 mean_roll
   <int> <int> <int> <int>     <dbl>
 1     1     1     1     1      1   
 2     1     1     1     2      1.25
 3     1     1     1     3      1.5 
 4     1     1     1     4      1.75
 5     1     1     1     5      2   
 6     1     1     1     6      2.25
 7     1     1     2     1      1.25
 8     1     1     2     2      1.5 
 9     1     1     2     3      1.75
10     1     1     2     4      2   
# ... with 1,286 more rows
Sampling in R

Exact sampling distribution

ggplot(dice, aes(factor(mean_roll))) +
  geom_bar()

Bar plot of mean roll counts.

Sampling in R

The number of outcomes increases fast

outcomes <- tibble(
  n_dice = 1:100,
  n_outcomes = 6 ^ n_dice
)
ggplot(outcomes, aes(n_dice, n_outcomes)) + 
  geom_point()

A line plot of dice outcome possibilities.

Sampling in R

Simulating the mean of four dice rolls




    four_rolls <- sample(
      1:6, size = 4, replace = TRUE
    )
    mean(four_rolls)


Sampling in R

Simulating the mean of four dice rolls

sample_means_1000 <- replicate(
  n = 1000,
  expr = {
    four_rolls <- sample(
      1:6, size = 4, replace = TRUE
    )
    mean(four_rolls)
  }
)
sample_means <- tibble(
  sample_mean = sample_means_1000
)
# A tibble: 1,000 x 1
   sample_mean
         <dbl>
 1        4   
 2        4.5 
 3        2.5 
 4        3.75
 5        3.75
 6        4   
 7        3   
 8        4.75
 9        3.75
10        4.25
# ... with 990 more rows
Sampling in R

Approximate sampling distribution

ggplot(sample_means, aes(factor(sample_mean))) +
  geom_bar()

A bar plot of approximate sampling distribution of mean dice rolls.

Sampling in R

Let's practice!

Sampling in R

Preparing Video For Download...