Sampling in R
Richie Cotton
Data Evangelist at DataCamp
coffee_ratings %>%
slice_sample(n = 300) %>%
nrow()
300
coffee_ratings %>%
slice_sample(prop = 0.25) %>%
nrow()
334
coffee_ratings %>%
summarize(mean_points = mean(total_cup_points)) %>%
pull(mean_points)
82.15
coffee_ratings %>%
slice_sample(n = 10) %>%
summarize(mean_points = mean(total_cup_points)) %>%
pull(mean_points)
82.82
coffee_ratings %>%
slice_sample(n = 100) %>%
summarize(mean_points = mean(total_cup_points)) %>%
pull(mean_points)
82.02
coffee_ratings %>%
slice_sample(n = 1000) %>%
summarize(mean_points = mean(total_cup_points)) %>%
pull(mean_points)
82.16
Population parameter
population_mean <- coffee_ratings %>%
summarize(mean_points = mean(total_cup_points)) %>%
pull(mean_points)
Point estimate
sample_mean <- coffee_ratings %>%
slice_sample(n = sample_size) %>%
summarize(mean_points = mean(total_cup_points)) %>%
pull(mean_points)
Relative error as a percentage
100 * abs(population_mean - sample_mean) / population_mean
ggplot(errors, aes(sample_size, relative_error)) +
geom_line() +
geom_smooth(method = "loess")
Sampling in R