R'de Örnekleme
Richie Cotton
Data Evangelist at DataCamp
set.seed(19790801)
coffee_sample <- coffee_ratings %>%
select(variety, country_of_origin, flavor) %>%
rowid_to_column() %>%
slice_sample(n = 500)
glimpse(coffee_sample)
Rows: 500
Columns: 4
$ rowid <int> 10, 278, 458, 622, 131, 385, 1292, 47, 904, 1020, 5...
$ variety <chr> "Other", "Bourbon", NA, "Caturra", "Caturra", "Yell...
$ country_of_origin <chr> "Ethiopia", "Guatemala", "Colombia", "Thailand", "C...
$ flavor <dbl> 8.58, 7.75, 7.75, 7.50, 8.00, 7.83, 7.17, 8.08, 7.3...
mean_flavors_1000 <- replicate(
n = 1000,
expr = coffee_sample %>%
slice_sample(prop = 1, replace = TRUE) %>%
summarize(mean_flavor = mean(flavor, na.rm = TRUE)) %>%
pull(mean_flavor)
)
bootstrap_distn <- tibble(
resample_mean = mean_flavors_1000
)
ggplot(bootstrap_distn, aes(resample_mean)) +
geom_histogram(binwidth = 0.0025)

Örneklem ortalaması
coffee_sample %>%
summarize(mean_flavor = mean(flavor)) %>%
pull(mean_flavor)
7.5163
Tahmini ana kütle ortalaması
bootstrap_distn %>%
summarize(mean_mean_flavor = mean(resample_mean)) %>%
pull(mean_mean_flavor)
7.5167
Gerçek ana kütle ortalaması
coffee_ratings %>%
summarize(mean_flavor = mean(flavor)) %>%
pull(mean_flavor)
7.5260
Örneklem standart sapması
coffee_focus %>%
summarize(sd_flavor = sd(flavor)) %>%
pull(sd_flavor)
0.3525
Tahmini ana kütle standart sapması?
bootstrap_distn %>%
summarize(sd_mean_flavor = sd(resample_mean)) %>%
pull(sd_mean_flavor)
0.01572
Örneklem standart sapması
coffee_focus %>%
summarize(sd_flavor = sd(flavor)) %>%
pull(sd_flavor)
0.3525
Tahmini ana kütle standart sapması
standard_error <- bootstrap_distn %>%
summarize(sd_mean_flavor = sd(resample_mean)) %>%
pull(sd_mean_flavor)
standard_error * sqrt(500)
0.3515
Gerçek standart sapma
coffee_ratings %>%
summarize(sd_flavor = sd(flavor)) %>%
pull(sd_flavor)
0.3414
"Standart hata", ilgilenilen istatistiğin standart sapmasıdır.
Standart hata çarpı örneklem büyüklüğünün karekökü, ana kütle standart sapmasını tahmin eder.
R'de Örnekleme