Parallel Programming in R
Nabeel Imam
Data Scientist
print(customer_ids)
$USA
[1] 465500 612953 106420 279492 376941 163474 164493 801983 898941 406844 829157 ...
$Canada
[1] 140521 398164 817703 715385 771801 656814 721270 719120 425819 774558 111418 ...
$Mexico
[1] 714842 486725 706765 858020 790364 390760 198667 419197 352989 202494 756636 ...
$UK
[1] 886285 151731 274940 779966 375535 431644 880434 649074 765423 449147 408041 ...
lucky_draw <- function (ids) { sample(ids, 1) }
cl <- makeCluster(4)
set.seed(1234)
parLapply(cl, customer_ids, lucky_draw) stopCluster(cl)
$USA
[1] 673576
$Canada
[1] 164613
$Mexico
[1] 769658
$UK
[1] 683102
$USA
[1] 673576
$Canada
[1] 164613
$Mexico
[1] 769658
$UK
[1] 683102
$USA
[1] 638051
$Canada
[1] 133431
$Mexico
[1] 522137
$UK
[1] 856141
cl <- makeCluster(4)
# A seed for all worker processes in cluster clusterSetRNGStream(cl, 1234)
parLapply(cl, customer_ids, lucky_draw) stopCluster(cl)
$USA
[1] 421408
$Canada
[1] 877562
$Mexico
[1] 460786
$UK
[1] 658513
$USA
[1] 421408
$Canada
[1] 877562
$Mexico
[1] 460786
$UK
[1] 658513
cl <- makeCluster(4) clusterSetRNGStream(cl, 1234)
run1 <- parLapply(cl, customer_ids, lucky_draw) stopCluster(cl)
cl <- makeCluster(4) clusterSetRNGStream(cl, 1234) run2 <- parLapply(cl, customer_ids, lucky_draw) stopCluster(cl)
identical(run1, run2)
[1] TRUE
config <- furrr_options(seed = 1234)
plan(multisession, workers = 4) run1 <- future_map(customer_ids, lucky_draw, .options = config) plan(sequential)
plan(multisession, workers = 4)
run2 <- future_map(customer_ids, lucky_draw,
# Using the same configuration
.options = config)
plan(sequential)
identical(run1, run2)
[1] TRUE
install.packages("doRNG") library(doRNG)
cl <- makeCluster(4) registerDoParallel(cl)
registerDoRNG(1234)
run1 <- foreach(i = customer_ids) %dopar% { lucky_draw(i) } stopCluster(cl)
cl <- makeCluster(4)
registerDoParallel(cl)
registerDoRNG(1234) # Same seed
run2 <- foreach(i = customer_ids) %dopar% {
lucky_draw(i)
}
stopCluster(cl)
identical(run1, run2)
[1] TRUE
rnorm
, rbinom
, etcsample_n()
from dplyr
Parallel Programming in R