Parallel programmeren in R
Nabeel Imam
Data Science
print(ls_weights)
$AK
[1] 30 33 50 30 26 99 52 28 24 40 30 99 40
[14] 20 20 39 25 30 20 33 99 54 30 99 30 38
...
$AL
[1] 60 32 30 21 30 27 28 32 18 28 30 24 59
[14] 30 19 30 15 17 18 53 26 5 25 41 50 20
...
$AR
[1] 31 40 30 99 27 36 30 35 25 19 10 25 40
[14] 33 37 12 34 99 43 26 28 99 37 30 26 6
...
boot_mean <- function (weights) {
est <- rep(0, 1e4)
for (i in 1:1e4) {
b <- sample(weights, replace = T)
est[i] <- mean(b)
}
return(est)
}
ls_boot <- mclapply(ls_weights, boot_mean,mc.cores = 4)

lapply(ls_weights, length)
AK AL AR AZ CA CO CT DC DE FL GA HI IA ...
260 1503 816 1649 14869 1286 1151 520 295 4693 2824 510 891 ...
microbenchmark(
"parLapply" = {
cl <- makeCluster(4)
ls <- parLapply(cl, ls_weights, boot_mean)
stopCluster(cl)
},
"parLapplyLB" = {
cl <- makeCluster(4)
ls <- parLapplyLB(cl, ls_weights, boot_mean)
stopCluster(cl)
},
times = 10
)
Unit: seconds
expr min mean max neval
parLapply 15.77250 16.38548 16.83822 10
parLapplyLB 14.55609 15.49170 17.26866 10
add <- function (x, y, z) x + y + zvalue1 <- c(0.5, 3.2, 5.1, 1.9) value2 <- c(0.1, 0.5, 0.2, 2.4) value3 <- 5

cl <- makeCluster(4)clusterMap(cl, add,x = value1, y = value2, z = value3) # Enkelvoudige waarde stopCluster(cl)
print(value3)
[1] 5
[[1]]
[1] 5.6
[[2]]
[1] 8.5
[[3]]
[1] 10.2
[[4]]
[1] 8.4
clusterMap(cl, # Cluster
add, # Functie
x = value1, # Meerdere inputs
y = value2,
z = value3) # Kan enkele waarden recyclen
parLapply(cl, # Cluster
input, # Eén input
fun, # Functie
arg_name = static) # Geëxporteerde statische variabelen
# doorgegeven aan benoemd argument
print(ls_weights)
$AK
[1] 30 33 50 30 26 99 52 28 24 40 30 99 40
...
print(ls_plur)
$AK
[1] 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1
...
boot_dist <- function (weights, pluralities) { est <- rep(0, 1e4) ratio <- weights/pluralities for (i in 1:1e4) { b <- sample(ratio, replace = T) est[i] <- mean(b) } return(est) }cl <- makeCluster(4)result <- clusterMap(cl, boot_dist,weights = ls_weights, pluralities = ls_plur) stopCluster(cl)
print(mat_births)
1 2 3 4 5 6 7 8 9 10 11 12
AK 7 13 8 8 5 9 12 19 16 16 7 16
AL 55 58 79 66 48 63 69 81 74 76 61 82
AR 28 28 41 37 40 40 42 52 40 44 33 38
AZ 69 70 70 71 68 61 73 81 76 74 87 62
CA 603 599 659 601 669 719 675 687 660 698 703 633
CO 50 55 72 56 63 56 56 70 58 48 53 58
CT 51 51 57 58 59 41 51 46 54 56 58 52
DC 25 18 14 40 22 19 24 25 20 26 20 25
...
cl <- makeCluster(4)# parCapply voor kolombewerkingen parCapply(cl, mat_births, sum)# parRapply voor rijbewerkingen parRapply(cl, mat_births, sum) stopCluster(cl)
Som per kolom
1 2 3 4 5 6 7
4258 3966 4437 4345 4371 4482 4614
8 9 10 11 12
4738 4686 4483 4427 4247
Som per rij
AK AL AR AZ CA CO CT
136 812 463 862 7906 695 634
DC DE FL GA HI IA ID
278 151 2519 1512 254 472 237
...
Parallel programmeren in R