Parallel Programming in R
Nabeel Imam
Data Scientist
boot_dist <- function (variable, B) { est <- rep(0, B) for (i in 1:B) { b <- sample(variable, replace = T) est[i] <- mean(b) } return(est) }
n_samples <- 10000
print(age_list)
$AK
[1] 43 34 26 19 22 36 17 24 33 25 18 44 21
[14] 32 31 27 27 21 27 30 20 37 31 21 29 32
...
$AL
[1] 33 25 24 22 27 21 34 21 45 36 31 29 25
[14] 23 20 27 28 30 33 16 29 27 24 26 20 15
...
$AR
[1] 28 27 20 27 26 21 20 30 22 17 22 30 15
[14] 27 31 30 30 19 25 25 22 26 23 18 26 20
...
...
plan(multisession, workers = 4)
future_map(age_list, boot_dist)
Error in ...furrr_fn(...) : argument "B" is missing, with no default
boot_dist
requires a value for the argument B
multisession
plans do not share workspace# Create configuration config <- furrr_options(globals = "n_samples")
# Plan multisession of four plan(multisession, workers = 4)
# Pass inputs and functions to future_map, future_map(age_list, boot_dist,
# specify value for second argument of boot_dist, B = n_samples,
# and supply configuration to .options .options = config) plan(sequential)
$AK
[1] 27.22053 27.29658 26.03422 26.85551
[5] 27.13688 26.77947 27.23574 26.67300
...
$AL
[1] 25.16612 25.25577 25.05735 25.18062
[5] 24.89914 24.82729 25.29071 25.00725
...
$AR
[1] 24.50978 24.74450 24.38631 24.48533
[5] 24.91565 24.50000 24.48533 24.82152
...
...
print(ls_births)
$AK
state month plurality weight_gain_pounds mother_age
AK 6 1 30 15
AK 3 1 38 15
...
$AL
state month plurality weight_gain_pounds mother_age
AL 1 1 22 15
AL 6 1 53 16
...
...
filter_df <- function (df, min_value) { df %>% dplyr::filter(mother_age >= min_value) } cutoff <- 20
config <- furrr_options(globals = "cutoff", # Global variables
packages = "dplyr") # Packages
plan(multisession, workers = 4) ls_filtered <- future_map(ls_births, filter_df, min_value = cutoff, .options = config) plan(sequential)
print(ls_filtered)
$AK
state month plurality weight_gain_pounds mother_age
AK 4 1 99 20
AK 6 1 22 21
...
$AL
state month plurality weight_gain_pounds mother_age
AL 8 1 19 20
AL 4 1 50 22
...
...
print(ls_weights)
$AK
[1] 99 22 2 60 20 18 43 10 37 29 30
[12] 15 26 50 11 22 17 20 40 30 99 24
...
$AL
[1] 19 50 14 32 40 40 41 20 59 23 99
[12] 36 31 50 34 39 15 73 99 99 38 40
...
...
print(ls_plur)
$AK
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[18] 1 1 1 1 1 1 1 1 1 2 1 1 1 3 1 1 1
...
$AL
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
[18] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
...
...
plan(multisession, workers = 4)
calculate <- function (weights, babies) { weights/babies }
future_pmap(list(ls_weights, ls_plur), # Combine arguments into one list
calculate) # Function to apply plan(sequential)
$AK
[1] 99.0 22.0 2.0 60.0 20.0 18.0 43.0 10.0 37.0 29.0 30.0
[12] 15.0 26.0 50.0 11.0 22.0 17.0 20.0 40.0 30.0 99.0 24.0
...
$AL
[1] 19.00 50.00 14.00 32.00 40.00 40.00 41.00 20.00 59.00
[10] 23.00 99.00 36.00 31.00 25.00 34.00 39.00 15.00 73.00
...
...
Parallel Programming in R