Parallel Programming in R
Nabeel Imam
Data Scientist
report <- "<important report text>"
task_future <- future({
print("Hi, please find attached the report.")
report
})
library(purrr)
numbers <- 1:1000000 map(numbers, sqrt)
[[1]]
[1] 1
[[2]]
[1] 1.414214
[[3]]
[1] 1.732051
[[4]]
[1] 2
...
map_dbl(numbers, sqrt)
[1] 1.000000 1.414214 1.732051
[4] 2.000000 2.236068 2.449490
[7] 2.645751 2.828427 3.000000
[10] 3.162278 3.316625 3.464102
[13] 3.605551 3.741657 3.872983
...
map_chr(numbers, sqrt)
[1] "1.000000" "1.414214" "1.732051"
[4] "2.000000" "2.236068" "2.449490"
[7] "2.645751" "2.828427" "3.000000"
[10] "3.162278" "3.316625" "3.464102"
[13] "3.605551" "3.741657" "3.872983"
...
map()
map_chr()
map_dbl()
map_int()
map_lgl()
microbenchmark(
"map" = map(numbers, sqrt),
"map_dbl" = map_dbl(numbers, sqrt),
"map_chr" = map_chr(numbers, sqrt),
times = 10
)
Unit: milliseconds
expr min mean max neval
1 map 794.20 1254.46 1904.02 10
2 map_dbl 889.33 1067.41 1496.56 10
3 map_chr 1735.96 1934.97 2269.59 10
Sequential
library(purrr)
map_dbl(1:1000000, sqrt)
Sequential
library(furrr)
future_map_dbl(1:1000000, sqrt)
n_cores <- detectCore() - 2
plan(multisession, workers = n_cores)
future_map_dbl(1:1000000, sqrt) # Future enabled map_dbl()
plan(sequential)
[1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751
[8] 2.828427 3.000000 3.162278 3.316625 3.464102 3.605551 3.741657
[15] 3.872983 4.000000 4.123106 4.242641 4.358899 4.472136 4.582576
map()
map_chr()
map_dbl()
map_int()
map_lgl()
future_map()
future_map_chr()
future_map_dbl()
future_map_int()
future_map_lgl()
input_list
and a function to map, calculate()
:future_list <- lapply(input_list, function (x) future(calculate(x)))
result_list <- value(future_list) # Further processing of result_list to make a numeric vector
library(furrr)
# Using future_map_dbl() variant to get a numeric vector
result_list <- future_map_dbl(input_list, calculate)
Parallel Programming in R