Parallel Programming in R
Nabeel Imam
Data Scientist
var_list <- list(1:10, 1:10, c("a", "b", "c"))
lapply(var_list, sqrt)
Error in FUN(X[[i]], ...) : non-numeric
argument to mathematical function
sqrt_custom <- function(var) {
tryCatch( # Expression to evaluate sqrt(var),
# What to do with an error error = function (e) return(e) )
}
lapply(var_list, sqrt_custom)
[[1]]
[1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427 3.000000
[10] 3.162278
[[2]]
[1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427 3.000000
[10] 3.162278
[[3]]
<simpleError in sqrt(var): non-numeric argument to mathematical function>
cl <- makeCluster(3)
parLapply(cl, var_list, sqrt_custom)
stopCluster(cl)
[[1]]
[1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427 3.000000
[10] 3.162278
[[2]]
[1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427 3.000000
[10] 3.162278
[[3]]
<simpleError in sqrt(var): non-numeric argument to mathematical function>
print(ls_births)
$AK
month plurality
1 1
2 1
...
$AL
month plurality
10 1
9 1
...
...
summarise_births <- function(df) {
tryCatch({
df %>% group_by(month) %>% summarise(total = sum(plurality))
},
error = function (e) "Error! Check data")
}
cl <- makeCluster(4) clusterEvalQ(cl, library(dplyr))
parLapply(cl, ls_births, summarise_births)
stopCluster(cl)
$AK
month total
1 1 7
2 2 13
...
$AL
[1] "Error! Check data"
$AR
month total
1 1 28
2 2 28
...
...
head(ls_births[["AL"]], n = 10)
month plurality
263 10 1
335 9 1
473 12 1
474 6 1
475 9 1
839 9 1
1291 11 Twins
1369 4 1
1609 5 1
1610 5 Triplets
plan(multisession, workers = 4)
config <- furrr_options(packages = "dplyr")
future_map(ls_births, summarise_births,
.options = config)
plan(sequential)
$AK
month total
1 1 7
2 2 13
...
$AL
[1] "Error! Check data"
$AR
month total
1 1 28
2 2 28
...
...
cl <- makeCluster(4)
registerDoParallel(cl)
foreach(df = ls_births,
.packages = "dplyr"
) %dopar% {
summarise_births(df)
}
stopCluster(cl)
$AK
month total
1 1 7
2 2 13
...
$AL
[1] "Error! Check data"
$AR
month total
1 1 28
2 2 28
...
...
Parallel Programming in R