Parallel Programming in R
Nabeel Imam
Data Scientist
report <- "<important report text>"
task <- { print("Hi, here is the report.") # Email text report }
"Hi, please find attached the report."
task
"<important report text>"
library(future)
task_future <- future({ print("Hi, here is the report.") report })
print(task_future)
Expression: { print("Hi, here is the report.") report }
Environment: R_GlobalEnv
Resolved: TRUE
Value: 136 bytes of class "character" ...
value(task_future)
[1] "Hi, here is the report."
[1] "<important report text>"
task1 <- future({
print("Hi, here is the report.")
report
})
task2 <- future({
run_analysis()
print("Analysis done")
})
task3 <- future({
book_meeting_rooms()
print("Meeting rooms booked!")
})
value(task1) # 10:00 AM Office Administrator
[1] "Hi, here is the report."
[1] "<important report text>"
value(task3) # 11:00 AM Office Administrator
"Meeting rooms booked!"
value(task2) # 03:00 PM Analyst
"Analysis done"
print(amazon_prices)
[1] 312.58 307.01 302.24
[4] 297.50 300.32 301.48
[7] 297.56 297.48 291.93
[10] 294.00 286.28 292.59
...
ma <- future({moving_average(amazon_prices)})
moving_average <- function (prices) {
N <- length(prices) - 2
moving_avgs <- rep(NA, N)
for (i in 1:N) {
moving_avgs[i] <- mean(prices[i: (i + 2)])
}
return(moving_avgs)
}
ggplot() +
geom_line(aes(x = 1:(length(amazon_prices) - 2),
y = value(ma))) + # Value of futures is required here
labs(y = "Moving price average (USD)", x = "Days")
print(amazon_list)
$`2015-01`
[1] 312.58 307.01 302.24
[4] 297.50 300.32 301.48
...
$`2015-02`
[1] 350.05 360.29 358.38
[4] 366.00 374.87 371.00
...
$`2015-03`
[1] 380.85 383.95 385.71
[4] 385.61 385.52 378.40
...
...
moving_average <- function (prices) {
N <- length(prices) - 2
moving_avgs <- rep(NA, N)
for (i in 1:N) {
moving_avgs[i] <- mean(prices[i: (i + 2)])
}
return(moving_avgs)
}
plan(multisession, workers = 4)
ma <- lapply(amazon_list, function (x) { future(moving_average(x)) })
Plan to employ four worker cores in a multisession
Create one future task for every element of amazon_list
value(ma)
plan(sequential)
$`2015-01`
[1] 307.2767 302.2500 300.0200 299.7667 299.7867 298.8400 295.6567 ...
$`2015-02`
[1] 356.2400 361.5567 366.4167 370.6233 372.3533 371.1400 372.5067 ...
$`2015-03`
[1] 383.5033 385.0900 385.6133 383.1767 380.4567 375.4867 372.2933 ...
...
plan(sequential)
ma <- lapply(amazon_list,
function (x) {
future(moving_average(x))
})
value(ls)
plan(multisession, workers = 4)
ls <- lapply(amazon_list,
function (x) {
future(moving_average(x))
})
value(ls)
plan(sequential)
Parallel Programming in R