Parallel Programming in R
Nabeel Imam
Data Scientist
numbers <- 1:1e6
sqroots <- rep(0, length(numbers))
for (i in 1:length(numbers)) {
sqroots[i] <- sqrt(numbers[i])
}
numbers <- 1:1e6 library(foreach)
sqroots <- foreach(i = numbers) %do% { sqrt(i) }
numbers <- 1:1e6
sqroots <- foreach(i = numbers) %do% {
sqrt(i)
}
cl <- makeCluster(4)
library(doParallel) registerDoParallel(cl)
sqroots <- foreach(i = numbers # The parallel operator ) %dopar% { sqrt(i) }
stopCluster(cl)
print(uni_list)
[1] "./uni_data/Argentina.csv"
[2] "./uni_data/Australia.csv"
[3] "./uni_data/Austria.csv"
[4] "./uni_data/Azerbaijan.csv"
[5] "./uni_data/Bahrain.csv"
[6] "./uni_data/Bangladesh.csv"
[7] "./uni_data/Belarus.csv"
[8] "./uni_data/Belgium.csv"
[9] "./uni_data/Bolivia.csv"
[10] "./uni_data/Bosnia and Herzegovina.csv"
...
cl <- makeCluster(4)
registerDoParallel(cl)
ls_df <- foreach(csv = uni_list) %dopar% { read.csv(csv) } stopCluster(cl)
cl <- makeCluster(4)
registerDoParallel(cl)
ls_df <- foreach(csv = uni_list) %dopar% {
read.csv(csv)
}
stopCluster(cl)
[[1]]
location institution score
Argentina Universidad de Buenos Aires 68.9
...
[[2]]
location institution score
Australia Australian National University 82.1
...
cl <- makeCluster(4)
registerDoParallel(cl)
df_uni <- foreach(csv = uni_list,
.combine = "rbind") %dopar% {
read.csv(csv)
}
stopCluster(cl)
location institution score
1 Argentina Universidad de Buenos Aires 68.9
2 Argentina Universidad Católica Argentina 33.3
3 Argentina Universidad de Palermo (UP) 29.1
...
library(dplyr) n_unis <- 3
# Empty list ls_df <- list()
for (i in 1:length(uni_list)) { # Read, filter, collect in empty list ls_df[[i]] <- read.csv(uni_list[[i]]) %>% top_n(n_unis, total_score) }
# Combine the list into one combined_df <- Reduce("rbind", ls_df)
n_unis <- 3
cl <- makeCluster(4) registerDoParallel(cl)
df_top3 <- foreach(csv = uni_list,
.packages = "dplyr",
.export = "n_unis",
.combine = "rbind") %dopar% { read.csv(csv) %>% top_n(n_unis, score) } stopCluster(cl)
location institution score
Argentina Universidad de Buenos Aires 68.9
Argentina Universidad Católica Argentina 33.3
Argentina Universidad de Palermo (UP) 29.1
Australia Australian National University 82.1
Australia The University of Melbourne 81.6
Australia The University of Sydney 79.6
Austria University of Vienna 50.6
Austria Technische Universität Wien 45.7
...
Parallel Programming in R