Rolling operations

Optimizing R Code with Rcpp

Romain François

Consulting Datactive, ThinkR

Rolling means

rollmean1 <- function(x, window = 3){
  n <- length(x)

  # create empty vector full of NA
  res <- rep(NA, n)

  # fill the values
  for( i in seq(window, n) ){
    idx <- seq(i-window+1,i)
    res[i] <- mean(x[idx])
  }
  res
}
Optimizing R Code with Rcpp

Rolling means

  • Make an integer vector to hold indices, extract the relevant part of x
  • Call the mean function on that extract
Optimizing R Code with Rcpp

Alternative algorithm

Optimizing R Code with Rcpp

Alternative algorithm

rollmean2 <- function(x, window = 3){
  n <- length(x)
  res <- rep(NA, n)

  # first value
  total <- sum(head(x,window))
  res[window] <- total / window

  # remaining values
  for( i in seq(window+1, n) ){
    total <- total + x[i] - x[i-window]
    res[i] <- total / window
  }
  res
}
Optimizing R Code with Rcpp

Hackstucious (hack + astucious) vectorization

x <- c(1.3, 3.2, 4.2, 4.5, 6.8)
start <- sum(x[1:3])
head( x, -3 )
tail( x, -3 )
1.3 3.2

4.5 6.8
c( start, start + cumsum( tail(x, -3) - head( x, -3 ) ) )

c( start, start + cumsum( tail(x, -3) - head( x, -3 ) ) ) / 3
8.7 11.9 15.5

2.900000 3.966667 5.166667
Optimizing R Code with Rcpp

Comparison

library(microbenchmark)
x <- rnorm(1e5)

microbenchmark(
    rollmean1(x, 3), 
    rollmean2(x, 3), 
    rollmean3(x, 3)
)
Unit: milliseconds
            expr        min         lq       mean     median ...
 rollmean1(x, 3) 833.667884 857.507753 971.250098 893.206776 ...
 rollmean2(x, 3)  10.539993  11.034244  12.293105  11.396629 ...
 rollmean3(x, 3)   1.429817   1.625453   3.070925   3.067068 ...
Optimizing R Code with Rcpp

Last observation carried forward

Optimizing R Code with Rcpp

Last observation carried forward

na_locf1 <- function(x){
  current <- NA

  res <- x
  for( i in seq_along(x)){
    if( is.na(x[i]) ){
      # replace with current
      res[i] <- current
    } else {
      # set current 
      current <- x[i]
    }
  }  
  res
}
Optimizing R Code with Rcpp

Mean carried forward

Optimizing R Code with Rcpp
na_meancf1 <- function(x){
  # ( cumulative sum of non NA values ) / ( cumulative count of non NA )
  means <- cumsum( replace(x, is.na(x), 0) ) / cumsum(!is.na(x))  
  # replace the missing values by the means
  x[is.na(x)] <- means[is.na(x)]
  x    }

# iterative version
na_meancf2 <- function(x){
  total <- 0
  n     <- 0
  for( i in seq_along(x) ){
    if( is.na(x[i]) ){
        x[i] <- total / n} 
    else {
        total <- x[i] + total
        n <- n + 1}        }
}
Optimizing R Code with Rcpp

Comparisons

x <- rnorm(1e5)
x[ sample(1e5, 100) ] <- NA

microbenchmark( na_meancf1(x), na_meancf2(x) )
Unit: milliseconds
          expr       min        lq      mean    median ...
 na_meancf1(x)  1.176276  2.785667  3.237009  3.474028 ...
 na_meancf2(x) 16.945271 17.430133 19.678276 18.625274 ...
Optimizing R Code with Rcpp

Let's practice!

Optimizing R Code with Rcpp

Preparing Video For Download...