Benchmarking R and Rcpp

10 minute read

The intent of this document is to illustrate some of the efficiencies that can be gained from implementing functions in Rcpp compared to their higher level implementations in R.

Setup

knitr::opts_chunk$set(
    echo = TRUE,
    out.width = "80%"
)
library(tidyverse)
library(Rcpp)
options(tibble.width = Inf)

R equations

The set of functions being used to illustrate differences between R code and Rcpp code. The main function being test is: full_balance.

sy <- function(x, cond) {
  out <- ifelse(cond < x, 1, 0)
  out <- 1 - (sum(out) / length(x))
  ifelse(out <= 0.01, 0.01, out)
}

new_cond <- function(curr, addit, subtrac, microt) {
  sy_cond <- sy(microt, curr)
  curr + (addit + subtrac) / sy_cond
}

full_balance <- function(iterations, initial, addition, subtraction, add_sd,
  sub_sd, microtopo) {
  #Initialize
  N <- length(addition) #rows
  M <- iterations #columns
  out <- matrix(nrow = N, ncol = M)
  out[1,] <- initial
  add_error <- matrix(rep(addition, times = M) + rnorm(N * M, 0, add_sd),
    nrow = N, ncol = M, byrow = FALSE)
  sub_error <- matrix(rep(subtraction, times = M) + rnorm(N * M, 0, sub_sd),
    nrow = N, ncol = M, byrow = FALSE)
  #Execute
  for(i in 1:iterations) { #columns
    for(j in 2:length(addition)) { #rows
      out[j, i] <- new_cond(out[j - 1, i], add_error[j, i], sub_error[j, i],
        microtopo)
    }
  }
  #Return
  as.data.frame(out)
}

C++ (Rcpp) equations

Similar functions to those above, but implemented in C++ via the Rcpp package. The main function being test is: full_balance_cpp.

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double sy_cpp(NumericVector x, double cond) {
  //Declare
  int N = x.length();
  NumericVector cond_sum(N);
  double out;
  //First ifelse()
  // Short-hand form:
  for(int i = 0; i < N; i++) {
    cond_sum[i] = (cond < x[i] ? 1 : 0);
  }
  //`out` line
  out = 1 - (sum(cond_sum) / N);
  //Second ifelse()
  if(out <= 0.01) {
    out = 0.01;
  }
  //Explicit return statement
  return out;
}

// [[Rcpp::export]]
double new_cond_cpp(double curr, double addit, double subtrac,
  NumericVector microt) {
  //Declare
  double sy_cond;
  double out;
  //Execute
  sy_cond = sy_cpp(microt, curr);
  out = curr + (addit + subtrac) / sy_cond;
  //Return
  return out;
}

// [[Rcpp::export]]
DataFrame full_balance_cpp(int iterations, double initial,
  NumericVector addition, NumericVector subtraction, double add_sd,
  double sub_sd, NumericVector microtopo) {
  //Declare
  int N = addition.length(); //Rows
  int M = iterations; //Columns
  NumericMatrix add_error(N, M);
  NumericMatrix sub_error(N, M);
  NumericMatrix out_mat(N, M);
  
  //Initialize
  //Set initial conditions for each iteration
  for(int i = 0; i < M; ++i) {
    out_mat(0,i) = initial; //Note `out()` instead of `out[]` for indexing matrices
  }
  
  //Execute
  //Matrix of observations with error
  for(int i = 0; i < M; ++i) {
    add_error(_, i) = addition + rnorm(N, 0 , add_sd);
    sub_error(_, i) = subtraction + rnorm(N, 0 , add_sd);
  }
  //Loop through each set of observations
  for(int i = 0; i < M; ++i) {
    for(int j = 1; j < N; ++j){
      out_mat(j, i) = new_cond_cpp(out_mat(j - 1, i), add_error(j, i),
        sub_error(j, i), microtopo);
    }
  }
  
  //Return a dataframe of results
  //Explicit coercion to a different, compatable type following the pattern:
  // as<type>(object)
  DataFrame out = as<DataFrame>(out_mat);
  return out;
}

Data

Some data being used test the different functions.

per_obs <- 1:200
per_n <- length(per_obs)

add_means <- 0.5 * (sin(seq(0, 2 * per_n / 100 * pi, length.out = per_n))) +
  abs(rnorm(per_obs))
add_means <- ifelse(add_means < 0 , 0, add_means)
sub_means <- 0.75 * (cos(seq(0, 2 * per_n / 100 * pi, length.out = per_n)) - 2) +
  abs(rnorm(per_obs))
sub_means <- ifelse(sub_means > 0 , 0, sub_means)

add_sds <- 1
sub_sds <- 1

init_cond <- 0

micro <- map2(.x = c(-4, -2, 0, 2, 5), .y = c(3, 1, 2, 4, 1),
  ~ rnorm(2000, .x, .y)) %>%
  simplify()

Data visualization

Just to give an idea of what the data looks like. This problem was originally conceived of as a time series.

qplot(per_obs, add_means + sub_means) + theme_bw()

qplot(micro, geom = "density") + theme_bw()

Benchmarking

Here I’m using the bench package to iterate over different parameter sets, so we may get an idea of how the two equivalent functions respond to changes in function conditions.

Run the benchmarks

micro_maker <- function(obs) {
  map2(.x = c(-4, -2, 0, 2, 5), .y = c(3, 1, 2, 4, 1),
  ~ rnorm(obs, .x, .y)) %>%
  simplify()
}

iter_maker <- function(iters) {
  iters
}

bench_press <- bench::press(
  obs = c(10, 100, 1000),
  iters = c(1, 10, 100, 1000),
  {
    iterations = iter_maker(iters)
    microtopo = micro_maker(obs)
    bench::mark(
      R = full_balance(iterations, init_cond, add_means, sub_means, add_sds,
        sub_sds, microtopo),
      Rcpp = full_balance_cpp(iterations, init_cond, add_means, sub_means,
        add_sds, sub_sds, microtopo),
      check = FALSE,
      max_iterations = 20
    )
  }
)
## Running with:
##      obs iters
##  1    10     1
##  2   100     1
##  3  1000     1
##  4    10    10
##  5   100    10
##  6  1000    10
##  7    10   100
##  8   100   100
##  9  1000   100
## 10    10  1000
## 11   100  1000
## 12  1000  1000

Investigate the results of the benchmark

bench_press %>%
  select(-result, -memory, -gc, -time) %>%
  print(n = nrow(.))
## # A tibble: 24 x 11
##    expression   obs iters      min   median `itr/sec` mem_alloc `gc/sec`
##    <bch:expr> <dbl> <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
##  1 R             10     1   2.02ms   2.44ms  374.      775.03KB   19.7  
##  2 Rcpp          10     1  121.2us  164.2us 4294.      101.65KB    0    
##  3 R            100     1   3.31ms    6.1ms  171.        4.47MB    9.00 
##  4 Rcpp         100     1  380.4us  503.7us 1853.      801.26KB   97.5  
##  5 R           1000     1  23.89ms  27.73ms   29.5       42.8MB   29.5  
##  6 Rcpp        1000     1   2.99ms   4.07ms  168.        7.62MB   25.3  
##  7 R             10    10  46.92ms  98.16ms   12.8       5.61MB    5.11 
##  8 Rcpp          10    10  969.7us   1.45ms  589.      992.37KB    0    
##  9 R            100    10 122.41ms 132.25ms    7.01     44.78MB    8.76 
## 10 Rcpp         100    10   5.59ms  10.31ms   84.9        7.8MB   17.0  
## 11 R           1000    10 728.27ms 728.27ms    1.37    442.31MB   19.2  
## 12 Rcpp        1000    10  60.95ms  69.73ms   13.6      76.12MB   33.0  
## 13 R             10   100 275.08ms 294.15ms    3.40     56.38MB    5.10 
## 14 Rcpp          10   100   7.59ms  11.23ms   73.3       9.67MB   18.3  
## 15 R            100   100    1.19s    1.19s    0.842   440.74MB    6.74 
## 16 Rcpp         100   100  78.12ms 107.41ms    9.89     77.99MB   14.8  
## 17 R           1000   100    3.27s    3.27s    0.305     4.28GB   12.2  
## 18 Rcpp        1000   100 768.34ms 768.34ms    1.30     761.2MB    9.11 
## 19 R             10  1000     4.7s     4.7s    0.213   559.95MB    0.639
## 20 Rcpp          10  1000  91.98ms 161.54ms    5.99     96.67MB    1.50 
## 21 R            100  1000    5.89s    5.89s    0.170     4.28GB    1.19 
## 22 Rcpp         100  1000 430.41ms 445.14ms    2.25    779.89MB    2.25 
## 23 R           1000  1000   24.94s   24.94s    0.0401   43.02GB    2.33 
## 24 Rcpp        1000  1000    3.45s    3.45s    0.290     7.43GB    2.90 
##    n_itr  n_gc total_time
##    <int> <dbl>   <bch:tm>
##  1    19     1    50.82ms
##  2    20     0     4.66ms
##  3    19     1   111.12ms
##  4    19     1    10.26ms
##  5    15    15   507.72ms
##  6    20     3   118.76ms
##  7     5     2   391.25ms
##  8    20     0    33.95ms
##  9     4     5   570.53ms
## 10    20     4   235.63ms
## 11     1    14   728.27ms
## 12     7    17   515.74ms
## 13     2     3    588.3ms
## 14    20     5   273.01ms
## 15     1     8      1.19s
## 16     6     9   606.94ms
## 17     1    40      3.27s
## 18     1     7   768.34ms
## 19     1     3       4.7s
## 20     4     1   667.98ms
## 21     1     7      5.89s
## 22     2     2   890.28ms
## 23     1    58     24.94s
## 24     1    10      3.45s

Visualize the results of the benchmark

Points indicate the median iteration time. Note: variance in run times are so small that they cannot be seen at the scales provided.

bp_time_df <- bench_press %>%
  mutate(rows = 1:n(),
    expression = as.character(expression)) %>%
  select(-result, -memory, -gc) %>%
  split(.$rows) %>%
  map_dfr(~ unnest_legacy(.x, time))

bp_time_df %>%
  group_by(obs, iters, expression) %>%
  summarise(median = first(median)) %>%
  ungroup() %>%
  mutate(combo = paste0(obs, ": ", iters)) %>%
  arrange(median, combo) %>%
  mutate(combo = fct_inorder(combo)) %>%
  ggplot(aes(combo, median, fill = expression)) +
  geom_point(shape = 21, size = 4) +
  labs(x = "Observations: Iterations",
    y = "Iteration time (s)",
    fill = "Code\nsource:") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Set1")

Visualization of results from functions

Essentially this was a bootstrapping problem. Below, we take the largest set of results (i.e, most iterations and observations) from bench and plot them. Shaded area represents the 88th percentile of all runs. The “traces” are a sample of individual runs (n = 250). As the number of “observations”, associated with the micro object, and “iterations” gets larger, the two sets of code should converge on a similar set of uncertainties, though individual runs will still be variable. I could ensure similarity between runs by setting a “seed”, but I’ve chosen not to do that.

R <- bench_press %>%
  mutate(expression = as.character(expression)) %>%
  filter(obs == max(obs), iters == max(iters), expression == "R") %>%
  select(result) %>%
  unnest_legacy()
  
Rcpp <- bench_press %>%
  mutate(expression = as.character(expression)) %>%
  filter(obs == max(obs), iters == max(iters), expression == "Rcpp") %>%
  select(result) %>%
  unnest_legacy()

code_summary <- lst(R, Rcpp) %>%
  map_dfr( ~ .x %>%
  as_tibble() %>%
  mutate(obs_num = 1:n()) %>%
  pivot_longer(cols = -obs_num, names_to = "iteration", values_to = "value") %>%
  mutate(iteration = parse_number(iteration)) %>%
  group_by(obs_num) %>%
  summarise(median = quantile(value, probs = 0.5, na.rm = TRUE),
    high = quantile(value, probs = 0.94, na.rm = TRUE),
    low = quantile(value, probs = 0.06, na.rm = TRUE)),
    .id = "source")

code_sample <- lst(R, Rcpp) %>%
  map_dfr(~ .x %>%
      as_tibble() %>%
  mutate(obs_num = 1:n()) %>%
  pivot_longer(cols = -obs_num, names_to = "iteration", values_to = "value") %>%
  mutate(iteration = parse_number(iteration)) %>%
  group_by(iteration) %>%
  nest() %>%
  ungroup() %>%
  sample_n(250) %>%
  unnest(cols = c(data)),
    .id = "source")

ggplot() +
  geom_ribbon(data = code_summary,
    aes(x = obs_num, ymin = low, ymax = high, fill = source),
    alpha = 0.7, show.legend = FALSE) +
  geom_line(data = code_sample,
    aes(x = obs_num, y = value, group = iteration),
    alpha = 0.1) +
  labs(x = "Observation number",
    y = "Expected value") +
  facet_wrap(~ source) +
  theme_bw() +
  theme(panel.grid = element_blank(),
    strip.background = element_blank()) +
  scale_fill_brewer(palette = "Set1")

Updated: