Benchmarking R and Rcpp
The intent of this document is to illustrate some of the efficiencies that can be gained from implementing functions in Rcpp
compared to their higher level implementations in R.
Setup
knitr::opts_chunk$set(
echo = TRUE,
out.width = "80%"
)
library(tidyverse)
library(Rcpp)
options(tibble.width = Inf)
R equations
The set of functions being used to illustrate differences between R code and Rcpp code. The main function being test is: full_balance
.
sy <- function(x, cond) {
out <- ifelse(cond < x, 1, 0)
out <- 1 - (sum(out) / length(x))
ifelse(out <= 0.01, 0.01, out)
}
new_cond <- function(curr, addit, subtrac, microt) {
sy_cond <- sy(microt, curr)
curr + (addit + subtrac) / sy_cond
}
full_balance <- function(iterations, initial, addition, subtraction, add_sd,
sub_sd, microtopo) {
#Initialize
N <- length(addition) #rows
M <- iterations #columns
out <- matrix(nrow = N, ncol = M)
out[1,] <- initial
add_error <- matrix(rep(addition, times = M) + rnorm(N * M, 0, add_sd),
nrow = N, ncol = M, byrow = FALSE)
sub_error <- matrix(rep(subtraction, times = M) + rnorm(N * M, 0, sub_sd),
nrow = N, ncol = M, byrow = FALSE)
#Execute
for(i in 1:iterations) { #columns
for(j in 2:length(addition)) { #rows
out[j, i] <- new_cond(out[j - 1, i], add_error[j, i], sub_error[j, i],
microtopo)
}
}
#Return
as.data.frame(out)
}
C++ (Rcpp) equations
Similar functions to those above, but implemented in C++ via the Rcpp package. The main function being test is: full_balance_cpp
.
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
double sy_cpp(NumericVector x, double cond) {
//Declare
int N = x.length();
NumericVector cond_sum(N);
double out;
//First ifelse()
// Short-hand form:
for(int i = 0; i < N; i++) {
cond_sum[i] = (cond < x[i] ? 1 : 0);
}
//`out` line
out = 1 - (sum(cond_sum) / N);
//Second ifelse()
if(out <= 0.01) {
out = 0.01;
}
//Explicit return statement
return out;
}
// [[Rcpp::export]]
double new_cond_cpp(double curr, double addit, double subtrac,
NumericVector microt) {
//Declare
double sy_cond;
double out;
//Execute
sy_cond = sy_cpp(microt, curr);
out = curr + (addit + subtrac) / sy_cond;
//Return
return out;
}
// [[Rcpp::export]]
DataFrame full_balance_cpp(int iterations, double initial,
NumericVector addition, NumericVector subtraction, double add_sd,
double sub_sd, NumericVector microtopo) {
//Declare
int N = addition.length(); //Rows
int M = iterations; //Columns
NumericMatrix add_error(N, M);
NumericMatrix sub_error(N, M);
NumericMatrix out_mat(N, M);
//Initialize
//Set initial conditions for each iteration
for(int i = 0; i < M; ++i) {
out_mat(0,i) = initial; //Note `out()` instead of `out[]` for indexing matrices
}
//Execute
//Matrix of observations with error
for(int i = 0; i < M; ++i) {
add_error(_, i) = addition + rnorm(N, 0 , add_sd);
sub_error(_, i) = subtraction + rnorm(N, 0 , add_sd);
}
//Loop through each set of observations
for(int i = 0; i < M; ++i) {
for(int j = 1; j < N; ++j){
out_mat(j, i) = new_cond_cpp(out_mat(j - 1, i), add_error(j, i),
sub_error(j, i), microtopo);
}
}
//Return a dataframe of results
//Explicit coercion to a different, compatable type following the pattern:
// as<type>(object)
DataFrame out = as<DataFrame>(out_mat);
return out;
}
Data
Some data being used test the different functions.
per_obs <- 1:200
per_n <- length(per_obs)
add_means <- 0.5 * (sin(seq(0, 2 * per_n / 100 * pi, length.out = per_n))) +
abs(rnorm(per_obs))
add_means <- ifelse(add_means < 0 , 0, add_means)
sub_means <- 0.75 * (cos(seq(0, 2 * per_n / 100 * pi, length.out = per_n)) - 2) +
abs(rnorm(per_obs))
sub_means <- ifelse(sub_means > 0 , 0, sub_means)
add_sds <- 1
sub_sds <- 1
init_cond <- 0
micro <- map2(.x = c(-4, -2, 0, 2, 5), .y = c(3, 1, 2, 4, 1),
~ rnorm(2000, .x, .y)) %>%
simplify()
Data visualization
Just to give an idea of what the data looks like. This problem was originally conceived of as a time series.
qplot(per_obs, add_means + sub_means) + theme_bw()
qplot(micro, geom = "density") + theme_bw()
Benchmarking
Here I’m using the bench
package to iterate over different parameter sets, so we may get an idea of how the two equivalent functions respond to changes in function conditions.
Run the benchmarks
micro_maker <- function(obs) {
map2(.x = c(-4, -2, 0, 2, 5), .y = c(3, 1, 2, 4, 1),
~ rnorm(obs, .x, .y)) %>%
simplify()
}
iter_maker <- function(iters) {
iters
}
bench_press <- bench::press(
obs = c(10, 100, 1000),
iters = c(1, 10, 100, 1000),
{
iterations = iter_maker(iters)
microtopo = micro_maker(obs)
bench::mark(
R = full_balance(iterations, init_cond, add_means, sub_means, add_sds,
sub_sds, microtopo),
Rcpp = full_balance_cpp(iterations, init_cond, add_means, sub_means,
add_sds, sub_sds, microtopo),
check = FALSE,
max_iterations = 20
)
}
)
## Running with:
## obs iters
## 1 10 1
## 2 100 1
## 3 1000 1
## 4 10 10
## 5 100 10
## 6 1000 10
## 7 10 100
## 8 100 100
## 9 1000 100
## 10 10 1000
## 11 100 1000
## 12 1000 1000
Investigate the results of the benchmark
bench_press %>%
select(-result, -memory, -gc, -time) %>%
print(n = nrow(.))
## # A tibble: 24 x 11
## expression obs iters min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 R 10 1 2.02ms 2.44ms 374. 775.03KB 19.7
## 2 Rcpp 10 1 121.2us 164.2us 4294. 101.65KB 0
## 3 R 100 1 3.31ms 6.1ms 171. 4.47MB 9.00
## 4 Rcpp 100 1 380.4us 503.7us 1853. 801.26KB 97.5
## 5 R 1000 1 23.89ms 27.73ms 29.5 42.8MB 29.5
## 6 Rcpp 1000 1 2.99ms 4.07ms 168. 7.62MB 25.3
## 7 R 10 10 46.92ms 98.16ms 12.8 5.61MB 5.11
## 8 Rcpp 10 10 969.7us 1.45ms 589. 992.37KB 0
## 9 R 100 10 122.41ms 132.25ms 7.01 44.78MB 8.76
## 10 Rcpp 100 10 5.59ms 10.31ms 84.9 7.8MB 17.0
## 11 R 1000 10 728.27ms 728.27ms 1.37 442.31MB 19.2
## 12 Rcpp 1000 10 60.95ms 69.73ms 13.6 76.12MB 33.0
## 13 R 10 100 275.08ms 294.15ms 3.40 56.38MB 5.10
## 14 Rcpp 10 100 7.59ms 11.23ms 73.3 9.67MB 18.3
## 15 R 100 100 1.19s 1.19s 0.842 440.74MB 6.74
## 16 Rcpp 100 100 78.12ms 107.41ms 9.89 77.99MB 14.8
## 17 R 1000 100 3.27s 3.27s 0.305 4.28GB 12.2
## 18 Rcpp 1000 100 768.34ms 768.34ms 1.30 761.2MB 9.11
## 19 R 10 1000 4.7s 4.7s 0.213 559.95MB 0.639
## 20 Rcpp 10 1000 91.98ms 161.54ms 5.99 96.67MB 1.50
## 21 R 100 1000 5.89s 5.89s 0.170 4.28GB 1.19
## 22 Rcpp 100 1000 430.41ms 445.14ms 2.25 779.89MB 2.25
## 23 R 1000 1000 24.94s 24.94s 0.0401 43.02GB 2.33
## 24 Rcpp 1000 1000 3.45s 3.45s 0.290 7.43GB 2.90
## n_itr n_gc total_time
## <int> <dbl> <bch:tm>
## 1 19 1 50.82ms
## 2 20 0 4.66ms
## 3 19 1 111.12ms
## 4 19 1 10.26ms
## 5 15 15 507.72ms
## 6 20 3 118.76ms
## 7 5 2 391.25ms
## 8 20 0 33.95ms
## 9 4 5 570.53ms
## 10 20 4 235.63ms
## 11 1 14 728.27ms
## 12 7 17 515.74ms
## 13 2 3 588.3ms
## 14 20 5 273.01ms
## 15 1 8 1.19s
## 16 6 9 606.94ms
## 17 1 40 3.27s
## 18 1 7 768.34ms
## 19 1 3 4.7s
## 20 4 1 667.98ms
## 21 1 7 5.89s
## 22 2 2 890.28ms
## 23 1 58 24.94s
## 24 1 10 3.45s
Visualize the results of the benchmark
Points indicate the median iteration time. Note: variance in run times are so small that they cannot be seen at the scales provided.
bp_time_df <- bench_press %>%
mutate(rows = 1:n(),
expression = as.character(expression)) %>%
select(-result, -memory, -gc) %>%
split(.$rows) %>%
map_dfr(~ unnest_legacy(.x, time))
bp_time_df %>%
group_by(obs, iters, expression) %>%
summarise(median = first(median)) %>%
ungroup() %>%
mutate(combo = paste0(obs, ": ", iters)) %>%
arrange(median, combo) %>%
mutate(combo = fct_inorder(combo)) %>%
ggplot(aes(combo, median, fill = expression)) +
geom_point(shape = 21, size = 4) +
labs(x = "Observations: Iterations",
y = "Iteration time (s)",
fill = "Code\nsource:") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set1")
Visualization of results from functions
Essentially this was a bootstrapping problem. Below, we take the largest set of results (i.e, most iterations and observations) from bench
and plot them. Shaded area represents the 88th percentile of all runs. The “traces” are a sample of individual runs (n = 250). As the number of “observations”, associated with the micro
object, and “iterations” gets larger, the two sets of code should converge on a similar set of uncertainties, though individual runs will still be variable. I could ensure similarity between runs by setting a “seed”, but I’ve chosen not to do that.
R <- bench_press %>%
mutate(expression = as.character(expression)) %>%
filter(obs == max(obs), iters == max(iters), expression == "R") %>%
select(result) %>%
unnest_legacy()
Rcpp <- bench_press %>%
mutate(expression = as.character(expression)) %>%
filter(obs == max(obs), iters == max(iters), expression == "Rcpp") %>%
select(result) %>%
unnest_legacy()
code_summary <- lst(R, Rcpp) %>%
map_dfr( ~ .x %>%
as_tibble() %>%
mutate(obs_num = 1:n()) %>%
pivot_longer(cols = -obs_num, names_to = "iteration", values_to = "value") %>%
mutate(iteration = parse_number(iteration)) %>%
group_by(obs_num) %>%
summarise(median = quantile(value, probs = 0.5, na.rm = TRUE),
high = quantile(value, probs = 0.94, na.rm = TRUE),
low = quantile(value, probs = 0.06, na.rm = TRUE)),
.id = "source")
code_sample <- lst(R, Rcpp) %>%
map_dfr(~ .x %>%
as_tibble() %>%
mutate(obs_num = 1:n()) %>%
pivot_longer(cols = -obs_num, names_to = "iteration", values_to = "value") %>%
mutate(iteration = parse_number(iteration)) %>%
group_by(iteration) %>%
nest() %>%
ungroup() %>%
sample_n(250) %>%
unnest(cols = c(data)),
.id = "source")
ggplot() +
geom_ribbon(data = code_summary,
aes(x = obs_num, ymin = low, ymax = high, fill = source),
alpha = 0.7, show.legend = FALSE) +
geom_line(data = code_sample,
aes(x = obs_num, y = value, group = iteration),
alpha = 0.1) +
labs(x = "Observation number",
y = "Expected value") +
facet_wrap(~ source) +
theme_bw() +
theme(panel.grid = element_blank(),
strip.background = element_blank()) +
scale_fill_brewer(palette = "Set1")