Benchmark

on Linux x86_64

Add new column for nealy million (960000) row data.frame

method elapsed time (sec)
R-base 0.005
dplyr 0.036
DataSailr 2.064
dplyr with rowwise() 8.470
R-base using for loop Over some minutes

Add multiple new columns for nealy million (960000) row data.frame

method elapsed time (sec)
R-base 0.107
dplyr 0.459
DataSailr 8.859
dplyr with rowwise() 72.494
R-base using for loop Over some minutes

Code to create nealy million row data.frame

data(mtcars)

mtcarsMillion = data.frame()

n = 30000 # Add mtcars 30000 times
mtcarsMillion = do.call("rbind", replicate(n, mtcars , simplify = FALSE))


nrow(mtcarsMillion)
save(mtcarsMillion, file="mtcarsMillion.Rda")

Benchmark code for adding new column

load("mtcarsMillion.Rda")

system.time({
result = mtcarsMillion
result$hp_per_cyl = mtcarsMillion$hp / mtcarsMillion$cyl
})
library(dplyr)
load("mtcarsMillion.Rda")

system.time({
result = mutate(mtcarsMillion, hp_per_cyl = hp / cyl )
})
library(datasailr)
load("mtcarsMillion.Rda")

system.time({
result = sail(mtcarsMillion, code='
  hp_per_cyl = hp / cyl
')
})
library(dplyr)
load("mtcarsMillion.Rda")

system.time({
result = rowwise(mtcarsMillion) %>% 
  mutate( hp_per_cyl = hp / cyl )
})
load("mtcarsMillion.Rda")

result = mtcarsMillion
idx = 1
last = nrow(mtcarsMillion)

system.time({
for( i in idx:last ){
  result[i, "hp_per_cyl"] = mtcarsMillion[i, "hp"] / mtcarsMillion[i, "cyl"]
}
})

Benchmark code for adding multiple new columns

load("mtcarsMillion.Rda")

system.time({
result = mtcarsMillion
result$hp_per_cyl = mtcarsMillion$hp / mtcarsMillion$cyl
result$mpg_per_cyl = mtcarsMillion$mpg / mtcarsMillion$cyl
result$wt_per_cyl = mtcarsMillion$wt / mtcarsMillion$cyl
result$qsec_per_cyl = mtcarsMillion$qsec / mtcarsMillion$cyl
result$v_type_hp_per_cyl = ifelse( mtcarsMillion$vs == 1 ,
  mtcarsMillion$hp / mtcarsMillion$cyl , NA )
})
library(dplyr)
load("mtcarsMillion.Rda")

system.time({
result = mutate(mtcarsMillion, hp_per_cyl = hp / cyl ) %>%
  mutate( hp_per_cyl = hp / cyl ) %>%
  mutate( mpg_per_cyl = mpg / cyl ) %>%
  mutate( wt_per_cyl = wt / cyl ) %>%
  mutate( qsec_per_cyl = qsec / cyl ) %>%
  mutate( v_type_hp_per_cyl = ifelse( vs == 1, hp / cyl , NA ))
})

summary(result$hp_per_cyl)
summary(result$mpg_per_cyl)
summary(result$wt_per_cyl)
summary(result$qsec_per_cyl)
summary(result$v_type_hp_per_cyl)
library(datasailr)
load("mtcarsMillion.Rda")

system.time({
result = sail(mtcarsMillion, code='
  hp_per_cyl = hp / cyl
  mpg_per_cyl = mpg / cyl
  wt_per_cyl = wt / cyl
  qsec_per_cyl = qsec / cyl
  if( vs == 1){
    v_type_hp_per_cyl = hp / cyl
  }else{
    v_type_hp_per_cyl = .
  }
')
})
library(dplyr)
load("mtcarsMillion.Rda")

system.time({
result = rowwise(mtcarsMillion) %>%
  mutate( hp_per_cyl = hp / cyl ) %>%
  mutate( hp_per_cyl = hp / cyl ) %>%
  mutate( mpg_per_cyl = mpg / cyl ) %>%
  mutate( wt_per_cyl = wt / cyl ) %>%
  mutate( qsec_per_cyl = qsec / cyl ) %>%
  mutate( v_type_hp_per_cyl = ifelse( vs == 1, hp / cyl , NA ))
})