Part 3 Map-reduce APIs

Map-reduce packages that work with the future framework:

Take-home message: future.apply, furrr, foreach, plyr, and BiocParallel (from Bioconductor.org) are siblings. Their goals are the same, but they provide alternative syntax for achieving them. They’re all equally good with the same performance and limitations; use the one that you prefer. It’s no different than some people prefer to use base-R lapply(), while others prefer to use purrr::map().

3.1 Parallel alternatives to base-R apply functions

The future.apply package implements plug-and-play, parallel alternatives to base-R apply functions:

The most common base-R apply functions and their parallel counterparts in the future.apply package. {#tbl-future.apply}
base future.apply
apply() future_apply()
by() future_by()
eapply() future_eapply()
lapply() future_lapply()
Map() future_Map()
mapply() future_mapply()
replicate() future_replicate()
sapply() future_sapply()
tapply() future_tapply()
vapply() future_vapply()

3.1.1 Example: base::lapply(X, FUN)

Let’s introduce another slow function that calculates the square root very slowly:

slow_sqrt <- function(x) {
  Sys.sleep(1.0)  ## 1 second emulated slowness
  sqrt(x)
}

If run use this with lapply() to calculate ten values, it takes 10 seconds:

X <- 1:10
z <- lapply(X, slow_sqrt)  ## takes ~10 seconds
str(z)
#> List of 10
#>  $ : num 1
#>  $ : num 1.41
#>  $ : num 1.73
#>  ...
#>  $ : num 3.16

We can parallelize this using future.apply as:

library(future.apply)
plan(multisession, workers = 4)

z <- future_lapply(X, slow_sqrt)

How long will this take?

3.1.1.1 Other future alternatives

Here are parallel alternative for achieving identical results using furrr, and foreach, plyr, and BiocParallel, while using the doFuture adaptor.

library(furrr)
plan(multisession, workers = 4)

z <- future_map(X, slow_sqrt)
library(foreach)
doFuture::registerDoFuture()
plan(multisession, workers = 4)

z <- foreach(x = X) %dopar% { slow_sqrt(x) }
library(plyr)
doFuture::registerDoFuture()
plan(multisession, workers = 4)

z <- llply(X, slow_sqrt, .parallel = TRUE)
library(BiocParallel)
register(DoparParam(), default = TRUE)
doFuture::registerDoFuture()
plan(multisession, workers = 4)

z <- bplapply(X, slow_sqrt)

3.1.1.2 Non-future alternatives

Here are parallel alternative for achieving identical results using the parallel package that comes built-in with R.

library(parallel)
options(mc.cores = 4)

z <- mclapply(X, slow_sqrt)
library(parallel)
cl <- makeCluster(4)
parallel::setDefaultCluster(cl)
parallel::clusterExport(varlist = "slow_sqrt")

z <- parLapply(X = X, fun = slow_sqrt)

stopCluster(cl)

3.1.2 Example: base::vapply(X, FUN, FUN.VALUE)

X <- 1:10
z <- vapply(X, slow_sqrt, FUN.VALUE = NA_real_)
str(z)
#> num [1:10] 1 1.41 1.73 2 2.24 ...
library(future.apply)
plan(multisession, workers = 4)

z <- future_vapply(X, slow_sqrt, FUN.VALUE = NA_real_)

3.1.2.1 Other future alternatives

Here are parallel alternative for achieving identical results using furrr, and foreach and plyr, while using the doFuture adaptor.

library(furrr)
plan(multisession, workers = 4)

z <- future_map_dbl(X, slow_sqrt)
library(foreach)
doFuture::registerDoFuture()
plan(multisession, workers = 4)

z <- foreach(x = X, .combine = c) %dopar% { slow_sqrt(x) }
library(plyr)
doFuture::registerDoFuture()
plan(multisession, workers = 4)

z <- laply(X, slow_sqrt, .parallel = TRUE)

3.1.2.2 Non-future alternatives

Here are parallel alternative for achieving identical results using the parallel package that comes built-in with R.

library(parallel)
options(mc.cores = 4)

z <- mclapply(X, slow_sqrt)
z <- unlist(z, use.names = FALSE)
library(parallel)
cl <- makeCluster(4)
parallel::setDefaultCluster(cl)
parallel::clusterExport(varlist = "slow_sqrt")

z <- parLapply(X = X, fun = slow_sqrt)
z <- unlist(z, use.names = FALSE)

stopCluster(cl)

3.1.3 Example: base::mapply(X, Y, FUN)

X <- 1:10
Y <- 10:1
z <- mapply(X, Y, FUN = function(x, y) { slow_sqrt(x * y) })
str(z)
#> [1] 3.162278 4.242641 4.898979 5.291503 5.477226
#> [6] 5.477226 5.291503 4.898979 4.242641 3.162278
library(future.apply)
plan(multisession, workers = 4)

z <- future_mapply(X, Y, FUN = function(x, y) { slow_sqrt(x * y) })

3.1.3.1 Other future alternatives

Here are parallel alternative for achieving identical results using furrr, and foreach and plyr, while using the doFuture adaptor.

library(furrr)
plan(multisession, workers = 4)

z <- future_map2_dbl(X, Y, function(x, y) { slow_sqrt(x * y) })
library(foreach)
doFuture::registerDoFuture()
plan(multisession, workers = 4)

z <- foreach(x = X, y = Y, .combine = c) %dopar% { 
  slow_sqrt(x * y) 
}
library(plyr)
doFuture::registerDoFuture()
plan(multisession, workers = 4)

z <- maply(cbind(x = X, y = Y), 
           function(x, y) { slow_sqrt(x * y) },
           .expand = FALSE)
names(z) <- NULL

3.1.3.2 Non-future alternatives

Here are parallel alternative for achieving identical results using the parallel package that comes built-in with R.

library(parallel)
options(mc.cores = 4)

z <- mcmapply(X, Y, FUN = function(x, y) { slow_sqrt(x * y) })
library(parallel)
cl <- makeCluster(4)
parallel::setDefaultCluster(cl)
parallel::clusterExport(varlist = "slow_sqrt")

z <- parApply(X = cbind(x = X, y = Y), MARGIN = 1L, 
              FUN = function(row) { slow_sqrt(row["x"] * row["y"]) })

stopCluster(cl)

3.2 Parallel alternatives to purrr functions

The hexlogo for the ‘furrr’ package designed by ?? Kuhn

The furrr package, by Davis Vaughan, implements plug-and-play, parallel alternatives to purrr functions:

purrr furrr
imap() future_imap()
imap_chr() future_imap_chr()
imap_dbl() future_imap_dbl()
imap_dfc() future_imap_dfc()
imap_dfr() future_imap_dfr()
imap_int() future_imap_int()
imap_lgl() future_imap_lgl()
imap_raw() future_imap_raw()
invoke_map() future_invoke_map()
invoke_map_chr() future_invoke_map_chr()
invoke_map_dbl() future_invoke_map_dbl()
invoke_map_dfc() future_invoke_map_dfc()
invoke_map_dfr() future_invoke_map_dfr()
invoke_map_int() future_invoke_map_int()
invoke_map_lgl() future_invoke_map_lgl()
invoke_map_raw() future_invoke_map_raw()
iwalk() future_iwalk()
map() future_map()
map_at() future_map_at()
map_chr() future_map_chr()
map_dbl() future_map_dbl()
map_dfc() future_map_dfc()
map_dfr() future_map_dfr()
map_if() future_map_if()
map_int() future_map_int()
map_lgl() future_map_lgl()
map_raw() future_map_raw()
map2() future_map2()
map2_chr() future_map2_chr()
map2_dbl() future_map2_dbl()
map2_dfc() future_map2_dfc()
map2_dfr() future_map2_dfr()
map2_int() future_map2_int()
map2_lgl() future_map2_lgl()
map2_raw() future_map2_raw()
modify() future_modify()
modify_at() future_modify_at()
modify_if() future_modify_if()
pmap() future_pmap()
pmap_chr() future_pmap_chr()
pmap_dbl() future_pmap_dbl()
pmap_dfc() future_pmap_dfc()
pmap_dfr() future_pmap_dfr()
pmap_int() future_pmap_int()
pmap_lgl() future_pmap_lgl()
pmap_raw() future_pmap_raw()
pwalk() future_pwalk()
walk() future_walk()
walk2() future_walk2()