Getting started with chopin

Introduction

chopin workflow

library(chopin)
library(terra)
library(sf)
library(collapse)
library(dplyr)
library(future)
library(future.mirai)
library(future.apply)

Example data

temp_dir <- tempdir(check = TRUE)

url_nccnty <-
  paste0(
    "https://raw.githubusercontent.com/",
    "ropensci/chopin/refs/heads/main/",
    "tests/testdata/nc_hierarchy.gpkg"
  )
url_ncelev <-
  paste0(
    "https://raw.githubusercontent.com/",
    "ropensci/chopin/refs/heads/main/",
    "tests/testdata/nc_srtm15_otm.tif"
  )

nccnty_path <- file.path(temp_dir, "nc_hierarchy.gpkg")
ncelev_path <- file.path(temp_dir, "nc_srtm15_otm.tif")

# download data
download.file(url_nccnty, nccnty_path, mode = "wb", quiet = TRUE)
download.file(url_ncelev, ncelev_path, mode = "wb", quiet = TRUE)

nccnty <- terra::vect(nccnty_path)
ncelev <- terra::rast(ncelev_path)

Generating random points in North Carolina

ncsamp <-
  terra::spatSample(
    nccnty,
    1e4L
  )
ncsamp$pid <- seq_len(nrow(ncsamp))

Creating grids

ncgrid <- par_pad_grid(ncsamp, mode = "grid", nx = 4L, ny = 2L, padding = 10000)

plot(ncgrid$original)

Extracting values from raster

future::plan(future.mirai::mirai_multisession, workers = 2L)
pg <-
  par_grid(
    grids = ncgrid,
    pad_y = FALSE,
    .debug = TRUE,
    fun_dist = extract_at,
    x = ncelev_path,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )

Hierarchical processing

nccnty <- sf::st_read(nccnty_path, layer = "county")
## Reading layer `county' from data source `/tmp/RtmpdaKerh/nc_hierarchy.gpkg' using driver `GPKG'
## Simple feature collection with 100 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 1054155 ymin: 1341756 xmax: 1838923 ymax: 1690176
## Projected CRS: NAD83 / Conus Albers
nctrct <- sf::st_read(nccnty_path, layer = "tracts")
## Reading layer `tracts' from data source `/tmp/RtmpdaKerh/nc_hierarchy.gpkg' using driver `GPKG'
## Simple feature collection with 2672 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1054155 ymin: 1341756 xmax: 1838923 ymax: 1690176
## Projected CRS: NAD83 / Conus Albers
px <-
  par_hierarchy(
    # from here the par_hierarchy-specific arguments
    regions = nctrct,
    regions_id = "GEOID",
    length_left = 5,
    pad = 10000,
    pad_y = FALSE,
    .debug = TRUE,
    # from here are the dispatched function definition
    # for parallel workers
    fun_dist = extract_at,
    # below should follow the arguments of the dispatched function
    x = ncelev,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )

dim(px)
## [1] 10000     2
head(px)
##   pid      mean
## 1  10  8.313316
## 2 108 13.137020
## 3 131 12.605999
## 4 157  8.069553
## 5 189 15.656404
## 6 310 17.499598
tail(px)
##        pid      mean
## 9995  9152  5.591672
## 9996  9468 -3.740387
## 9997  9556  5.198344
## 9998  9579  5.305995
## 9999  9731 -4.416286
## 10000 9800  4.727526

Multiraster processing

ncelev <- terra::rast(ncelev_path)
tdir <- tempdir(check = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test1.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test2.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test3.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test4.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test5.tif"), overwrite = TRUE)

rasts <- list.files(tdir, pattern = "tif$", full.names = TRUE)

pm <-
  par_multirasters(
    filenames = rasts,
    fun_dist = extract_at,
    x = NA,
    y = sf::st_as_sf(ncsamp)[1:500, ],
    id = "pid",
    radius = 1e4,
    func = "mean",
    .debug = TRUE
  )

dim(pm)
## [1] 3000    2
head(pm)
##         mean                       base_raster
## 1 122.651756 /tmp/RtmpdaKerh/nc_srtm15_otm.tif
## 2  52.318684 /tmp/RtmpdaKerh/nc_srtm15_otm.tif
## 3  -1.640308 /tmp/RtmpdaKerh/nc_srtm15_otm.tif
## 4  30.664553 /tmp/RtmpdaKerh/nc_srtm15_otm.tif
## 5 226.509506 /tmp/RtmpdaKerh/nc_srtm15_otm.tif
## 6  32.144981 /tmp/RtmpdaKerh/nc_srtm15_otm.tif
tail(pm)
##            mean               base_raster
## 2995   3.890733 /tmp/RtmpdaKerh/test5.tif
## 2996 111.909477 /tmp/RtmpdaKerh/test5.tif
## 2997   8.897887 /tmp/RtmpdaKerh/test5.tif
## 2998 264.120361 /tmp/RtmpdaKerh/test5.tif
## 2999  21.573734 /tmp/RtmpdaKerh/test5.tif
## 3000  33.012138 /tmp/RtmpdaKerh/test5.tif

Caveats

Why parallelization is slower than the ordinary function run?

Notes on data restrictions

chopin works best with two-dimensional (planar) geometries. Users should disable s2 spherical geometry mode in sf by setting sf::sf_use_s2(FALSE). Running any chopin functions at spherical or three-dimensional (e.g., including M/Z dimensions) geometries may produce incorrect or unexpected results.

mirror server hosted at Truenetwork, Russian Federation.