Getting started with chopin

Introduction

chopin workflow

library(chopin)
library(terra)
library(sf)
library(collapse)
library(dplyr)
library(future)
library(future.mirai)
library(future.apply)

Example data

temp_dir <- tempdir(check = TRUE)

url_nccnty <-
  paste0(
    "https://raw.githubusercontent.com/",
    "ropensci/chopin/refs/heads/main/",
    "tests/testdata/nc_hierarchy.gpkg"
  )
url_ncelev <-
  paste0(
    "https://raw.githubusercontent.com/",
    "ropensci/chopin/refs/heads/main/",
    "tests/testdata/nc_srtm15_otm.tif"
  )

nccnty_path <- file.path(temp_dir, "nc_hierarchy.gpkg")
ncelev_path <- file.path(temp_dir, "nc_srtm15_otm.tif")

# download data
download.file(url_nccnty, nccnty_path, mode = "wb", quiet = TRUE)
download.file(url_ncelev, ncelev_path, mode = "wb", quiet = TRUE)

nccnty <- terra::vect(nccnty_path, layer = "county")
ncelev <- terra::rast(ncelev_path)

Generating random points in North Carolina

ncsamp <-
  terra::spatSample(
    nccnty,
    1e4L
  )
ncsamp$pid <- seq_len(nrow(ncsamp))

Creating grids

ncgrid <- par_pad_grid(ncsamp, mode = "grid", nx = 4L, ny = 2L, padding = 10000)

plot(ncgrid$original)

Extracting values from raster

future::plan(future.mirai::mirai_multisession, workers = 2L)
pg <-
  par_grid(
    grids = ncgrid,
    pad_y = FALSE,
    .debug = TRUE,
    fun_dist = extract_at,
    x = ncelev_path,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )
# also possible in mirai with par_*_mirai functions
# mirai daemons should be created first
mirai::daemons(n = 2L)
pg <-
  par_grid_mirai(
    grids = ncgrid,
    pad_y = FALSE,
    .debug = TRUE,
    fun_dist = extract_at,
    x = ncelev_path,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )
mirai::daemons(n = 0L)

Hierarchical processing

nccnty <- sf::st_read(nccnty_path, layer = "county")
## Reading layer `county' from data source `/tmp/RtmpbClS6n/nc_hierarchy.gpkg' using driver `GPKG'
## Simple feature collection with 100 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 1054155 ymin: 1341756 xmax: 1838923 ymax: 1690176
## Projected CRS: NAD83 / Conus Albers
nctrct <- sf::st_read(nccnty_path, layer = "tracts")
## Reading layer `tracts' from data source `/tmp/RtmpbClS6n/nc_hierarchy.gpkg' using driver `GPKG'
## Simple feature collection with 2672 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1054155 ymin: 1341756 xmax: 1838923 ymax: 1690176
## Projected CRS: NAD83 / Conus Albers
px <-
  par_hierarchy(
    # from here the par_hierarchy-specific arguments
    regions = nctrct,
    regions_id = "GEOID",
    length_left = 5,
    pad = 10000,
    pad_y = FALSE,
    .debug = TRUE,
    # from here are the dispatched function definition
    # for parallel workers
    fun_dist = extract_at,
    # below should follow the arguments of the dispatched function
    x = ncelev,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )

dim(px)
## [1] 10000     2
head(px)
##   pid      mean
## 1  55 -2.242934
## 2  83  7.638708
## 3  89  7.517975
## 4 190  8.162208
## 5 271 -8.555017
## 6 358  9.314404
tail(px)
##        pid       mean
## 9995  9213 -4.7030168
## 9996  9483  5.4337306
## 9997  9608  6.7779775
## 9998  9766 -4.9676342
## 9999  9907 -0.5347484
## 10000 9932 -4.1729984

Multiraster processing

ncelev <- terra::rast(ncelev_path)
tdir <- tempdir(check = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test1.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test2.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test3.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test4.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test5.tif"), overwrite = TRUE)

rasts <- list.files(tdir, pattern = "tif$", full.names = TRUE)

pm <-
  par_multirasters(
    filenames = rasts,
    fun_dist = extract_at,
    x = NA,
    y = sf::st_as_sf(ncsamp)[1:500, ],
    id = "pid",
    radius = 1e4,
    func = "mean",
    .debug = TRUE
  )

dim(pm)
## [1] 3000    2
head(pm)
##         mean                       base_raster
## 1  -5.211319 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 2 304.373932 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 3 158.474487 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 4  37.591179 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 5  10.516018 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 6  18.762609 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
tail(pm)
##           mean               base_raster
## 2995  36.50384 /tmp/RtmpbClS6n/test5.tif
## 2996 220.32663 /tmp/RtmpbClS6n/test5.tif
## 2997  51.63021 /tmp/RtmpbClS6n/test5.tif
## 2998 151.88278 /tmp/RtmpbClS6n/test5.tif
## 2999 755.19885 /tmp/RtmpbClS6n/test5.tif
## 3000 225.11125 /tmp/RtmpbClS6n/test5.tif

User-defined functions

From version 0.9.9, a custom function that takes sf objects in x and y arguments can be used with par_grid and par_hierarchy functions. The custom function should return a data.frame or tibble object.

An example below demonstrates to compute the floor area ratio (or area-enclosed ratio, AER) in 100 meters circular buffers of points from random points in central Seoul, South Korea.

gpkg_path <- system.file("extdata/osm_seoul.gpkg", package = "chopin")
bldg <- sf::st_read(gpkg_path, layer = "buildings")
## Reading layer `buildings' from data source 
##   `/tmp/Rtmpns0kJS/Rinst3f5f861c2ced8/chopin/extdata/osm_seoul.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 11112 features and 2 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 319332.8 ymin: 4159652 xmax: 325432.1 ymax: 4166901
## Projected CRS: WGS 84 / UTM zone 52N
grdpnt <- sf::st_read(gpkg_path, layer = "points")
## Reading layer `points' from data source 
##   `/tmp/Rtmpns0kJS/Rinst3f5f861c2ced8/chopin/extdata/osm_seoul.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 595 features and 1 field
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 319096.8 ymin: 4159684 xmax: 325296.8 ymax: 4166884
## Projected CRS: WGS 84 / UTM zone 52N
# plot
plot(sf::st_geometry(bldg), col = "lightgrey")
plot(sf::st_geometry(grdpnt), add = TRUE, pch = 20, col = "blue")

# Radius for AER (meters)
radius_m <- 100

# This function will be dispatched in parallel over computational grids.
aer_at_points <-
  function(
    x, y,
    radius,
    id_col = "pid",
    floors_col = "floors",
    area_col = "foot_m2"
  ) {
    if (!inherits(x, "sf") && !inherits(y, "sf")) {
      x <- sf::st_as_sf(x)
      y <- sf::st_as_sf(y)
    }
    yorig <- y
    y <- sf::st_geometry(y)

    # Buffers around each point
    buf <- sf::st_buffer(y, radius)
    # spatial join (buildings intersecting buffers)
    j <- sf::st_intersects(buf, sf::st_geometry(x))
    # Compute
    out <- vapply(seq_along(j), function(i) {
      if (length(j[[i]]) == 0) return(NA_real_)
      sub <- x[j[[i]], ]
      sub <- sub[!is.na(sub[[floors_col]]) & !is.na(sub[[area_col]]), ]
      if (nrow(sub) == 0) return(NA_real_)
      aer_num <- sum(sub[[area_col]] * sub[[floors_col]], na.rm = TRUE)
      aer_den <- pi * radius^2
      aer_num / aer_den
    }, numeric(1))

    res <-
      data.frame(
        pid = yorig[[id_col]],
        aer = out
      )
    res
  }

## Parallel processing
# Initiate mirai damons
plan(mirai_multisession, workers = 4L)
grd <- chopin::par_pad_grid(
  bldg,
  mode = "grid",
  nx = 1L,
  ny = 4L,
  padding = 300
)

bldg_cast <- sf::st_cast(bldg, "POLYGON")
radius_m <- 300

res <- par_grid_mirai(
  grids    = grd,
  fun_dist = aer_at_points,
  x        = bldg_cast,
  y        = grdpnt,
  radius   = radius_m,
  id_col = "pid",
  .debug = TRUE
)
future::plan(future::sequential)
mirai::daemons(n = 0L)
## [1] 0
head(res)
##   pid       aer
## 1   1 0.2847932
## 2   2 0.7716681
## 3   3 0.4084137
## 4   4 0.4438036
## 5   5 0.6123537
## 6   6 0.5815721

Caveats

Why parallelization is slower than the ordinary function run?

Notes on data restrictions

chopin works best with two-dimensional (planar) geometries. Users should disable s2 spherical geometry mode in sf by setting sf::sf_use_s2(FALSE). Running any chopin functions at spherical or three-dimensional (e.g., including M/Z dimensions) geometries may produce incorrect or unexpected results.

mirror server hosted at Truenetwork, Russian Federation.