Getting Started with GWPR.light 1.0.0

Overview

GWPR.light 1.0.0 provides a modern, sf-first API for Geographically Weighted Panel Regression (GWPR). The public interface consists of four functions:

All functions accept panel data as a plain data.frame and spatial information as an sf object. The workers argument controls parallel execution; the default workers = 1 runs serially and is safe in all environments.

Minimal linear GWPR example

library(GWPR.light)
library(sf)
#> Warning: package 'sf' was built under R version 4.3.3
#> Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE

set.seed(42)

# Simulate a tiny spatial panel: 6 units, 4 time periods
n_units <- 6
n_time  <- 4

pts <- sf::st_as_sf(
  data.frame(
    id = 1:n_units,
    X  = c(0, 1, 2, 0, 1, 2),
    Y  = c(0, 0, 0, 1, 1, 1)
  ),
  coords = c("X", "Y"),
  crs    = NA_integer_
)

dat <- data.frame(
  id   = rep(1:n_units, each = n_time),
  time = rep(1:n_time,  n_units),
  x1   = rnorm(n_units * n_time),
  x2   = rnorm(n_units * n_time)
)
dat$y <- 1.5 * dat$x1 - 0.8 * dat$x2 + rnorm(n_units * n_time, sd = 0.3)

# Fit with a known bandwidth (skip automatic search for speed)
fit <- fit_gwpr(
  formula   = y ~ x1 + x2,
  data      = dat,
  spatial   = pts,
  id        = "id",
  time      = "time",
  bandwidth = 2,
  family    = "gaussian",
  model     = "pooling",
  workers   = 1
)

print(fit)
#> Geographically Weighted Panel Regression (gwpr_fit)
#> ----------------------------------------------------
#> Family   : gaussian 
#> Model    : pooling 
#> Effect   : individual 
#> Bandwidth: 2 
#> Metrics  :
#>   R2           0.9816
#>   MSE          0.06987
#>   RMSE         0.2643
#>   MAE          0.1924

Accessing results

# Overall goodness-of-fit metrics
str(fit$metrics)
#> List of 4
#>  $ R2  : num 0.982
#>  $ MSE : num 0.0699
#>  $ RMSE: num 0.264
#>  $ MAE : num 0.192

# Per-unit spatial coefficients (one row per spatial unit)
if (!is.null(fit$spatial_results)) {
  head(fit$spatial_results)
}
#>   unit_id status coef_(Intercept)  coef_x1    coef_x2
#> 1       1     ok       0.09506111 1.454001 -0.7834870
#> 2       2     ok       0.08224925 1.448562 -0.8178750
#> 3       3     ok       0.03704843 1.441328 -0.8517107
#> 4       4     ok       0.10574378 1.497491 -0.8031366
#> 5       5     ok       0.08602491 1.468562 -0.8307433
#> 6       6     ok       0.03501923 1.440875 -0.8499490

Full pipeline with gwpr()

# Use the best bandwidth found above to avoid re-running search
full_fit <- gwpr(
  formula     = y ~ x1 + x2,
  data        = dat,
  spatial     = pts,
  id          = "id",
  time        = "time",
  bandwidth   = bw$best_bandwidth,
  family      = "gaussian",
  model       = "pooling",
  diagnostics = FALSE,   # skip diagnostics for speed
  workers     = 1
)

print(full_fit)
#> Geographically Weighted Panel Regression (gwpr_fit)
#> ----------------------------------------------------
#> Family   : gaussian 
#> Model    : pooling 
#> Effect   : individual 
#> Bandwidth: 0.5 
#> Metrics  :
#>   R2           0.9882
#>   MSE          0.0448
#>   RMSE         0.2117
#>   MAE          0.1425

Diagnostics

diag_result <- diagnose_gwpr(
  full_fit,
  diagnostics = c("f_test", "hausman", "lm_test")
)

print(diag_result)
#> GWPR Diagnostics (gwpr_diagnostics)
#> ------------------------------------
#> Model type   : gaussian 
#> Panel balance: TRUE 
#> Tests run    : f_test, hausman, lm_test

Long-running examples

The following code illustrates automatic bandwidth search via SGD and a binomial (logistic) GWPR. These are wrapped in \donttest{} in the function documentation because they may take more than a few seconds on larger datasets.

# Automatic SGD bandwidth search + fit (may take several seconds)
fit_auto <- gwpr(
  formula          = y ~ x1 + x2,
  data             = dat,
  spatial          = pts,
  id               = "id",
  time             = "time",
  bandwidth_method = "sgd",
  bandwidth_control = list(n_iter = 20, step_size = 0.1),
  workers          = 1,
  seed             = 123
)

# Binomial GWPR
dat$y_bin <- as.integer(dat$y > 0)
fit_logit <- fit_gwpr(
  formula   = y_bin ~ x1 + x2,
  data      = dat,
  spatial   = pts,
  id        = "id",
  time      = "time",
  bandwidth = 2,
  family    = "binomial",
  model     = "pooling",
  workers   = 1
)

mirror server hosted at Truenetwork, Russian Federation.