Introduction to snreg

library(snreg)

Overview

The snreg package offers a set of methods for conducting regression analysis when the model errors follow a skew-normal distribution.

Framework

The snreg package implements the framework developed in:

Oleg Badunenko and Daniel J. Henderson (2023). “Production analysis with asymmetric noise”. Journal of Productivity Analysis, 61(1), 1–18. DOI

Data

We illustrate the functionality using the banks07 dataset, which contains selected variables for 500 U.S. commercial banks randomly sampled for the year 2007.

Note: The dataset is provided solely for illustration and pedagogical purposes and is not suitable for empirical research.

data(banks07, package = "snreg")
head(banks07)
#>       id year        TC       Y1        Y2       W1       W2       W3        ER
#> 1 152440 2007  4659.505  8661.84  58393.04 38.55422 37.75000 4.146092 0.1117789
#> 2 544335 2007 10848.960 19492.91 146789.00 30.66785 45.13934 2.263886 0.1139121
#> 3 548203 2007 14523.650 18630.49 219705.70 27.38797 56.02857 4.312672 0.1342984
#> 4 568238 2007  1644.808 11902.50  18675.68 32.84768 50.47368 1.541316 0.1029891
#> 5 651158 2007  3054.240 29322.21  38916.14 42.20033 52.23529 3.192056 0.1421004
#> 6 705444 2007  6168.736 36568.03  67179.16 11.95771 41.46667 3.181241 0.1449880
#>          LA        TA    ZSCORE  ZSCORE3     SDROA       LLP lnzscore lnzscore3
#> 1 0.7781611  75039.78  29.49198 11.93898 0.4532092  732.4904 3.384118  2.479809
#> 2 0.7679300 191148.92  62.97659 20.66493 0.2298561  305.9889 4.142763  3.028438
#> 3 0.8880010 247416.05  38.99969 30.88642 0.3874364 1534.6521 3.663554  3.430317
#> 4 0.5050412  36978.53  30.68124 24.53655 0.4063778    0.0000 3.423651  3.200164
#> 5 0.5264002  73928.81  30.02672 28.42257 0.5496323    0.0000 3.402088  3.347184
#> 6 0.5222276 128639.62 110.67650 37.66002 0.1502981    0.0000 4.706612  3.628599
#>      lnsdroa     scope ms_county
#> 1 -0.7914014 0.4801792 0.3466652
#> 2 -1.4703018 0.5254947 0.8830606
#> 3 -0.9482036 0.8897177 1.1430008
#> 4 -0.9004720 0.3917692 0.1708316
#> 5 -0.5985058 0.5523984 0.3415328
#> 6 -1.8951346 0.4073012 0.5942832

Model Specification

Define the translog cost function specification:

spe.tl <- log(TC) ~ (log(Y1) + log(Y2) + log(W1) + log(W2))^2 +
  I(0.5 * log(Y1)^2) + I(0.5 * log(Y2)^2) +
  I(0.5 * log(W1)^2) + I(0.5 * log(W2)^2)

Linear Regression via MLE

Homoskedastic Model

formSV <- NULL

m1 <- lm.mle(
  formula   = spe.tl,
  data      = banks07,
  ln.var.v  = formSV
)

coef(m1)
#>          (Intercept)              log(Y1)              log(Y2) 
#>         -4.386991731          0.415874408          0.370050861 
#>              log(W1)              log(W2)   I(0.5 * log(Y1)^2) 
#>          0.524152369          1.342097991          0.047454756 
#>   I(0.5 * log(Y2)^2)   I(0.5 * log(W1)^2)   I(0.5 * log(W2)^2) 
#>          0.077484315          0.034996665         -0.226281101 
#>      log(Y1):log(Y2)      log(Y1):log(W1)      log(Y1):log(W2) 
#>         -0.057609631         -0.020462972         -0.005676474 
#>      log(Y2):log(W1)      log(Y2):log(W2)      log(W1):log(W2) 
#>          0.013639077          0.018840745         -0.154925076 
#> lnVARv0i_(Intercept) 
#>         -3.687587268

Heteroskedastic Model

Variance depends on total assets (TA):

formSV <- ~ log(TA)

m2 <- lm.mle(
  formula   = spe.tl,
  data      = banks07,
  ln.var.v  = formSV
)

coef(m2)
#>          (Intercept)              log(Y1)              log(Y2) 
#>        -4.3869917305         0.4158744170         0.3700508703 
#>              log(W1)              log(W2)   I(0.5 * log(Y1)^2) 
#>         0.5241523719         1.3420979943         0.0474548059 
#>   I(0.5 * log(Y2)^2)   I(0.5 * log(W1)^2)   I(0.5 * log(W2)^2) 
#>         0.0774843635         0.0349966695        -0.2262810941 
#>      log(Y1):log(Y2)      log(Y1):log(W1)      log(Y1):log(W2) 
#>        -0.0576095323        -0.0204629424        -0.0056764377 
#>      log(Y2):log(W1)      log(Y2):log(W2)      log(W1):log(W2) 
#>         0.0136391061         0.0188407818        -0.1549250651 
#> lnVARv0i_(Intercept)     lnVARv0i_log(TA) 
#>        -3.6875872440        -0.0003644791

Linear Regression with Skew-Normal Errors

The snreg() function fits a linear regression model where the disturbance term follows a skew-normal distribution.

Homoskedastic and Symmetric Model

formSV <- NULL     # variance equation
formSK <- NULL     # skewness equation

m1 <- snreg(
  formula  = spe.tl,
  data     = banks07,
  ln.var.v = formSV,
  skew.v   = formSK
)

coef(m1)
#>          (Intercept)              log(Y1)              log(Y2) 
#>         -5.884394817          0.439869125          0.504978469 
#>              log(W1)              log(W2)   I(0.5 * log(Y1)^2) 
#>          0.546801118          1.622213193          0.041529597 
#>   I(0.5 * log(Y2)^2)   I(0.5 * log(W1)^2)   I(0.5 * log(W2)^2) 
#>          0.067752458          0.039855132         -0.284215702 
#>      log(Y1):log(Y2)      log(Y1):log(W1)      log(Y1):log(W2) 
#>         -0.055954492         -0.021953387          0.000322907 
#>      log(Y2):log(W1)      log(Y2):log(W2)      log(W1):log(W2) 
#>          0.012945689          0.009754988         -0.158033814 
#> lnVARv0i_(Intercept) Skew_v0i_(Intercept) 
#>         -3.010768472          1.866035868

Heteroskedastic with Skewed Noise

formSV <- ~ log(TA)   # heteroskedasticity in v
formSK <- ~ ER        # skewness driven by equity ratio

m2 <- snreg(
  formula  = spe.tl,
  data     = banks07,
  ln.var.v = formSV,
  skew.v   = formSK
)

coef(m2)
#>               (Intercept)                   log(Y1)                   log(Y2) 
#>             -5.7150133374              0.4521618623              0.6467104718 
#>                   log(W1)                   log(W2)        I(0.5 * log(Y1)^2) 
#>              0.4891776489              1.1553013814              0.0407090640 
#>        I(0.5 * log(Y2)^2)        I(0.5 * log(W1)^2)        I(0.5 * log(W2)^2) 
#>              0.0617096653              0.0427144069             -0.1599932243 
#>           log(Y1):log(Y2)           log(Y1):log(W1)           log(Y1):log(W2) 
#>             -0.0575785975             -0.0188789363              0.0016515541 
#>           log(Y2):log(W1)           log(Y2):log(W2)           log(W1):log(W2) 
#>              0.0051605363             -0.0002226092             -0.1310764688 
#>      lnVARv0i_(Intercept) lnVARv0i_lnVARv0i_log(TA)      Skew_v0i_(Intercept) 
#>             -0.8945288842             -0.1773003673              3.1056774984 
#>      Skew_v0i_Skew_v0i_ER 
#>             -9.5520895773

Stochastic Frontier Model with Skew-Normal Errors

The snsf() function performs maximum likelihood estimation of parameters and technical or cost efficiencies in a Stochastic Frontier Model with a skew-normally distributed error term.

Homoskedastic and Symmetric Model

myprod <- FALSE

formSV <- NULL   # variance equation
formSK <- NULL   # skewness equation

m1 <- snsf(
  formula  = spe.tl,
  data     = banks07,
  prod     = myprod,
  ln.var.v = formSV,
  skew.v   = formSK
)

coef(m1)
#>          (Intercept)              log(Y1)              log(Y2) 
#>        -6.3606134654         0.4425312174         0.5660453675 
#>              log(W1)              log(W2)   I(0.5 * log(Y1)^2) 
#>         0.4983319531         1.6557355502         0.0448519992 
#>   I(0.5 * log(Y2)^2)   I(0.5 * log(W1)^2)   I(0.5 * log(W2)^2) 
#>         0.0649995029         0.0362747554        -0.3099280819 
#>      log(Y1):log(Y2)      log(Y1):log(W1)      log(Y1):log(W2) 
#>        -0.0586056701        -0.0226917826        -0.0005509121 
#>      log(Y2):log(W1)      log(Y2):log(W2)      log(W1):log(W2) 
#>         0.0117430476         0.0106095399        -0.1375404433 
#> lnVARv0i_(Intercept) Skew_v0i_(Intercept) lnVARu0i_(Intercept) 
#>        -3.8206601915        -1.5006628040        -4.3422317628

Heteroskedastic with Skewed Noise

formSV <- ~ log(TA)      # heteroskedastic variance
formSK <- ~ ER           # skewness driver

m2 <- snsf(
  formula  = spe.tl,
  data     = banks07,
  prod     = myprod,
  ln.var.v = formSV,
  skew.v   = formSK
)

coef(m2)
#>          (Intercept)              log(Y1)              log(Y2) 
#>        -5.944090e+00         4.589482e-01         6.134700e-01 
#>              log(W1)              log(W2)   I(0.5 * log(Y1)^2) 
#>         5.167646e-01         1.270694e+00         4.138294e-02 
#>   I(0.5 * log(Y2)^2)   I(0.5 * log(W1)^2)   I(0.5 * log(W2)^2) 
#>         6.141001e-02         4.385504e-02        -2.077538e-01 
#>      log(Y1):log(Y2)      log(Y1):log(W1)      log(Y1):log(W2) 
#>        -5.745504e-02        -2.151307e-02         1.351571e-05 
#>      log(Y2):log(W1)      log(Y2):log(W2)      log(W1):log(W2) 
#>         6.752251e-03         8.627646e-03        -1.368348e-01 
#> lnVARv0i_(Intercept)     lnVARv0i_log(TA) Skew_v0i_(Intercept) 
#>        -1.883867e+00        -1.558322e-01         2.004266e+00 
#>          Skew_v0i_ER lnVARu0i_(Intercept) 
#>        -7.263169e+00        -4.640750e+00

Acknowledgments

The R package snreg computes Owen’s T function using C code written by John Burkardt. This implementation, distributed under the MIT license, is publicly accessible at https://people.sc.fsu.edu/~jburkardt/c_src/owen/owen.html.

References

Badunenko, O. and Henderson, D.J. (2023). Production analysis with asymmetric noise. Journal of Productivity Analysis, 61(1), 1–18. https://doi.org/10.1007/s11123-023-00680-5

mirror server hosted at Truenetwork, Russian Federation.