## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 4
)

## ----data---------------------------------------------------------------------
library(mixedsubjectsirt)

set.seed(242424)

n_human     <- 120
n_generated <- 350
n_items     <- 5

human_pars <- data.frame(
  item = paste0("Item", seq_len(n_items)),
  a    = c(0.8, 1.0, 1.2, 1.35, 1.55),
  d    = c(-0.8, -0.3, 0.1, 0.45, 0.9)
)
human_pars$b <- -human_pars$d / human_pars$a

llm_pars   <- human_pars
llm_pars$a <- pmax(0.35, 0.9 * human_pars$a)
llm_pars$d <- human_pars$d + 0.25
llm_pars$b <- -llm_pars$d / llm_pars$a

theta_human <- rnorm(n_human)
observed    <- simulate_2pl(theta_human, human_pars)
predicted   <- simulate_2pl(theta_human, llm_pars)
generated   <- simulate_2pl(rnorm(n_generated), llm_pars)

lambda_grid <- c(0, 0.25, 0.5, 0.75, 1)

## ----ability-tuning-mml-------------------------------------------------------
ability_tuned_mml <- tune_lambda_ability_risk(
  lambda_grid  = lambda_grid,
  observed     = observed,
  predicted    = predicted,
  generated    = generated,
  target_resp  = observed,
  initial_pars = human_pars,
  method       = "grid",                # show the risk at each candidate value
  control      = list(maxit = 100)
)

ability_tuned_mml$summary[, c("lambda", "mean_param_var", "mean_total_risk",
                               "convergence", "selection_risk")]
ability_tuned_mml$best_lambda

tune_lambda_ability_risk(
  observed = observed, predicted = predicted, generated = generated,
  target_resp = observed, initial_pars = human_pars,
  control = list(maxit = 100)
)$best_lambda

## ----risk-components----------------------------------------------------------

risk <- ability_risk(
  resp        = observed,
  fit_or_pars = ability_tuned_mml$best_fit,
  vcov        = vcov(ability_tuned_mml$best_fit)
)
risk$summary

## ----risk-with-truth----------------------------------------------------------
ability_tuned_truth <- tune_lambda_ability_risk(
  lambda_grid  = lambda_grid,
  observed     = observed,
  predicted    = predicted,
  generated    = generated,
  target_resp  = observed,
  theta_true   = theta_human,
  initial_pars = human_pars,
  fit_fn       = fit_mixed_subjects_mml,
  method       = "grid",
  control      = list(maxit = 100)
)

ability_tuned_truth$summary[, c("lambda", "mean_param_var",
                                 "mean_squared_error", "mean_total_risk")]

## ----crossfit-----------------------------------------------------------------
split_id <- rep(1:2, length.out = nrow(observed))

crossfit_tuned <- tune_lambda_ability_risk_crossfit(
  lambda_grid  = c(0, 0.5, 1),
  observed     = observed,
  predicted    = predicted,
  generated    = generated,
  split_id     = split_id,
  initial_pars = human_pars,
  n_quad       = 7,
  control      = list(maxit = 100)
)

crossfit_tuned$lambda_by_split   # per-fold tuned lambda
crossfit_tuned$lambda_final      # fold-size-weighted scalar used for the final fit

## ----frozen-ec----------------------------------------------------------------
ability_tuned_ec <- tune_lambda_ability_risk(
  lambda_grid  = lambda_grid,
  observed     = observed,
  predicted    = predicted,
  generated    = generated,
  target_resp  = observed,
  initial_pars = human_pars,
  fit_fn       = fit_mixed_subjects,   # opt in to the frozen EC estimator
  n_quad       = 7,
  slope_upper  = 4,           # required to prevent divergence
  control      = list(maxit = 100)
)

ability_tuned_ec$best_lambda

## ----ppi-score----------------------------------------------------------------
ppi_score <- tune_lambda_ppi_score(
  observed    = observed,
  predicted   = predicted,
  item_pars   = human_pars,
  n_generated = nrow(generated)
)

cat("PPI++ score lambda:", round(ppi_score$lambda, 3),
    " r =", round(ppi_score$r, 3),
    " N/(n+N) =", round(1 / (1 + ppi_score$r), 3), "\n")

## ----ppi-score-2--------------------------------------------------------------
ppi_score_match <- tune_lambda_ppi_score(
  observed    = observed,
  predicted   = observed,
  item_pars   = human_pars,
  n_generated = nrow(generated)
)

cat("PPI++ score lambda:", round(ppi_score_match$lambda, 3),
    " r =", round(ppi_score$r, 3),
    " N/(n+N) =", round(1 / (1 + ppi_score_match$r), 3), "\n")

