## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ----data---------------------------------------------------------------------
library(oda)

# Cross-classification: rows = biological type, cols = amino acid type.
# (column-major reconstruction matches published Table 1)
#                  AA=1  AA=2  AA=3  AA=4   total
#  Biological=1     98    16     5     3      122
#  Biological=2     13    50     2     8       73
#  Biological=3      6     4    23    12       45
#  Biological=4      7    19    14    45       85
#  total           124    89    44    68      325

biological_type <- c(
  rep(1L, 98), rep(2L, 13), rep(3L,  6), rep(4L,  7),  # amino_acid = 1
  rep(1L, 16), rep(2L, 50), rep(3L,  4), rep(4L, 19),  # amino_acid = 2
  rep(1L,  5), rep(2L,  2), rep(3L, 23), rep(4L, 14),  # amino_acid = 3
  rep(1L,  3), rep(2L,  8), rep(3L, 12), rep(4L, 45)   # amino_acid = 4
)
amino_acid_type <- c(rep(1L, 124), rep(2L, 89), rep(3L, 44), rep(4L, 68))

table(amino_acid_type, biological_type,
      dnn = c("Amino Acid Type (1-4)", "Biological Type (1-4)"))

## ----fit-canonical, eval=FALSE------------------------------------------------
# # Canonical reference run (mc_iter = 25000L; not evaluated in CRAN vignette)
# fit <- oda_fit(
#   x         = amino_acid_type,
#   y         = biological_type,
#   attr_type = "categorical",
#   mc_iter   = 25000L,
#   loo       = "on"
# )

## ----fit----------------------------------------------------------------------
# CRAN-safe run: mc_iter = 500L for vignette rendering speed.
# Training rule, ESS, and confusion matrix are identical to the canonical run.
fit <- oda_fit(
  x         = amino_acid_type,
  y         = biological_type,
  attr_type = "categorical",
  mc_iter   = 500L,
  mc_seed   = 42L,
  loo       = "on"
)

## ----print-fit----------------------------------------------------------------
print(fit)

## ----confusion----------------------------------------------------------------
# Confusion matrix (actual x predicted); strip dimnames for clean display
conf_mat <- unname(fit$confusion)
rownames(conf_mat) <- paste0("Bio=", 1:4)
colnames(conf_mat) <- paste0("Pred=", 1:4)
print(conf_mat)

## ----metrics------------------------------------------------------------------
summary(fit)

## ----pac-pv-------------------------------------------------------------------
m <- oda_metrics(fit)

# PAC (sensitivity) per class - pac_by_class is already on percentage scale
cat("PAC by biological type:\n")
cat("  Type 1:", round(m$pac_by_class[1], 1), "%\n")
cat("  Type 2:", round(m$pac_by_class[2], 1), "%\n")
cat("  Type 3:", round(m$pac_by_class[3], 1), "%\n")
cat("  Type 4:", round(m$pac_by_class[4], 1), "%\n")

# Predictive value: diagonal / column sums
pv <- diag(fit$confusion) / colSums(fit$confusion) * 100
cat("\nPV by biological type:\n")
cat("  Type 1:", round(pv[1], 1), "%\n")
cat("  Type 2:", round(pv[2], 1), "%\n")
cat("  Type 3:", round(pv[3], 1), "%\n")
cat("  Type 4:", round(pv[4], 1), "%\n")

