mlr3 Integration

ggmlR ships two mlr3 learners — classif.ggml and regr.ggml — that let you use ggmlR neural networks inside the mlr3 ecosystem for resampling, benchmarking, tuning, and pipelines.

Quick start

Classification

library(ggmlR)
library(mlr3)

task    <- tsk("iris")
learner <- lrn("classif.ggml",
               epochs     = 20L,
               batch_size = 16L,
               predict_type = "prob")

learner$train(task)
pred <- learner$predict(task)
pred$score(msr("classif.acc"))

Regression

task    <- tsk("mtcars")
learner <- lrn("regr.ggml",
               epochs     = 50L,
               batch_size = 8L)

learner$train(task)
pred <- learner$predict(task)
pred$score(msr("regr.rmse"))

Learner parameters

Both learners share the same parameter set:

Parameter	Default	Description
`epochs`	`10L`	Training epochs
`batch_size`	`32L`	Mini-batch size
`optimizer`	`"adam"`	`"adam"` or `"sgd"`
`validation_split`	`0`	Fraction held out for validation
`verbose`	`0L`	Print training progress
`backend`	`"auto"`	`"auto"`, `"cpu"`, or `"gpu"`
`hidden_layers`	`c(128, 64)`	Hidden layer sizes
`activation`	`"relu"`	Activation function
`dropout`	`0.2`	Dropout rate
`callbacks`	`list()`	ggmlR callback objects

learner <- lrn("classif.ggml")
learner$param_set$values$epochs        <- 30L
learner$param_set$values$hidden_layers <- c(256L, 128L, 64L)
learner$param_set$values$dropout       <- 0.3
learner$param_set$values$backend       <- "gpu"

GPU acceleration

Set backend = "gpu" (or leave "auto" if a Vulkan GPU is available):

learner <- lrn("classif.ggml",
               backend = "gpu",
               epochs  = 100L)
learner$train(tsk("iris"))

Custom model architecture

By default the learners build an MLP via ggml_default_mlp(). To use a custom architecture, assign a builder function to the model_fn field. The function receives the task, input/output dimensions, and learner parameters:

learner <- lrn("classif.ggml",
               epochs     = 50L,
               batch_size = 16L)

learner$model_fn <- function(task, n_features, n_out, pars) {
  ggml_model_sequential() |>
    ggml_layer_dense(64L, activation = "relu", input_shape = n_features) |>
    ggml_layer_dropout(rate = 0.3) |>
    ggml_layer_dense(32L, activation = "relu") |>
    ggml_layer_dense(n_out, activation = "softmax")
}

learner$train(tsk("iris"))

The pars argument gives access to all current learner parameters, so your builder can read pars$hidden_layers, pars$dropout, etc.

Resampling and benchmarking

The learners work with any mlr3 resampling strategy. Marshal support ensures models survive serialization across parallel workers.

task    <- tsk("iris")
learner <- lrn("classif.ggml",
               epochs     = 20L,
               batch_size = 16L,
               backend    = "cpu")

rr <- resample(task, learner, rsmp("cv", folds = 5L))
rr$aggregate(msr("classif.acc"))

Benchmarking multiple ggmlR configurations:

design <- benchmark_grid(
  tasks    = tsk("iris"),
  learners = list(
    lrn("classif.ggml", epochs = 20L, batch_size = 16L, backend = "cpu"),
    lrn("classif.ggml", epochs = 20L, batch_size = 16L, backend = "gpu")
  ),
  resamplings = rsmp("cv", folds = 5L)
)
bmr <- benchmark(design)
bmr$aggregate(msr("classif.acc"))

Hyperparameter tuning

Use mlr3tuning to search over ggmlR hyperparameters:

library(mlr3tuning)

learner <- lrn("classif.ggml", backend = "gpu")

search_space <- ps(
  epochs     = p_int(lower = 10L, upper = 100L),
  batch_size = p_int(lower = 8L,  upper = 64L),
  dropout    = p_dbl(lower = 0,   upper = 0.5)
)

instance <- ti(
  task       = tsk("iris"),
  learner    = learner,
  resampling = rsmp("cv", folds = 3L),
  measures   = msr("classif.acc"),
  terminator = trm("evals", n_evals = 20L)
)

tuner <- tnr("random_search")
tuner$optimize(instance)

instance$result

Callbacks

Pass ggmlR callbacks through the callbacks parameter:

learner <- lrn("classif.ggml",
               epochs     = 200L,
               batch_size = 16L,
               callbacks  = list(
                 ggml_callback_early_stopping(
                   monitor  = "val_loss",
                   patience = 10L
                 )
               ),
               validation_split = 0.2)

learner$train(tsk("iris"))

Observation weights

The classification learner honours task weights. Assign a weights_learner column to upweight or downweight specific observations:

d <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y  = factor(rep(c("a", "b"), each = 50)),
  w  = c(rep(2.0, 50), rep(0.5, 50))
)
task <- as_task_classif(d, target = "y")
task$set_col_roles("w", roles = "weights_learner")

learner <- lrn("classif.ggml", epochs = 20L)
learner$train(task)

Marshal and parallel execution

The learners implement mlr3’s marshal protocol. This means models can be serialized and deserialized for parallel execution. Marshal uses ggml_save_model() / ggml_load_model() internally and preserves the original backend.

learner <- lrn("classif.ggml", epochs = 10L, backend = "cpu")
learner$train(tsk("iris"))

learner$marshal()
learner$marshaled
#> [1] TRUE

learner$unmarshal()
learner$marshaled
#> [1] FALSE

# Predictions are identical after roundtrip
pred <- learner$predict(tsk("iris"))

You can also use the lower-level helpers directly:

model <- ggml_model_sequential() |>
  ggml_layer_dense(16L, activation = "relu", input_shape = 4L) |>
  ggml_layer_dense(3L,  activation = "softmax")
model <- ggml_compile(model, optimizer = "adam",
                      loss = "categorical_crossentropy")

blob <- ggml_marshal_model(model)
blob

model2 <- ggml_unmarshal_model(blob)

Summary

	`classif.ggml`	`regr.ggml`
Task type	Classification	Regression
Predict types	`response`, `prob`	`response`
Feature types	`numeric`	`numeric`
Properties	`multiclass`, `twoclass`, `weights`, `marshal`	`marshal`
Custom `model_fn`	Yes	Yes