mlr3 Integration

ggmlR ships two mlr3 learners — classif.ggml and regr.ggml — that let you use ggmlR neural networks inside the mlr3 ecosystem for resampling, benchmarking, tuning, and pipelines.


Quick start

Classification

library(ggmlR)
library(mlr3)

task    <- tsk("iris")
learner <- lrn("classif.ggml",
               epochs     = 20L,
               batch_size = 16L,
               predict_type = "prob")

learner$train(task)
#> Note: dropping last 6 sample(s) (150 -> 144) because batch_size=16 must divide evenly. Training metrics are computed on 144 samples only.
pred <- learner$predict(task)
pred$score(msr("classif.acc"))
#> classif.acc 
#>        0.84

Regression

task    <- tsk("mtcars")
learner <- lrn("regr.ggml",
               epochs     = 50L,
               batch_size = 8L)

learner$train(task)
pred <- learner$predict(task)
pred$score(msr("regr.rmse"))
#> regr.rmse 
#>   17.0346

Learner parameters

Both learners share the same parameter set:

Parameter Default Description
epochs 10L Training epochs
batch_size 32L Mini-batch size
optimizer "adam" "adam" or "sgd"
validation_split 0 Fraction held out for validation
verbose 0L Print training progress
backend "auto" "auto", "cpu", or "gpu"
hidden_layers c(128, 64) Hidden layer sizes
activation "relu" Activation function
dropout 0.2 Dropout rate
callbacks list() ggmlR callback objects
learner <- lrn("classif.ggml")
learner$param_set$values$epochs        <- 30L
learner$param_set$values$hidden_layers <- c(256L, 128L, 64L)
learner$param_set$values$dropout       <- 0.3
learner$param_set$values$backend       <- "gpu"

GPU acceleration

Set backend = "gpu" (or leave "auto" if a Vulkan GPU is available):

learner <- lrn("classif.ggml",
               backend = "gpu",
               epochs  = 100L)
learner$train(tsk("iris"))
#> Note: dropping last 22 sample(s) (150 -> 128) because batch_size=32 must divide evenly. Training metrics are computed on 128 samples only.

Custom model architecture

By default the learners build an MLP via ggml_default_mlp(). To use a custom architecture, assign a builder function to the model_fn field. The function receives the task, input/output dimensions, and learner parameters:

learner <- lrn("classif.ggml",
               epochs     = 50L,
               batch_size = 16L)

learner$model_fn <- function(task, n_features, n_out, pars) {
  ggml_model_sequential() |>
    ggml_layer_dense(64L, activation = "relu", input_shape = n_features) |>
    ggml_layer_dropout(rate = 0.3) |>
    ggml_layer_dense(32L, activation = "relu") |>
    ggml_layer_dense(n_out, activation = "softmax")
}

learner$train(tsk("iris"))
#> Note: dropping last 6 sample(s) (150 -> 144) because batch_size=16 must divide evenly. Training metrics are computed on 144 samples only.

The pars argument gives access to all current learner parameters, so your builder can read pars$hidden_layers, pars$dropout, etc.


Resampling and benchmarking

The learners work with any mlr3 resampling strategy. Marshal support ensures models survive serialization across parallel workers.

task    <- tsk("iris")
learner <- lrn("classif.ggml",
               epochs     = 20L,
               batch_size = 16L,
               backend    = "cpu")

rr <- resample(task, learner, rsmp("cv", folds = 5L))
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
rr$aggregate(msr("classif.acc"))
#> classif.acc 
#>   0.9533333

Benchmarking against other learners:

future::plan("sequential")  # ensure rpart is visible in all workers

design <- benchmark_grid(
  tasks    = tsk("iris"),
  learners = list(
    lrn("classif.ggml", epochs = 20L, batch_size = 16L),
    lrn("classif.rpart")
  ),
  resamplings = rsmp("cv", folds = 5L)
)
bmr <- benchmark(design)
bmr$aggregate(msr("classif.acc"))

Hyperparameter tuning

Use mlr3tuning to search over ggmlR hyperparameters:

library(mlr3tuning)

learner <- lrn("classif.ggml", backend = "gpu")

search_space <- ps(
  epochs     = p_int(lower = 10L, upper = 100L),
  batch_size = p_int(lower = 8L,  upper = 64L),
  dropout    = p_dbl(lower = 0,   upper = 0.5)
)

instance <- ti(
  task       = tsk("iris"),
  learner    = learner,
  resampling = rsmp("cv", folds = 3L),
  measures   = msr("classif.acc"),
  terminator = trm("evals", n_evals = 20L)
)

tuner <- tnr("random_search")
tuner$optimize(instance)

instance$result

Pipelines with mlr3pipelines

ggmlR learners accept only numeric features. Use mlr3pipelines to encode factors automatically:

library(mlr3pipelines)

graph <- po("encode") %>>% lrn("classif.ggml", epochs = 20L)
glrn  <- as_learner(graph)

task <- tsk("penguins")
glrn$train(task)
glrn$predict(task)

For tasks that need imputation and scaling:

graph <-
  po("imputemedian") %>>%
  po("encode") %>>%
  po("scale") %>>%
  lrn("classif.ggml",
      epochs        = 30L,
      batch_size    = 16L,
      hidden_layers = c(64L, 32L))

glrn <- as_learner(graph)
glrn$train(tsk("penguins"))

Callbacks

Pass ggmlR callbacks through the callbacks parameter:

learner <- lrn("classif.ggml",
               epochs     = 200L,
               batch_size = 16L,
               callbacks  = list(
                 ggml_callback_early_stopping(
                   monitor  = "val_loss",
                   patience = 10L
                 )
               ),
               validation_split = 0.2)

learner$train(tsk("iris"))
#> Note: dropping last 6 sample(s) (150 -> 144) because batch_size=16 must divide evenly. Training metrics are computed on 144 samples only.

Observation weights

The classification learner honours task weights. Assign a weights_learner column to upweight or downweight specific observations:

d <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y  = factor(rep(c("a", "b"), each = 50)),
  w  = c(rep(2.0, 50), rep(0.5, 50))
)
task <- as_task_classif(d, target = "y")
task$set_col_roles("w", roles = "weights_learner")

learner <- lrn("classif.ggml", epochs = 20L)
learner$train(task)
#> Note: dropping last 4 sample(s) (100 -> 96) because batch_size=32 must divide evenly. Training metrics are computed on 96 samples only.

Marshal and parallel execution

The learners implement mlr3’s marshal protocol. This means models can be serialized and deserialized for parallel execution (e.g. via future). Marshal uses ggml_save_model() / ggml_load_model() internally and preserves the original backend.

learner <- lrn("classif.ggml", epochs = 10L, backend = "cpu")
learner$train(tsk("iris"))
#> Note: dropping last 22 sample(s) (150 -> 128) because batch_size=32 must divide evenly. Training metrics are computed on 128 samples only.

learner$marshal()
learner$marshaled
#> [1] TRUE
#> [1] TRUE

learner$unmarshal()
learner$marshaled
#> [1] FALSE
#> [1] FALSE

# Predictions are identical after roundtrip
pred <- learner$predict(tsk("iris"))

You can also use the lower-level helpers directly:

model <- ggml_model_sequential() |>
  ggml_layer_dense(16L, activation = "relu", input_shape = 4L) |>
  ggml_layer_dense(3L,  activation = "softmax")
model <- ggml_compile(model, optimizer = "adam",
                      loss = "categorical_crossentropy")

blob <- ggml_marshal_model(model)
blob
#> <ggmlR marshaled model>
#>   api:           sequential
#>   backend:       gpu
#>   format:        ggmlR.marshal v1
#>   ggmlR version: 0.7.2
#>   R version:     4.3.3
#>   created:       2026-04-14 21:52:22
#>   payload size:  349 bytes
#>   sha256:        e2a069a601f97004...

model2 <- ggml_unmarshal_model(blob)

Feature types

Only numeric features are supported natively. Use mlr3pipelines to handle factors, characters, dates, etc.

# This will error:
# lrn("classif.ggml")$train(tsk("german_credit"))

# This works:
graph <- po("encode") %>>% lrn("classif.ggml", epochs = 20L)
as_learner(graph)$train(tsk("german_credit"))

Summary

classif.ggml regr.ggml
Task type Classification Regression
Predict types response, prob response
Feature types numeric numeric
Properties multiclass, twoclass, weights, marshal marshal
Custom model_fn Yes Yes

mirror server hosted at Truenetwork, Russian Federation.