ggmlR ships two mlr3 learners — classif.ggml and
regr.ggml — that let you use ggmlR neural networks inside
the mlr3 ecosystem for resampling, benchmarking, tuning, and
pipelines.
library(ggmlR)
library(mlr3)
task <- tsk("iris")
learner <- lrn("classif.ggml",
epochs = 20L,
batch_size = 16L,
predict_type = "prob")
learner$train(task)
#> Note: dropping last 6 sample(s) (150 -> 144) because batch_size=16 must divide evenly. Training metrics are computed on 144 samples only.
pred <- learner$predict(task)
pred$score(msr("classif.acc"))
#> classif.acc
#> 0.84Both learners share the same parameter set:
| Parameter | Default | Description |
|---|---|---|
epochs |
10L |
Training epochs |
batch_size |
32L |
Mini-batch size |
optimizer |
"adam" |
"adam" or "sgd" |
validation_split |
0 |
Fraction held out for validation |
verbose |
0L |
Print training progress |
backend |
"auto" |
"auto", "cpu", or "gpu" |
hidden_layers |
c(128, 64) |
Hidden layer sizes |
activation |
"relu" |
Activation function |
dropout |
0.2 |
Dropout rate |
callbacks |
list() |
ggmlR callback objects |
learner <- lrn("classif.ggml")
learner$param_set$values$epochs <- 30L
learner$param_set$values$hidden_layers <- c(256L, 128L, 64L)
learner$param_set$values$dropout <- 0.3
learner$param_set$values$backend <- "gpu"Set backend = "gpu" (or leave "auto" if a
Vulkan GPU is available):
learner <- lrn("classif.ggml",
backend = "gpu",
epochs = 100L)
learner$train(tsk("iris"))
#> Note: dropping last 22 sample(s) (150 -> 128) because batch_size=32 must divide evenly. Training metrics are computed on 128 samples only.By default the learners build an MLP via
ggml_default_mlp(). To use a custom architecture, assign a
builder function to the model_fn field. The function
receives the task, input/output dimensions, and learner parameters:
learner <- lrn("classif.ggml",
epochs = 50L,
batch_size = 16L)
learner$model_fn <- function(task, n_features, n_out, pars) {
ggml_model_sequential() |>
ggml_layer_dense(64L, activation = "relu", input_shape = n_features) |>
ggml_layer_dropout(rate = 0.3) |>
ggml_layer_dense(32L, activation = "relu") |>
ggml_layer_dense(n_out, activation = "softmax")
}
learner$train(tsk("iris"))
#> Note: dropping last 6 sample(s) (150 -> 144) because batch_size=16 must divide evenly. Training metrics are computed on 144 samples only.The pars argument gives access to all current learner
parameters, so your builder can read pars$hidden_layers,
pars$dropout, etc.
The learners work with any mlr3 resampling strategy. Marshal support ensures models survive serialization across parallel workers.
task <- tsk("iris")
learner <- lrn("classif.ggml",
epochs = 20L,
batch_size = 16L,
backend = "cpu")
rr <- resample(task, learner, rsmp("cv", folds = 5L))
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
#> Note: dropping last 8 sample(s) (120 -> 112) because batch_size=16 must divide evenly. Training metrics are computed on 112 samples only.
rr$aggregate(msr("classif.acc"))
#> classif.acc
#> 0.9533333Benchmarking against other learners:
future::plan("sequential") # ensure rpart is visible in all workers
design <- benchmark_grid(
tasks = tsk("iris"),
learners = list(
lrn("classif.ggml", epochs = 20L, batch_size = 16L),
lrn("classif.rpart")
),
resamplings = rsmp("cv", folds = 5L)
)
bmr <- benchmark(design)
bmr$aggregate(msr("classif.acc"))Use mlr3tuning to search over ggmlR hyperparameters:
library(mlr3tuning)
learner <- lrn("classif.ggml", backend = "gpu")
search_space <- ps(
epochs = p_int(lower = 10L, upper = 100L),
batch_size = p_int(lower = 8L, upper = 64L),
dropout = p_dbl(lower = 0, upper = 0.5)
)
instance <- ti(
task = tsk("iris"),
learner = learner,
resampling = rsmp("cv", folds = 3L),
measures = msr("classif.acc"),
terminator = trm("evals", n_evals = 20L)
)
tuner <- tnr("random_search")
tuner$optimize(instance)
instance$resultggmlR learners accept only numeric features. Use
mlr3pipelines to encode factors automatically:
library(mlr3pipelines)
graph <- po("encode") %>>% lrn("classif.ggml", epochs = 20L)
glrn <- as_learner(graph)
task <- tsk("penguins")
glrn$train(task)
glrn$predict(task)For tasks that need imputation and scaling:
graph <-
po("imputemedian") %>>%
po("encode") %>>%
po("scale") %>>%
lrn("classif.ggml",
epochs = 30L,
batch_size = 16L,
hidden_layers = c(64L, 32L))
glrn <- as_learner(graph)
glrn$train(tsk("penguins"))Pass ggmlR callbacks through the callbacks
parameter:
learner <- lrn("classif.ggml",
epochs = 200L,
batch_size = 16L,
callbacks = list(
ggml_callback_early_stopping(
monitor = "val_loss",
patience = 10L
)
),
validation_split = 0.2)
learner$train(tsk("iris"))
#> Note: dropping last 6 sample(s) (150 -> 144) because batch_size=16 must divide evenly. Training metrics are computed on 144 samples only.The classification learner honours task weights. Assign a
weights_learner column to upweight or downweight specific
observations:
d <- data.frame(
x1 = rnorm(100),
x2 = rnorm(100),
y = factor(rep(c("a", "b"), each = 50)),
w = c(rep(2.0, 50), rep(0.5, 50))
)
task <- as_task_classif(d, target = "y")
task$set_col_roles("w", roles = "weights_learner")
learner <- lrn("classif.ggml", epochs = 20L)
learner$train(task)
#> Note: dropping last 4 sample(s) (100 -> 96) because batch_size=32 must divide evenly. Training metrics are computed on 96 samples only.The learners implement mlr3’s marshal protocol. This means models can
be serialized and deserialized for parallel execution (e.g. via
future). Marshal uses ggml_save_model() /
ggml_load_model() internally and preserves the original
backend.
learner <- lrn("classif.ggml", epochs = 10L, backend = "cpu")
learner$train(tsk("iris"))
#> Note: dropping last 22 sample(s) (150 -> 128) because batch_size=32 must divide evenly. Training metrics are computed on 128 samples only.
learner$marshal()
learner$marshaled
#> [1] TRUE
#> [1] TRUE
learner$unmarshal()
learner$marshaled
#> [1] FALSE
#> [1] FALSE
# Predictions are identical after roundtrip
pred <- learner$predict(tsk("iris"))You can also use the lower-level helpers directly:
model <- ggml_model_sequential() |>
ggml_layer_dense(16L, activation = "relu", input_shape = 4L) |>
ggml_layer_dense(3L, activation = "softmax")
model <- ggml_compile(model, optimizer = "adam",
loss = "categorical_crossentropy")
blob <- ggml_marshal_model(model)
blob
#> <ggmlR marshaled model>
#> api: sequential
#> backend: gpu
#> format: ggmlR.marshal v1
#> ggmlR version: 0.7.2
#> R version: 4.3.3
#> created: 2026-04-14 21:52:22
#> payload size: 349 bytes
#> sha256: e2a069a601f97004...
model2 <- ggml_unmarshal_model(blob)Only numeric features are supported natively. Use
mlr3pipelines to handle factors, characters, dates,
etc.
# This will error:
# lrn("classif.ggml")$train(tsk("german_credit"))
# This works:
graph <- po("encode") %>>% lrn("classif.ggml", epochs = 20L)
as_learner(graph)$train(tsk("german_credit"))classif.ggml |
regr.ggml |
|
|---|---|---|
| Task type | Classification | Regression |
| Predict types | response, prob |
response |
| Feature types | numeric |
numeric |
| Properties | multiclass, twoclass,
weights, marshal |
marshal |
Custom model_fn |
Yes | Yes |