tidymodels / parsnip Integration

ggmlR registers a "ggml" engine for parsnip::mlp(), giving you GPU-accelerated neural networks inside the tidymodels ecosystem — resampling, tuning, workflows, and recipes all work out of the box.

library(ggmlR)
library(parsnip)

1. Classification

spec <- mlp(
  hidden_units = c(64L, 32L),
  epochs       = 20L,
  dropout      = 0.1
) |>
  set_engine("ggml") |>
  set_mode("classification")

fit_obj <- fit(spec, Species ~ ., data = iris)
#> Note: dropping last 22 sample(s) (150 -> 128) because batch_size=32 must divide evenly. Training metrics are computed on 128 samples only.

# Class predictions
preds <- predict(fit_obj, new_data = iris)
head(preds)
#> # A tibble: 6 × 1
#>   .pred_class
#>   <fct>      
#> 1 setosa     
#> 2 setosa     
#> 3 setosa     
#> 4 setosa     
#> 5 setosa     
#> 6 setosa

# Probability predictions
probs <- predict(fit_obj, new_data = iris, type = "prob")
head(probs)
#> # A tibble: 6 × 3
#>   .pred_setosa .pred_versicolor .pred_virginica
#>          <dbl>            <dbl>           <dbl>
#> 1        0.960           0.0272          0.0129
#> 2        0.938           0.0441          0.0179
#> 3        0.946           0.0366          0.0174
#> 4        0.919           0.0588          0.0217
#> 5        0.960           0.0271          0.0130
#> 6        0.957           0.0320          0.0112

# Accuracy
cat(sprintf("Accuracy: %.4f\n", mean(preds$.pred_class == iris$Species)))
#> Accuracy: 0.6667

2. Regression

spec_reg <- mlp(
  hidden_units = c(64L, 32L),
  epochs       = 50L
) |>
  set_engine("ggml") |>
  set_mode("regression")

fit_reg <- fit(spec_reg, mpg ~ ., data = mtcars)

preds_reg <- predict(fit_reg, new_data = mtcars)
head(preds_reg)
#> # A tibble: 6 × 1
#>   .pred
#>   <dbl>
#> 1  19.2
#> 2  19.3
#> 3  17.1
#> 4  17.1
#> 5  23.2
#> 6  16.7

3. Engine parameters

The ggml engine maps standard parsnip arguments to ggmlR internals:

parsnip ggmlR Default
hidden_units hidden_layers c(128, 64)
epochs epochs 10
dropout dropout 0.2
activation activation "relu"
learn_rate learn_rate 0.001
# Customize architecture
spec_custom <- mlp(
  hidden_units = c(128L, 64L, 32L),
  epochs       = 30L,
  dropout      = 0.3,
  activation   = "relu"
) |>
  set_engine("ggml") |>
  set_mode("classification")

4. Resampling with rsample

library(rsample)

folds <- vfold_cv(iris, v = 5L)

spec <- mlp(hidden_units = c(32L), epochs = 10L) |>
  set_engine("ggml") |>
  set_mode("classification")

library(tune)
library(yardstick)
library(workflows)

wf <- workflow() |>
  add_model(spec) |>
  add_formula(Species ~ .)

results <- fit_resamples(wf, resamples = folds)
collect_metrics(results)

5. Recipes for preprocessing

ggmlR accepts only numeric features. Use recipes to handle factors, missing values, and scaling:

library(recipes)
library(workflows)

rec <- recipe(Species ~ ., data = iris) |>
  step_normalize(all_numeric_predictors())

spec <- mlp(hidden_units = c(32L), epochs = 10L) |>
  set_engine("ggml") |>
  set_mode("classification")

wf <- workflow() |>
  add_recipe(rec) |>
  add_model(spec)

fit_obj <- fit(wf, data = iris)
predict(fit_obj, new_data = iris)

For datasets with factors:

rec <- recipe(Species ~ ., data = iris) |>
  step_dummy(all_nominal_predictors()) |>
  step_normalize(all_numeric_predictors())

6. Hyperparameter tuning

library(tune)
library(dials)
library(workflows)

spec <- mlp(
  hidden_units = tune(),
  epochs       = tune(),
  dropout      = tune()
) |>
  set_engine("ggml") |>
  set_mode("classification")

wf <- workflow() |>
  add_model(spec) |>
  add_formula(Species ~ .)

grid <- grid_regular(
  hidden_units(range = c(16L, 128L)),
  epochs(range = c(10L, 50L)),
  dropout(range = c(0, 0.4)),
  levels = 3L
)

folds <- vfold_cv(iris, v = 3L)
results <- tune_grid(wf, resamples = folds, grid = grid)
show_best(results, metric = "accuracy")

7. Comparison with other engines

library(workflows)
library(workflowsets)

specs <- workflow_set(
  preproc = list(basic = Species ~ .),
  models  = list(
    ggml  = mlp(hidden_units = c(32L), epochs = 20L) |> set_engine("ggml"),
    nnet  = mlp(hidden_units = 32L,    epochs = 200L) |> set_engine("nnet")
  )
) |>
  workflow_map("fit_resamples",
               resamples = vfold_cv(iris, v = 5L))

rank_results(specs, rank_metric = "accuracy")

Summary

Feature Supported
Classification Yes (class, prob)
Regression Yes (numeric)
GPU (Vulkan) Yes (auto-detected)
Recipes / preprocessing Yes
Resampling Yes
Tuning Yes
Workflows Yes

mirror server hosted at Truenetwork, Russian Federation.