Introduction to R package `unifiedml`

library(unifiedml) # this package
library(randomForest)
library(e1071)
library(glmnet)

## Loading required package: Matrix

## Loaded glmnet 4.1-10

# ------------------------------------------------------------
# REGRESSION EXAMPLES
# ------------------------------------------------------------

cat("\n=== REGRESSION EXAMPLES ===\n\n")

## 
## === REGRESSION EXAMPLES ===

# Example 1: Synthetic data (numeric y → automatic regression)
 
X <- matrix(rnorm(100), ncol = 4)
y <- 2*X[,1] - 1.5*X[,2] + rnorm(25)  # numeric -> regression
 
mod <- Model$new(glmnet::glmnet)
mod$fit(X, y, alpha = 0, lambda = 0.1)
mod$print()

## Model Object
## ------------
## Model function: self$model_fn 
## Fitted: TRUE 
## Task: regression 
## Training samples: 25 
## Features: 4

mod$summary(h = 0.01)

## 
## Model Summary - Numerical Derivatives
## ======================================
## Task: regression 
## Samples: 25 | Features: 4 
## Step size (h): 0.01 
## 
##  Feature Mean_Derivative    Std_Error       t_value       p_value Significance
##       X1       1.7545596 1.776357e-15  9.877292e+14  0.000000e+00          ***
##       X2      -1.2065893 2.710400e-15 -4.451702e+14  0.000000e+00          ***
##       X3      -0.1970950 2.279340e-15 -8.647022e+13 1.927745e-319          ***
##       X4       0.4840615 1.573523e-15  3.076290e+14  0.000000e+00          ***
## 
## Significance codes: 0 '***' 0.01 '**' 0.05 '*' 0.1 ' ' 1

print(head(mod$predict(X)))

## [1] -1.5235552 -2.6955258  2.0380043 -3.2384894 -0.9919252  0.3170660

(cv_scores <- cross_val_score(mod, X, y, cv = 5))  # auto-uses RMSE

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

##     fold1     fold2     fold3     fold4     fold5 
## 0.8246004 1.3110373 1.3896279 1.4518106 1.7828576

mean(cv_scores)  # Average RMSE

## [1] 1.351987

# Example 2: Random Forest Regression
cat("\n2. Random Forest Regression - Auto-detected: Regression\n")

## 
## 2. Random Forest Regression - Auto-detected: Regression

# randomForest regression
set.seed(123)
X <- MASS::Boston[, -ncol(MASS::Boston)]
y <- MASS::Boston$medv
mod2 <- Model$new(randomForest::randomForest)  # No task parameter!
mod2$fit(X, y, ntree = 50)
mod2$print()

## Model Object
## ------------
## Model function: self$model_fn 
## Fitted: TRUE 
## Task: regression 
## Training samples: 506 
## Features: 13

cat("\n")

mod2$summary(h = 0.01)

## 
## Model Summary - Numerical Derivatives
## ======================================
## Task: regression 
## Samples: 506 | Features: 13 
## Step size (h): 0.01 
## 
##  Feature Mean_Derivative   Std_Error    t_value      p_value Significance
##     crim     0.662793149 0.395133057  1.6773923 9.408457e-02           **
##       zn     0.008695652 0.008695652  1.0000000 3.177894e-01             
##    indus     0.009298419 0.005877691  1.5819850 1.142791e-01            *
##     chas     0.000000000 0.000000000        NaN          NaN         <NA>
##      nox    -7.542908523 1.404899994 -5.3690003 1.209489e-07          ***
##       rm     3.872168299 0.497810491  7.7783984 4.165748e-14          ***
##      age    -0.078310277 0.058365068 -1.3417319 1.802859e-01            *
##      dis    -0.411075899 0.446098658 -0.9214910 3.572341e-01             
##      rad     0.031330698 0.023908458  1.3104441 1.906413e-01            *
##      tax    -0.028758235 0.032039105 -0.8975979 3.698277e-01             
##  ptratio    -0.013899868 0.013598379 -1.0221709 3.071894e-01             
##    black     0.014163373 0.038784859  0.3651779 7.151315e-01             
##    lstat    -0.310423225 0.109410051 -2.8372460 4.733392e-03          ***
## 
## Significance codes: 0 '***' 0.01 '**' 0.05 '*' 0.1 ' ' 1

(cv2 <- cross_val_score(mod2, X, y, cv = 5L))

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

##    fold1    fold2    fold3    fold4    fold5 
## 2.732583 4.661567 3.610838 2.933593 2.780172

print(head(mod2$predict(X)))

##        1        2        3        4        5        6 
## 24.90370 22.04237 34.66717 34.55993 34.98763 27.94497

# ------------------------------------------------------------
# CLASSIFICATION EXAMPLES
# ------------------------------------------------------------

cat("\n\n=== CLASSIFICATION EXAMPLES ===\n\n")

## 
## 
## === CLASSIFICATION EXAMPLES ===

# Example: Iris dataset (factor y → automatic classification)
data(iris)

# Binary classification with factor
cat("3. Binary Classification with Factor Response\n")

## 3. Binary Classification with Factor Response

iris_binary <- iris[iris$Species %in% c("setosa", "versicolor"), ]
X_binary <- iris_binary[, 1:4]
y_binary <- as.factor(as.character(iris_binary$Species))  # factor → classification


mod4 <- Model$new(randomForest::randomForest)  # No task parameter!
mod4$fit(X_binary, y_binary, ntree = 50)
mod4$print()

## Model Object
## ------------
## Model function: self$model_fn 
## Fitted: TRUE 
## Task: classification 
## Training samples: 100 
## Features: 4 
## Classes: setosa, versicolor 
## Class distribution:
## 
##     setosa versicolor 
##         50         50

print(head(mod4$predict(X_binary)))

##      1      2      3      4      5      6 
## setosa setosa setosa setosa setosa setosa 
## Levels: setosa versicolor

print(head(mod4$predict(X_binary, type="prob")))

##   setosa versicolor
## 1      1          0
## 2      1          0
## 3      1          0
## 4      1          0
## 5      1          0
## 6      1          0

(cv4 <- cross_val_score(mod4, X_binary, y_binary, cv = 5L))  # Auto-uses accuracy

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

## fold1 fold2 fold3 fold4 fold5 
##     1     1     1     1     1

cat("\nMean Accuracy:", mean(cv4), "\n")

## 
## Mean Accuracy: 1

mod4 <- Model$new(nnet::nnet) 
mod4$fit(X_binary, y_binary, size=50, trace=FALSE)
print(head(mod4$predict(X_binary, type="class")))

## [1] setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor

(cv4 <- cross_val_score(mod4, X_binary, y_binary, fit_params=list(size=50L, 
                        cv = 5L, type="class", trace=FALSE)))

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

## fold1 fold2 fold3 fold4 fold5 
##   0.6   0.9   0.7   0.6   1.0

cat("\nMean Accuracy:", mean(cv4), "\n")

## 
## Mean Accuracy: 0.76

mod4 <- Model$new(e1071::svm)  # No task parameter!
mod4$fit(X_binary, y_binary)
mod4$print()

## Model Object
## ------------
## Model function: self$model_fn 
## Fitted: TRUE 
## Task: classification 
## Training samples: 100 
## Features: 4 
## Classes: setosa, versicolor 
## Class distribution:
## 
##     setosa versicolor 
##         50         50

print(head(mod4$predict(X_binary)))

##      1      2      3      4      5      6 
## setosa setosa setosa setosa setosa setosa 
## Levels: setosa versicolor

(cv4 <- cross_val_score(mod4, X_binary, y_binary, cv = 5L))  # Auto-uses accuracy

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

## fold1 fold2 fold3 fold4 fold5 
##     1     1     1     1     1

cat("\nMean Accuracy:", mean(cv4), "\n")

## 
## Mean Accuracy: 1

# Multi-class classification
cat("4. Multi-class Classification\n")

## 4. Multi-class Classification

X_multi <- iris[, 1:4]
y_multi <- iris$Species  # factor with 3 levels → multi-class classification

mod4 <- Model$new(randomForest::randomForest)  # No task parameter!
mod4$fit(X_multi, y_multi, ntree = 50)
mod4$print()

## Model Object
## ------------
## Model function: self$model_fn 
## Fitted: TRUE 
## Task: classification 
## Training samples: 150 
## Features: 4 
## Classes: setosa, versicolor, virginica 
## Class distribution:
## 
##     setosa versicolor  virginica 
##         50         50         50

print(head(mod4$predict(X_multi)))

##      1      2      3      4      5      6 
## setosa setosa setosa setosa setosa setosa 
## Levels: setosa versicolor virginica

print(head(mod4$predict(X_binary, type="prob")))

##   setosa versicolor virginica
## 1      1          0         0
## 2      1          0         0
## 3      1          0         0
## 4      1          0         0
## 5      1          0         0
## 6      1          0         0

(cv4 <- cross_val_score(mod4, X_multi, y_multi, cv = 5L))  # Auto-uses accuracy

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

##     fold1     fold2     fold3     fold4     fold5 
## 0.9333333 1.0000000 0.9333333 0.9333333 1.0000000

cat("\nMean Accuracy:", mean(cv4), "\n")

## 
## Mean Accuracy: 0.96

mod4 <- Model$new(nnet::nnet) 
mod4$fit(X_multi, y_multi, size=50, trace=FALSE)
print(head(mod4$predict(X_multi, type="class")))

## [1] setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor virginica

(cv4 <- cross_val_score(mod4, X_multi, y_multi, fit_params=list(size=50L, 
                        cv = 5L, type="class", trace=FALSE)))

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

## fold1 fold2 fold3 fold4 fold5 
##     0     0     0     0     0

cat("\nMean Accuracy:", mean(cv4), "\n")

## 
## Mean Accuracy: 0

mod4 <- Model$new(e1071::svm)  # No task parameter!
mod4$fit(X_multi, y_multi, kernel="radial")
mod4$print()

## Model Object
## ------------
## Model function: self$model_fn 
## Fitted: TRUE 
## Task: classification 
## Training samples: 150 
## Features: 4 
## Classes: setosa, versicolor, virginica 
## Class distribution:
## 
##     setosa versicolor  virginica 
##         50         50         50

print(head(mod4$predict(X_multi)))

##      1      2      3      4      5      6 
## setosa setosa setosa setosa setosa setosa 
## Levels: setosa versicolor virginica

(cv4 <- cross_val_score(mod4, X_multi, y_multi, cv = 5L))  # Auto-uses accuracy

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

##     fold1     fold2     fold3     fold4     fold5 
## 0.9333333 1.0000000 0.9666667 0.9333333 1.0000000

cat("\nMean Accuracy:", mean(cv4), "\n")

## 
## Mean Accuracy: 0.9666667

mod4 <- Model$new(e1071::svm)  # No task parameter!
mod4$fit(X_multi, y_multi, kernel="polynomial", probability=TRUE)
mod4$print()

## Model Object
## ------------
## Model function: self$model_fn 
## Fitted: TRUE 
## Task: classification 
## Training samples: 150 
## Features: 4 
## Classes: setosa, versicolor, virginica 
## Class distribution:
## 
##     setosa versicolor  virginica 
##         50         50         50

print(head(mod4$predict(X_multi)))

##      1      2      3      4      5      6 
## setosa setosa setosa setosa setosa setosa 
## Levels: setosa versicolor virginica

(cv4 <- cross_val_score(mod4, X_multi, y_multi, cv = 5L))  # Auto-uses accuracy

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

##     fold1     fold2     fold3     fold4     fold5 
## 0.9333333 1.0000000 0.9666667 0.9333333 1.0000000

cat("\nMean Accuracy:", mean(cv4), "\n")

## 
## Mean Accuracy: 0.9666667

mirror server hosted at Truenetwork, Russian Federation.

Introduction to R package unifiedml

Introduction to R package `unifiedml`