Introduction to R package
unifiedml
library(unifiedml) # this package
library(randomForest)
library(e1071)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-10
# ------------------------------------------------------------
# REGRESSION EXAMPLES
# ------------------------------------------------------------
cat("\n=== REGRESSION EXAMPLES ===\n\n")
##
## === REGRESSION EXAMPLES ===
# Example 1: Synthetic data (numeric y → automatic regression)
X <- matrix(rnorm(100), ncol = 4)
y <- 2*X[,1] - 1.5*X[,2] + rnorm(25) # numeric -> regression
mod <- Model$new(glmnet::glmnet)
mod$fit(X, y, alpha = 0, lambda = 0.1)
mod$print()
## Model Object
## ------------
## Model function: self$model_fn
## Fitted: TRUE
## Task: regression
## Training samples: 25
## Features: 4
##
## Model Summary - Numerical Derivatives
## ======================================
## Task: regression
## Samples: 25 | Features: 4
## Step size (h): 0.01
##
## Feature Mean_Derivative Std_Error t_value p_value Significance
## X1 1.7545596 1.776357e-15 9.877292e+14 0.000000e+00 ***
## X2 -1.2065893 2.710400e-15 -4.451702e+14 0.000000e+00 ***
## X3 -0.1970950 2.279340e-15 -8.647022e+13 1.927745e-319 ***
## X4 0.4840615 1.573523e-15 3.076290e+14 0.000000e+00 ***
##
## Significance codes: 0 '***' 0.01 '**' 0.05 '*' 0.1 ' ' 1
print(head(mod$predict(X)))
## [1] -1.5235552 -2.6955258 2.0380043 -3.2384894 -0.9919252 0.3170660
(cv_scores <- cross_val_score(mod, X, y, cv = 5)) # auto-uses RMSE
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0.8246004 1.3110373 1.3896279 1.4518106 1.7828576
mean(cv_scores) # Average RMSE
## [1] 1.351987
# Example 2: Random Forest Regression
cat("\n2. Random Forest Regression - Auto-detected: Regression\n")
##
## 2. Random Forest Regression - Auto-detected: Regression
# randomForest regression
set.seed(123)
X <- MASS::Boston[, -ncol(MASS::Boston)]
y <- MASS::Boston$medv
mod2 <- Model$new(randomForest::randomForest) # No task parameter!
mod2$fit(X, y, ntree = 50)
mod2$print()
## Model Object
## ------------
## Model function: self$model_fn
## Fitted: TRUE
## Task: regression
## Training samples: 506
## Features: 13
##
## Model Summary - Numerical Derivatives
## ======================================
## Task: regression
## Samples: 506 | Features: 13
## Step size (h): 0.01
##
## Feature Mean_Derivative Std_Error t_value p_value Significance
## crim 0.662793149 0.395133057 1.6773923 9.408457e-02 **
## zn 0.008695652 0.008695652 1.0000000 3.177894e-01
## indus 0.009298419 0.005877691 1.5819850 1.142791e-01 *
## chas 0.000000000 0.000000000 NaN NaN <NA>
## nox -7.542908523 1.404899994 -5.3690003 1.209489e-07 ***
## rm 3.872168299 0.497810491 7.7783984 4.165748e-14 ***
## age -0.078310277 0.058365068 -1.3417319 1.802859e-01 *
## dis -0.411075899 0.446098658 -0.9214910 3.572341e-01
## rad 0.031330698 0.023908458 1.3104441 1.906413e-01 *
## tax -0.028758235 0.032039105 -0.8975979 3.698277e-01
## ptratio -0.013899868 0.013598379 -1.0221709 3.071894e-01
## black 0.014163373 0.038784859 0.3651779 7.151315e-01
## lstat -0.310423225 0.109410051 -2.8372460 4.733392e-03 ***
##
## Significance codes: 0 '***' 0.01 '**' 0.05 '*' 0.1 ' ' 1
(cv2 <- cross_val_score(mod2, X, y, cv = 5L))
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 2.732583 4.661567 3.610838 2.933593 2.780172
print(head(mod2$predict(X)))
## 1 2 3 4 5 6
## 24.90370 22.04237 34.66717 34.55993 34.98763 27.94497
# ------------------------------------------------------------
# CLASSIFICATION EXAMPLES
# ------------------------------------------------------------
cat("\n\n=== CLASSIFICATION EXAMPLES ===\n\n")
##
##
## === CLASSIFICATION EXAMPLES ===
# Example: Iris dataset (factor y → automatic classification)
data(iris)
# Binary classification with factor
cat("3. Binary Classification with Factor Response\n")
## 3. Binary Classification with Factor Response
iris_binary <- iris[iris$Species %in% c("setosa", "versicolor"), ]
X_binary <- iris_binary[, 1:4]
y_binary <- as.factor(as.character(iris_binary$Species)) # factor → classification
mod4 <- Model$new(randomForest::randomForest) # No task parameter!
mod4$fit(X_binary, y_binary, ntree = 50)
mod4$print()
## Model Object
## ------------
## Model function: self$model_fn
## Fitted: TRUE
## Task: classification
## Training samples: 100
## Features: 4
## Classes: setosa, versicolor
## Class distribution:
##
## setosa versicolor
## 50 50
print(head(mod4$predict(X_binary)))
## 1 2 3 4 5 6
## setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor
print(head(mod4$predict(X_binary, type="prob")))
## setosa versicolor
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 0
(cv4 <- cross_val_score(mod4, X_binary, y_binary, cv = 5L)) # Auto-uses accuracy
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 1 1 1 1 1
cat("\nMean Accuracy:", mean(cv4), "\n")
##
## Mean Accuracy: 1
mod4 <- Model$new(nnet::nnet)
mod4$fit(X_binary, y_binary, size=50, trace=FALSE)
print(head(mod4$predict(X_binary, type="class")))
## [1] setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor
(cv4 <- cross_val_score(mod4, X_binary, y_binary, fit_params=list(size=50L,
cv = 5L, type="class", trace=FALSE)))
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0.6 0.9 0.7 0.6 1.0
cat("\nMean Accuracy:", mean(cv4), "\n")
##
## Mean Accuracy: 0.76
mod4 <- Model$new(e1071::svm) # No task parameter!
mod4$fit(X_binary, y_binary)
mod4$print()
## Model Object
## ------------
## Model function: self$model_fn
## Fitted: TRUE
## Task: classification
## Training samples: 100
## Features: 4
## Classes: setosa, versicolor
## Class distribution:
##
## setosa versicolor
## 50 50
print(head(mod4$predict(X_binary)))
## 1 2 3 4 5 6
## setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor
(cv4 <- cross_val_score(mod4, X_binary, y_binary, cv = 5L)) # Auto-uses accuracy
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 1 1 1 1 1
cat("\nMean Accuracy:", mean(cv4), "\n")
##
## Mean Accuracy: 1
# Multi-class classification
cat("4. Multi-class Classification\n")
## 4. Multi-class Classification
X_multi <- iris[, 1:4]
y_multi <- iris$Species # factor with 3 levels → multi-class classification
mod4 <- Model$new(randomForest::randomForest) # No task parameter!
mod4$fit(X_multi, y_multi, ntree = 50)
mod4$print()
## Model Object
## ------------
## Model function: self$model_fn
## Fitted: TRUE
## Task: classification
## Training samples: 150
## Features: 4
## Classes: setosa, versicolor, virginica
## Class distribution:
##
## setosa versicolor virginica
## 50 50 50
print(head(mod4$predict(X_multi)))
## 1 2 3 4 5 6
## setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor virginica
print(head(mod4$predict(X_binary, type="prob")))
## setosa versicolor virginica
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
(cv4 <- cross_val_score(mod4, X_multi, y_multi, cv = 5L)) # Auto-uses accuracy
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0.9333333 1.0000000 0.9333333 0.9333333 1.0000000
cat("\nMean Accuracy:", mean(cv4), "\n")
##
## Mean Accuracy: 0.96
mod4 <- Model$new(nnet::nnet)
mod4$fit(X_multi, y_multi, size=50, trace=FALSE)
print(head(mod4$predict(X_multi, type="class")))
## [1] setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor virginica
(cv4 <- cross_val_score(mod4, X_multi, y_multi, fit_params=list(size=50L,
cv = 5L, type="class", trace=FALSE)))
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0 0 0 0 0
cat("\nMean Accuracy:", mean(cv4), "\n")
##
## Mean Accuracy: 0
mod4 <- Model$new(e1071::svm) # No task parameter!
mod4$fit(X_multi, y_multi, kernel="radial")
mod4$print()
## Model Object
## ------------
## Model function: self$model_fn
## Fitted: TRUE
## Task: classification
## Training samples: 150
## Features: 4
## Classes: setosa, versicolor, virginica
## Class distribution:
##
## setosa versicolor virginica
## 50 50 50
print(head(mod4$predict(X_multi)))
## 1 2 3 4 5 6
## setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor virginica
(cv4 <- cross_val_score(mod4, X_multi, y_multi, cv = 5L)) # Auto-uses accuracy
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0.9333333 1.0000000 0.9666667 0.9333333 1.0000000
cat("\nMean Accuracy:", mean(cv4), "\n")
##
## Mean Accuracy: 0.9666667
mod4 <- Model$new(e1071::svm) # No task parameter!
mod4$fit(X_multi, y_multi, kernel="polynomial", probability=TRUE)
mod4$print()
## Model Object
## ------------
## Model function: self$model_fn
## Fitted: TRUE
## Task: classification
## Training samples: 150
## Features: 4
## Classes: setosa, versicolor, virginica
## Class distribution:
##
## setosa versicolor virginica
## 50 50 50
print(head(mod4$predict(X_multi)))
## 1 2 3 4 5 6
## setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor virginica
(cv4 <- cross_val_score(mod4, X_multi, y_multi, cv = 5L)) # Auto-uses accuracy
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0.9333333 1.0000000 0.9666667 0.9333333 1.0000000
cat("\nMean Accuracy:", mean(cv4), "\n")
##
## Mean Accuracy: 0.9666667