Random Forest

iris_binary <- iris[iris$Species %in% c("setosa", "versicolor"), ]
X_binary <- iris_binary[, 1:4]
y_binary <- as.factor(as.character(iris_binary$Species))  # factor → classification

datasplit <- unifiedml::train_test_split(X_binary, y_binary, 
                                         test_size = 0.3, seed = 42)

mod <- Model$new(caret::train) 
mod$fit(datasplit$X_train, datasplit$y_train,
        method = "rf",
        trControl = caret::trainControl(method = "none"))
print(head(mod$predict(datasplit$X_test)))

## [1] setosa     versicolor setosa     versicolor setosa     versicolor
## Levels: setosa versicolor

print(head(mod$predict(datasplit$X_test, type="prob")))

##     setosa versicolor
## 49       1          0
## 65       0          1
## 25       1          0
## 74       0          1
## 18       1          0
## 100      0          1

Logistic Regression with `glmnet`

X <- iris[, 1:4]
y <- iris$Species  # factor → classification

datasplit <- unifiedml::train_test_split(X, y, 
                                         test_size = 0.3, seed = 42)

mod <- Model$new(caret::train) 
mod$fit(datasplit$X_train, datasplit$y_train,
        method = "glmnet",
        tuneGrid = data.frame(alpha = 0,  # ridge regression
                               lambda = 0.01),  # fixed lambda
        trControl = caret::trainControl(method = "none"))
print(head(mod$predict(datasplit$X_test)))

## [1] setosa     versicolor versicolor virginica  virginica  virginica 
## Levels: setosa versicolor virginica

print(head(mod$predict(datasplit$X_test, type="prob")))

##          setosa versicolor    virginica
## 49  0.953222227 0.04646469 0.0003130799
## 65  0.173565092 0.69159670 0.1348382075
## 74  0.047769376 0.73182191 0.2204087107
## 146 0.003334732 0.16712746 0.8295378089
## 122 0.019433591 0.33431880 0.6462476134
## 150 0.028121764 0.38063759 0.5912406473

(cv <- cross_val_score(mod, datasplit$X_train, datasplit$y_train, cv = 5L, 
                       fit_params=list(method = "glmnet",
                       tuneGrid = data.frame(alpha = 0,  # ridge regression
                       lambda = 0.01),  # fixed lambda
                       trControl = caret::trainControl(method = "none"))))  # Auto-uses accuracy

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

##     fold1     fold2     fold3     fold4     fold5 
## 0.9047619 0.9047619 1.0000000 0.9047619 1.0000000

cat("\nMean Accuracy:", mean(cv), "\n")

## 
## Mean Accuracy: 0.9428571

(cv <- cross_val_score(mod, datasplit$X_train, datasplit$y_train, cv = 5L, 
                       fit_params=list(method = "glmnet",
                       tuneGrid = data.frame(alpha = 0.5,  # ridge regression
                       lambda = 0.01),  # fixed lambda
                       trControl = caret::trainControl(method = "none"))))  # Auto-uses accuracy

##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%

##    fold1    fold2    fold3    fold4    fold5 
## 0.952381 0.952381 1.000000 0.952381 1.000000

cat("\nMean Accuracy:", mean(cv), "\n")

## 
## Mean Accuracy: 0.9714286

Use unifiedml with caret without tuning

Random Forest

Logistic Regression with glmnet

Use `unifiedml` with caret without tuning

Logistic Regression with `glmnet`