Random Forest
iris_binary <- iris[iris$Species %in% c("setosa", "versicolor"), ]
X_binary <- iris_binary[, 1:4]
y_binary <- as.factor(as.character(iris_binary$Species)) # factor → classification
datasplit <- unifiedml::train_test_split(X_binary, y_binary,
test_size = 0.3, seed = 42)
mod <- Model$new(caret::train)
mod$fit(datasplit$X_train, datasplit$y_train,
method = "rf",
trControl = caret::trainControl(method = "none"))
print(head(mod$predict(datasplit$X_test)))
## [1] setosa versicolor setosa versicolor setosa versicolor
## Levels: setosa versicolor
print(head(mod$predict(datasplit$X_test, type="prob")))
## setosa versicolor
## 49 1 0
## 65 0 1
## 25 1 0
## 74 0 1
## 18 1 0
## 100 0 1
Logistic Regression with glmnet
X <- iris[, 1:4]
y <- iris$Species # factor → classification
datasplit <- unifiedml::train_test_split(X, y,
test_size = 0.3, seed = 42)
mod <- Model$new(caret::train)
mod$fit(datasplit$X_train, datasplit$y_train,
method = "glmnet",
tuneGrid = data.frame(alpha = 0, # ridge regression
lambda = 0.01), # fixed lambda
trControl = caret::trainControl(method = "none"))
print(head(mod$predict(datasplit$X_test)))
## [1] setosa versicolor versicolor virginica virginica virginica
## Levels: setosa versicolor virginica
print(head(mod$predict(datasplit$X_test, type="prob")))
## setosa versicolor virginica
## 49 0.953222227 0.04646469 0.0003130799
## 65 0.173565092 0.69159670 0.1348382075
## 74 0.047769376 0.73182191 0.2204087107
## 146 0.003334732 0.16712746 0.8295378089
## 122 0.019433591 0.33431880 0.6462476134
## 150 0.028121764 0.38063759 0.5912406473
(cv <- cross_val_score(mod, datasplit$X_train, datasplit$y_train, cv = 5L,
fit_params=list(method = "glmnet",
tuneGrid = data.frame(alpha = 0, # ridge regression
lambda = 0.01), # fixed lambda
trControl = caret::trainControl(method = "none")))) # Auto-uses accuracy
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0.9047619 0.9047619 1.0000000 0.9047619 1.0000000
cat("\nMean Accuracy:", mean(cv), "\n")
##
## Mean Accuracy: 0.9428571
(cv <- cross_val_score(mod, datasplit$X_train, datasplit$y_train, cv = 5L,
fit_params=list(method = "glmnet",
tuneGrid = data.frame(alpha = 0.5, # ridge regression
lambda = 0.01), # fixed lambda
trControl = caret::trainControl(method = "none")))) # Auto-uses accuracy
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## fold1 fold2 fold3 fold4 fold5
## 0.952381 0.952381 1.000000 0.952381 1.000000
cat("\nMean Accuracy:", mean(cv), "\n")
##
## Mean Accuracy: 0.9714286