Machine Learning with caret in R
Zach Mayer
Data Scientist at DataRobot and co-author of caret
?preProcess
for more detail# Generate some data with missing values
data(mtcars)
set.seed(42)
mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[,2:4] # <- Missing at random
# Use linear model "recipe"
set.seed(42)
model <- train(
X, Y, method = "glm",
preProcess = c("center", "scale", "medianImpute")
)
print(min(model$results$RMSE))
3.612713
# PCA before modeling
set.seed(42)
model <- train(
X, Y, method = "glm",
preProcess = c("center", "scale", "medianImpute", "pca")
)
min(model$results$RMSE)
3.402557
# Spatial sign transform
set.seed(42)
model <- train(
X, Y, method = "glm",
preProcess = c("center", "scale", "medianImpute", "spatialSign")
)
min(model$results$RMSE)
4.284904
Machine Learning with caret in R