Machine Learning in the Tidyverse
Dmitriy (Dima) Gorenshteyn
Lead Data Scientist, Memorial Sloan Kettering Cancer Center
cv_tune <- cv_data %>%
crossing(mtry = c(2, 4, 8, 16))
cv_models_rf <- cv_tune %>%
mutate(model = map2(train, mtry, ~ranger(formula = Attrition~.,
data = .x, mtry = .y,
num.trees = 100, seed = 42)))
attrition | class |
---|---|
Yes | TRUE |
No | FALSE |
validate$Attrition
No No No No No Yes No Yes ... No No No
validate_actual <- validate$Attrition == "Yes"
validate_actual
FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE ... FALSE FALSE FALSE
P(attrition) | class |
---|---|
Yes | TRUE |
No | FALSE |
validate_classes <- predict(rf_model, rf_validate)$predictions
validate_classes
No No No No No Yes No No ... No No No
validate_predicted <- validate_classes == "Yes"
validate_predicted
FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE ... FALSE FALSE FALSE
Machine Learning in the Tidyverse