Dimensionality Reduction in R
Matt Pickard
Owner, Pickard Predictives, LLC
linear_reg(engine = "glmnet", penalty = 0.001 , mixture = 1)
scale()
for target variable as.vector()
step_normalize()
for predictor variables# Scale target variable
df <- df %>% mutate(target = as.vector(scale(target)))
...
# Scale predictor variables
recipe() %>% step_normalize(all_numeric_predictors())
tune()
in tidymodels
linear_reg(engine = "glmnet", penalty = tune() , mixture = 1)
house_sales_subset_df <- house_sales_subset_df %>%
mutate(price = as.vector(scale(price)))
split <- initial_split(house_sales_subset_df, prop = 0.8)
train <- split %>% training()
test <- split %>% testing()
lasso_recipe <-
recipe(price ~ ., data = train) %>%
step_normalize(all_numeric_predictors())
lasso_model <- linear_reg(penalty = 0.01, mixture = 1, engine = "glmnet")
lasso_workflow <- workflow(preprocessor = lasso_recipe, spec = lasso_model)
tidy(lasso_workflow %>% fit(train)) %>% filter(estimate > 0)
# A tibble: 9 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 bathrooms 0.0477 0.01
2 sqft_living 0.434 0.01
3 floors 0.0262 0.01
4 waterfront 0.133 0.01
5 view 0.0510 0.01
6 condition 0.0319 0.01
... ... ...
lasso_model <- linear_reg(penalty = tune(), mixture = 1, engine = "glmnet")
lasso_workflow <- workflow(preprocessor = lasso_recipe, spec = lasso_model)
train_cv <- vfold_cv(train, v = 5)
penalty_grid <- grid_regular(penalty(range = c(-3, -1)), levels = 20)
range = c(-3, -1)
lasso_grid <- tune_grid(
lasso_workflow,
resamples = train_cv,
grid = penalty_grid)
autoplot(lasso_grid, metric = "rmse")
best_rmse <- lasso_grid %>% select_best("rmse")
final_lasso <-
finalize_workflow(lasso_workflow, best_rmse) %>%
fit(train)
tidy(final_lasso) %>% filter(estimate > 0)
Dimensionality Reduction in R