Dimensionality Reduction in R
Matt Pickard
Owner, Pickard Predictives, LLC
credit_df %>% head(n=5)
annual_income num_bank_accounts num_credit_card outstanding_debt credit_history_months
<dbl> <dbl> <dbl> <dbl> <dbl>
1 87630. 2 5 526. 286
2 16574. 2 5 NA 122
3 24931. 2 5 NA 351
4 136680. 2 5 NA 216
5 76850. 2 5 1112. 272
na_filter <- credit_df %>% summarize(across(everything(), ~ var(., na.rm = TRUE))) %>%
pivot_longer(everything(), names_to = "feature", values_to = "variance") %>%
filter(variance == 0) %>%
pull(feature)
na_filter
"num_bank_accounts" "num_credit_card"
na_filter <- credit_df %>% summarize(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "feature", values_to = "num_missing_values") %>%
filter(num_missing_values > 0) %>%
pull(feature)
na_filter
"outstanding_debt"
combined_filter <-
c(low_var_filter, na_filter)
credit_df %>%
select(-all_of(combined_filter)) %>%
head(3)
annual_income credit_history_months
<dbl> <dbl>
1 87630. 286
2 16574. 122
3 24931. 351
Dimensionality Reduction in R