Fraud Detection in R
Bart Baesens
Professor Data Science at KU Leuven
table(creditcard$Class)
0 1
24108 492
n_fraud <- 492 new_frac_fraud <- 0.50 new_n_total <- n_fraud / new_frac_fraud ## = 492 / 0.50 = 984
library(ROSE) undersampling_result <- ovun.sample(formula = Class ~ ., data = creditcard, method = "under", N = new_n_total, seed = 2018)
undersampled_credit <- undersampling_result$data
prop.table(table(undersampled_credit$Class))
0 1
0.5 0.5
n_new <- nrow(creditcard) ## = 24600 fraction_fraud_new <- 0.50
sampling_result <- ovun.sample(formula = Class ~ ., data = creditcard, method = "both", N = n_new, p = fraction_fraud_new, seed = 2018) sampled_credit <- sampling_result$data
prop.table(table(sampled_credit$Class))
0 1
0.5039837 0.4960163
Fraud Detection in R