Fraudedetectie in R
Bart Baesens
Professor Data Science at KU Leuven
head(timestamps)
"20:27:28" "21:08:41" "01:30:16" "00:57:04" "23:12:14" "22:54:16"
library(lubridate) ts <- as.numeric(hms(timestamps)) / 3600head(ts)
20.4577778 21.1447222 1.5044444 0.9511111 23.2038889 22.9044444
library(ggplot2) clock <- ggplot(data.frame(ts), aes(x = ts)) + geom_histogram(breaks = seq(0, 24), colour = "blue", fill = "lightblue") + coord_polar()arithmetic_mean <- mean(ts) clock + geom_vline(xintercept = arithmetic_mean, linetype = 2, color = "red", size = 2)

$$D\sim vonMises\left(\mu,\kappa\right)$$
# Converteer decimale timestamps naar klasse "circular" library(circular) ts <- circular(ts, units = "hours", template = "clock24")head(ts)
Circular Data:
[1] 20.457889 21.144607 1.504422 0.950982 23.203917 4.904397
estimates <- mle.vonmises(ts)
p_mean <- estimates$mu %% 24
concentration <- estimates$kappa

(1) Schat $\mu(S)$ en $\kappa(S)$ op $S$ met mle.vonmises():
estimates <- mle.vonmises(ts)
p_mean <- estimates$mu %% 24
concentration <- estimates$kappa
(2) Bereken de dichtheid (= likelihood) van de timestamps met dvonmises():
densities <- dvonmises(ts, mu = p_mean, kappa = concentration)
TRUE als timestamp binnen BI valt, anders FALSEalpha <- 0.90 quantile <- qvonmises(p = (1 - alpha)/2, mu = p_mean, kappa = concentration) %% 24 cutoff <- dvonmises(quantile, mu = p_mean, kappa = concentration)time_feature <- densities >= cutoff


$$
$$

## ts bevat de timestamps 18.42, 20.45, 20.88, 0.75, 19.20, 23.65 en 6.08time_feature = c(NA, NA) for (i in 3:length(ts)) { ts_history <- ts[1:(i-1)] ## (1) Vorige timestampsestimates <- mle.vonmises(ts_history) ## (2) Schat mu en kappa op historische timestamps p_mean <- estimates$mu %% 24 concentration <- estimates$kappadens_i <- dvonmises(ts[i], mu = p_mean, kappa = concentration) ## (3) Schat dichtheid huidig timestampalpha <- 0.90 ## (4) Check of dichtheid groter is dan afkap bij 90% betrouwbaarheid quantile <- qvonmises((1-alpha)/2, mu=p_mean, kappa=concentration) %% 24 cutoff <- dvonmises(quantile, mu = p_mean, kappa = concentration) time_feature[i] <- dens_i >= cutoff }print(time_feature)
NA NA TRUE FALSE TRUE TRUE FALSE
Fraudedetectie in R