Mixture Models in R
Victor Medina
Researcher at The University of Edinburgh
head(data, 4)
x
1 3.294453
2 5.818586
3 2.380493
4 4.415913
Initial means
means_init <- c(1, 2)
means_init
1 2
Initial proportions
props_init <- c(0.5, 0.5)
props_init
0.5 0.5
data_with_probs <- data %>%
mutate(prob_from_red = props_init[1] * dnorm(x, mean = means_init[1]),
prob_from_blue = props_init[2] * dnorm(x, mean = means_init[2]),
prob_red = prob_from_red/(prob_from_red + prob_from_blue),
prob_blue = prob_from_blue/(prob_from_red + prob_from_blue)) %>%
select(x, prob_red, prob_blue)
head(data_with_probs, 4)
x prob_red prob_blue
1 3.294453 0.14252762 0.8574724
2 5.818586 0.01314364 0.9868564
3 2.380493 0.29307562 0.7069244
4 4.415913 0.05137250 0.9486275
means_estimates <- data_with_probs %>%
summarise(mean_red = sum(x * prob_red) / sum(prob_red),
mean_blue = sum(x * prob_blue) / sum(prob_blue)) %>%
as.numeric()
means_estimates
2.848001 4.572862
props_estimates <- data_with_probs %>%
summarise(proportion_red = mean(prob_red),
proportion_blue = 1- proportion_red) %>%
as.numeric()
props_estimates
0.1032487 0.8967513
# Expectation (known means and proportions)
expectation <- function(data, means, proportions){
# Estimate the probabilities
data <- data %>%
mutate(prob_from_red = proportions[1] * dnorm(x, mean = means[1]),
prob_from_blue = proportions[2] * dnorm(x, mean = means[2]),
prob_red = prob_from_red/(prob_from_red + prob_from_blue),
prob_blue = prob_from_blue/(prob_from_red + prob_from_blue)) %>%
select(x, prob_red, prob_blue)
# Return data with probabilities
return(data)
}
# Maximization (known probabilities)
maximization <- function(data_with_probs){
# Estimate the means
means_estimates <- data_with_probs %>%
summarise(mean_red = sum(x * prob_red) / sum(prob_red),
mean_blue = sum(x * prob_blue) / sum(prob_blue)) %>%
as.numeric()
# Estimate the proportions
proportions_estimates <- data_with_probs %>%
summarise(proportion_red = mean(prob_red),
proportion_blue = 1 - proportion_red) %>%
as.numeric()
# Return the results
list(means_estimates, proportions_estimates)
}
# Iterative process
for(i in 1:10){
# Expectation-Maximization
new_values <- maximization(expectation(data, means_init, props_init))
# New means and proportions
means_init <- new_values[[1]]
props_init <- new_values[[2]]
# Print results
cat(c(i, means_init, proportions_init),"\n")
}
1 2.848001 4.572862 0.1032487 0.8967513
2 2.469715 4.736764 0.1508531 0.8491469
3 2.411235 4.863675 0.1911983 0.8088017
4 2.455946 4.929702 0.2162419 0.7837581
5 2.511132 4.96399 0.232063 0.767937
6 2.556729 4.984427 0.2428862 0.7571138
7 2.59167 4.998099 0.2507144 0.7492856
8 2.618177 5.007884 0.2565634 0.7434366
9 2.638406 5.015153 0.261021 0.738979
10 2.653982 5.020675 0.264463 0.735537
Mixture Models in R