Support Vector Machines in R
Kailash Awati
Instructor
x1
and x2
uniformly distributed in (0,1).# Preliminaries... # Set required number of data points n <- 200 # Set seed to ensure reproducibility set.seed(42)
# Generate dataframe with two predictors x1 and x2 in (0,1) df <- data.frame(x1 = runif(n), x2 = runif(n))
x1 = x2
y = -1
for points below line and y = 1
for points above it# Classify points as -1 or +1
df$y <- factor(ifelse(df$x1 - df$x2 > 0, -1, 1),
levels = c(-1, 1))
x1 = x2
: passes through (0, 0) and has slope = 1library(ggplot2)
# Build plot
p <- ggplot(data = df, aes(x = x1, y = x2, color = y)) +
geom_point() +
scale_color_manual(values = c("-1" = "red", "1" = "blue")) +
geom_abline(slope = 1, intercept = 0)
# Display it
p
# Create a margin of 0.05 in dataset delta <- 0.05 # Retain only those points that lie outside the margin df1 <- df[abs(df$x1 - df$x2) > delta, ] # Check number of data points remaining nrow(df1)
# Replot dataset with margin (code is exactly same as before) p <- ggplot(data = df1, aes(x = x1, y = x2, color = y)) + geom_point() + scale_color_manual(values = c("red", "blue")) + geom_abline(slope = 1, intercept = 0) # Display plot p
p <- p +
geom_abline(slope = 1, intercept = delta, linetype = "dashed") +
geom_abline(slope = 1, intercept = -delta, linetype = "dashed")
p
Support Vector Machines in R