Text Mining with Bag-of-Words in R
Ted Kwartler
Instructor
# qdap cleaning function
qdap_clean <- function(x) {
x <- replace_abbreviation(x)
x <- replace_contraction(x)
x <- replace_number(x)
x <- replace_ordinal(x)
x <- replace_symbol(x)
x <- tolower(x)
return(x)
}
# tm cleaning function
tm_clean <- function(corpus) {
tm_clean <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeWords,
c(stopwords("en"), "Google", "Amazon", "company"))
return(corpus)
}
Text Mining with Bag-of-Words in R