Text Mining with Bag-of-Words in R
Ted Kwartler
Instructor
# Convert TDM to matrix chardonnay_tdm <- TermDocumentMatrix(clean_chardonnay) chardonnay_m <- as.matrix(chardonnay_tdm)
# Sum rows and sort by frequency term_frequency <- rowSums(chardonnay_m) word_freqs <- data.frame(term = names(term_frequency), num = term_frequency)
# Make word cloud wordcloud(word_freqs$term, word_freqs$num, max.words = 100, colors = "red")
clean_corpus <- function(corpus){
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus,
content_transformer(tolower))
corpus <- tm_map(corpus, removeWords,
c(stopwords("en"), "amp"))
return(corpus)
}
clean_corpus <- function(corpus){ corpus <- tm_map(corpus, removePunctuation) corpus <- tm_map(corpus, stripWhitespace) corpus <- tm_map(corpus, removeNumbers) corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removeWords, c(stopwords("en"), "amp", "chardonnay", "wine", "glass"))
return(corpus) }
Text Mining with Bag-of-Words in R