Analyzing Social Media Data in R
Vivek Vijayaraghavan
Data Science Coach
# Extract term frequency
library(qdap)
term_count <- freq_terms(twt_corpus_final, 60)
term_count
# Create a vector of custom stop words
custom_stop <- c("obesity", "can", "amp", "one", "like", "will", "just",
"many", "new", "know", "also", "need", "may", "now",
"get", "s", "t", "m", "re")
# Remove custom stop words
twt_corpus_refined <- tm_map(twt_corpus_final,removeWords, custom_stop)
# Term count after refining corpus
term_count_clean <- freq_terms(twt_corpus_refined, 20)
term_count_clean
# Create a subset dataframe
term50 <- subset(term_count_clean, FREQ > 50)
library(ggplot2)
# Create a bar plot of frequent terms
ggplot(term50, aes(x = reorder(WORD, -FREQ), y = FREQ)) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
wordcloud()
function helps create word clouds# Create a word cloud based on min frequency
library(wordcloud)
wordcloud(twt_corpus_refined, min.freq = 20, colors = "red",
scale = c(3,0.5), random.order = FALSE)
# Create a colorful word cloud
library(RColorBrewer)
wordcloud(twt_corpus_refined, max.words = 100,
colors = brewer.pal(6,"Dark2"), scale = c(2.5,.5),
random.order = FALSE)
Analyzing Social Media Data in R