Text mining con Bag-of-Words in R
Ted Kwartler
Instructor

# Unisci entrambi i corpora: all_tweets all_coffee <- paste(coffee_tweets$text, collapse = "") all_chardonnay <- paste(chardonnay_tweets$text, collapse = "")all_tweets <- c(all_coffee, all_chardonnay)# Pulisci all_tweets all_tweets <- VectorSource(all_tweets) all_corpus <- VCorpus(all_tweets) all_clean <- clean_corpus(all_corpus) all_dm <- TermDocumentMatrix(all_clean) all_m <- as.matrix(all_tdm)# Crea la commonality cloud commonality.cloud(all_m, colors = "steelblue1", max.words = 100)


# Unisci entrambi i corpora: all_tweets all_coffee <- paste(coffee_tweets$text, collapse = "") all_chardonnay <- paste(chardonnay_tweets$text, collapse = "") all_tweets <- c(all_coffee, all_chardonnay)# Pulisci all_tweets all_tweets <- VectorSource(all_tweets) all_corpus <- VCorpus(all_tweets) all_clean <- clean_corpus(all_corpus) all_tdm <- TermDocumentMatrix(all_clean)colnames(all_tdm) <- c("coffee", "chardonnay")all_m <- as.matrix(all_tdm) # Crea la comparison cloud comparison.cloud(all_m, colors = c("orange", "blue"), max.words = 50)

# Identifica i termini condivisi da entrambi i documenti common_words <- subset( all_tdm_m, all_tdm_m[, 1] > 0 & all_tdm_m[, 2] > 0 )# Trova le parole più spesso condivise difference <- abs(common_words[, 1] - common_words[, 2])common_words <- cbind(common_words, difference) common_words <- common_words[order(common_words[, 3], decreasing = TRUE), ] top25_df <- data.frame(x = common_words[1:25, 1], y = common_words[1:25, 2], labels = rownames(common_words[1:25, ]))
# Crea il grafico a piramide
pyramid.plot(top25_df$x, top25_df$y,
labels = top25_df$labels,
main = "Parole in comune",
gap = 8, laxly = NULL,
raxlab = NULL, unit = NULL,
top.labels = c("Chardonnay",
"Parole",
"Coffee")
)

# Crea la rete di parole
word_associate(coffee_tweets$text,
match.string = c("barista"),
stopwords = c(Top200Words, "coffee", "amp"),
network.plot = TRUE,
cloud.colors = c("gray85", "darkred"))
# Aggiungi titolo
title(main = "Associazioni tweet su Barista Coffee")

Text mining con Bag-of-Words in R