Introduction to Text Analysis in R
Maham Faisal Khan
Senior Data Science Content Developer
tidy_review %>%
inner_join(get_sentiments("loughran"))
# A tibble: 3,960 x 6
id date product stars word sentiment
<int> <chr> <chr> <dbl> <chr> <chr>
1 5 12/22/15 iRobot Roomba 650 for Pets 5 slow negative
2 5 12/22/15 iRobot Roomba 650 for Pets 5 easily positive
3 5 12/22/15 iRobot Roomba 650 for Pets 5 random uncertainty
4 5 12/22/15 iRobot Roomba 650 for Pets 5 easy positive
# … with 3,956 more rows
sentiment_review <- tidy_review %>%
inner_join(get_sentiments("loughran"))
sentiment_review %>%
count(sentiment)
# A tibble: 6 x 2
sentiment n
<chr> <int>
3 negative 1795
4 positive 1568
# … with 4 more rows
sentiment_review %>%
count(word, sentiment) %>%
arrange(desc(n))
# A tibble: 598 x 3
word sentiment n
<chr> <chr> <int>
1 easy positive 297
2 happy positive 107
3 trouble negative 58
# … with 595 more rows
sentiment_review2 <- sentiment_review %>% filter(sentiment %in% c("positive", "negative"))
word_counts <- sentiment_review2 %>% count(word, sentiment) %>% group_by(sentiment) %>% slice_max(n, n = 10) %>% ungroup() %>% mutate( word2 = fct_reorder(word, n) )
ggplot(word_counts, aes(x = word2, y = n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ sentiment, scales = "free") +
coord_flip() +
labs(
title = "Sentiment Word Counts",
x = "Words"
)
Introduction to Text Analysis in R