Appending dictionaries

Introduction to Text Analysis in R

Maham Faisal Khan

Senior Data Science Content Developer

Using inner_join()

tidy_review %>% 
  inner_join(get_sentiments("loughran"))

# A tibble: 3,960 x 6
      id date     product                    stars word      sentiment  
   <int> <chr>    <chr>                      <dbl> <chr>     <chr>      
 1     5 12/22/15 iRobot Roomba 650 for Pets     5 slow      negative   
 2     5 12/22/15 iRobot Roomba 650 for Pets     5 easily    positive   
 3     5 12/22/15 iRobot Roomba 650 for Pets     5 random    uncertainty
 4     5 12/22/15 iRobot Roomba 650 for Pets     5 easy      positive   
# … with 3,956 more rows

Counting sentiment

sentiment_review <- tidy_review %>% 
  inner_join(get_sentiments("loughran"))
sentiment_review %>% 
  count(sentiment)

# A tibble: 6 x 2
  sentiment        n
  <chr>        <int>
3 negative      1795
4 positive      1568
# … with 4 more rows

Counting sentiment

sentiment_review %>% 
  count(word, sentiment) %>% 
  arrange(desc(n))

# A tibble: 598 x 3
   word      sentiment       n
   <chr>     <chr>       <int>
 1 easy      positive      297
 2 happy     positive      107
 3 trouble   negative       58
# … with 595 more rows

Visualizing sentiment

sentiment_review2 <- sentiment_review %>% 
  filter(sentiment %in% c("positive", "negative"))

word_counts <- sentiment_review2 %>% 
  count(word, sentiment) %>% 
  group_by(sentiment) %>%
  slice_max(n, n = 10) %>% 
  ungroup() %>% 
  mutate(
    word2 = fct_reorder(word, n)
  )

Visualizing sentiment

ggplot(word_counts, aes(x = word2, y = n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ sentiment, scales = "free") +
  coord_flip() +
  labs(
    title = "Sentiment Word Counts",
    x = "Words"
  )

Visualizing sentiment

Let's practice!

Introduction to Text Analysis in R