AFINN & NRC inner joins

Sentiment Analysis in R

Ted Kwartler

Data Dude

AFINN

library(textdata)
library(tidytext)
afinn <- get_sentiments('afinn')

Result:

tail(afinn)

# A tibble: 6 x 2 word value <chr> <dbl> 1 youthful 2 2 yucky -2 3 yummy 3 4 zealot -2 5 zealots -2 6 zealous 2
Sentiment Analysis in R

NRC

Load & Subset

library(textdata)
library(tidytext)
nrc <- get_sentiments('nrc')

Result:

tail(nrc)
# A tibble: 6 x 2
  word    sentiment   
  <chr>   <chr>       
1 zealous trust       
2 zest    anticipation
3 zest    joy         
4 zest    positive    
5 zest    trust       
6 zip     negative
Sentiment Analysis in R

Huckleberry Finn

huck funn

tidy_huck
# A tibble: 55,198 x 3
   document term         count
   <chr>    <chr>        <dbl>
 1 1        finn             1
 2 1        huckleberry     1
 3 3        ago              1
 4 3        fifty            1
 5 3        forty            1
 6 3        mississippi      1
 7 3        scene            1
 8 3        the              1
 9 3        time             1
10 3        valley           1
# … with 55,188 more rows
Sentiment Analysis in R

Huck Finn joined to AFINN

huck_finn_join <- tidy_huck %>% 
  inner_join(afinn, by = c("term" = "word"))

 huck_finn_join
# A tibble: 4,849 x 6
   document       term count     value
      <chr>      <chr> <dbl>     <int>
 1       11 adventures     1         2
 2       11     matter     1         1
 3       14       lied     1        -2
 4       17       true     1         2
 5       20        hid     1        -1
 6       20       rich     1         2
# ... with 4,843 more rows
Sentiment Analysis in R

Using summarize()

sample_df
# A tibble: 2 x 6
  document  term count  score
     <dbl> <chr> <dbl>  <dbl>
1       22 judge     1     -3
2       22  took     1      1
sample_df %>%
  group_by(document) %>%
  summarize(total_score = sum(score))
# A tibble: 1 x 2
  document total_score
     <dbl>       <dbl>
1       22          -2
Sentiment Analysis in R

Using filter()

filter(huck_finn_join, document == 20)
# A tibble: 2 x 6
  document  term count score
     <chr> <chr> <dbl>  <int>
1       20   hid     1     -1
2       20  rich     1      2
Sentiment Analysis in R

Plutchik & NRC

Plutchik Wheel

nrc <- get_sentiments("nrc")
head(nrc, 10)
# A tibble: 10 x 2
   word        sentiment
   <chr>       <chr>    
 1 abacus      trust    
 2 abandon     fear     
 3 abandon     negative 
 4 abandon     sadness  
 5 abandoned   anger    
 6 abandoned   fear     
 7 abandoned   negative 
 8 abandoned   sadness  
 9 abandonment anger    
10 abandonment fear  
Sentiment Analysis in R

The Wonderful Wizard of NRC

Oz cover

oz
# A tibble: 19,007 x 3
   document         term count
      <chr>        <chr> <dbl>
 1       1           the     1
 2       1        wizard     1
 3       1     wonderful     1
 4       6          baum     1
 5       6         frank     1
 6       10     contents     1
 7       12 introduction     1
 8       13      cyclone     1
 9       13          the     1
10       14      council     1
 # … with 18,997 more rows   1
Sentiment Analysis in R

%in% operator

x <- c("text", "mining", "python")
y <- c("text", "tm", "qdap", "R", "mining")
x %in% y
[1] TRUE TRUE FALSE
y %in% x
[1] TRUE FALSE FALSE FALSE TRUE
Sentiment Analysis in R

Let's practice!

Sentiment Analysis in R

Preparing Video For Download...