Analyzing Social Media Data in R
Vivek Vijayaraghavan
Data Science Coach
# Get overall current trending topics
trend_topics <- get_trends()
head(trend_topics$trend, 10)
[1] "#madebygoogle" "#????H??????"
[3] "????" "Jennifer Aniston"
[5] "#??????????" "#FelizMartes"
[7] "#G????????" "????"
[9] "????" "??????"
# Extract locations of available twitter trends
trends_avail <- trends_available()
head(trends_avail)
name url parentid country
<chr> <chr> <int> <chr>
Worldwide http://where.yahooapis.com/v1/place/1
Winnipeg http://where.yahooapis.com/v1/place/2972 23424775 Canada
Ottawa http://where.yahooapis.com/v1/place/3369 23424775 Canada
Quebec http://where.yahooapis.com/v1/place/3444 23424775 Canada
Montreal http://where.yahooapis.com/v1/place/3534 23424775 Canada
Toronto http://where.yahooapis.com/v1/place/4118 23424775 Canada
# Get trending topics in the US
gt_US <- get_trends("United States")
View(gt_US)
# Get trending topics in New York
gt_city <- get_trends("New York")
head(gt_city)
trend url promoted_content
<chr> <chr> <lgl>
Lions http://twitter.com/search?q=Lions NA
Green Bay http://twitter.com/search?q=%22Green+Bay%22 NA
#DETvsGB http://twitter.com/search?q=%23DETvsGB NA
LeBron http://twitter.com/search?q=LeBron NA
Aaron Rodgers http://twitter.com/search?q=%22Aaron+Rodgers%22 NA
#90DayFiance http://twitter.com/search?q=%2390DayFiance NA
tweet_volume
has count of tweets made on a trending topic# Aggregate trends and tweet volumes
library(dplyr)
trend_df <- gt_city %>%
group_by(trend) %>%
summarize(tweet_vol = mean(tweet_volume))
head(trend_df)
trend tweet_vol
<chr> <dbl>
#90DayFiance 14375
#acefamilyisoverparty 12760
#ascendwithme NA
#bbcon2019 NA
#bookbirthday NA
#DemDebate 18928
# Sort data frame on descending order of tweet volumes
trend_df_sort <- arrange(trend_df, desc(tweet_vol))
# View the most tweeted trends
head(trend_df_sort)
trend tweet_vol
<chr> <dbl>
LeBron 298302
Lions 267945
Columbus Day 135014
John Bolton 118933
#DETvsGB 67197
#TuesdayThoughts 63259
Analyzing Social Media Data in R