Analyzing Social Media Data in R
Vivek Vijayaraghavan
Data Science Coach
friends_count
and followers_count
of a user# Search for 1000 tweets on #fitness
tweet_fit <- search_tweets("#fitness", n = 1000)
# Extract user information
user_fit <- users_data(tweet_fit)
# View column names of the user data
names(user_fit)
# Aggregate screen_name, followers_count & friends_count
library(dplyr)
counts_df <- user_fit %>%
group_by(screen_name) %>%
summarize(follower = mean(followers_count),
friend = mean(friends_count))
head(counts_df)
screen_name follower friend
<chr> <dbl> <dbl>
__seokjinnie124 209 454
_Aminata 623 523
_amsvn 167 126
_arweeennn 539 801
_asof_ 1336 455
_blendac 833 195
# Create a column to calculate the golden ratio
counts_df$ratio <- follow_df$follower/follow_df$friend
head(counts_df$ratio)
[1] 0.4603524 1.1912046 1.3253968 0.6729089 2.9362637 4.2717949
# Sort the data frame in decreasing order of follower count
counts_sort <- arrange(counts_df, desc(follower))
# Select rows where the follower count is greater than 30000
counts_sort[counts_sort$follower>30000,]
screen_name follower friend ratio
<chr> <dbl> <dbl> <dbl>
mashable 9817699 2783 3528
MensHealthMag 4528421 1111 4076
Sophie_Choudry 2367827 157 15082
thewebmaster_ 103936 6508 16
qwikad 92932 89557 1
Rharvley 90464 19484 5
SayWhenLA 68122 6680 10
# Select rows where the follower count is less than 2000
counts_sort[counts_sort$follower<2000,]
screen_name follower friend ratio
<chr> <dbl> <dbl> <dbl>
workout_ehime 1960 1027 2
SardImperium 1932 256 8
Deem_Hoops 1912 1520 1
kaykay_inem 1890 443 4
bhealhty 1855 3066 1
# Get all lists "Playstation" subscribes to
lst_playstation <- lists_users("PlayStation")
lst_playstation[,1:4]
list_id name uri subscriber_count
<chr> <chr> <chr> <int>
58505230 PS Family /PlayStation/lists/ps-family 136
4747423 GameDevelopers /PlayStation/lists/gamedevelopers 467
2490894 gaming /PlayStation/lists/gaming 658
# Extract 100 subscribers of the "gaming" list owned by "Playstation"
list_PS_sub <- lists_subscribers(slug = "gaming", owner_user = "PlayStation", n = 100)
# View screen names of the subscribers
list_PS_sub$screen_name
# Create a list of four screen names
users <- c("Morten83032201","ndugumr", "WOLF210_Warrior", "souransb")
# Extract user information
users_PS_gaming <- lookup_users(users)
user_id status_id created_at screen_name
<chr> <chr> <S3: POSIXct> <chr>
1158299850573791233 1172604921121824769 2019-09-13 20:16:13 Morten83032201
894525207620321280 1183293767215992832 2019-10-13 08:09:53 ndugumr
325760816 1182867378293616640 2019-10-12 03:55:34 WOLF210_Warrior
469270931 511997829384904704 2014-09-16 21:59:29 souransb
Analyzing Social Media Data in R