Case Studies: Network Analysis in R
Edmund Hart
Instructor
library(igraph)
library(stringr)
raw_tweets <- read.csv("datasets/rstatstweets.csv",
stringsAsFactors = FALSE)
Data sample, single row
user_name: Karen Millidine
screen_name: KJMillidine
tweet_tex:t RT @Rbloggers: RStudio v1.1 Released
https://t.co/kCMHc689nY #rstats #DataScience
favorites: 0
retweets: 96
location: None
expanded_url: https://wp.me/pMm6L-ExV
in_reply_to_tweet_id: NA
in_reply_to_user_id: NA
dt: 10/10/17
## Get all the screen names
all_sn <- unique(raw_tweets$screen_name)
## Create graph
retweet_graph <- graph.empty()
## Add screen names as vertices
retweet_graph <- retweet_graph + vertices(all_sn)
## Extract name and add edges for(i in 1:dim(raw_tweets)[1]){
# Extract retweet name rt_name <- find_rt(raw_tweets$tweet_text[i]) # If there is a name add an edge if(!is.null(rt_name)){
# Check to make sure the vertex exists, if not, add it if(!rt_name %in% all_sn){ retweet_graph <- retweet_graph + vertices(rt_name) }
# add the edge retweet_graph <- retweet_graph + edges(c(raw_tweets$screen_name[i], rt_name))
}
}
## Size the number of degree 0 vertices
sum(degree(retweet_graph) == 0)
## Trim and simplify
retweet_graph <- simplify(retweet_graph)
retweet_graph <- delete.vertices(retweet_graph,
degree(retweet_graph) == 0)
Case Studies: Network Analysis in R