Case Studies: Network Analysis in R
Edmund Hart
Instructor
library(igraph)
library(dplyr)
library(lubridate)
bike_dat <- read.csv("datasets/bike2_test3.csv", stringsAsFactors = FALSE)
str(bike_dat)
'data.frame': 52800 obs. of 13 variables:
$ tripduration : int 295 533 1570 2064 2257 296 412...
$ from_station_id : int 49 165 25 300 85 174 75 45 85 99 ...
$ from_station_name: chr "Dearborn St & Monroe St" ...
$ to_station_id : int 174 308 287 296 313 198 56 147 174 99 ...
$ to_station_name : chr "Canal St & Madison St" ...
$ usertype : chr "Subscriber" "Subscriber" "Customer"...
$ gender : chr "Male" "Male" "" "" ...
$ birthyear : int 1964 1972 NA NA 1963 1973 1989 1965 1983 1983 ...
$ from_latitude : num 41.9 42 41.9 41.9 41.9 ...
$ from_longitude : num -87.6 -87.7 -87.6 -87.6 -87.6 ...
$ to_latitude : num 41.9 41.9 41.9 41.9 41.9 ...
$ to_longitude : num -87.6 -87.7 -87.6 -87.6 -87.6 ...
$ geo_distance : num 859 1882 2159 288 3044 ...
trip_df <- bike_dat %>% group_by(from_station_id, to_station_id) %>% summarize(weights = n())
head(trip_df)
# A tibble: 6 x 3
# Groups: from_station_id [1]
from_station_id to_station_id weights
<int> <int> <int>
1 5 5 2
2 5 14 1
3 5 16 1
4 5 25 3
5 5 29 3
6 5 33 1
trip_g <- graph_from_data_frame(trip_df[, 1:2])
# add edge weights E(trip_g)$weight <- trip_df$weights
# Quick exploration of our graph gsize(trip_g)
19052
gorder(trip_g)
300
sg <- induced_subgraph(trip_g, 1:12)
plot(sg, vertex.label = NA, edge.arrow.width = 0.8,
edge.arrow.size = 0.6,
margin = 0,
vertex.size = 6,
edge.width = log(E(sg)$weight + 2))
Case Studies: Network Analysis in R