Exploring our data

Case Studies: Network Analysis in R

Edmund Hart

Instructor

library(igraph)
library(dplyr)
library(lubridate)
bike_dat <- read.csv("datasets/bike2_test3.csv", stringsAsFactors = FALSE)
str(bike_dat)
'data.frame':    52800 obs. of  13 variables:
 $ tripduration     : int  295 533 1570 2064 2257 296 412...
 $ from_station_id  : int  49 165 25 300 85 174 75 45 85 99 ...
 $ from_station_name: chr  "Dearborn St & Monroe St" ...
 $ to_station_id    : int  174 308 287 296 313 198 56 147 174 99 ...
 $ to_station_name  : chr  "Canal St & Madison St" ...
 $ usertype         : chr  "Subscriber" "Subscriber" "Customer"...
 $ gender           : chr  "Male" "Male" "" "" ...
 $ birthyear        : int  1964 1972 NA NA 1963 1973 1989 1965 1983 1983 ...
 $ from_latitude    : num  41.9 42 41.9 41.9 41.9 ...
 $ from_longitude   : num  -87.6 -87.7 -87.6 -87.6 -87.6 ...
 $ to_latitude      : num  41.9 41.9 41.9 41.9 41.9 ...
 $ to_longitude     : num  -87.6 -87.7 -87.6 -87.6 -87.6 ...
 $ geo_distance     : num  859 1882 2159 288 3044 ...
Case Studies: Network Analysis in R

Creating the bike sharing graph

trip_df <- bike_dat %>% 
    group_by(from_station_id, to_station_id) %>% 
    summarize(weights = n())

head(trip_df)
# A tibble: 6 x 3
# Groups: from_station_id [1]
  from_station_id to_station_id weights
            <int>         <int>   <int>
1               5             5       2
2               5            14       1
3               5            16       1
4               5            25       3
5               5            29       3
6               5            33       1
Case Studies: Network Analysis in R

Creating the bike sharing graph

trip_g <- graph_from_data_frame(trip_df[, 1:2])

# add edge weights E(trip_g)$weight <- trip_df$weights
# Quick exploration of our graph gsize(trip_g)
19052
gorder(trip_g)
300
Case Studies: Network Analysis in R

Explore the graph

sg <- induced_subgraph(trip_g, 1:12)
plot(sg, vertex.label = NA, edge.arrow.width = 0.8, 
     edge.arrow.size = 0.6,
      margin = 0, 
     vertex.size = 6, 
     edge.width = log(E(sg)$weight + 2))

Case Studies: Network Analysis in R

Let's practice!

Case Studies: Network Analysis in R

Preparing Video For Download...