Extract a dataset

Predictive Analytics using Networked Data in R

María Óskarsdóttir, Ph.D.

Post-doctoral researcher

V(g)$degree<-degree(g)
V(g)$triangles<-count_triangles(g)
V(g)$betweeness<-betweenness(g,normalized=TRUE)
V(g)$transitivity<-transitivity(g,type='local',isolates='zero')

A <- get.adjacency(g) preference <- c(1,1,1,1,1,1,0,0,0,0) age <- c(23,65,33,36,28,45,41,24,38,39) V(g)$rNeighbors <- as.vector(A%*%preference) V(g)$averageAge <- as.vector(A%*%age/V(g)$degree)
V(g)$pageRank<-page.rank(g)$vector V(g)$personalizePageRank<-page.rank(g, personalized = c(1,0,0,0,0,0,0,0,0,0))$vector g
IGRAPH UN-- 10 19 -- 
 attr: name (v/c), degree (v/n), triangles (v/n), transitivity
| (v/n), rNeighbors (v/n), averageAge (v/n), pageRank (v/n),
| pPageRank (v/n), label (e/c)
 edges (vertex names):
 A--B A--C A--D A--E B--C B--D C--D C--G D--E D--F D--G E--F F--G F--I G--I G--H H--I H--J I--J
Predictive Analytics using Networked Data in R
IGRAPH UN-- 10 19 -- 
 attr: name (v/c), degree (v/n), triangles (v/n), transitivity
| (v/n), rNeighbors (v/n), averageAge (v/n), pageRank (v/n),
| pPageRank (v/n), label (e/c)
 edges (vertex names):
 [1] A--B A--C A--D A--E B--C B--D C--D C--G D--E D--F D--G E--F F--G F--I G--I G--H H--I H--J I--J
as_data_frame(g,what='vertices')
  name degree triangles transitivity rNeighbors averageAge   pageRank  pPageRank
A    A      4         4    0.6666667          4   40.50000 0.10238312 0.25528911
B    B      3         3    1.0000000          3   30.66667 0.07917232 0.10363533
C    C      4         4    0.6666667          3   41.25000 0.10164910 0.12156935
D    D      6         7    0.4666667          5   39.16667 0.14693274 0.16625582
E    E      3         2    0.6666667          3   34.66667 0.07953551 0.09366836
F    F      4         3    0.5000000          2   35.75000 0.10335821 0.07466596
G    G      5         4    0.4000000          3   35.20000 0.12732387 0.08473039
H    H      3         2    0.6666667          0   39.33333 0.08675903 0.03285162
I    I      4         3    0.5000000          1   37.25000 0.10994175 0.04785657
J    J      2         1    1.0000000          0   31.00000 0.06294435 0.01947748
Predictive Analytics using Networked Data in R

Preprocessing - missing values

sum(is.na(dataset$degree))
2
Predictive Analytics using Networked Data in R

Preprocessing - correlated variables

library(corrplot)

M <- cor(dataset[,-1])

corrplot(M, method = 'circle')

Predictive Analytics using Networked Data in R

Let's practice!

Predictive Analytics using Networked Data in R

Preparing Video For Download...