Measures of center

Exploratory Data Analysis in R

Andrew Bray

Assistant Professor, Reed College

County demographics

life
# A tibble: 3,142 x 4
     state          county expectancy income
     <chr>           <chr>      <dbl>  <int>
1  Alabama  Autauga County     76.060  37773
2  Alabama  Baldwin County     77.630  40121
3  Alabama  Barbour County     74.675  31443
4  Alabama     Bibb County     74.155  29075
5  Alabama   Blount County     75.880  31663
6  Alabama  Bullock County     71.790  25929
7  Alabama   Butler County     73.730  33518
8  Alabama  Calhoun County     73.300  33418
9  Alabama Chambers County     73.245  31282
10 Alabama Cherokee County     74.650  32645
# ... with 3,132 more rows
Exploratory Data Analysis in R

Center: mean

x <- head(round(life$expectancy), 11)
x
76 78 75 74 76 72 74 73 73 75 74

ch3_1.002.png

Exploratory Data Analysis in R

Center: mean

sum(x)/11
74.54545
mean(x)
74.54545

ch3_1.002.png

Exploratory Data Analysis in R

Center: mean

sum(x)/11
74.54545
mean(x)
74.54545

ch3_1.006.png

Exploratory Data Analysis in R

Center: mean, median

sort(x)
72 73 73 74 74 74 75 75 76 76 78
median(x)
74

ch3_1.007.png

Exploratory Data Analysis in R

Center: mean, median

sort(x)
72 73 73 74 74 74 75 75 76 76 78
median(x)
74

ch3_1.011.png

Exploratory Data Analysis in R

Center: mean, median, mode

table(x)
x
72 73 74 75 76 78 
 1  2  3  2  2  1 

ch3_1.014.png

Exploratory Data Analysis in R

Center: mean, median, mode

table(x)
x
72 73 74 75 76 78 
 1  2  3  2  2  1 

ch3_1.015.png

Exploratory Data Analysis in R

Groupwise means

life <- life %>%
  mutate(west_coast = state %in% c("California", "Oregon", "Washington"))
life %>%
  group_by(west_coast) %>%
  summarize(mean(expectancy),
            median(expectancy))
# A tibble: 2 x 3
  west_coast mean(expectancy) median(expectancy)
       <lgl>            <dbl>              <dbl>
1      FALSE         77.12750              77.31
2       TRUE         78.90545              78.65
Exploratory Data Analysis in R

Without group_by()

life %>%
  slice(240:247) %>%
  summarize(mean(expectancy))
# A tibble: 1 x 1
  mean(expectancy)
             <dbl>
1          79.2775

ch3_1.032.png

Exploratory Data Analysis in R

With group_by()

life %>%
  slice(240:247) %>%
  group_by(west_coast) %>%
  summarize(mean(expectancy))
# A tibble: 2 x 2
  west_coast mean(expectancy)
       <lgl           <dbl>
1      FALSE         79.26125
2       TRUE         79.29375

ch3_1.035.png

Exploratory Data Analysis in R

Let's practice!

Exploratory Data Analysis in R

Preparing Video For Download...