Analyzing US Census Data in R
Kyle Walker
Instructor
tx_largest <- tx_race %>%
group_by(GEOID) %>%
filter(estimate == max(estimate)) %>%
select(NAME, variable, estimate)
tx_largest
# A tibble: 254 x 4
# Groups: GEOID [254]
GEOID NAME variable estimate
<chr> <chr> <chr> <dbl>
1 48001 Anderson County, Texas White 34680
2 48003 Andrews County, Texas Hispanic 9360
3 48005 Angelina County, Texas White 54060
4 48007 Aransas County, Texas White 16836
5 48009 Archer County, Texas White 7751
# ... with 249 more rows
tx_largest %>%
group_by(variable) %>%
tally()
# A tibble: 2 x 2
variable n
<chr> <int>
1 Hispanic 67
2 White 187
wa_grouped <- wa_income %>%
filter(variable != "B19001_001") %>%
mutate(incgroup = case_when(variable < "B19001_008" ~ "below35k",
variable < "B19001_013" ~ "35kto75k",
TRUE ~ "above75k")) %>%
group_by(NAME, incgroup) %>%
summarize(group_est = sum(estimate))
wa_grouped
# A tibble: 117 x 3
NAME incgroup group_est
<chr> <chr> <dbl>
1 Adams County, Washington 35kto75k 2124
2 Adams County, Washington above75k 1482
3 Adams County, Washington below35k 2127
# ... with 114 more rows
mi_cities <- map_df(2012:2016, function(x) {
get_acs(geography = "place", variables = c(totalpop = "B01003_001"),
state = "MI", survey = "acs1", year = x) %>%
mutate(year = x)})
mi_cities %>% arrange(NAME, year)
# A tibble: 80 x 6
GEOID NAME variable estimate moe year
<chr> <chr> <chr> <dbl> <dbl> <int>
1 2603000 Ann Arbor city, Michigan totalpop 116128 35 2012
2 2603000 Ann Arbor city, Michigan totalpop 117034 43 2013
3 2603000 Ann Arbor city, Michigan totalpop 117759 44 2014
4 2603000 Ann Arbor city, Michigan totalpop 117070 33 2015
5 2603000 Ann Arbor city, Michigan totalpop 120777 33 2016
6 2621000 Dearborn city, Michigan totalpop 96470 28 2012
7 2621000 Dearborn city, Michigan totalpop 95888 35 2013
8 2621000 Dearborn city, Michigan totalpop 95546 48 2014
9 2621000 Dearborn city, Michigan totalpop 95180 40 2015
10 2621000 Dearborn city, Michigan totalpop 94430 52 2016
# ... with 70 more rows
Analyzing US Census Data in R