Programming with dplyr
Dr. Chester Ismay
Educator, Data Scientist, and R/Python Consultant
imf_data %>%
select(iso, country, year,
consumer_price_index) %>%
filter(country == "Uruguay",
year > 2010)
# A tibble: 9 × 4
iso country year consumer_price_index
<chr> <chr> <int> <dbl>
1 URY Uruguay 2011 105.
2 URY Uruguay 2012 114.
3 URY Uruguay 2013 123.
4 URY Uruguay 2014 134.
5 URY Uruguay 2015 146.
6 URY Uruguay 2016 160.
7 URY Uruguay 2017 170.
8 URY Uruguay 2018 183.
9 URY Uruguay 2019 197.
imf_data %>%
select(iso, country, year,
consumer_price_index) %>%
filter(country == "Belize",
year > 2010)
# A tibble: 9 × 4
iso country year consumer_price_index
<chr> <chr> <int> <dbl>
1 BLZ Belize 2011 130.
2 BLZ Belize 2012 132.
3 BLZ Belize 2013 133.
4 BLZ Belize 2014 134.
5 BLZ Belize 2015 133.
6 BLZ Belize 2016 134.
7 BLZ Belize 2017 136.
8 BLZ Belize 2018 136.
9 BLZ Belize 2019 136.
cpi_by_country <- function(country_name) {
imf_data %>% select(iso, country, year, consumer_price_index) %>% filter(country == country_name, year > 2010) }
cpi_by_country <- function(country_name) {
imf_data %>%
select(iso, country, year,
consumer_price_index) %>%
filter(country == country_name,
year > 2010)
}
cpi_by_country(country_name = "Samoa")
# A tibble: 9 × 4
iso country year consumer_price_index
<chr> <chr> <int> <dbl>
1 WSM Samoa 2011 102.
2 WSM Samoa 2012 109.
3 WSM Samoa 2013 108.
4 WSM Samoa 2014 107.
5 WSM Samoa 2015 109.
6 WSM Samoa 2016 109.
7 WSM Samoa 2017 111.
8 WSM Samoa 2018 115.
9 WSM Samoa 2019 117.
joined <- imf_data %>% inner_join(world_bank_data, by = c("iso", "year")) %>%
relocate(continent, region, .after = year)
joined
# A tibble: 299 × 22
iso country.x year continent region gdp_in_billions...
<chr> <chr> <dbl> <fct> <fct> <dbl>
1 ALB Albania 2011 Europe Southern Europe 12.9
2 ALB Albania 2012 Europe Southern Europe 12.3
3 AGO Angola 2014 Africa Middle Africa 146.
# ... with 296 more rows, and 16 more variables:
# usd_conversion_rate <dbl>, total_investment_as_perc_gdp <dbl>,
# consumer_price_index <dbl>, imports_perc_change <dbl>,
# exports_perc_change <dbl>, population_in_millions <dbl>,
# gov_revenue_as_perc_gdp <dbl>, gov_net_debt_as_perc_gdp <dbl>,
# country.y <chr>, infant_mortality_rate <dbl>, fertility_rate <dbl>,
# perc_electric_access <dbl>, perc_college_complete <dbl>, ...
joined %>%
group_by(continent) %>%
summarize(mean_gov_revenue = mean(
gov_revenue_as_perc_gdp)
)
# A tibble: 5 × 2
continent mean_gov_revenue
<fct> <dbl>
1 Africa 18.2
2 Americas 25.6
3 Asia 26.7
4 Europe 41.5
5 Oceania 35.6
grouped_mean_gov_revenue <- function(group_col) { joined %>% group_by(group_col) %>% summarize(mean_gov_revenue = mean(gov_revenue_as_perc_gdp)) }
grouped_mean_gov_revenue(group_col = year)
Error: Must group by variables found in `.data`.
* Column `group_col` is not found.
library(rlang)
grouped_mean_gov_revenue <- function(group_col) { joined %>% group_by({{ group_col }}) %>% summarize(mean_gov_revenue = mean(gov_revenue_as_perc_gdp)) }
grouped_mean_gov_revenue(group_col = year)
# A tibble: 17 × 2
year mean_gov_revenue
<dbl> <dbl>
1 2000 39.4
2 2001 30.8
...
16 2015 34.7
17 2016 32.3
Programming with dplyr