Data Manipulation in Julia
Katerina Zahradova
Instructor
# Not readable
combine(groupby(
select(wages, :year, :eff_2020)
:year),
:eff_2020 => mean)
# Easy to forget a bracket
combine(groupby(wages, :year, ... )
syntax: incomplete:
premature end of input ...
# Many intermediate variables
w_tmp = select(wages, :state, :year, :eff_2020)
year_groups = groupby(w_tmp, :year)
combine(year_groups, :eff_2020 => mean)
# Easy to overwrite something important
select!(wages, :year, :eff_2020)
groupby(wages, :state)
ArgumentError:
column name "state" not found ...
Computing average minimum wage by year
:year
@chain wages begin
select(:state, :year, :eff_2020)
groupby(:year) combine(:eff_2020 => mean)
end
53×2 DataFrame
Row year eff_2020_mean
Int64 Float64
_________________________
1 1968 9.28529
...
@chain wages begin select(:state, :year, :eff_2020) groupby(:year) combine(:eff_2020 => mean)
# Use _ to pipe in multiple places plot(_.year, _.eff_2020_mean)
end
@chain wages begin select(:state, :year, :eff_2020, :state_2020) groupby(:year) combine([:eff_2020, :state_2020] .=> mean)
# Use @aside to skip piping @aside plot(_.year,_.eff_2020_mean) plot!(_.year,_.state_2020_mean)
end
# Save the output of the chain macro as a variable
wages_mean_by_year = @chain wages begin
select(:state, :year, :eff_2020, :state_2020)
groupby(:year)
combine([:eff_2020, :state_2020] .=> mean)
end
# Print the first line
println(first(wages_mean_by_year))
DataFrameRow
Row | year eff_2020_mean state_2020_mean
| Int64 Float64 Float64
____|_________________________________
1| 1968 9.28529 6.21549
Data Manipulation in Julia