Introduction to Redshift
Jason Myers
Principal Architect
OVER
clauseThree main concepts
PARTITION BY
)ORDER BY
)SELECT division_id, sale_date, revenue,
-- Calculate the average revenue AVG(revenue) OVER ( -- By division for each year and month PARTITION BY division_id, DATE_PART('year', sale_date) DATE_PART('month', sale_date), ) AS month_avg_revenue
FROM orders ORDER BY division_id, sale_date DESC;
division_id | sale_date | revenue | dept_month_avg_revenue
============|============|=========|=======================
1 | 2024-01-23 | 350460 | 225500
1 | 2024-01-09 | 100540 | 225500
1 | 2023-12-15 | 231000 | 231000
1 | 2023-11-12 | 124000 | 68000
1 | 2023-11-07 | 75000 | 68000
1 | 2023-11-01 | 5000 | 68000
2 | 2024-01-10 | 500 | 500
2 | 2023-12-11 | 1000 | 16166.666666666667
2 | 2023-12-08 | 37000 | 16166.666666666667
2 | 2023-12-01 | 10500 | 16166.666666666667
ORDER BY
clauseSELECT division_id, DATE_PART('year', sale_date) AS sales_year, DATE_PART('month', sale_date) AS sales_month,
-- Count records for the window COUNT(*) AS current_month_sales,
-- Count the previous windows records LAG(COUNT(*), 1) OVER (
-- For each division PARTITION BY division_id -- Ordered by year and month ORDER BY DATE_PART('year', sale_date), DATE_PART('month', sale_date) ) AS prior_month_sales
FROM sales_data
-- Make sure to group by all the window clauses
GROUP BY division_id,
sales_year,
sales_month
ORDER BY division_id,
sales_year DESC,
sales_month DESC;
division_id | sales_year | sales_month | current_month_sales | prior_month_sales |
---|---|---|---|---|
1 | 2024 | 1 | 2 | 1 |
1 | 2023 | 12 | 1 | 3 |
1 | 2023 | 11 | 3 | null |
2 | 2024 | 1 | 1 | 3 |
2 | 2023 | 12 | 3 | null |
RANK
allows us to rank a value over the window according to the ORDER BY
clause starting with 1SELECT division_id,
sale_date,
revenue,
-- Calculate the rank for each sale in the window
RANK() OVER (
-- For each division
PARTITION BY division_id
-- Using revenue for the rank
ORDER BY revenue desc
) as division_sales_rank
FROM sales_data
-- Put them in rank order by division
ORDER BY division_id, division_sales_rank;
division_id | sale_date | revenue | division_sales_rank |
---|---|---|---|
1 | 2024-01-23 | 350460 | 1 |
1 | 2023-12-15 | 231000 | 2 |
1 | 2023-11-12 | 124000 | 3 |
1 | 2024-01-09 | 100540 | 4 |
1 | 2023-11-07 | 75000 | 5 |
1 | 2023-11-01 | 5000 | 6 |
2 | 2023-12-08 | 37000 | 1 |
2 | 2023-12-01 | 10500 | 2 |
2 | 2023-12-11 | 1000 | 3 |
2 | 2024-01-10 | 500 | 4 |
Introduction to Redshift