PostgreSQL Summary Stats and Window Functions
Michel Semaan
Data Scientist
Query
SELECT
Year, Event, Country,
ROW_NUMBER() OVER () AS Row_N
FROM Summer_Medals
WHERE
Medal = 'Gold';
Result*
| Year | Event | Country | Row_N |
|------|----------------------------|---------|-------|
| 1896 | 100M Freestyle | HUN | 1 |
| 1896 | 100M Freestyle For Sailors | GRE | 2 |
| 1896 | 1200M Freestyle | HUN | 3 |
| ... | ... | ... | ... |
ORDER BY
in OVER
orders the rows related to the current rowROW_NUMBER
's OVER
clause will assign 1 to the most recent year's rowsQuery
SELECT
Year, Event, Country,
ROW_NUMBER() OVER (ORDER BY Year DESC) AS Row_N
FROM Summer_Medals
WHERE
Medal = 'Gold';
Result
| Year | Event | Country | Row_N |
|------|---------------|---------|-------|
| 2012 | Wg 96 KG | IRI | 1 |
| 2012 | 4X100M Medley | USA | 2 |
| 2012 | Wg 84 KG | RUS | 3 |
| ... | ... | ... | ... |
| 2008 | 50M Freestyle | BRA | 637 |
| 2008 | 96 - 120KG | CUB | 638 |
| ... | ... | ... | ... |
Query
SELECT
Year, Event, Country,
ROW_NUMBER() OVER
(ORDER BY Year DESC, Event ASC) AS Row_N
FROM Summer_Medals
WHERE
Medal = 'Gold';
Result
| Year | Event | Country | Row_N |
|------|---------|---------|-------|
| 2012 | + 100KG | FRA | 1 |
| 2012 | + 67 KG | SRB | 2 |
| 2012 | + 78KG | CUB | 3 |
| ... | ... | ... | ... |
Query
SELECT
Year, Event, Country,
ROW_NUMBER() OVER
(ORDER BY Year DESC, Event ASC) AS Row_N
FROM Summer_Medals
WHERE
Medal = 'Gold'
ORDER BY Country ASC, Row_N ASC;
Result
| Year | Event | Country | Row_N |
|------|---------|---------|-------|
| 2012 | 1500M | ALG | 36 |
| 2000 | 1500M | ALG | 1998 |
| 1996 | 1500M | ALG | 2662 |
| ... | ... | ... | ... |
ORDER BY
inside OVER
takes effect before ORDER BY
outside OVER
Enter LAG
LAG(column, n) OVER (...)
returns column
's value at the row n
rows before the current rowLAG(column, 1) OVER (...)
returns the previous row's valueQuery
SELECT
Year, Country AS Champion
FROM Summer_Medals
WHERE
Year IN (1996, 2000, 2004, 2008, 2012)
AND Gender = 'Men' AND Medal = 'Gold'
AND Event = 'Discus Throw';
Result
| Year | Champion |
|------|----------|
| 1996 | GER |
| 2000 | LTU |
| 2004 | LTU |
| 2008 | EST |
| 2012 | GER |
Query
WITH Discus_Gold AS (
SELECT
Year, Country AS Champion
FROM Summer_Medals
WHERE
Year IN (1996, 2000, 2004, 2008, 2012)
AND Gender = 'Men' AND Medal = 'Gold'
AND Event = 'Discus Throw')
SELECT
Year, Champion,
LAG(Champion, 1) OVER
(ORDER BY Year ASC) AS Last_Champion
FROM Discus_Gold
ORDER BY Year ASC;
Result
| Year | Champion | Last_Champion |
|------|----------|---------------|
| 1996 | GER | null |
| 2000 | LTU | GER |
| 2004 | LTU | LTU |
| 2008 | EST | LTU |
| 2012 | GER | EST |
PostgreSQL Summary Stats and Window Functions