PostgreSQL Summary Stats and Window Functions
Michel Semaan
Data Scientist
Query
WITH Discus_Gold AS (
SELECT
Year, Event, Country AS Champion
FROM Summer_Medals
WHERE
Year IN (2004, 2008, 2012)
AND Gender = 'Men' AND Medal = 'Gold'
AND Event IN ('Discus Throw', 'Triple Jump')
AND Gender = 'Men')
SELECT
Year, Event, Champion,
LAG(Champion) OVER
(ORDER BY Event ASC, Year ASC) AS Last_Champion
FROM Discus_Gold
ORDER BY Event ASC, Year ASC;
Result
| Year | Event | Champion | Last_Champion |
|------|--------------|----------|---------------|
| 2004 | Discus Throw | LTU | null |
| 2008 | Discus Throw | EST | LTU |
| 2012 | Discus Throw | GER | EST |
| 2004 | Triple Jump | SWE | GER |
| 2008 | Triple Jump | POR | SWE |
| 2012 | Triple Jump | USA | POR |
Event
changes from Discus Throw
to Triple Jump
, LAG
fetched Discus Throw
's last champion as opposed to a null
PARTITION BY
splits the table into partitions based on a column's unique valuesROW_NUMBER
will reset for each partitionLAG
will only fetch a row's previous value if its previous row is in the same partitionQuery
WITH Discus_Gold AS (...)
SELECT
Year, Event, Champion,
LAG(Champion) OVER
(PARTITION BY Event
ORDER BY Event ASC, Year ASC) AS Last_Champion
FROM Discus_Gold
ORDER BY Event ASC, Year ASC;
Result
| Year | Event | Champion | Last_Champion |
|------|--------------|----------|---------------|
| 2004 | Discus Throw | LTU | null |
| 2008 | Discus Throw | EST | LTU |
| 2012 | Discus Throw | GER | EST |
| 2004 | Triple Jump | SWE | null |
| 2008 | Triple Jump | POR | SWE |
| 2012 | Triple Jump | USA | POR |
| Year | Country | Event | Row_N |
|------|---------|----------------------|-------|
| 2008 | CHN | + 78KG (Heavyweight) | 1 |
| 2008 | CHN | - 49 KG | 2 |
| ... | ... | ... | ... |
| 2008 | JPN | 48 - 55KG | 27 |
| 2008 | JPN | 48 - 55KG | 28 |
| ... | ... | ... | ... |
| 2012 | CHN | +75KG | 32 |
| 2012 | CHN | - 49 KG | 33 |
| ... | ... | ... | ... |
| 2012 | JPN | +75KG | 51 |
| 2012 | JPN | - 49 KG | 52 |
| ... | ... | ... | ... |
Year
and Country
Query
WITH Country_Gold AS (
SELECT
DISTINCT Year, Country, Event
FROM Summer_Medals
WHERE
Year IN (2008, 2012)
AND Country IN ('CHN', 'JPN')
AND Gender = 'Women' AND Medal = 'Gold')
SELECT
Year, Country, Event,
ROW_NUMBER() OVER (PARTITION BY Year, Country)
FROM Country_Gold;
Result
| Year | Country | Event | Row_N |
|------|---------|----------------------|-------|
| 2008 | CHN | + 78KG (Heavyweight) | 1 |
| 2008 | CHN | - 49 KG | 2 |
| ... | ... | ... | ... |
| 2008 | JPN | 48 - 55KG | 1 |
| 2008 | JPN | 48 - 55KG | 2 |
| ... | ... | ... | ... |
| 2012 | CHN | +75KG | 1 |
| 2012 | CHN | - 49 KG | 2 |
| ... | ... | ... | ... |
| 2012 | JPN | +75KG | 1 |
| 2012 | JPN | - 49 KG | 2 |
| ... | ... | ... | ... |
PostgreSQL Summary Stats and Window Functions