Time Series Analysis in PostgreSQL
Jasmin Ludolf
Content Developer, DataCamp
MIN()
, MAX()
, SUM()
, AVG()
, COUNT()
SELECT
AVG(views)::INTEGER as avg_views
title
FROM dc_news_fact
JOIN dc_news_dim USING(id)
GROUP BY title
ORDER BY avg_views DESC;
|avg_views|title |
|---------|---------------------------|
| 207|For the Wealthiest, a Pr...|
| 12|Pet surrenders on rise a...|
| 12|Argentina's New Presiden...|
| 7|These 5 charts prove tha...|
| 7|How Is the Economy Doing...|
WITH day_views AS (
SELECT id, ts::DATE AS date, SUM(views) AS views
FROM dc_news_fact
GROUP BY id, date
)
SELECT
AVG(views)::INTEGER AS avg
title
FROM day_views JOIN dc_news_dim USING(id)
GROUP BY title
ORDER BY avg DESC;
| avg|title |
|----|-----------------------------------------------------------------|
|9855|For the Wealthiest, a Private Tax System That Saves Them Billions|
| 579|Pet surrenders on rise as Fort McMurray's economy falls |
| 574|Argentina's New President Moves Swiftly to Shake Up the Economy |
| 352|How Is the Economy Doing? Politics May Decide Your Answer |
| 348|These 5 charts prove that the economy does better under ... |
SELECT
(SUM(views)/COUNT(DISTINCT ts::DATE))::INTEGER as avg,
title
FROM dc_news_fact JOIN dc_news_dim USING(id)
GROUP BY title
ORDER BY avg DESC;
|avg |title |
|----|-----------------------------------------------------------------|
|9855|For the Wealthiest, a Private Tax System That Saves Them Billions|
| 579|Pet surrenders on rise as Fort McMurray's economy falls |
| 574|Argentina's New President Moves Swiftly to Shake Up the Economy |
...
Discrete median: the first value closest to the middle value
Continuous median: a value that cuts the dataset in half
Odd number of elements
Even number of elements
PERCENTILE_DISC()
PERCENTILE_CONT()
PERCENTILE_DISC(fraction) WITHIN GROUP (ORDER BY field)
SELECT
PERCENTILE_CONT(0.5) WITHIN GROUP
(ORDER BY value) AS median_cont,
PERCENTILE_DISC(0.5) WITHIN GROUP
(ORDER BY value) AS median_disc
FROM
(
VALUES
(1,1), (1,2), (1,3), (1,4), (1,5),
(2,1), (2,5), (2,7), (2,11), (2,11)
) AS t (id, value)
GROUP BY id;
|median_cont|median_disc|
|-----------|-----------|
| 3.0| 3|
| 7.0| 7|
SELECT
PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY value) AS ptile_25,
PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY value) AS ptile_50,
PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY value) AS ptile_75
FROM
(
VALUES
(1,1), (1,2), (1,3), (1,4), (1,5)
) AS t (id, value)
GROUP BY id;
|ptile_25|ptile_50|ptile_75|
|--------|--------|--------|
| 2| 3| 4|
SELECT
PERCENTILE_DISC(ARRAY[0.25, 0.5, 0.75])
WITHIN GROUP (ORDER BY value) AS median_disc
FROM (
VALUES
(1,1), (1,2), (1,3), (1,4), (1,5) )
AS t (id, value)
GROUP BY id;
|median_disc|
|-----------|
|{2,3,4} |
Time Series Analysis in PostgreSQL