Time Series Analysis in PostgreSQL
Jasmin Ludolf
Content Developer, DataCamp
MIN(), MAX(), SUM(), AVG(), COUNT()
SELECT
  AVG(views)::INTEGER as avg_views
  title
FROM dc_news_fact
JOIN dc_news_dim USING(id)
GROUP BY title
ORDER BY avg_views DESC;
|avg_views|title                      |
|---------|---------------------------|
|      207|For the Wealthiest, a Pr...|
|       12|Pet surrenders on rise a...|
|       12|Argentina's New Presiden...|
|        7|These 5 charts prove tha...|
|        7|How Is the Economy Doing...|
WITH day_views AS (
  SELECT id, ts::DATE AS date, SUM(views) AS views
  FROM dc_news_fact
  GROUP BY id, date
)
SELECT
  AVG(views)::INTEGER AS avg
  title
FROM day_views JOIN dc_news_dim USING(id)
GROUP BY title
ORDER BY avg DESC;
| avg|title                                                            |
|----|-----------------------------------------------------------------|
|9855|For the Wealthiest, a Private Tax System That Saves Them Billions|
| 579|Pet surrenders on rise as Fort McMurray's economy falls          |
| 574|Argentina's New President Moves Swiftly to Shake Up the Economy  |
| 352|How Is the Economy Doing? Politics May Decide Your Answer        |
| 348|These 5 charts prove that the economy does better under ...      |
SELECT
  (SUM(views)/COUNT(DISTINCT ts::DATE))::INTEGER as avg,
  title
FROM dc_news_fact JOIN dc_news_dim USING(id)
GROUP BY title 
ORDER BY avg DESC;
|avg |title                                                            |
|----|-----------------------------------------------------------------|
|9855|For the Wealthiest, a Private Tax System That Saves Them Billions|
| 579|Pet surrenders on rise as Fort McMurray's economy falls          |
| 574|Argentina's New President Moves Swiftly to Shake Up the Economy  |
...
Discrete median: the first value closest to the middle value
Continuous median: a value that cuts the dataset in half
Odd number of elements
Even number of elements
PERCENTILE_DISC()PERCENTILE_CONT()PERCENTILE_DISC(fraction) WITHIN GROUP (ORDER BY field)SELECT
  PERCENTILE_CONT(0.5) WITHIN GROUP
  (ORDER BY value) AS median_cont,
  PERCENTILE_DISC(0.5) WITHIN GROUP
  (ORDER BY value) AS median_disc
FROM
(
  VALUES
  (1,1), (1,2), (1,3), (1,4), (1,5),
  (2,1), (2,5), (2,7), (2,11), (2,11)
) AS t (id, value)
GROUP BY id;
|median_cont|median_disc|
|-----------|-----------|
|        3.0|          3|
|        7.0|          7|
SELECT
  PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY value) AS ptile_25,
  PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY value) AS ptile_50,
  PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY value) AS ptile_75
FROM
(
  VALUES 
  (1,1), (1,2), (1,3), (1,4), (1,5)
) AS t (id, value)
GROUP BY id;
|ptile_25|ptile_50|ptile_75|
|--------|--------|--------|
|       2|       3|       4|
SELECT
  PERCENTILE_DISC(ARRAY[0.25, 0.5, 0.75]) 
  WITHIN GROUP (ORDER BY value) AS median_disc
FROM (
  VALUES
  (1,1), (1,2), (1,3), (1,4), (1,5) )
  AS t (id, value)
GROUP BY id;
|median_disc|
|-----------|
|{2,3,4}    |
Time Series Analysis in PostgreSQL