Aggregating with date/time series

Exploratory Data Analysis in SQL

Christina Maimone

Data Scientist

Generate series

SELECT generate_series(from, to, interval);
SELECT generate_series('2018-01-01',
                       '2018-01-15',
                       '2 days'::interval);
   generate_series   
---------------------
 2018-01-01 00:00:00
 2018-01-03 00:00:00
 2018-01-05 00:00:00
 2018-01-07 00:00:00
 2018-01-09 00:00:00
 2018-01-11 00:00:00
 2018-01-13 00:00:00
 2018-01-15 00:00:00
(8 rows)
Exploratory Data Analysis in SQL

Generate series

SELECT generate_series('2018-01-01',
                       '2018-01-02',
                       '5 hours'::interval);
    generate_series     
----------------------
 2018-01-01 00:00:00
 2018-01-01 05:00:00
 2018-01-01 10:00:00
 2018-01-01 15:00:00
 2018-01-01 20:00:00
(5 rows)
Exploratory Data Analysis in SQL

Generate series from the beginning

SELECT generate_series('2018-01-31',
                       '2018-12-31',
                       '1 month'::interval);
   generate_series     
---------------------
 2018-01-31 00:00:00
 2018-02-28 00:00:00
 2018-03-28 00:00:00
 2018-04-28 00:00:00
 2018-05-28 00:00:00
 2018-06-28 00:00:00
 2018-07-28 00:00:00
 2018-08-28 00:00:00
 2018-09-28 00:00:00
 2018-10-28 00:00:00
 2018-11-28 00:00:00
 2018-12-28 00:00:00
(12 rows)
Exploratory Data Analysis in SQL

Generate series from the beginning

-- Subtract 1 day to get end of month
SELECT generate_series('2018-02-01',  -- start 1 month late
                       '2019-01-01',
                       '1 month'::interval) - '1 day'::interval;
   generate_series
---------------------
 2018-01-31 00:00:00
 2018-02-28 00:00:00
 2018-03-31 00:00:00
 2018-04-30 00:00:00
 2018-05-31 00:00:00
 2018-06-30 00:00:00
 2018-07-31 00:00:00
 2018-08-31 00:00:00
 2018-09-30 00:00:00
 2018-10-31 00:00:00
 2018-11-30 00:00:00
 2018-12-31 00:00:00
(12 rows)
Exploratory Data Analysis in SQL

Normal aggregation

SELECT * FROM sales;
        date         | amount 
---------------------+--------
 2018-04-23 09:13:14 |     12
 2018-04-23 13:57:53 |     41
 2018-04-23 12:05:44 |     23
 2018-04-23 09:07:33 |     31
 2018-04-23 10:31:40 |      5
 2018-04-23 09:35:16 |     18
 2018-04-23 12:17:43 |     19
 2018-04-23 12:57:49 |     32
 2018-04-23 10:12:35 |     13
 2018-04-23 13:21:30 |      6
(10 rows)
SELECT date_trunc('hour', date) 
          AS hour, 
       count(*) 
  FROM sales 
 GROUP BY hour
 ORDER BY hour;
        hour         | count 
---------------------+-------
 2018-04-23 09:00:00 |     3
 2018-04-23 10:00:00 |     2
 2018-04-23 12:00:00 |     3
 2018-04-23 13:00:00 |     2
(4 rows)
Exploratory Data Analysis in SQL

Aggregation with series

-- Create the series as a table called hour_series
WITH hour_series AS (
     SELECT generate_series('2018-04-23 09:00:00',          -- 9am
                            '2018-04-23 14:00:00',          -- 2pm
                            '1 hour'::interval) AS hours)

-- Hours from series, count date (NOT *) to count non-NULL SELECT hours, count(date) -- Join series to sales data FROM hour_series LEFT JOIN sales ON hours=date_trunc('hour', date) GROUP BY hours ORDER BY hours;
Exploratory Data Analysis in SQL

Aggregation with series: result

         hours          | count 
------------------------+-------
 2018-04-23 09:00:00-05 |     3
 2018-04-23 10:00:00-05 |     2
 2018-04-23 11:00:00-05 |     0
 2018-04-23 12:00:00-05 |     3
 2018-04-23 13:00:00-05 |     2
 2018-04-23 14:00:00-05 |     0
(6 rows)
Exploratory Data Analysis in SQL

Aggregation with bins

-- Create bins
WITH bins AS (
      SELECT generate_series('2018-04-23 09:00:00', 
                             '2018-04-23 15:00:00',
                             '3 hours'::interval) AS lower,
             generate_series('2018-04-23 12:00:00', 
                             '2018-04-23 18:00:00',
                             '3 hours'::interval) AS upper)

-- Count values in each bin SELECT lower, upper, count(date) -- left join keeps all bins FROM bins LEFT JOIN sales ON date >= lower AND date < upper -- Group by bin bounds to create the groups GROUP BY lower, upper ORDER BY lower;
Exploratory Data Analysis in SQL

Bin result

        lower        |        upper        | count 
---------------------+---------------------+-------
 2018-04-23 09:00:00 | 2018-04-23 12:00:00 |     5
 2018-04-23 12:00:00 | 2018-04-23 15:00:00 |     5
 2018-04-23 15:00:00 | 2018-04-23 18:00:00 |     0
(3 rows)
Exploratory Data Analysis in SQL

Practice generating series!

Exploratory Data Analysis in SQL

Preparing Video For Download...