Aggeren met datum/tijdreeksen

Exploratory Data Analysis in SQL

Christina Maimone

Data Scientist

Reeksen genereren

SELECT generate_series(from, to, interval);
SELECT generate_series('2018-01-01',
                       '2018-01-15',
                       '2 days'::interval);
   generate_series   
---------------------
 2018-01-01 00:00:00
 2018-01-03 00:00:00
 2018-01-05 00:00:00
 2018-01-07 00:00:00
 2018-01-09 00:00:00
 2018-01-11 00:00:00
 2018-01-13 00:00:00
 2018-01-15 00:00:00
(8 rijen)
Exploratory Data Analysis in SQL

Reeksen genereren

SELECT generate_series('2018-01-01',
                       '2018-01-02',
                       '5 hours'::interval);
    generate_series     
----------------------
 2018-01-01 00:00:00
 2018-01-01 05:00:00
 2018-01-01 10:00:00
 2018-01-01 15:00:00
 2018-01-01 20:00:00
(5 rijen)
Exploratory Data Analysis in SQL

Reeksen vanaf het begin genereren

SELECT generate_series('2018-01-31',
                       '2018-12-31',
                       '1 month'::interval);
   generate_series     
---------------------
 2018-01-31 00:00:00
 2018-02-28 00:00:00
 2018-03-28 00:00:00
 2018-04-28 00:00:00
 2018-05-28 00:00:00
 2018-06-28 00:00:00
 2018-07-28 00:00:00
 2018-08-28 00:00:00
 2018-09-28 00:00:00
 2018-10-28 00:00:00
 2018-11-28 00:00:00
 2018-12-28 00:00:00
(12 rijen)
Exploratory Data Analysis in SQL

Reeksen vanaf het begin genereren

-- Trek 1 dag af voor einde maand
SELECT generate_series('2018-02-01',  -- start 1 maand later
                       '2019-01-01',
                       '1 month'::interval) - '1 day'::interval;
   generate_series
---------------------
 2018-01-31 00:00:00
 2018-02-28 00:00:00
 2018-03-31 00:00:00
 2018-04-30 00:00:00
 2018-05-31 00:00:00
 2018-06-30 00:00:00
 2018-07-31 00:00:00
 2018-08-31 00:00:00
 2018-09-30 00:00:00
 2018-10-31 00:00:00
 2018-11-30 00:00:00
 2018-12-31 00:00:00
(12 rijen)
Exploratory Data Analysis in SQL

Normale aggregatie

SELECT * FROM sales;
        date         | amount 
---------------------+--------
 2018-04-23 09:13:14 |     12
 2018-04-23 13:57:53 |     41
 2018-04-23 12:05:44 |     23
 2018-04-23 09:07:33 |     31
 2018-04-23 10:31:40 |      5
 2018-04-23 09:35:16 |     18
 2018-04-23 12:17:43 |     19
 2018-04-23 12:57:49 |     32
 2018-04-23 10:12:35 |     13
 2018-04-23 13:21:30 |      6
(10 rijen)
SELECT date_trunc('hour', date) 
          AS hour, 
       count(*) 
  FROM sales 
 GROUP BY hour
 ORDER BY hour;
        hour         | count 
---------------------+-------
 2018-04-23 09:00:00 |     3
 2018-04-23 10:00:00 |     2
 2018-04-23 12:00:00 |     3
 2018-04-23 13:00:00 |     2
(4 rijen)
Exploratory Data Analysis in SQL

Aggregatie met reeksen

-- Maak de reeks als tabel hour_series
WITH hour_series AS (
     SELECT generate_series('2018-04-23 09:00:00',          -- 9u
                            '2018-04-23 14:00:00',          -- 14u
                            '1 hour'::interval) AS hours)

-- Uren uit de reeks, tel date (NIET *) om non-NULL te tellen SELECT hours, count(date) -- Join reeks met sales-data FROM hour_series LEFT JOIN sales ON hours=date_trunc('hour', date) GROUP BY hours ORDER BY hours;
Exploratory Data Analysis in SQL

Aggregatie met reeksen: resultaat

         hours          | count 
------------------------+-------
 2018-04-23 09:00:00-05 |     3
 2018-04-23 10:00:00-05 |     2
 2018-04-23 11:00:00-05 |     0
 2018-04-23 12:00:00-05 |     3
 2018-04-23 13:00:00-05 |     2
 2018-04-23 14:00:00-05 |     0
(6 rijen)
Exploratory Data Analysis in SQL

Aggregatie met bins

-- Maak bins
WITH bins AS (
      SELECT generate_series('2018-04-23 09:00:00', 
                             '2018-04-23 15:00:00',
                             '3 hours'::interval) AS lower,
             generate_series('2018-04-23 12:00:00', 
                             '2018-04-23 18:00:00',
                             '3 hours'::interval) AS upper)

-- Tel waarden per bin SELECT lower, upper, count(date) -- left join houdt alle bins FROM bins LEFT JOIN sales ON date >= lower AND date < upper -- Groepeer op grenzen om groepen te maken GROUP BY lower, upper ORDER BY lower;
Exploratory Data Analysis in SQL

Bin-resultaat

        lower        |        upper        | count 
---------------------+---------------------+-------
 2018-04-23 09:00:00 | 2018-04-23 12:00:00 |     5
 2018-04-23 12:00:00 | 2018-04-23 15:00:00 |     5
 2018-04-23 15:00:00 | 2018-04-23 18:00:00 |     0
(3 rijen)
Exploratory Data Analysis in SQL

Oefen met reeksen genereren!

Exploratory Data Analysis in SQL

Preparing Video For Download...