Exploratory Data Analysis in SQL
Christina Maimone
Data Scientist
SELECT *
FROM sales
ORDER BY date;
date | amount
---------------------+--------
2018-04-23 09:07:33 | 31
2018-04-23 09:13:14 | 12
2018-04-23 09:35:16 | 18
2018-04-23 10:12:35 | 13
2018-04-23 10:31:40 | 5
2018-04-23 12:05:44 | 23
2018-04-23 12:17:43 | 19
2018-04-23 12:57:49 | 32
2018-04-23 13:21:30 | 6
2018-04-23 13:57:53 | 41
(10 rows)
SELECT date,
lag(date) OVER (ORDER BY date),
lead(date) OVER (ORDER BY date)
FROM sales;
date | lag | lead
---------------------+---------------------+---------------------
2018-04-23 09:07:33 | | 2018-04-23 09:13:14
2018-04-23 09:13:14 | 2018-04-23 09:07:33 | 2018-04-23 09:35:16
2018-04-23 09:35:16 | 2018-04-23 09:13:14 | 2018-04-23 10:12:35
2018-04-23 10:12:35 | 2018-04-23 09:35:16 | 2018-04-23 10:31:40
2018-04-23 10:31:40 | 2018-04-23 10:12:35 | 2018-04-23 12:05:44
2018-04-23 12:05:44 | 2018-04-23 10:31:40 | 2018-04-23 12:17:43
2018-04-23 12:17:43 | 2018-04-23 12:05:44 | 2018-04-23 12:57:49
2018-04-23 12:57:49 | 2018-04-23 12:17:43 | 2018-04-23 13:21:30
2018-04-23 13:21:30 | 2018-04-23 12:57:49 | 2018-04-23 13:57:53
2018-04-23 13:57:53 | 2018-04-23 13:21:30 |
(10 rows)
SELECT date,
lag(date) OVER (ORDER BY date),
lead(date) OVER (ORDER BY date)
FROM sales;
SELECT date,
date - lag(date) OVER (ORDER BY date) AS gap
FROM sales;
date | gap
---------------------+----------
2018-04-23 09:07:33 |
2018-04-23 09:13:14 | 00:05:41
2018-04-23 09:35:16 | 00:22:02
2018-04-23 10:12:35 | 00:37:19
2018-04-23 10:31:40 | 00:19:05
2018-04-23 12:05:44 | 01:34:04
2018-04-23 12:17:43 | 00:11:59
2018-04-23 12:57:49 | 00:40:06
2018-04-23 13:21:30 | 00:23:41
2018-04-23 13:57:53 | 00:36:23
(10 rows)
SELECT avg(gap)
FROM (SELECT date - lag(date) OVER (ORDER BY date) AS gap
FROM sales) AS gaps;
avg
-----------------
00:32:15.555556
(1 row)
SELECT date,
amount,
lag(amount) OVER (ORDER BY date),
amount - lag(amount) OVER (ORDER BY date) AS change
FROM sales;
date | amount | lag | change
---------------------+--------+-----+--------
2018-04-23 09:07:33 | 31 | |
2018-04-23 09:13:14 | 12 | 31 | -19
2018-04-23 09:35:16 | 18 | 12 | 6
2018-04-23 10:12:35 | 13 | 18 | -5
2018-04-23 10:31:40 | 5 | 13 | -8
2018-04-23 12:05:44 | 23 | 5 | 18
2018-04-23 12:17:43 | 19 | 23 | -4
2018-04-23 12:57:49 | 32 | 19 | 13
2018-04-23 13:21:30 | 6 | 32 | -26
2018-04-23 13:57:53 | 41 | 6 | 35
(10 rows)
Exploratory Data Analysis in SQL