Window Functions in Snowflake
Jake Roach
Field Data Engineer
m_id | wd | cb | num_workouts | total_cb | cb_vs_average
------ | ------------ | ----- | -------------- | ---------- | ---------------
m_192 | 2024-01-01 | 105 | 4 | 432 | -3
m_192 | 2024-01-03 | 156 | 4 | 432 | 48
m_192 | 2024-01-04 | 69 | 4 | 432 | -39
m_192 | 2024-01-10 | 102 | 4 | 432 | -6
m_74 | 2024-02-10 | 374 | 3 | 1274 | -50.67
m_74 | 2024-02-13 | 396 | 3 | 1274 | -28.67
m_74 | 2024-02-14 | 504 | 3 | 1274 | -79.33
m_233 | 2024-03-05 | 51 | 2 | 132 | -15
m_233 | 2024-03-12 | 81 | 2 | 132 | 15
SELECT <fields>,
-- No need to ORDER BY! AVG(<1>) OVER( PARTITION BY <2> ) AS <alias>
...
<1>
: column to take the average of
<2>
: field to partition data by
$$
+
, -
, *
, /
AVG
, COUNT
, SUM
ORDER BY
!SELECT member_id AS m_id, calories_burned AS cb,
-- First, the average calories burned AVG(calories_burned) OVER( PARTITION BY member_id ) AS avg_cb,
-- Then, the difference vs. the average calories_burned - AVG(calories_burned) OVER( PARTITION BY member_id ) AS cb_vs_average
FROM fitness.workouts;
m_id | cb | avg_cb | cb_vs_average
------ | ----- | --------- | ---------------
m_192 | 105 | 108 | -3
m_192 | 156 | 108 | 48
m_192 | 69 | 108 | -39
m_192 | 102 | 108 | -6
m_74 | 374 | 424.67 | -50.67
m_74 | 396 | 424.67 | -28.67
m_74 | 504 | 424.67 | 79.33
m_233 | 51 | 66 | -15
m_233 | 81 | 66 | 15
SELECT
member_id AS m_id,
workout_date AS wd,
-- Can pass '*' to COUNT
COUNT(*) OVER(
PARTITION BY member_id
) AS num_workouts
FROM fitness.workouts
ORDER BY member_id, workout_date;
Can pass a *
to COUNT
!
m_id | wd | num_workouts
------ | ------------ | --------------
m_192 | 2024-01-01 | 4
m_192 | 2024-01-03 | 4
m_192 | 2024-01-04 | 4
m_192 | 2024-01-10 | 4
m_74 | 2024-02-10 | 3
m_74 | 2024-02-13 | 3
m_74 | 2024-02-14 | 3
m_233 | 2024-03-05 | 2
m_233 | 2024-03-12 | 2
SELECT member_id AS m_id, calories_burned AS cb,
-- Total calories burned for member id SUM(calories_burned) OVER( PARTITION BY member_id ) AS total_cb,
-- Find proportion of total calories_burned / SUM(calories_burned) OVER( PARTITION BY member_id ) AS total_cb
FROM fitness.workouts;
m_id | cb | total_cb | prop_cb
------ | ----- | ---------- | ---------
m_192 | 105 | 432 | 0.2431
m_192 | 156 | 432 | 0.3611
m_192 | 69 | 432 | 0.1597
m_192 | 102 | 432 | 0.2361
m_74 | 374 | 1274 | 0.2935
m_74 | 396 | 1274 | 0.3108
m_74 | 504 | 1274 | 0.3956
m_233 | 51 | 132 | 0.3864
m_233 | 81 | 132 | 0.6136
SELECT member_id AS m_id, workout_date AS wd, calories_burned AS cb,
COUNT(*) OVER( -- Find the count of workouts PARTITION BY member_id ) AS num_workouts,
SUM(calories_burned) OVER( -- Total # of calories burned for each member PARTITION BY member_id ) AS total_cb,
calories_burned - AVG(calories_burned) OVER( -- Compare calories burned in workout to average PARTITION BY member_id ) AS cb_vs_average
FROM fitness.workouts;
m_id | wd | cb | num_workouts | total_cb | cb_vs_average
------ | ------------ | ----- | -------------- | ---------- | ---------------
m_192 | 2024-01-01 | 105 | 4 | 432 | -3
m_192 | 2024-01-03 | 156 | 4 | 432 | 48
m_192 | 2024-01-04 | 69 | 4 | 432 | -39
m_192 | 2024-01-10 | 102 | 4 | 432 | -6
m_74 | 2024-02-10 | 374 | 3 | 1274 | -50.67
m_74 | 2024-02-13 | 396 | 3 | 1274 | -28.67
m_74 | 2024-02-14 | 504 | 3 | 1274 | 79.33
m_233 | 2024-03-05 | 51 | 2 | 132 | -15
m_233 | 2024-03-12 | 81 | 2 | 132 | 15
Window Functions in Snowflake