Data Pipeline Automation in Snowflake
Emily Melhuish
Technical Curriculum Developer, Snowflake

SQL doesn't run in the order you write it
COUNT(*) / COUNT(DISTINCT col) — total rows; unique valuesSUM — column totalAVG — mean valueMIN / MAX — rangeSELECT COUNT(*) AS total_shipments,
COUNT(DISTINCT carrier_id) AS unique_carriers,
SUM(delivery_days) AS total_days,
AVG(delivery_days) AS avg_days
FROM shipments;
Return usage by region and warehouse
SELECT
region
, warehouse_name
, SUM(credits_used)
FROM usage
GROUP BY region, warehouse_name;
Return usage by region sub-totals
SELECT region
, SUM(credits_used)
FROM usage GROUP BY region;
Return usage total
SELECT
SUM(credits_used)
FROM usage;
Input
SELECT region
, warehouse_name
, SUM(credits_used) AS total
FROM usage
GROUP BY GROUPING SETS (
(region, warehouse_name)
, (region)
, ()
);
Output
| REGION | WAREHOUSE_NAME | TOTAL |
|---|---|---|
| EMEA | harbr_wh_prod | 1240.50 |
| EMEA | harbr_wh_dev | 380.00 |
| EMEA | NULL | 1620.50 |
| APAC | harbr_wh_prod | 890.25 |
| APAC | NULL | 890.25 |
| NULL | NULL | 2510.75 |
Input
SELECT
region
, warehouse_name
, SUM(credits_used) AS total
FROM usage
GROUP BY ROLLUP (region, warehouse_name);
Output
| REGION | WAREHOUSE_NAME | TOTAL |
|---|---|---|
| EMEA | harbr_wh_prod | 1240.50 |
| EMEA | harbr_wh_dev | 380.00 |
| EMEA | NULL | 1620.50 |
| APAC | harbr_wh_prod | 890.25 |
| APAC | NULL | 890.25 |
| NULL | NULL | 2510.75 |
Input
SELECT
region
, warehouse_name
, SUM(credits_used) AS total
FROM usage
GROUP BY CUBE (region, warehouse_name);
Output
| REGION | WAREHOUSE_NAME | TOTAL | |
|---|---|---|---|
| EMEA | harbr_wh_prod | 1240.50 | |
| EMEA | harbr_wh_dev | 380.00 | |
| EMEA | NULL | 1620.50 | |
| APAC | harbr_wh_prod | 890.25 | |
| APAC | NULL | 890.25 | } |
| NULL | harbr_wh_prod | 2130.75 | |
| NULL | harbr_wh_dev | 380.00 | |
| NULL | NULL | 2510.75 |
SELECT region, warehouse_name, SUM(credits_used) AS total_credits, GROUPING(region) AS is_region_agg, GROUPING(warehouse_name) AS is_wh_agg ,
GROUPING_ID(region, warehouse_name) AS grouping_level
FROM usage
GROUP BY ROLLUP (region, warehouse_name);

Data Pipeline Automation in Snowflake