Query structure and query execution

Improving Query Performance in PostgreSQL

Amy McCarty

Instructor

Subqueries and joins

-- SUBQUERY
SELECT COUNT(athlete_id)
FROM athletes
WHERE country IN 
  (SELECT country FROM climate 
    WHERE temp_annual > 22)
-- JOIN
SELECT COUNT(athlete_id)
FROM athletes a
INNER JOIN climate c
  ON a.country = c.country
  AND c.temp_annual > 22
Improving Query Performance in PostgreSQL

Query plan

Aggregate  ()
  ->  Hash Join  ()
        Hash Cond: (athletes.country = climate.country)
        ->  Seq Scan on athletes  ()
        ->  Hash  ()
              ->  Seq Scan on climate  ()
                    Filter: (temp_annual > '22'::numeric)
Improving Query Performance in PostgreSQL

Common table expressions and temporary tables

-- CTE
WITH celsius AS 
(
  SELECT country 
  FROM climate 
  WHERE temp_annual > 22 -- Celsius
)
SELECT count(athlete_id)
FROM athletes a
INNER JOIN celsius c
  ON a.country = c.country
-- TEMP TABLE
CREATE TEMPORARY TABLE celsius AS 
  SELECT country 
  FROM climate 
  WHERE temp_annual > 22; -- Celsius

SELECT count(athlete_id)
FROM athletes a
INNER JOIN celsius c
  ON a.country = c.country
Improving Query Performance in PostgreSQL

Query plan

  Aggregate  ()
  CTE celsius
    ->  Seq Scan on climate  ()
          Filter: (temp_annual > '22'::numeric)
  ->  Hash Join  ()
        Hash Cond: (a.country_code = c.country_code)
        ->  Seq Scan on athletes a  ()
        ->  Hash  ()
              ->  CTE Scan on celsius c  ()
Improving Query Performance in PostgreSQL

Limiting the data

  SELECT country_code
  , COUNT(athlete_id) as athletes
  FROM athletes
  WHERE year in (2014, 2010) -- Indexed column
  GROUP BY country_code
Improving Query Performance in PostgreSQL

Limiting the data

  SELECT country_code
  , COUNT(athlete_id) as athletes
  FROM athletes
  WHERE year in (2014, 2010) -- Indexed column
  GROUP BY country_code
No Index Index
Planning Time: 3.370 ms Planning Time: 0.163 ms
Execution Time: 0.143 ms Execution Time: 0.062 ms
Improving Query Performance in PostgreSQL

Aggregations - different granularities

 

SELECT r.country
  , COUNT(a.athlete_id) as athletes
FROM regions r -- country level
INNER JOIN athletes a -- athletes level
  ON r.country = a.country
GROUP BY r.country

 

 

  • Execution Time : 0.267 ms
Improving Query Performance in PostgreSQL

Aggregations - changing the granularity

WITH olympians AS ( -- country level
  SELECT country
  , COUNT(athlete_id) as athletes
  FROM athletes -- athletes level
  GROUP BY country
)
SELECT country, athletes
FROM regions r -- country level
INNER JOIN olympians o
  ON r.country = o.country
Execution Time
Join 1st 0.267 ms
Aggregate 1st 0.192 ms
Improving Query Performance in PostgreSQL

Let's practice!

Improving Query Performance in PostgreSQL

Preparing Video For Download...