Data Manipulation in SQL
Mona Khalil
Data Scientist, Greenhouse Software
SELECT
, FROM
, WHERE
SELECT
s.stage,
ROUND(s.avg_goals,2) AS avg_goal,
(SELECT AVG(home_goal + away_goal)
FROM match WHERE season = '2013/2014') AS overall_avg
FROM
(SELECT
stage,
AVG(home_goal + away_goal) AS avg_goals
FROM match
WHERE season = '2013/2014'
GROUP BY stage) AS s
WHERE
s.avg_goals > (SELECT AVG(home_goal + away_goal)
FROM match WHERE season = '2013/2014');
SELECT
, FROM
, WHERE
, and GROUP BY
SELECT
col1,
col2,
col3
FROM table1
WHERE col1 = 2;
/* This query filters for col1 = 2
and only selects data from table1 */
SELECT
col1,
col2,
col3
FROM table1
WHERE col1 = 2;
SELECT
col1,
col2,
col3
FROM table1 -- this table has 10,000 rows
WHERE col1 = 2; -- Filter WHERE value 2
SELECT
col1,
col2,
col3
FROM table1
WHERE col1 IN
(SELECT id
FROM table2
WHERE year = 1991);
SELECT
date,
hometeam_id,
awayteam_id,
CASE WHEN hometeam_id = 8455 AND home_goal > away_goal
THEN 'Chelsea home win'
WHEN awayteam_id = 8455 AND home_goal < away_goal
THEN 'Chelsea away win'
WHEN hometeam_id = 8455 AND home_goal < away_goal
THEN 'Chelsea home loss'
WHEN awayteam_id = 8455 AND home_goal > away_goal
THEN 'Chelsea away loss'
WHEN (hometeam_id = 8455 OR awayteam_id = 8455)
AND home_goal = away_goal THEN 'Chelsea Tie'
END AS outcome
FROM match
WHERE hometeam_id = 8455 OR awayteam_id = 8455;
Subqueries require computing power
Is the subquery actually necessary?
SELECT
s.stage,
ROUND(s.avg_goals,2) AS avg_goal,
(SELECT AVG(home_goal + away_goal)
FROM match WHERE season = '2013/2014') AS overall_avg
FROM
(SELECT
stage,
AVG(home_goal + away_goal) AS avg_goals
FROM match
WHERE season = '2013/2014'
GROUP BY stage) AS s
WHERE
s.avg_goals > (SELECT AVG(home_goal + away_goal)
FROM match WHERE season = '2013/2014');
Data Manipulation in SQL