Cleaning Data in SQL Server Databases
Miriam Antona
Software Engineer
PIVOT
: turns the unique values from one column into multiple columns.
SELECT * FROM paper_shop_monthly_sales
| product_name_units | year_of_sale | month_of_sale |
|--------------------|--------------|---------------|
| notebooks-150 | 2018 | 1 |
| notebooks-200 | 2019 | 1 |
| notebooks-30 | 2019 | 2 |
| pencils-100 | 2018 | 1 |
| pencils-50 | 2018 | 2 |
| pencils-130 | 2019 | 1 |
| crayons-80 | 2018 | 1 |
| ... | ... | ... |
Change
| product_name_units | year_of_sale | month_of_sale |
|--------------------|--------------|---------------|
| notebooks-150 | 2018 | 1 |
| notebooks-200 | 2019 | 1 |
| pencils-50 | 2018 | 2 |
| crayons-80 | 2018 | 1 |
| ... | ... | ... |
to
| year_of_sale | notebooks | pencils | crayons |
|--------------|-----------|---------|---------|
| 2018 | 150 | 150 | 80 |
| 2019 | 230 | 130 | 170 |
SELECT
year_of_sale,
notebooks,
pencils,
crayons
FROM
(SELECT
year_of_sale,
SUBSTRING(product_name_units, 1, charindex('-', product_name_units)-1) AS product_name,
CAST(SUBSTRING(product_name_units,
charindex('-', product_name_units)+1, len(product_name_units)) AS INT) units
FROM paper_shop_monthly_sales) AS sales
PIVOT (SUM(units)
FOR product_name IN (notebook, pencils, crayons))
AS paper_shop_pivot
SELECT
year_of_sale,
notebooks,
pencils,
crayons
SELECT
year_of_sale,
notebooks,
pencils,
crayons
FROM
(SELECT
year_of_sale,
SUBSTRING(product_name_units, 1, charindex('-', product_name_units)-1) AS product_name,
CAST(SUBSTRING(product_name_units,
charindex('-', product_name_units)+1, len(product_name_units)) AS INT) units
FROM paper_shop_monthly_sales) AS sales
| year_of_sale | product_name | units |
|--------------|--------------|-------|
| 2018 | notebooks | 150 |
| 2019 | notebooks | 200 |
| 2019 | notebooks | 30 |
| 2018 | pencils | 100 |
| 2018 | pencils | 50 |
| 2019 | pencils | 130 |
| 2018 | crayons | 80 |
| 2019 | crayons | 90 |
| 2019 | crayons | 80 |
SELECT
year_of_sale,
notebooks,
pencils,
crayons
FROM
(SELECT
year_of_sale,
SUBSTRING(product_name_units, 1, charindex('-', product_name_units)-1) AS product_name,
CAST(SUBSTRING(product_name_units,
charindex('-', product_name_units)+1, len(product_name_units)) AS INT) units
FROM paper_shop_monthly_sales) AS sales
PIVOT (SUM(units)
SELECT
year_of_sale,
notebooks,
pencils,
crayons
FROM
(SELECT
year_of_sale,
SUBSTRING(product_name_units, 1, charindex('-', product_name_units)-1) AS product_name,
CAST(SUBSTRING(product_name_units,
charindex('-', product_name_units)+1, len(product_name_units)) AS INT) units
FROM paper_shop_monthly_sales) AS sales
PIVOT (SUM(units)
FOR product_name IN (notebook, pencils, crayons))
SELECT
year_of_sale,
notebooks,
pencils,
crayons
FROM
(SELECT
year_of_sale,
SUBSTRING(product_name_units, 1, charindex('-', product_name_units)-1) AS product_name,
CAST(SUBSTRING(product_name_units,
charindex('-', product_name_units)+1, len(product_name_units)) AS INT) units
FROM paper_shop_monthly_sales) AS sales
PIVOT (SUM(units)
FOR product_name IN (notebook, pencils, crayons))
AS paper_shop_pivot
| year_of_sale | notebooks_units | pencils_units | crayons_units |
|--------------|-----------------|---------------|---------------|
| 2018 | 150 | 150 | 80 |
| 2019 | 230 | 130 | 170 |
UNPIVOT
: Turns columns into rows.
SELECT * FROM pivot_sales
| year_of_sale | notebooks | pencils | crayons |
|--------------|-----------|---------|---------|
| 2018 | 150 | 150 | 80 |
| 2019 | 230 | 130 | 170 |
SELECT * FROM pivot_sales
UNPIVOT
(units FOR product_name IN (notebooks, pencils, crayons)
) AS unpvt
| year_of_sale | units | product_name |
|--------------|-------|--------------|
| 2018 | 150 | notebooks |
| 2018 | 150 | pencils |
| 2018 | 80 | crayons |
| 2019 | 230 | notebooks |
| 2019 | 130 | pencils |
| 2019 | 170 | crayons |
Cleaning Data in SQL Server Databases