Cleaning Data in PostgreSQL Databases
Darryl Reeves, Ph.D.
Industry Assistant Professor, New York University
camis | inspection_date | violation | ...
---------+-----------------+-------------------------------------------------+-----
... | ... | ... | ...
50038736 | 03/29/2018 | 09B Thawing procedures | ...
50033304 | 12/18/2019 | 02B Hot food item not held at or above 140º ... | ...
50081658 | 12/13/2018 | 06F Wiping cloths soiled or not stored in sa... | ...
50033733 | 02/12/2019 | 10B Plumbing not properly installed or maint... | ...
40559634 | 08/22/2017 | 04N Filth flies or food/refuse/sewage-associ... | ...
... | ... | ... | ...
STRPOS(source_string, search_string)
SELECT
STRPOS('09B Thawing procedures', ' ');
4
SELECT
STRPOS('09B Thawing procedures', '?');
0
SELECT
STRPOS('09B Thawing procedures', ' ');
4
SUBSTRING(source_string FROM start_pos FOR num_chars)
SUBSTRING('Homerun' FROM 1 FOR 4)
→ Home
SELECT
SUBSTRING(
'09B Thawing procedures'
FROM 1
FOR STRPOS('09B Thawing procedures', ' ') - 1
);
09B
Requirements:
SELECT
STRPOS('09B Thawing procedures', ' ') + 1;
5
LENGTH(string)
→ INTEGER
SELECT LENGTH('hello')
5
LENGTH('09B Thawing procedures')
→ 22
STRPOS('09B Thawing procedures', ' ')
→ 4
LENGTH('09B Thawing procedures') - STRPOS('09B Thawing procedures', ' ')
→ 18
LENGTH('Thawing procedures')
→ 18
SELECT
LENGTH('09B Thawing procedures') -
STRPOS('09B Thawing procedures', ' ');
18
SELECT
SUBSTRING(
'09B Thawing procedures'
FROM
STRPOS('09B Thawing procedures', ' ')
+ 1
FOR
LENGTH('09B Thawing procedures')
- STRPOS('09B Thawing procedures', ' ')
);
Thawing procedures
SELECT
camis,
inspection_date,
SUBSTRING(
violation
FROM 1
FOR STRPOS(violation, ' ') - 1
) AS violation_code,
SUBSTRING(
violation
FROM STRPOS(violation, ' ') + 1
FOR LENGTH(violation) - STRPOS(violation, ' ')
) AS violation_description
FROM
restaurant_inspection;
camis | inspection_date | violation | ...
---------+-----------------+-------------------------------------------------+-----
... | ... | ... | ...
50038736 | 03/29/2018 | 09B Thawing procedures | ...
50033304 | 12/18/2019 | 02B Hot food item not held at or above 140º ... | ...
50081658 | 12/13/2018 | 06F Wiping cloths soiled or not stored in sa... | ...
50033733 | 02/12/2019 | 10B Plumbing not properly installed or maint... | ...
40559634 | 08/22/2017 | 04N Filth flies or food/refuse/sewage-associ... | ...
... | ... | ... | ...
camis | inspection_date | violation_code | violation_description | ...
---------+-----------------+----------------+---------------------------------------------+-----
... | ... | ... | ... | ...
50038736 | 03/29/2018 | 09B | Thawing procedures | ...
50033304 | 12/18/2019 | 02B | Hot food item not held at or above 140º ... | ...
50081658 | 12/13/2018 | 06F | Wiping cloths soiled or not stored in sa... | ...
50033733 | 02/12/2019 | 10B | Plumbing not properly installed or maint... | ...
40559634 | 08/22/2017 | 04N | Filth flies or food/refuse/sewage-associ... | ...
... | ... | ... | ... | ...
Cleaning Data in PostgreSQL Databases