Cleaning Data in SQL Server Databases
Miriam Antona
Software Engineer
SELECT * FROM carriers
| code | name |
|------|---------------------------------|
| YV | Mesa Airlines Inc. |
| AA | American Airlines Inc. |
| B6 | JetBlue Airways |
| DL | Delta Air Lines Inc. |
| HA | Hawaiian Airlines Inc. |
| MQ | American Eagle Airlines Inc. |
| EV | ExpressJet Airlines Inc. |
| UA | United Air Lines Inc. |
| US | US Airways Inc. |
| ... | ... |
TRIM ( [characters ] string )
SELECT TRIM(' JetBlue Airways ');
JetBlue Airways
RTRIM
and LTRIM
.-- Removes all trailing spaces
RTRIM ( character_expression )
-- Removes all leading spaces
LTRIM ( character_expression )
SELECT LTRIM(RTRIM(' JetBlue Airways '));
JetBlue Airways
SELECT code, TRIM(name) AS name FROM carriers
SELECT code, LTRIM(RTRIM(name)) AS name FROM carriers
| code | name |
|------|------------------------------|
| YV | Mesa Airlines Inc. |
| AA | American Airlines Inc. |
| B6 | JetBlue Airways |
| DL | Delta Air Lines Inc. |
| HA | Hawaiian Airlines Inc. |
| US | US Airways Inc. |
| ... | ... |
SELECT * FROM airports
ORDER BY airport_state
| airport_code | airport_name | airport_city | airport_state |
|--------------|-----------------------------------------|-----------------|---------------|
| ... | ... | ... | ... |
| MIA | Miami International | Miami | fl |
| TPA | Tampa International | Tampa | Fl |
| FLL | Fort Lauderdale-Hollywood International | Fort Lauderdale | FL |
| MCO | Orlando International | Orlando | Florida |
| ... | ... | ... | ... |
"Fl" / "fl" / "Florida" -> "Florida"
REPLACE ( string_to_replace , occurrences , string_replacement )
Replaces all occurrences of a specified string with another string
Performs comparisons based on the collation of the input
COLLATE
to perform a comparison in a specific collationSELECT
airport_code, airport_name, airport_city,
REPLACE(airport_state, 'FL', 'Florida') AS airport_state
FROM airports
ORDER BY airport_state
| airport_code | airport_name | airport_city | airport_state |
|--------------|-----------------------------------------|-----------------|---------------|
| ... | ... | ... | ... |
| MIA | Miami International | Miami | Florida |
| TPA | Tampa International | Tampa | Florida |
| FLL | Fort Lauderdale-Hollywood International | Fort Lauderdale | Florida |
| MCO | Orlando International | Orlando | Floridaorida |
| ... | ... | ... | ... |
SELECT
airport_code, airport_name, airport_city,
REPLACE
(REPLACE(airport_state, 'FL', 'Florida'),
'Floridaorida', 'Florida') AS airport_state
FROM airports
ORDER BY airport_state
| airport_code | airport_name | airport_city | airport_state |
|--------------|-----------------------------------------|-----------------|---------------|
| MCO | Orlando International | Orlando | Florida |
| TPA | Tampa International | Tampa | Florida |
| FLL | Fort Lauderdale-Hollywood International | Fort Lauderdale | Florida |
| MIA | Miami International | Miami | Florida |
| ... | ... | ... | ... |
SELECT airport_code, airport_name, airport_city,
CASE
WHEN airport_state <> 'Florida' THEN REPLACE(airport_state, 'FL', 'Florida')
ELSE airport_state
END AS airport_state
FROM airports
ORDER BY airport_state
| airport_code | airport_name | airport_city | airport_state |
|--------------|-----------------------------------------|-----------------|---------------|
| MCO | Orlando International | Orlando | Florida |
| TPA | Tampa International | Tampa | Florida |
| FLL | Fort Lauderdale-Hollywood International | Fort Lauderdale | Florida |
| MIA | Miami International | Miami | Florida |
| ... | ... | ... | ... |
"Fl" / "fl" / "Florida" -> "FL"
SELECT
airport_code, airport_name, airport_city,
REPLACE(airport_state, 'Florida', 'FL') AS airport_state
FROM airports
ORDER BY airport_state
| airport_code | airport_name | airport_city | airport_state |
|--------------|-----------------------------------------|-----------------|---------------|
| MCO | Orlando International | Orlando | FL |
| TPA | Tampa International | Tampa | Fl |
| FLL | Fort Lauderdale-Hollywood International | Fort Lauderdale | FL |
| MIA | Miami International | Miami | fl |
| ... | ... | ... | ... |
UPPER ( character_expression )
SELECT
airport_code, airport_name, airport_city,
UPPER(
REPLACE(airport_state, 'Florida', 'FL')
) AS airport_state
FROM airports
ORDER BY airport_state
| airport_code | airport_name | airport_city | airport_state |
|--------------|-----------------------------------------|-----------------|---------------|
| ... | ... | ... | ... |
| MCO | Orlando International | Orlando | FL |
| TPA | Tampa International | Tampa | FL |
| FLL | Fort Lauderdale-Hollywood International | Fort Lauderdale | FL |
| MIA | Miami International | Miami | FL |
| ... | ... | ... | ... |
Cleaning Data in SQL Server Databases