Streamlined Data Ingestion with pandas
Amany Mahfouz
Instructor
SELECT [column names] FROM [table name];
SELECT date, tavg
FROM weather;
WHERE
clause to selectively import recordsSELECT [column_names]
FROM [table_name]
WHERE [condition];
=
>
and >=
<
and <=
<>
(not equal to)SELECT *
FROM weather
WHERE tmax > 32;
=
sign and the text to match/* Get records about incidents in Brooklyn */
SELECT *
FROM hpd311calls
WHERE borough = 'BROOKLYN';
# Load libraries import pandas as pd from sqlalchemy import create_engine
# Create database engine engine = create_engine("sqlite:///data.db")
# Write query to get records from Brooklyn query = """SELECT * FROM hpd311calls WHERE borough = 'BROOKLYN';"""
# Query the database brooklyn_calls = pd.read_sql(query, engine)
print(brookyn_calls.borough.unique())
['BROOKLYN']
WHERE
clauses with AND
return records that meet all conditions# Write query to get records about plumbing in the Bronx and_query = """SELECT * FROM hpd311calls WHERE borough = 'BRONX' AND complaint_type = 'PLUMBING';"""
# Get calls about plumbing issues in the Bronx bx_plumbing_calls = pd.read_sql(and_query, engine) # Check record count print(bx_plumbing_calls.shape)
(2016, 8)
WHERE
clauses with OR
return records that meet at least one condition# Write query to get records about water leaks or plumbing or_query = """SELECT * FROM hpd311calls WHERE complaint_type = 'WATER LEAK' OR complaint_type = 'PLUMBING';"""
# Get calls that are about plumbing or water leaks leaks_or_plumbing = pd.read_sql(or_query, engine) # Check record count print(leaks_or_plumbing.shape)
(10684, 8)
Streamlined Data Ingestion with pandas