Streamlined Data Ingestion with pandas
Amany Mahfouz
Instructor
SELECT DISTINCT
SELECT DISTINCT [column names] FROM [table];
SELECT DISTINCT * FROM [table];
/* Get unique street addresses and boroughs */
SELECT DISTINCT incident_address,
borough
FROM hpd311calls;
SUM
AVG
MAX
MIN
COUNT
SUM
, AVG
, MAX
, MIN
SELECT AVG(tmax) FROM weather;
COUNT
SELECT COUNT(*) FROM [table_name];
SELECT COUNT(DISTINCT [column_names]) FROM [table_name];
GROUP BY
statements/* Get counts of plumbing calls by borough */
SELECT borough,
COUNT(*)
FROM hpd311calls
WHERE complaint_type = 'PLUMBING'
GROUP BY borough;
# Create database engine engine = create_engine("sqlite:///data.db")
# Write query to get plumbing call counts by borough query = """SELECT borough, COUNT(*) FROM hpd311calls WHERE complaint_type = 'PLUMBING' GROUP BY borough;"""
# Query databse and create dataframe plumbing_call_counts = pd.read_sql(query, engine)
print(plumbing_call_counts)
borough COUNT(*)
0 BRONX 2016
1 BROOKLYN 2702
2 MANHATTAN 1413
3 QUEENS 808
4 STATEN ISLAND 178
Streamlined Data Ingestion with pandas