Streamlined Data Ingestion with pandas
Amany Mahfouz
Instructor
SELECT DISTINCTSELECT DISTINCT [column names] FROM [table];
SELECT DISTINCT * FROM [table];
/* Get unique street addresses and boroughs */
SELECT DISTINCT incident_address,
borough
FROM hpd311calls;
SUMAVGMAXMINCOUNTSUM, AVG, MAX, MINSELECT AVG(tmax) FROM weather;
COUNTSELECT COUNT(*) FROM [table_name];
SELECT COUNT(DISTINCT [column_names]) FROM [table_name];
GROUP BY statements/* Get counts of plumbing calls by borough */
SELECT borough,
COUNT(*)
FROM hpd311calls
WHERE complaint_type = 'PLUMBING'
GROUP BY borough;
# Create database engine engine = create_engine("sqlite:///data.db")# Write query to get plumbing call counts by borough query = """SELECT borough, COUNT(*) FROM hpd311calls WHERE complaint_type = 'PLUMBING' GROUP BY borough;"""# Query databse and create dataframe plumbing_call_counts = pd.read_sql(query, engine)
print(plumbing_call_counts)
borough COUNT(*)
0 BRONX 2016
1 BROOKLYN 2702
2 MANHATTAN 1413
3 QUEENS 808
4 STATEN ISLAND 178
Streamlined Data Ingestion with pandas