Menganalisis Data Sensus AS dengan Python
Lee Hachadoorian
Asst. Professor of Instruction, Temple University

Sumber: United States Census Bureau
import requests
# Bangun URL dasar
HOST = "https://api.census.gov/data"
year = "2012"
dataset = "acs/acs5"
base_url = "/".join([HOST, year, dataset])
# Tentukan variabel yang diminta
# B01001_001E = Perkiraan total populasi
# B03002_003E = Perkiraan populasi kulit putih non-Hispanik
# B03002_004E = Perkiraan populasi kulit hitam non-Hispanik
get_vars = ["NAME", "B01001_001E", "B03002_003E", "B03002_004E"]
# Tentukan variabel yang diminta get_vars = ["NAME", "B01001_001E", "B03002_003E", "B03002_004E"] # Buat kamus predikat predicates = {} predicates["get"] = ",".join(get_vars)# Geografi yang diminta predicates["for"] = \ "metropolitan statistical area/micropolitan statistical area:*"
r = requests.get(base_url, params=predicates)
print(r.json()[:5])
[['NAME', 'B01001_001E', 'B03002_003E', 'B03002_004E', 'metropolitan statistical area/micropolitan statistical area'],
['Adjuntas, PR Micro Area', '19458', '140', '0', '10260'],
['Aguadilla-Isabela-San Sebastián, PR Metro Area', '305538', '5602', '231', '10380'],
['Coamo, PR Micro Area', '71596', '228', '53', '17620'],
['Fajardo, PR Metro Area', '70633', '543', '195', '21940']]
# Buat nama kolom yang mudah dibaca
col_names = ["name", "pop", "white", "black", "msa"]
# Muat respons JSON ke DataFrame
msa = pd.DataFrame(columns=col_names, data=r.json()[1:])
# Ubah tipe data kolom jumlah menjadi int
msa[["pop", "white", "black"]] = msa["pop", "white", "black"]].astype(int)
state county tract white black
0 01 001 020100 1601 217
1 01 001 020200 844 1214
2 01 001 020300 2538 647
3 01 001 020400 4030 191
4 01 001 020500 8438 1418
msa msa_name county_name state_name state county
0 10100 Aberdeen, SD Brown County South Dakota 46 013
1 10100 Aberdeen, SD Edmunds County South Dakota 46 045
2 10140 Aberdeen, WA Grays Harbor County Washington 53 027
3 10180 Abilene, TX Callahan County Texas 48 059
4 10180 Abilene, TX Jones County Texas 48 253
import pandas as pd
# Menggabungkan DataFrame pada kolom yang cocok
tracts_with_msa_id = pd.merge(...)
import pandas as pd
# Menggabungkan DataFrame pada kolom yang cocok
tracts_with_msa_id = pd.merge(tracts, msa_def, ...)
import pandas as pd
# Menggabungkan DataFrame pada kolom yang cocok
tracts_with_msa_id = pd.merge(tracts, msa_def,
left_on = ["state", "county"], right_on = ["state", "county"])
# Alternatif saat nama kolom sama
tracts_with_msa_id = pd.merge(tracts, msa_def, on = ["state", "county"])
# DataFrame dengan nama negara bagian
st.head()
state_name
state
01 Alabama
02 Alaska
04 Arizona
05 Arkansas
06 California
# Gabungkan DataFrame tracts dan st tracts_st = pd.merge(tracts, st, left_on = "state", right_index = True)tracts_st.head()
state county tract white black state_name
0 01 001 020100 1601 217 Alabama
1 01 001 020200 844 1214 Alabama
2 01 001 020300 2538 647 Alabama
3 01 001 020400 4030 191 Alabama
Menganalisis Data Sensus AS dengan Python