ETL and ELT in Python
Jake Roach
Data Engineer
Nested JSON
{
"863703000": {
"price": {
"open": 0.12187,
"close": 0.09791
},
"volume": 1443120000
},
"863789400": {
}, ...
}
Goal:
$$
[
[863703000, 0.12187, 0.09791, 1443120000],
[863789400, 0.09843, ...]
]
# Loop over keys
for key in raw_data.keys():
...
# Loop over values
for value in raw_data.values():
...
# Loop over keys and values
for key, value in raw_data.items():
...
.keys()
.values()
.items()
entry = {
"volume": 1443120000,
"price": {
"open": 0.12187,
"close": 0.09791,
}
}
# Parse data from dictionary using .get()
volume = entry.get("volume")
ticker = entry.get("ticker", "DCMP")
# Call .get() twice to return the nested "open" value
open_price = entry.get("price").get("open", 0)
Pass a list of lists to pd.DataFrame()
# Pass a list of lists to pd.DataFrame
raw_data = pd.DataFrame(flattened_rows)
Set column headers using .columns
# Create columns
raw_data.columns = ["timestamps", "open", "close", "volume"]
Set an index using .set_index()
# Set the index column to be "timestamps"
raw_data.set_index("timestamps")
parsed_stock_data = []
# Loop through each key-value pair of the raw_stock_data dictionary
for timestamp, ticker_info in raw_stock_data.items():
parsed_stock_data.append([
timestamp,
ticker_info.get("price", {}).get("open", 0), # Parse the opening price
ticker_info.get("price", {}).get("close", 0), # Parse the closing price
ticker_info.get("volume", 0) # Parse the volume
])
# Create a DataFrame, assign column names, and set an index
transformed_stock_data = pd.DataFrame(parsed_stock_data)
transformed_stock_data.columns = ["timestamps", "open", "close", "volume"]
transformed_stock_data = transformed_stock_data.set_index("timestamps")
ETL and ELT in Python