Introduction to Redshift
Jason Myers
Principal Engineer
CREATE TABLE IDAHO_SITE_ID
(
'pk_siteid' INTEGER PRIMARY KEY,
'fk_monitoringlocation' INTEGER,
'organization_id' VARCHAR(31),
'organizationformalname' VARCHAR(68),
'organization' VARCHAR(16)
);
DISTKEY
, or PRIMARY KEY
Name | Description | Usage |
---|---|---|
ALL | Entire table on every node | Small fact lookup tables we need often in joins |
KEY | Distributed by data in the DISTKEY column | When we aggregate or join by DISTKEY |
EVEN | In turn distribution across nodes by row | Large tables with that don't have keys |
AUTO | Uses ALL for small tables. Key as it grows if there are suitable DISTKEYS falling back to EVEN | default |
CREATE TABLE IDAHO_SITE_ID
(
'pk_siteid' INTEGER PRIMARY KEY,
'fk_monitoringlocation' INTEGER,
'organization_id' VARCHAR(31),
'organizationformalname' VARCHAR(68),
'organization' VARCHAR(16)
)
-- Sets the data distribution key
-- to organization_id
DISTKEY(organization_id);
CREATE TABLE IDAHO_SITE_ID
(
'pk_siteid' INTEGER PRIMARY KEY,
'fk_monitoringlocation' INTEGER,
-- Sets organization_id as the data
-- distribution key
'organization_id' VARCHAR(31) DISTKEY,
'organizationformalname' VARCHAR(68),
'organization' VARCHAR(16)
);
CREATE TABLE IDAHO_SITE_ID
(
'pk_siteid' INTEGER PRIMARY KEY,
'fk_monitoringlocation' INTEGER,
'organization_id' VARCHAR(31),
'organizationformalname' VARCHAR(68),
'organization' VARCHAR(16)
)
DISTKEY(organization_id)
-- Sets the distribution style to key
DISTSTYLE KEY;
CREATE TABLE IDAHO_SITE_ID
(
'pk_siteid' INTEGER PRIMARY KEY,
'fk_monitoringlocation' INTEGER,
'organization_id' VARCHAR(31),
'organizationformalname' VARCHAR(68),
'organization' VARCHAR(16)
)
DISTKEY(organization_id)
DISTSTYLE KEY
-- Sets the data sort key
-- to fk_monitoringlocation
SORTKEY(fk_monitoringlocation);
CREATE TABLE IDAHO_SITE_ID
(
'pk_siteid' INTEGER PRIMARY KEY,
'fk_monitoringlocation' INTEGER,
'organization_id' VARCHAR(31),
'organizationformalname' VARCHAR(68),
'organization' VARCHAR(16)
)
-- Sets fk_monitoringlocation, organization_id as compound sort keys
COMPOUND SORTKEY(fk_monitoringlocation, organization_id);
-- View the dist and sortkey
-- status of a column
SELECT column_name,
distkey,
sortkey
FROM SVV_REDSHIFT_COLUMNS
-- Only in the spectrumdb schema
WHERE schema_name = 'spectrumdb'
-- For the ecommerce_sales table
AND table_name = 'ecommerce_sales';
Results
column | distkey | sortkey
============|=========|========
year_qtr | t | 1
total_sales | f | 2
ecom_sales | f | 0
Distkey Column
SVV_TABLE_INFO
table | encoded | diststyle | sortkey1 | skew_sortkey1 | skew_rows
===============|=========|=================|==============|===============|===========
ecommerce_sales| N | KEY(year_qtr) | year_qtr | |
date | N | ALL | dateid | 1.00 |
SELECT table
diststyle
FROM SVV_TABLE_INFO
WHERE schema like 'spectrumdb';
table | diststyle
================|==============
ecommerce_sales | KEY(year_qtr)
Introduction to Redshift