Redshift クラスターを作成する
クライアント(JackDB)から接続する
- Log in | JackDB にアカウントを作成する。
- [+ Add a data source] を選択する
JackDBからRedshiftへのアクセスを許可する
チュートリアル: テーブル設計のチューニング - Amazon Redshift を試してみる
ソートキー、分散キー、圧縮を使用しないケース
- テーブルを作成
CREATE TABLE part ( p_partkey INTEGER NOT NULL, p_name VARCHAR(22) NOT NULL, p_mfgr VARCHAR(6) NOT NULL, p_category VARCHAR(7) NOT NULL, p_brand1 VARCHAR(9) NOT NULL, p_color VARCHAR(11) NOT NULL, p_type VARCHAR(25) NOT NULL, p_size INTEGER NOT NULL, p_container VARCHAR(10) NOT NULL ); CREATE TABLE supplier ( s_suppkey INTEGER NOT NULL, s_name VARCHAR(25) NOT NULL, s_address VARCHAR(25) NOT NULL, s_city VARCHAR(10) NOT NULL, s_nation VARCHAR(15) NOT NULL, s_region VARCHAR(12) NOT NULL, s_phone VARCHAR(15) NOT NULL ); CREATE TABLE customer ( c_custkey INTEGER NOT NULL, c_name VARCHAR(25) NOT NULL, c_address VARCHAR(25) NOT NULL, c_city VARCHAR(10) NOT NULL, c_nation VARCHAR(15) NOT NULL, c_region VARCHAR(12) NOT NULL, c_phone VARCHAR(15) NOT NULL, c_mktsegment VARCHAR(10) NOT NULL ); CREATE TABLE dwdate ( d_datekey INTEGER NOT NULL, d_date VARCHAR(19) NOT NULL, d_dayofweek VARCHAR(10) NOT NULL, d_month VARCHAR(10) NOT NULL, d_year INTEGER NOT NULL, d_yearmonthnum INTEGER NOT NULL, d_yearmonth VARCHAR(8) NOT NULL, d_daynuminweek INTEGER NOT NULL, d_daynuminmonth INTEGER NOT NULL, d_daynuminyear INTEGER NOT NULL, d_monthnuminyear INTEGER NOT NULL, d_weeknuminyear INTEGER NOT NULL, d_sellingseason VARCHAR(13) NOT NULL, d_lastdayinweekfl VARCHAR(1) NOT NULL, d_lastdayinmonthfl VARCHAR(1) NOT NULL, d_holidayfl VARCHAR(1) NOT NULL, d_weekdayfl VARCHAR(1) NOT NULL ); CREATE TABLE lineorder ( lo_orderkey INTEGER NOT NULL, lo_linenumber INTEGER NOT NULL, lo_custkey INTEGER NOT NULL, lo_partkey INTEGER NOT NULL, lo_suppkey INTEGER NOT NULL, lo_orderdate INTEGER NOT NULL, lo_orderpriority VARCHAR(15) NOT NULL, lo_shippriority VARCHAR(1) NOT NULL, lo_quantity INTEGER NOT NULL, lo_extendedprice INTEGER NOT NULL, lo_ordertotalprice INTEGER NOT NULL, lo_discount INTEGER NOT NULL, lo_revenue INTEGER NOT NULL, lo_supplycost INTEGER NOT NULL, lo_tax INTEGER NOT NULL, lo_commitdate INTEGER NOT NULL, lo_shipmode VARCHAR(10) NOT NULL );
- データロード(圧縮なし)
と はAWS アカウント認証情報に置き換える。 - IAM ユーザー作成時にダウンロードまたはメモしておいた情報を使う。
copy customer from 's3://awssampledbuswest2/ssbgz/customer' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip compupdate off region 'us-west-2'; copy dwdate from 's3://awssampledbuswest2/ssbgz/dwdate' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip compupdate off region 'us-west-2'; copy lineorder from 's3://awssampledbuswest2/ssbgz/lineorder' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip compupdate off region 'us-west-2'; copy part from 's3://awssampledbuswest2/ssbgz/part' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip compupdate off region 'us-west-2'; copy supplier from 's3://awssampledbuswest2/ssbgz/supplier' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip compupdate off region 'us-west-2';
- テーブルに想定通りの件数がロードされたことを確認する。
select count(*) from LINEORDER; select count(*) from PART; select count(*) from CUSTOMER; select count(*) from SUPPLIER; select count(*) from DWDATE;
- テーブルのサイズを確認する
select stv_tbl_perm.name as table, count(*) as mb from stv_blocklist, stv_tbl_perm where stv_blocklist.tbl = stv_tbl_perm.id and stv_blocklist.slice = stv_tbl_perm.slice and stv_tbl_perm.name in ('lineorder','part','customer','dwdate','supplier') group by stv_tbl_perm.name order by 1 asc;
table mb customer 224 dwdate 160 lineorder 34,346 part 120 supplier 104
- クエリパフォーマンスを計測する
- 1回目はコンパイル時間が含まれるため2回実行して2回目をとる。
-- Query 1 -- Restrictions on only one dimension. select sum(lo_extendedprice*lo_discount) as revenue from lineorder, dwdate where lo_orderdate = d_datekey and d_year = 1997 and lo_discount between 1 and 3 and lo_quantity < 24; -- Query 2 -- Restrictions on two dimensions select sum(lo_revenue), d_year, p_brand1 from lineorder, dwdate, part, supplier where lo_orderdate = d_datekey and lo_partkey = p_partkey and lo_suppkey = s_suppkey and p_category = 'MFGR#12' and s_region = 'AMERICA' group by d_year, p_brand1 order by d_year, p_brand1; -- Query 3 -- Drill down in time to just one month select c_city, s_city, d_year, sum(lo_revenue) as revenue from customer, lineorder, supplier, dwdate where lo_custkey = c_custkey and lo_suppkey = s_suppkey and lo_orderdate = d_datekey and (c_city='UNITED KI1' or c_city='UNITED KI5') and (s_city='UNITED KI1' or s_city='UNITED KI5') and d_yearmonth = 'Dec1997' group by c_city, s_city, d_year order by d_year asc, revenue desc;
ソートキー、分散キー、圧縮を使用するケース
- テーブルを削除する
drop table part cascade; drop table supplier cascade; drop table customer cascade; drop table dwdate cascade; drop table lineorder cascade;
- テーブルを再作成する(分散キー、ソートキーあり)
CREATE TABLE part ( p_partkey integer not null sortkey distkey, p_name varchar(22) not null, p_mfgr varchar(6) not null, p_category varchar(7) not null, p_brand1 varchar(9) not null, p_color varchar(11) not null, p_type varchar(25) not null, p_size integer not null, p_container varchar(10) not null ); CREATE TABLE supplier ( s_suppkey integer not null sortkey, s_name varchar(25) not null, s_address varchar(25) not null, s_city varchar(10) not null, s_nation varchar(15) not null, s_region varchar(12) not null, s_phone varchar(15) not null) diststyle all; CREATE TABLE customer ( c_custkey integer not null sortkey, c_name varchar(25) not null, c_address varchar(25) not null, c_city varchar(10) not null, c_nation varchar(15) not null, c_region varchar(12) not null, c_phone varchar(15) not null, c_mktsegment varchar(10) not null) diststyle all; CREATE TABLE dwdate ( d_datekey integer not null sortkey, d_date varchar(19) not null, d_dayofweek varchar(10) not null, d_month varchar(10) not null, d_year integer not null, d_yearmonthnum integer not null, d_yearmonth varchar(8) not null, d_daynuminweek integer not null, d_daynuminmonth integer not null, d_daynuminyear integer not null, d_monthnuminyear integer not null, d_weeknuminyear integer not null, d_sellingseason varchar(13) not null, d_lastdayinweekfl varchar(1) not null, d_lastdayinmonthfl varchar(1) not null, d_holidayfl varchar(1) not null, d_weekdayfl varchar(1) not null) diststyle all; CREATE TABLE lineorder ( lo_orderkey integer not null, lo_linenumber integer not null, lo_custkey integer not null, lo_partkey integer not null distkey, lo_suppkey integer not null, lo_orderdate integer not null sortkey, lo_orderpriority varchar(15) not null, lo_shippriority varchar(1) not null, lo_quantity integer not null, lo_extendedprice integer not null, lo_ordertotalprice integer not null, lo_discount integer not null, lo_revenue integer not null, lo_supplycost integer not null, lo_tax integer not null, lo_commitdate integer not null, lo_shipmode varchar(10) not null );
- データロード(圧縮あり)
copy customer from 's3://awssampledbuswest2/ssbgz/customer' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip region 'us-west-2'; copy dwdate from 's3://awssampledbuswest2/ssbgz/dwdate' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip region 'us-west-2'; copy lineorder from 's3://awssampledbuswest2/ssbgz/lineorder' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip region 'us-west-2'; copy part from 's3://awssampledbuswest2/ssbgz/part' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip region 'us-west-2'; copy supplier from 's3://awssampledbuswest2/ssbgz/supplier' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' gzip region 'us-west-2';
- テーブルサイズを確認する
select stv_tbl_perm.name as "table", count(*) as "blocks (mb)" from stv_blocklist, stv_tbl_perm where stv_blocklist.tbl = stv_tbl_perm.id and stv_blocklist.slice = stv_tbl_perm.slice and stv_tbl_perm.name in ('customer', 'part', 'supplier', 'dwdate', 'lineorder') group by stv_tbl_perm.name order by 1 asc;
table blocks(mb) customer 604 dwdate 160 lineorder 26,359 part 200 supplier 236
圧縮が効いてブロック数が少なくなっている
- 分散キーによる分散を確認する
select trim(name) as table, slice, sum(num_values) as rows, min(minvalue), max(maxvalue) from svv_diskusage where name in ('customer', 'part', 'supplier', 'dwdate', 'lineorder') and col =0 group by name, slice order by name, slice;
- クエリを実行する
-- Query 1 -- Restrictions on only one dimension. select sum(lo_extendedprice*lo_discount) as revenue from lineorder, dwdate where lo_orderdate = d_datekey and d_year = 1997 and lo_discount between 1 and 3 and lo_quantity < 24; -- Query 2 -- Restrictions on two dimensions select sum(lo_revenue), d_year, p_brand1 from lineorder, dwdate, part, supplier where lo_orderdate = d_datekey and lo_partkey = p_partkey and lo_suppkey = s_suppkey and p_category = 'MFGR#12' and s_region = 'AMERICA' group by d_year, p_brand1 order by d_year, p_brand1; -- Query 3 -- Drill down in time to just one month select c_city, s_city, d_year, sum(lo_revenue) as revenue from customer, lineorder, supplier, dwdate where lo_custkey = c_custkey and lo_suppkey = s_suppkey and lo_orderdate = d_datekey and (c_city='UNITED KI1' or c_city='UNITED KI5') and (s_city='UNITED KI1' or s_city='UNITED KI5') and d_yearmonth = 'Dec1997' group by c_city, s_city, d_year order by d_year asc, revenue desc;