ablog

不器用で落着きのない技術者のメモ

Amazon Redshift で Cloud Data Warehouse Benchmark Derived from TPC-H を実行してみる

Amazon Redshift で Cloud Data Warehouse Benchmark Derived from TPC-H を実行してみる。

Cloud Data Warehouse Benchmark Derived from TPC-H の 100GB のデータセットをロードする

  • データベースを作成し、分離レベルを SNAPSHOT に設定する。
psql -a "host=rs-ra3-4xl-4n.********.us-east-1.redshift.amazonaws.com user=awsuser dbname=dev port=5439"

create database tpch_100gb;
alter database tpch_100gb isolation level snapshot;
  • データベースの collate を case_insensitive(大文字小文字区別なし)に変更する。
psql -a "host=rs-ra3-4xl-4n.********.us-east-1.redshift.amazonaws.com user=awsuser dbname=tpch_100gb port=5439" 

alter database tpch_100gb collate case_insensitive;
  • データベースの設定を確認する
select datname, datconfig from pg_database_info;

   datname    |                  datconfig                   
--------------+----------------------------------------------
 tpch_100gb   | {concurrency_model=1,case_sensitive=off} ★
 dev          | {enable_query_profiler_instrumentation=true}
 padb_harvest | 
 template1    | 
 template0    | 
(5 rows)
  • ddl.sql を実行し、テーブルを作成してデータをロードする
create table customer (
  c_custkey int8 not null ,
  c_name varchar(25) not null,
  c_address varchar(40) not null,
  c_nationkey int4 not null,
  c_phone char(15) not null,
  c_acctbal numeric(12,2) not null,
  c_mktsegment char(10) not null,
  c_comment varchar(117) not null,
  Primary Key(C_CUSTKEY)
) distkey(c_custkey) sortkey(c_custkey);

create table lineitem (
  l_orderkey int8 not null ,
  l_partkey int8 not null,
  l_suppkey int4 not null,
  l_linenumber int4 not null,
  l_quantity numeric(12,2) not null,
  l_extendedprice numeric(12,2) not null,
  l_discount numeric(12,2) not null,
  l_tax numeric(12,2) not null,
  l_returnflag char(1) not null,
  l_linestatus char(1) not null,
  l_shipdate date not null ,
  l_commitdate date not null,
  l_receiptdate date not null,
  l_shipinstruct char(25) not null,
  l_shipmode char(10) not null,
  l_comment varchar(44) not null,
  Primary Key(L_ORDERKEY, L_LINENUMBER)
) distkey(l_orderkey) sortkey(l_shipdate,l_orderkey)  ;

create table nation (
  n_nationkey int4 not null,
  n_name char(25) not null ,
  n_regionkey int4 not null,
  n_comment varchar(152) not null,
  Primary Key(N_NATIONKEY)                                
) distkey(n_nationkey) sortkey(n_nationkey) ;

create table orders (
  o_orderkey int8 not null,
  o_custkey int8 not null,
  o_orderstatus char(1) not null,
  o_totalprice numeric(12,2) not null,
  o_orderdate date not null,
  o_orderpriority char(15) not null,
  o_clerk char(15) not null,
  o_shippriority int4 not null,
  o_comment varchar(79) not null,
  Primary Key(O_ORDERKEY)
) distkey(o_orderkey) sortkey(o_orderdate, o_orderkey) ;

create table part (
  p_partkey int8 not null ,
  p_name varchar(55) not null,
  p_mfgr char(25) not null,
  p_brand char(10) not null,
  p_type varchar(25) not null,
  p_size int4 not null,
  p_container char(10) not null,
  p_retailprice numeric(12,2) not null,
  p_comment varchar(23) not null,
  PRIMARY KEY (P_PARTKEY)
) distkey(p_partkey) sortkey(p_partkey);

create table partsupp (
  ps_partkey int8 not null,
  ps_suppkey int4 not null,
  ps_availqty int4 not null,
  ps_supplycost numeric(12,2) not null,
  ps_comment varchar(199) not null,
  Primary Key(PS_PARTKEY, PS_SUPPKEY)
) distkey(ps_partkey) sortkey(ps_partkey);

create table region (
  r_regionkey int4 not null,
  r_name char(25) not null ,
  r_comment varchar(152) not null,
  Primary Key(R_REGIONKEY)                             
) distkey(r_regionkey) sortkey(r_regionkey);

create table supplier (
  s_suppkey int4 not null,
  s_name char(25) not null,
  s_address varchar(40) not null,
  s_nationkey int4 not null,
  s_phone char(15) not null,
  s_acctbal numeric(12,2) not null,
  s_comment varchar(101) not null,
  Primary Key(S_SUPPKEY)
) distkey(s_suppkey) sortkey(s_suppkey)
;

copy region from 's3://redshift-downloads/TPC-H/2.18/100GB/region/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
copy nation from 's3://redshift-downloads/TPC-H/2.18/100GB/nation/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
copy lineitem from 's3://redshift-downloads/TPC-H/2.18/100GB/lineitem/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
copy orders from 's3://redshift-downloads/TPC-H/2.18/100GB/orders/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
copy part from 's3://redshift-downloads/TPC-H/2.18/100GB/part/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
copy supplier from 's3://redshift-downloads/TPC-H/2.18/100GB/supplier/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
copy partsupp from 's3://redshift-downloads/TPC-H/2.18/100GB/partsupp/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
copy customer from 's3://redshift-downloads/TPC-H/2.18/100GB/customer/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';