計測
分散キー・ソートキーなし
# set search_path to ssbgz_no_sortkey;
# show search_path;
# set enable_result_cache_for_session=off;
# select * from pg_table_def where (distkey = true or sortkey <> 0);
schemaname | tablename | column | type | encoding | distkey | sortkey | notnull
(0 rows)
Time: 60.131 ms
# select count(lo_orderdate) from lineorder where lo_orderdate < '19950101' and lo_discount between 1 and 3;
count
74529670
(1 row)
Time: 432.911 ms
# select pg_last_query_id();
pg_last_query_id
1768
(1 row)
Time: 18.267 ms
# \gset
Time: 17.707 ms
# select * from svl_query_summary where query = :pg_last_query_id order by stm, seg, step;
userid | query | stm | seg | step | maxtime | avgtime | rows | bytes | rate_row | rate_byte | label | is_diskbased | workmem | is_rrscan | is_delayed_scan | rows_pre_filter
100 | 1768 | 0 | 0 | 0 | 390925 | 380732 | 74529670 | 1192474720 | | | scan tbl=105854 name=lineorder | f | 0 | t | f | 600037902
100 | 1768 | 0 | 0 | 1 | 390925 | 380732 | 74529670 | 0 | | | project | f | 0 | f | f | 0
100 | 1768 | 0 | 0 | 2 | 390925 | 380732 | 74529670 | 0 | | | project | f | 0 | f | f | 0
100 | 1768 | 0 | 0 | 3 | 390925 | 380732 | 8 | 64 | | | aggr tbl=1043 | f | 0 | f | f | 0
100 | 1768 | 1 | 1 | 0 | 1548 | 606 | 8 | 64 | | | scan tbl=1043 name=Internal Worktable | f | 0 | f | f | 0
100 | 1768 | 1 | 1 | 1 | 1548 | 606 | 8 | 64 | | | return | f | 0 | f | f | 0
100 | 1768 | 1 | 2 | 0 | 5115 | 5115 | 8 | 64 | | | scan tbl=6906 name=Internal Worktable | f | 0 | f | f | 0
100 | 1768 | 1 | 2 | 1 | 5115 | 5115 | 1 | 16 | | | aggr tbl=1048 | f | 0 | f | f | 0
100 | 1768 | 2 | 3 | 0 | 287 | 287 | 1 | 16 | | | scan tbl=1048 name=Internal Worktable | f | 0 | f | f | 0
100 | 1768 | 2 | 3 | 1 | 287 | 287 | 1 | 0 | | | project | f | 0 | f | f | 0
100 | 1768 | 2 | 3 | 2 | 287 | 287 | 1 | 0 | | | project | f | 0 | f | f | 0
100 | 1768 | 2 | 3 | 3 | 287 | 287 | 1 | 14 | | | return | f | 0 | f | f | 0
(12 rows)
Time: 2334.550 ms (00:02.335)
ソートキーあり(パターン1)
# set search_path to ssbgz_sortkey1;
# show search_path;
# set enable_result_cache_for_session=off;
# select * from pg_table_def where (distkey = true or sortkey <> 0);
schemaname | tablename | column | type | encoding | distkey | sortkey | notnull
ssbgz_sortkey1 | lineorder | lo_partkey | integer | az64 | t | 0 | t
ssbgz_sortkey1 | lineorder | lo_orderdate | integer | none | f | 1 | t
ssbgz_sortkey1 | lineorder | lo_discount | integer | none | f | 2 | t
(3 rows)
Time: 62.679 ms
=# select count(lo_orderdate) from lineorder where lo_orderdate < '19950101' and lo_discount between 1 and 3;
count
74529670
(1 row)
Time: 105.568 ms
# select pg_last_query_id();
pg_last_query_id
5147
(1 row)
Time: 18.544 ms
# \gset
Time: 20.727 ms
# select * from svl_query_summary where query = :pg_last_query_id order by stm, seg, step;
userid | query | stm | seg | step | maxtime | avgtime | rows | bytes | rate_row | rate_byte | label | is_diskbased | workmem | is_rrscan | is_delayed_scan | rows_pre_filter
100 | 5147 | 0 | 0 | 0 | 64463 | 60645 | 74529670 | 1192474720 | | | scan tbl=105866 name=lineorder | f | 0 | t | f | 274402995
100 | 5147 | 0 | 0 | 1 | 64463 | 60645 | 74529670 | 0 | | | project | f | 0 | f | f | 0
100 | 5147 | 0 | 0 | 2 | 64463 | 60645 | 74529670 | 0 | | | project | f | 0 | f | f | 0
100 | 5147 | 0 | 0 | 3 | 64463 | 60645 | 8 | 64 | | | aggr tbl=1043 | f | 0 | f | f | 0
100 | 5147 | 1 | 1 | 0 | 577 | 492 | 8 | 64 | | | scan tbl=1043 name=Internal Worktable | f | 0 | f | f | 0
100 | 5147 | 1 | 1 | 1 | 577 | 492 | 8 | 64 | | | return | f | 0 | f | f | 0
100 | 5147 | 1 | 2 | 0 | 1549 | 1549 | 8 | 64 | | | scan tbl=20031 name=Internal Worktable | f | 0 | f | f | 0
100 | 5147 | 1 | 2 | 1 | 1549 | 1549 | 1 | 16 | | | aggr tbl=1048 | f | 0 | f | f | 0
100 | 5147 | 2 | 3 | 0 | 299 | 299 | 1 | 16 | | | scan tbl=1048 name=Internal Worktable | f | 0 | f | f | 0
100 | 5147 | 2 | 3 | 1 | 299 | 299 | 1 | 0 | | | project | f | 0 | f | f | 0
100 | 5147 | 2 | 3 | 2 | 299 | 299 | 1 | 0 | | | project | f | 0 | f | f | 0
100 | 5147 | 2 | 3 | 3 | 299 | 299 | 1 | 14 | | | return | f | 0 | f | f | 0
(12 rows)
Time: 11460.543 ms (00:11.461)
ソートキーあり(パターン2)
# set search_path to ssbgz_sortkey2;
# show search_path;
# set enable_result_cache_for_session=off;
awsuser 20230402_14:53:48 =# select * from pg_table_def where (distkey = true or sortkey <> 0);
schemaname | tablename | column | type | encoding | distkey | sortkey | notnull
ssbgz_sortkey2 | lineorder | lo_partkey | integer | az64 | t | 0 | t
ssbgz_sortkey2 | lineorder | lo_orderdate | integer | none | f | 2 | t
ssbgz_sortkey2 | lineorder | lo_discount | integer | none | f | 1 | t
(3 rows)
Time: 41.423 ms
=# select count(lo_orderdate) from lineorder where lo_orderdate < '19950101' and lo_discount between 1 and 3;
count
74529670
(1 row)
Time: 74.131 ms
=# select pg_last_query_id();
pg_last_query_id
200884
(1 row)
Time: 19.078 ms
# \gset
Time: 20.848 ms
=# select * from svl_query_summary where query = :pg_last_query_id order by stm, seg, step;
userid | query | stm | seg | step | maxtime | avgtime | rows | bytes | rate_row | rate_byte | label | is_diskbased | workmem | is_rrscan | is_delayed_scan | rows_pre_filter
100 | 200884 | 0 | 0 | 0 | 32840 | 30759 | 74529670 | 1192474720 | | | scan tbl=105869 name=lineorder | f | 0 | t | f | 80198010
100 | 200884 | 0 | 0 | 1 | 32840 | 30759 | 74529670 | 0 | | | project | f | 0 | f | f | 0
100 | 200884 | 0 | 0 | 2 | 32840 | 30759 | 74529670 | 0 | | | project | f | 0 | f | f | 0
100 | 200884 | 0 | 0 | 3 | 32840 | 30759 | 8 | 64 | | | aggr tbl=1043 | f | 0 | f | f | 0
100 | 200884 | 1 | 1 | 0 | 570 | 437 | 8 | 64 | | | scan tbl=1043 name=Internal Worktable | f | 0 | f | f | 0
100 | 200884 | 1 | 1 | 1 | 570 | 437 | 8 | 64 | | | return | f | 0 | f | f | 0
100 | 200884 | 1 | 2 | 0 | 1805 | 1805 | 8 | 64 | | | scan tbl=3510 name=Internal Worktable | f | 0 | f | f | 0
100 | 200884 | 1 | 2 | 1 | 1805 | 1805 | 1 | 16 | | | aggr tbl=1048 | f | 0 | f | f | 0
100 | 200884 | 2 | 3 | 0 | 272 | 272 | 1 | 16 | | | scan tbl=1048 name=Internal Worktable | f | 0 | f | f | 0
100 | 200884 | 2 | 3 | 1 | 272 | 272 | 1 | 0 | | | project | f | 0 | f | f | 0
100 | 200884 | 2 | 3 | 2 | 272 | 272 | 1 | 0 | | | project | f | 0 | f | f | 0
100 | 200884 | 2 | 3 | 3 | 272 | 272 | 1 | 14 | | | return | f | 0 | f | f | 0
(12 rows)
Time: 2472.368 ms (00:02.472)
準備: スキーマ・テーブル作成、データロード
分散キー・ソートキーなし
create schema if not exists ssbgz_no_sortkey;
set search_path to ssbgz_no_sortkey;
CREATE TABLE lineorder
(
lo_orderkey INTEGER NOT NULL,
lo_linenumber INTEGER NOT NULL,
lo_custkey INTEGER NOT NULL,
lo_partkey INTEGER NOT NULL,
lo_suppkey INTEGER NOT NULL,
lo_orderdate INTEGER NOT NULL,
lo_orderpriority VARCHAR(15) NOT NULL,
lo_shippriority VARCHAR(1) NOT NULL,
lo_quantity INTEGER NOT NULL,
lo_extendedprice INTEGER NOT NULL,
lo_ordertotalprice INTEGER NOT NULL,
lo_discount INTEGER NOT NULL,
lo_revenue INTEGER NOT NULL,
lo_supplycost INTEGER NOT NULL,
lo_tax INTEGER NOT NULL,
lo_commitdate INTEGER NOT NULL,
lo_shipmode VARCHAR(10) NOT NULL
);
copy lineorder from 's3://awssampledb/ssbgz/lineorder'
iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess'
gzip compupdate off region 'ap-northeast-1';
ソートキーあり(パターン1)
create schema if not exists ssbgz_sortkey1;
set search_path to ssbgz_sortkey1;
CREATE TABLE lineorder (
lo_orderkey integer not null,
lo_linenumber integer not null,
lo_custkey integer not null,
lo_partkey integer not null,
lo_suppkey integer not null,
lo_orderdate integer not null,
lo_orderpriority varchar(15) not null,
lo_shippriority varchar(1) not null,
lo_quantity integer not null,
lo_extendedprice integer not null,
lo_ordertotalprice integer not null,
lo_discount integer not null,
lo_revenue integer not null,
lo_supplycost integer not null,
lo_tax integer not null,
lo_commitdate integer not null,
lo_shipmode varchar(10) not null
)
diststyle key distkey(lo_partkey)
sortkey (lo_orderdate,lo_discount);
copy lineorder from 's3://awssampledb/ssbgz/lineorder'
iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess'
gzip compupdate off region 'ap-northeast-1';
ソートキーありパターン2
create schema if not exists ssbgz_sortkey2;
set search_path to ssbgz_sortkey2;
CREATE TABLE lineorder (
lo_orderkey integer not null,
lo_linenumber integer not null,
lo_custkey integer not null,
lo_partkey integer not null,
lo_suppkey integer not null,
lo_orderdate integer not null,
lo_orderpriority varchar(15) not null,
lo_shippriority varchar(1) not null,
lo_quantity integer not null,
lo_extendedprice integer not null,
lo_ordertotalprice integer not null,
lo_discount integer not null,
lo_revenue integer not null,
lo_supplycost integer not null,
lo_tax integer not null,
lo_commitdate integer not null,
lo_shipmode varchar(10) not null
)
diststyle key distkey(lo_partkey)
sortkey (lo_orderdate,lo_discount);
copy lineorder from 's3://awssampledb/ssbgz/lineorder'
iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess'
gzip compupdate off region 'ap-northeast-1';
データの分布
=# select lo_discount, count(lo_discount) from lineorder group by lo_discount order by lo_discount;
lo_discount | count
0 | 54553438
1 | 54552507
2 | 54545850
3 | 54552145
4 | 54539920
5 | 54555445
6 | 54540338
7 | 54545023
8 | 54558312
9 | 54546828
10 | 54548096
(11 rows)
=# select substring(lo_orderdate, 1, 4), count(substring(lo_orderdate, 1, 4)) from lineorder group by substring(lo_orderdate, 1, 4) order by substring(lo_orderdate, 1, 4);
substring | count
1992 | 91248844
1993 | 91007488
1994 | 91044214
1995 | 91016436
1996 | 91301792
1997 | 91050840
1998 | 53368288
(7 rows)