前提
計測結果
効率的なソートキー
- 実行時間(クライアント側): 2.8 秒
=# select min(l_shipdate) min_shipdate , max(l_shipdate) max_shipdate from lineitem where l_shipdate between '1996-01-01' and '1997-12-31'; min_shipdate | max_shipdate ---------------+-------------- 1996-01-01 | 1997-12-31 (1 row) Time: 2856.924 ms (00:02.857)
- クエリIDを取得
=# select pg_last_query_id(); =# \gset
- SVL_QUERY_SUMMARY でボトルネックを確認
- Segment 0 で2.6秒要しており、ここで最も時間を使っている。
- rows_pre_filter(5,473,383,140)- rows(5,468,882,659) = 4,500,481、ストレージから不要な行の読み込みは少ない
=# select * from svl_query_summary where query = :pg_last_query_id order by stm, seg, step; userid | query | stm | seg | step | maxtime | avgtime | rows | bytes | rate_row | rate_byte | label | is_diskbased | workmem | is_rrscan | is_delayed_scan | rows_pre_filter --------+--------+-----+-----+------+---------+---------+------------+-------------+------------+-------------+-----------------------------------------+--------------+---------+-----------+-----------------+----------------- 100 | 600351 | 0 | 0 | 0 | ★2645774 | 2069041 | ★5468882659 | 65626591908 | 2734441329 | 32813295954 | scan tbl=107017 name=lineitem | f | 0 | t | f | ★5473383140 100 | 600351 | 0 | 0 | 1 | 2645774 | 2069041 | 5468882659 | 0 | 2734441329 | 0 | project | f | 0 | f | f | 0 100 | 600351 | 0 | 0 | 2 | 2645774 | 2069041 | 5468882659 | 0 | 2734441329 | 0 | project | f | 0 | f | f | 0 100 | 600351 | 0 | 0 | 3 | 2645774 | 2069041 | 16 | 192 | 8 | 96 | aggr tbl=1043 | f | 0 | f | f | 0 100 | 600351 | 1 | 1 | 0 | 689 | 467 | 16 | 192 | | | scan tbl=1043 name=Internal Worktable | f | 0 | f | f | 0 100 | 600351 | 1 | 1 | 1 | 689 | 467 | 16 | 192 | | | return | f | 0 | f | f | 0 100 | 600351 | 1 | 2 | 0 | 3253 | 3253 | 16 | 192 | | | scan tbl=1330 name=Internal Worktable | f | 0 | f | f | 0 100 | 600351 | 1 | 2 | 1 | 3253 | 3253 | 1 | 12 | | | aggr tbl=1048 | f | 0 | f | f | 0 100 | 600351 | 2 | 3 | 0 | 356 | 356 | 1 | 12 | | | scan tbl=1048 name=Internal Worktable | f | 0 | f | f | 0 100 | 600351 | 2 | 3 | 1 | 356 | 356 | 1 | 0 | | | project | f | 0 | f | f | 0 100 | 600351 | 2 | 3 | 2 | 356 | 356 | 1 | 0 | | | project | f | 0 | f | f | 0 100 | 600351 | 2 | 3 | 3 | 356 | 356 | 1 | 30 | | | return | f | 0 | f | f | 0
- SVL_QUERY_METRICS でクエリのメトリクスを確認する
- ストレージからの読込んだブロック数は 43,259(=43,259MB=42.2GB)。
=# select * from svl_query_metrics where query = :pg_last_query_id order by dimension; userid | query | service_class | dimension | segment | step | step_label | query_cpu_time | query_blocks_read | query_execution_time | query_cpu_usage_percent | query_temp_blocks_to_disk | segment_execution_time | cpu_skew | io_skew | scan_row_count | join_row_count | nested_loop_join_row_count | return_row_count | spectrum_scan_row_count | spectrum_scan_size_mb | query_queue_time | service_class_name --------+--------+---------------+-----------+---------+------+------------+----------------+-------------------+----------------------+-------------------------+---------------------------+------------------------+----------+---------+----------------+----------------+----------------------------+------------------+-------------------------+-----------------------+------------------+------------------------------------------------------------------ 100 | 600351 | 100 | query | | | | 9 | 43259★ | 3 | 36.51 | | | | | | | | | | | | Default queue 100 | 600351 | 100 | segment | 0 | | | | | 3 | | | 2 | 1.31 | 1.33 | | | | | | | | Default queue 100 | 600351 | 100 | step | 0 | 2 | project | | | 3 | | | | | | | | | | | | | Default queue 100 | 600351 | 100 | step | 0 | 1 | project | | | 3 | | | | | | | | | | | | | Default queue 100 | 600351 | 100 | step | 0 | 0 | scan | | | 3 | | | | | | 3650849308 | | | | | | | Default queue
非効率なソートキー
- 実行時間(クライアント側): 6.8秒
=# select min(l_shipdate) min_shipdate , max(l_shipdate) max_shipdate from lineitem where l_shipdate between '1996-01-01' and '1997-12-31'; min_shipdate | max_shipdate ---------------+-------------- 1996-01-01 | 1997-12-31 (1 row) Time: 6891.637 ms (00:06.892)
- クエリIDを取得
=# select pg_last_query_id(); =# \gset
- SVL_QUERY_SUMMARY でボトルネックを確認
- Segment 0 で6.6秒要しており、ここで最も時間を使っている。
- rows_pre_filter(18,000,048,306)- rows(5,468,882,659) = 12,531,165,647、ストレージから不要な行の読み込みが多い。
=# select * from svl_query_summary where query = :pg_last_query_id order by stm, seg, step; userid | query | stm | seg | step | maxtime | avgtime | rows | bytes | rate_row | rate_byte | label | is_diskbased | workmem | is_rrscan | is_delayed_scan | rows_pre_filter --------+--------+-----+-----+------+---------+---------+------------+-------------+-----------+-------------+-----------------------------------------+--------------+---------+-----------+-----------------+----------------- 100 | 600342 | 0 | 0 | 0 | ★6665755 | 6195903 | ★5468882659 | 65626591908 | 911480443 | 10937765318 | scan tbl=108285 name=lineitem | f | 0 | t | f | ★18000048306 100 | 600342 | 0 | 0 | 1 | 6665755 | 6195903 | 5468882659 | 0 | 911480443 | 0 | project | f | 0 | f | f | 0 100 | 600342 | 0 | 0 | 2 | 6665755 | 6195903 | 5468882659 | 0 | 911480443 | 0 | project | f | 0 | f | f | 0 100 | 600342 | 0 | 0 | 3 | 6665755 | 6195903 | 16 | 192 | 2 | 32 | aggr tbl=1043 | f | 0 | f | f | 0 100 | 600342 | 1 | 1 | 0 | 2024 | 574 | 16 | 192 | | | scan tbl=1043 name=Internal Worktable | f | 0 | f | f | 0 100 | 600342 | 1 | 1 | 1 | 2024 | 574 | 16 | 192 | | | return | f | 0 | f | f | 0 100 | 600342 | 1 | 2 | 0 | 6880 | 6880 | 16 | 192 | | | scan tbl=1321 name=Internal Worktable | f | 0 | f | f | 0 100 | 600342 | 1 | 2 | 1 | 6880 | 6880 | 1 | 12 | | | aggr tbl=1048 | f | 0 | f | f | 0 100 | 600342 | 2 | 3 | 0 | 382 | 382 | 1 | 12 | | | scan tbl=1048 name=Internal Worktable | f | 0 | f | f | 0 100 | 600342 | 2 | 3 | 1 | 382 | 382 | 1 | 0 | | | project | f | 0 | f | f | 0 100 | 600342 | 2 | 3 | 2 | 382 | 382 | 1 | 0 | | | project | f | 0 | f | f | 0 100 | 600342 | 2 | 3 | 3 | 382 | 382 | 1 | 30 | | | return | f | 0 | f | f | 0
- SVL_QUERY_METRICS でクエリのメトリクスを確認する
- ストレージからの読込んだブロック数は 199,492(=199,492MB=194.8GB)。
=# select * from svl_query_metrics where query = :pg_last_query_id order by dimension; userid | query | service_class | dimension | segment | step | step_label | query_cpu_time | query_blocks_read | query_execution_time | query_cpu_usage_percent | query_temp_blocks_to_disk | segment_execution_time | cpu_skew | io_skew | scan_row_count | join_row_count | nested_loop_join_row_count | return_row_count | spectrum_scan_row_count | spectrum_scan_size_mb | query_queue_time | service_class_name --------+--------+---------------+-----------+---------+------+------------+----------------+-------------------+----------------------+-------------------------+---------------------------+------------------------+----------+---------+----------------+----------------+----------------------------+------------------+-------------------------+-----------------------+------------------+------------------------------------------------------------------ 100 | 600342 | 100 | query | | | | 23 | ★199492 | 7 | 24.56 | | | | | | | | | | | | Default queue 100 | 600342 | 100 | segment | 0 | | | | | 7 | | | 6 | 1.22 | 1.09 | | | | | | | | Default queue 100 | 600342 | 100 | step | 0 | 0 | scan | | | 7 | | | | | | 16857732179 | | | | | | | Default queue 100 | 600342 | 100 | step | 0 | 1 | project | | | 7 | | | | | | | | | | | | | Default queue 100 | 600342 | 100 | step | 0 | 2 | project | | | 7 | | | | | | | | | |
事前準備
共通
- ユーザーレベルで結果キャッシュと AutoMV を無効化
alter user awsuser set enable_result_cache_for_session = off; alter user awsuser set mv_enable_aqmv_for_session=off;
TPC-H 3TB のデータセットそのまま(速いパターン)
- データベース作成
create database tpch_3tb collate case_insensitive isolation level snapshot;
TPC-H 3TB のデータセットで、lineitem と orders のソートキーを変更したもの(遅いパターン)
- データベース作成
create database tpch_3tb_other_key collate case_insensitive isolation level snapshot;
- テーブル作成・データロード
create table lineitem ( l_orderkey int8 not null , l_partkey int8 not null, l_suppkey int4 not null, l_linenumber int4 not null, l_quantity numeric(12,2) not null, l_extendedprice numeric(12,2) not null, l_discount numeric(12,2) not null, l_tax numeric(12,2) not null, l_returnflag char(1) not null, l_linestatus char(1) not null, l_shipdate date not null , l_commitdate date not null, l_receiptdate date not null, l_shipinstruct char(25) not null, l_shipmode char(10) not null, l_comment varchar(44) not null, Primary Key(L_ORDERKEY, L_LINENUMBER) ) distkey(l_orderkey) sortkey(l_orderkey, l_shipdate) ; create table orders ( o_orderkey int8 not null, o_custkey int8 not null, o_orderstatus char(1) not null, o_totalprice numeric(12,2) not null, o_orderdate date not null, o_orderpriority char(15) not null, o_clerk char(15) not null, o_shippriority int4 not null, o_comment varchar(79) not null, Primary Key(O_ORDERKEY) ) distkey(o_orderkey) sortkey(o_orderkey, o_orderdate) ; copy lineitem from 's3://redshift-downloads/TPC-H/2.18/3TB/lineitem/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1'; copy orders from 's3://redshift-downloads/TPC-H/2.18/3TB/orders/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';