ablog

不器用で落着きのない技術者のメモ

Amazon Redshift で2列で内部結合しているクエリで分散キー指定列の違いによる比較

Amazon Redshift で2列で内部結合しているクエリで、分散キーが2列のうち偏りのある列にした場合と偏りのない列にした場合の比較。

検証結果

偏りのある列(l_shipdate)が分散キー
select sum(a.l_quantity), count(a.l_orderkey), min(a.l_shipdate), max(a.l_shipdate)
from lineitem_dk_shipdate1 a
join lineitem_dk_shipdate2 b 
    on a.l_orderkey = b.l_orderkey 
        and a.l_shipdate = b.l_shipdate 
        and a.l_quantity > 1;
  • クエリIDを取得する
=# select pg_last_query_id();
=# \gset
  • SVL_QUERY_SUMMARY でボトルネックを確認
    • Segment 0 の avgtime が約64秒、maxtime が約71秒と偏りは小さい。
=# select * from svl_query_summary where query = :pg_last_query_id order by stm, seg, step;
 userid | query  | stm | seg | step | maxtime  | avgtime  |    rows     |    bytes     | rate_row  | rate_byte  |                    label                     | is_diskbased | workmem | is_rrscan | is_delayed_scan | rows_pre_filter 
--------+--------+-----+-----+------+----------+----------+-------------+--------------+-----------+------------+----------------------------------------------+--------------+---------+-----------+-----------------+-----------------
    100 | 414954 |   0 |   0 |    0 | ★70917830 | ★64208271 | 19992553974 | 399851079480 | 285607913 | 5712158278 | scan   tbl=120466 name=lineitem_dk_orderkey2 | f            |       0 | f         | f               |     19992553974
    100 | 414954 |   0 |   0 |    1 | 70917830 | 64208271 | 19992553974 |            0 | 285607913 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |    2 | 70917830 | 64208271 | 19992553974 |            0 | 285607913 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |    3 | 70917830 | 64208271 | 19992553974 |            0 | 285607913 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |    4 | 70917830 | 64208271 | 23857145165 |            0 | 340816359 |          0 | mjoin  tbl=1043                              | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |    5 | 70917830 | 64208271 | 23857145165 |            0 | 340816359 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |    6 | 70917830 | 64208271 | 23857145165 |            0 | 340816359 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |    7 | 70917830 | 64208271 |          16 |          768 |         0 |         10 | aggr   tbl=1051                              | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |    8 | 70917830 | 64208271 | 19592725531 | 456025823120 | 279896079 | 6514654616 | scan   tbl=120428 name=lineitem_dk_orderkey1 | f            |       0 | t         | f               |     19992553974
    100 | 414954 |   0 |   0 |    9 | 70917830 | 64208271 | 19592725531 |            0 | 279896079 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   0 |   0 |   10 | 70917830 | 64208271 | 19592725531 |            0 | 279896079 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   1 |   1 |    0 |      485 |      339 |          16 |          768 |           |            | scan   tbl=1051 name=Internal Worktable      | f            |       0 | f         | f               |               0
    100 | 414954 |   1 |   1 |    1 |      485 |      339 |          16 |          768 |           |            | return                                       | f            |       0 | f         | f               |               0
    100 | 414954 |   1 |   2 |    0 |     1451 |     1451 |          16 |          768 |           |            | scan   tbl=60806 name=Internal Worktable     | f            |       0 | f         | f               |               0
    100 | 414954 |   1 |   2 |    1 |     1451 |     1451 |           1 |           48 |           |            | aggr   tbl=1056                              | f            |       0 | f         | f               |               0
    100 | 414954 |   2 |   3 |    0 |      331 |      331 |           1 |           48 |           |            | scan   tbl=1056 name=Internal Worktable      | f            |       0 | f         | f               |               0
    100 | 414954 |   2 |   3 |    1 |      331 |      331 |           1 |            0 |           |            | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   2 |   3 |    2 |      331 |      331 |           1 |            0 |           |            | project                                      | f            |       0 | f         | f               |               0
    100 | 414954 |   2 |   3 |    3 |      331 |      331 |           1 |           64 |           |            | return                                       | f            |       0 | f         | f               |               0
  • SVL_QUERY_METRICS でクエリのメトリクスを確認する
    • Segment 0 の segment_execution_time が 71秒。
    • cpu_skew、io_skew が1程度とほぼ偏りはない。
=# select * from svl_query_metrics where query = :pg_last_query_id order by dimension;
 userid | query  | service_class | dimension | segment | step | step_label | query_cpu_time | query_blocks_read | query_execution_time | query_cpu_usage_percent | query_temp_blocks_to_disk | segment_execution_time | cpu_skew | io_skew | scan_row_count | join_row_count | nested_loop_join_row_count | return_row_count | spectrum_scan_row_count | spectrum_scan_size_mb | query_queue_time |                        service_class_name                        
--------+--------+---------------+-----------+---------+------+------------+----------------+-------------------+----------------------+-------------------------+---------------------------+------------------------+----------+---------+----------------+----------------+----------------------------+------------------+-------------------------+-----------------------+------------------+------------------------------------------------------------------
    100 | 414954 |           100 | query     |         |      |            |            552 |            517348 |                   71 |                   54.01 |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | segment   |       0 |      |            |                |                   |                   71 |                         |                           |                     71 |     1.02 |    1.01 |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    0 | scan       |                |                   |                   71 |                         |                           |                        |          |         |    19844669888 |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |   10 | project    |                |                   |                   71 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    6 | project    |                |                   |                   71 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    1 | project    |                |                   |                   71 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    2 | project    |                |                   |                   71 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    5 | project    |                |                   |                   71 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    8 | scan       |                |                   |                   71 |                         |                           |                        |          |         |    19844873920 |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    9 | project    |                |                   |                   71 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    4 | mjoin      |                |                   |                   71 |                         |                           |                        |          |         |                |    23707130858 |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414954 |           100 | step      |       0 |    3 | project    |                |                   |                   71 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
偏りのない列(l_orderkey)が分散キー
select sum(a.l_quantity), count(a.l_orderkey), min(a.l_shipdate), max(a.l_shipdate)
from lineitem_dk_orderkey1 a
join lineitem_dk_orderkey2 b 
    on a.l_orderkey = b.l_orderkey 
        and a.l_shipdate = b.l_shipdate 
        and a.l_quantity > 1;
  • クエリIDを取得する
=# select pg_last_query_id();
=# \gset
  • SVL_QUERY_SUMMARY でボトルネックを確認
    • Segment 0 の avgtime が約1分26秒に対して、maxtime が約3分とスライスの処理時間に偏りがある。
=# select * from svl_query_summary where query = :pg_last_query_id order by stm, seg, step;
 userid | query  | stm | seg | step |  maxtime  | avgtime  |    rows     |    bytes     | rate_row  | rate_byte  |                    label                     | is_diskbased | workmem | is_rrscan | is_delayed_scan | rows_pre_filter 
--------+--------+-----+-----+------+-----------+----------+-------------+--------------+-----------+------------+----------------------------------------------+--------------+---------+-----------+-----------------+-----------------
    100 | 414994 |   0 |   0 |    0 | ★175359065 | ★86312217 | 19992553974 | 399851079480 | 114243165 | 2284863311 | scan   tbl=120462 name=lineitem_dk_shipdate2 | f            |       0 | f         | f               |     19992553974
    100 | 414994 |   0 |   0 |    1 | 175359065 | 86312217 | 19992553974 |            0 | 114243165 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |    2 | 175359065 | 86312217 | 19992553974 |            0 | 114243165 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |    3 | 175359065 | 86312217 | 19992553974 |            0 | 114243165 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |    4 | 175359065 | 86312217 | 23857145165 |            0 | 136326543 |          0 | mjoin  tbl=1061                              | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |    5 | 175359065 | 86312217 | 23857145165 |            0 | 136326543 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |    6 | 175359065 | 86312217 | 23857145165 |            0 | 136326543 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |    7 | 175359065 | 86312217 |          16 |          768 |         0 |          4 | aggr   tbl=1069                              | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |    8 | 175359065 | 86312217 | 19592725531 | 456032030664 | 111958431 | 2605897318 | scan   tbl=111347 name=lineitem_dk_shipdate1 | f            |       0 | t         | f               |     19992553974
    100 | 414994 |   0 |   0 |    9 | 175359065 | 86312217 | 19592725531 |            0 | 111958431 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   0 |   0 |   10 | 175359065 | 86312217 | 19592725531 |            0 | 111958431 |          0 | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   1 |   1 |    0 |       538 |      364 |          16 |          768 |           |            | scan   tbl=1069 name=Internal Worktable      | f            |       0 | f         | f               |               0
    100 | 414994 |   1 |   1 |    1 |       538 |      364 |          16 |          768 |           |            | return                                       | f            |       0 | f         | f               |               0
    100 | 414994 |   1 |   2 |    0 |      1450 |     1450 |          16 |          768 |           |            | scan   tbl=61172 name=Internal Worktable     | f            |       0 | f         | f               |               0
    100 | 414994 |   1 |   2 |    1 |      1450 |     1450 |           1 |           48 |           |            | aggr   tbl=1074                              | f            |       0 | f         | f               |               0
    100 | 414994 |   2 |   3 |    0 |       358 |      358 |           1 |           48 |           |            | scan   tbl=1074 name=Internal Worktable      | f            |       0 | f         | f               |               0
    100 | 414994 |   2 |   3 |    1 |       358 |      358 |           1 |            0 |           |            | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   2 |   3 |    2 |       358 |      358 |           1 |            0 |           |            | project                                      | f            |       0 | f         | f               |               0
    100 | 414994 |   2 |   3 |    3 |       358 |      358 |           1 |           64 |           |            | return                                       | f            |       0 | f         | f               |               0
  • SVL_QUERY_METRICS でクエリのメトリクスを確認する
    • segment 0 の segment_execution_time が約3分(175秒)。
    • cpu_skew が 3.16、io_skew が 2.79 と偏りが大きい。
=# select * from svl_query_metrics where query = :pg_last_query_id order by dimension;
 userid | query  | service_class | dimension | segment | step | step_label | query_cpu_time | query_blocks_read | query_execution_time | query_cpu_usage_percent | query_temp_blocks_to_disk | segment_execution_time | cpu_skew | io_skew | scan_row_count | join_row_count | nested_loop_join_row_count | return_row_count | spectrum_scan_row_count | spectrum_scan_size_mb | query_queue_time |                        service_class_name                        
--------+--------+---------------+-----------+---------+------+------------+----------------+-------------------+----------------------+-------------------------+---------------------------+------------------------+----------+---------+----------------+----------------+----------------------------+------------------+-------------------------+-----------------------+------------------+------------------------------------------------------------------
    100 | 414994 |           100 | query     |         |      |            |            553 |            525403 |                  176 |                   40.30 |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | segment   |       0 |      |            |                |                   |                  176 |                         |                           |                    ★175 |     ★3.16 |    ★2.79 |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    1 | project    |                |                   |                  176 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    6 | project    |                |                   |                  176 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    0 | scan       |                |                   |                  176 |                         |                           |                        |          |         |    19722842048 |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    3 | project    |                |                   |                  176 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    5 | project    |                |                   |                  176 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    4 | mjoin      |                |                   |                  176 |                         |                           |                        |          |         |                |    23583795169 |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |   10 | project    |                |                   |                  176 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    9 | project    |                |                   |                  176 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    2 | project    |                |                   |                  176 |                         |                           |                        |          |         |                |                |                            |                  |                         |                       |                  | Default queue                                                   
    100 | 414994 |           100 | step      |       0 |    8 | scan       |                |                   |                  176 |                         |                           |                        |          |         |    19723004800 |                |                            |                  |                         |                       |                  | Default queue                                                   

事前準備

  • ユーザーレベルで結果キャッシュを無効化
alter user awsuser set enable_result_cache_for_session = off;
  • データベース作成
create database tpch_3tb_skew_distkey
collate case_insensitive 
isolation level snapshot;
  • テーブルを作成
    • 分散キーに l_shipdate を指定
create table lineitem_dk_shipdate1 (
  l_orderkey int8 not null ,
  l_partkey int8 not null,
  l_suppkey int4 not null,
  l_linenumber int4 not null,
  l_quantity numeric(12,2) not null,
  l_extendedprice numeric(12,2) not null,
  l_discount numeric(12,2) not null,
  l_tax numeric(12,2) not null,
  l_returnflag char(1) not null,
  l_linestatus char(1) not null,
  l_shipdate date not null ,
  l_commitdate date not null,
  l_receiptdate date not null,
  l_shipinstruct char(25) not null,
  l_shipmode char(10) not null,
  l_comment varchar(44) not null,
  Primary Key(L_ORDERKEY, L_LINENUMBER)
) distkey(l_shipdate) sortkey(l_shipdate,l_orderkey)  ;

create table lineitem_dk_shipdate2 (
  l_orderkey int8 not null ,
  l_partkey int8 not null,
  l_suppkey int4 not null,
  l_linenumber int4 not null,
  l_quantity numeric(12,2) not null,
  l_extendedprice numeric(12,2) not null,
  l_discount numeric(12,2) not null,
  l_tax numeric(12,2) not null,
  l_returnflag char(1) not null,
  l_linestatus char(1) not null,
  l_shipdate date not null ,
  l_commitdate date not null,
  l_receiptdate date not null,
  l_shipinstruct char(25) not null,
  l_shipmode char(10) not null,
  l_comment varchar(44) not null,
  Primary Key(L_ORDERKEY, L_LINENUMBER)
) distkey(l_shipdate) sortkey(l_shipdate,l_orderkey)  ;
  • データをロードする
copy lineitem_dk_shipdate1 from 's3://redshift-downloads/TPC-H/2.18/3TB/orders/' iam_role 'arn:aws:iam::123456789012:role/redshift-spectrum-s3-fullaccess' delimiter '|' region 'us-east-1';
  • lineitem_dk_shipdate1.l_shipdate を偏らせる
insert into lineitem_dk_shipdate1 (
    select 
        l_orderkey,
        l_partkey,
        l_suppkey,
        l_linenumber,
        l_quantity,
        l_extendedprice,
        l_discount,
        l_tax,
        l_returnflag,
        l_linestatus,
        '1997-07-03',
        l_commitdate,
        l_receiptdate,
        l_shipinstruct,
        l_shipmode,
        l_comment
    from lineitem_dk_shipdate1
    limit 1992505668
);
  • スライス毎の行数を確認する
    • スライス4だけ約3倍ほど行数が多い。
 =# select *  from stv_tbl_perm where name = 'lineitem_dk_shipdate1' order by slice;
 slice |   id   |                                                               name                                                               |    rows    | sorted_rows | temp | db_id  | insert_pristine | delete_pristine | backup | dist_style | block_count 
-------+--------+----------------------------------------------------------------------------------------------------------------------------------+------------+-------------+------+--------+-----------------+-----------------+--------+------------+-------------
     0 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1105831627 |  1105831627 |    0 | 110787 |               0 |               1 |      1 |          1 |       58332
     1 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1205274176 |  1205274176 |    0 | 110787 |               0 |               1 |      1 |          1 |       63588
     2 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1122205096 |  1122205096 |    0 | 110787 |               0 |               1 |      1 |          1 |       59151
     3 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1318458166 |  1318458166 |    0 | 110787 |               0 |               1 |      1 |          1 |       69562
     4 | 111347 | lineitem_dk_shipdate1                                                                                                            | ★3103552138 |  3103552138 |    0 | 110787 |               0 |               1 |      1 |          1 |      176590
     5 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1107365471 |  1107365471 |    0 | 110787 |               0 |               1 |      1 |          1 |       58409
     6 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1145335102 |  1145335102 |    0 | 110787 |               0 |               1 |      1 |          1 |       60410
     7 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1023442529 |  1023442529 |    0 | 110787 |               0 |               1 |      1 |          1 |       53997
     8 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1090088206 |  1090088206 |    0 | 110787 |               0 |               1 |      1 |          1 |       57473
     9 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1140357965 |  1140357965 |    0 | 110787 |               0 |               1 |      1 |          1 |       60123
    10 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1128260563 |  1128260563 |    0 | 110787 |               0 |               1 |      1 |          1 |       59512
    11 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1044099754 |  1044099754 |    0 | 110787 |               0 |               1 |      1 |          1 |       55074
    12 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1071680574 |  1071680574 |    0 | 110787 |               0 |               1 |      1 |          1 |       56517
    13 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1014295796 |  1014295796 |    0 | 110787 |               0 |               1 |      1 |          1 |       53462
    14 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1183561831 |  1183561831 |    0 | 110787 |               0 |               1 |      1 |          1 |       62405
    15 | 111347 | lineitem_dk_shipdate1                                                                                                            | 1188744980 |  1188744980 |    0 | 110787 |               0 |               1 |      1 |          1 |       62672
 12816 | 111347 | lineitem_dk_shipdate1                                                                                                            |          0 |           0 |    0 | 110787 |               3 |               1 |      1 | 
  • lineitem_dk_shipdate1 のデータを lineitem_dk_shipdate2 にコピーする。
insert into lineitem_dk_shipdate2 (select * from lineitem_dk_shipdate1);
  • 分散キーが l_orderkey の ineitem テーブルを作成
create table lineitem_dk_orderkey1 (
  l_orderkey int8 not null ,
  l_partkey int8 not null,
  l_suppkey int4 not null,
  l_linenumber int4 not null,
  l_quantity numeric(12,2) not null,
  l_extendedprice numeric(12,2) not null,
  l_discount numeric(12,2) not null,
  l_tax numeric(12,2) not null,
  l_returnflag char(1) not null,
  l_linestatus char(1) not null,
  l_shipdate date not null ,
  l_commitdate date not null,
  l_receiptdate date not null,
  l_shipinstruct char(25) not null,
  l_shipmode char(10) not null,
  l_comment varchar(44) not null,
  Primary Key(L_ORDERKEY, L_LINENUMBER)
) distkey(l_orderkey) sortkey(l_shipdate,l_orderkey) ;

create table lineitem_dk_orderkey2 (
  l_orderkey int8 not null ,
  l_partkey int8 not null,
  l_suppkey int4 not null,
  l_linenumber int4 not null,
  l_quantity numeric(12,2) not null,
  l_extendedprice numeric(12,2) not null,
  l_discount numeric(12,2) not null,
  l_tax numeric(12,2) not null,
  l_returnflag char(1) not null,
  l_linestatus char(1) not null,
  l_shipdate date not null ,
  l_commitdate date not null,
  l_receiptdate date not null,
  l_shipinstruct char(25) not null,
  l_shipmode char(10) not null,
  l_comment varchar(44) not null,
  Primary Key(L_ORDERKEY, L_LINENUMBER)
) distkey(l_orderkey) sortkey(l_shipdate,l_orderkey) ;
insert into lineitem_dk_orderkey1 (select * from lineitem_dk_shipdate1);
insert into lineitem_dk_orderkey2 (select * from lineitem_dk_orderkey1);