ablog

不器用で落着きのない技術者のメモ

プロセスのランキュー待ち時間とI/O待ち時間を調べる

cat file|awk では実行時間 < CPU時間となっていますが、cat が I/O wait していないとは限りません。実行時間は単純に終了時間 - 開始時間で算出しますが、CPU時間はプロセスのCPU時間を getrusage システムコールで取得します。catのプロセスと awk のプロセスが並列実行されている期間があるため、実行時間 < CPU時間となっています。例えば、CPUバウンドな2プロセスがほぼ完全に並列実行されると、実行時間 * 2 ≒ CPU時間 となったりします。
(中略)

大きなテキストファイルをawkで処理するときにcatで投げ込むと速い理由 - ablog

と書きましたが、プロセスの

  • ランキュー待ち時間は /proc//sched の2列目(sched_info.run_delay)
  • I/O待ち時間は /proc//schedstat の se.statistics.iowait_sum

で調べることができます(たぶん)。


Brendan Gregg の USE Method: Linux Performance Checklist にも書かれていました。

Physical Resources

component type metric
CPU saturation system-wide: vmstat 1, "r" > CPU count [2]; sar -q, "runq-sz" > CPU count; dstat -p, "run" > CPU count; per-process: /proc/PID/schedstat 2nd field (sched_info.run_delay); perf sched latency (shows "Average" and "Maximum" delay per-schedule); dynamic tracing, eg, SystemTap schedtimes.stp "queued(us)" [3]
Storage device I/O utilization system-wide: iostat -xz 1, "%util"; sar -d, "%util"; per-process: iotop; pidstat -d; /proc/PID/sched "se.statistics.iowait_sum"
USE Method: Linux Performance Checklist

プロセスのランキュー待ち時間を調べる

分析結果

/proc//sched の2列目(sched_info.run_delay) を1秒間隔でサンプリングしたもののデルタ値をグラフ化したものです。秒間で最大30ミリ秒程度ランキュー待ちしていることがわかります。

手順
$ time { cat sample.txt | awk '$5 ~ /00$/ {print}' > result2.txt; } &
[2] 7125
$ pstree -alp $$
bash,4227
  ├─bash,6906
  ├─bash,7125
  │   ├─awk,7127 $5\040~\040/00$/\040{print}
  │   └─cat,7126 sample.txt
  └─pstree,7128 -alp 4227
$ while :; do cat /proc/7126/schedstat|awk '{print strftime("%y-%m-%d %H:%M:%S"), $0}'; sleep 1; done
15-08-06 21:36:45 383345672 191046611 110195
15-08-06 21:36:46 423034353 219229112 123027
15-08-06 21:36:47 462241056 244457968 136128
15-08-06 21:36:48 500990500 256206129 149262
15-08-06 21:36:49 539925919 274300181 162458
15-08-06 21:36:50 578372401 299073669 175502
15-08-06 21:36:51 616229970 321905887 188106
15-08-06 21:36:52 657995074 344450537 199886
15-08-06 21:36:53 695856409 360521384 213114
15-08-06 21:36:54 733568047 379649740 225926
15-08-06 21:36:55 772121747 410237735 238805
15-08-06 21:36:56 811170984 439155374 251822
15-08-06 21:36:57 850370696 465749478 264767
15-08-06 21:36:58 890058031 484898263 278059
15-08-06 21:36:59 929623665 502559894 291335
15-08-06 21:37:00 967534110 521142065 304264
15-08-06 21:37:01 1005369475 548146247 317000
15-08-06 21:37:02 1045520743 570541563 330048
15-08-06 21:37:03 1084174346 595538881 342781
15-08-06 21:37:04 1122857147 624379948 355558
15-08-06 21:37:05 1161376812 644308940 368760
15-08-06 21:37:06 1199607045 662688976 381716
15-08-06 21:37:07 1238034816 684981820 394697
15-08-06 21:37:08 1276644529 700901178 407911
15-08-06 21:37:09 1314893392 725301702 420697
15-08-06 21:37:10 1353172176 751330360 433661
15-08-06 21:37:11 1391506701 781730973 446465
15-08-06 21:37:12 1430111400 808695732 459453
15-08-06 21:37:13 1467890931 824451306 472343
15-08-06 21:37:14 1506399258 843147547 485447
15-08-06 21:37:15 1544417093 873616546 498517
15-08-06 21:37:16 1583421820 894680563 509600
15-08-06 21:37:17 1622831983 919412658 522427
15-08-06 21:37:18 1661565247 943010058 535545
15-08-06 21:37:19 1699449046 965567060 548420
15-08-06 21:37:20 1737512755 990405342 561547
15-08-06 21:37:21 1775750584 1009670772 574741
15-08-06 21:37:22 1813066160 1031995672 587582
15-08-06 21:37:23 1851382288 1058140829 600612
15-08-06 21:37:24 1890265617 1082606489 613648
15-08-06 21:37:25 1927966940 1101233300 626576
15-08-06 21:37:26 1965556572 1122902677 639485
15-08-06 21:37:27 2004574469 1135810609 652729
15-08-06 21:37:28 2043342617 1154135252 665729

real	0m52.387s
user	0m51.237s
sys	0m2.828s
[2]-  Done                    time { cat sample.txt | awk '$5 ~ /00$/ {print}' > result2.txt; }
cat: /proc/7126/schedstat: No such file or directory

プロセスのI/O待ち時間を調べる

分析結果

/proc//schedstat の se.statistics.iowait_sum を1秒間隔でサンプリングしたもののデルタ値をグラフ化したものです。単位はナノ秒のようです。

手順
$ time { cat sample.txt | awk '$5 ~ /00$/ {print}' > result2.txt; } &
$ pstree -alp $$
bash,4227
  ├─bash,6906
  ├─bash,8290
  │   ├─awk,8292 $5\040~\040/00$/\040{print}
  │   └─cat,8291 sample.txt
  └─pstree,8296 -alp 4227
$ while :; do cat /proc/8291/sched|awk '{print strftime("%y-%m-%d %H:%M:%S"), $0}'; sleep 1; done
15-08-06 22:11:45 cat (8291, #threads: 1)
15-08-06 22:11:45 ---------------------------------------------------------
15-08-06 22:11:45 se.exec_start                      :       5789181.672512
15-08-06 22:11:45 se.vruntime                        :      17852208.688334
15-08-06 22:11:45 se.sum_exec_runtime                :           898.581661
15-08-06 22:11:45 se.statistics.wait_start           :       5789181.856209
15-08-06 22:11:45 se.statistics.sleep_start          :             0.000000
15-08-06 22:11:45 se.statistics.block_start          :             0.000000
15-08-06 22:11:45 se.statistics.sleep_max            :            19.405457
15-08-06 22:11:45 se.statistics.block_max            :             8.517829
15-08-06 22:11:45 se.statistics.exec_max             :             0.315179
15-08-06 22:11:45 se.statistics.slice_max            :             0.303532
15-08-06 22:11:45 se.statistics.wait_max             :             3.450454
15-08-06 22:11:45 se.statistics.wait_sum             :           176.897778
15-08-06 22:11:45 se.statistics.wait_count           :               220924
15-08-06 22:11:45 se.statistics.iowait_sum           :            79.213880
15-08-06 22:11:45 se.statistics.iowait_count         :                   31
15-08-06 22:11:45 se.nr_migrations                   :                14756
15-08-06 22:11:45 se.statistics.nr_migrations_cold   :                    0
15-08-06 22:11:45 se.statistics.nr_failed_migrations_affine:                    0
15-08-06 22:11:45 se.statistics.nr_failed_migrations_running:                  100
15-08-06 22:11:45 se.statistics.nr_failed_migrations_hot:                  169
15-08-06 22:11:45 se.statistics.nr_forced_migrations :                    1
15-08-06 22:11:45 se.statistics.nr_wakeups           :               220832
15-08-06 22:11:45 se.statistics.nr_wakeups_sync      :               213774
15-08-06 22:11:45 se.statistics.nr_wakeups_migrate   :                14730
15-08-06 22:11:45 se.statistics.nr_wakeups_local     :                29500
15-08-06 22:11:45 se.statistics.nr_wakeups_remote    :               191332
15-08-06 22:11:45 se.statistics.nr_wakeups_affine    :                  538
15-08-06 22:11:45 se.statistics.nr_wakeups_affine_attempts:                  855
15-08-06 22:11:45 se.statistics.nr_wakeups_passive   :                    0
15-08-06 22:11:45 se.statistics.nr_wakeups_idle      :                    0
15-08-06 22:11:45 avg_atom                           :             0.004067
15-08-06 22:11:45 avg_per_cpu                        :             0.060896
15-08-06 22:11:45 nr_switches                        :               220898
15-08-06 22:11:45 nr_voluntary_switches              :               220833
15-08-06 22:11:45 nr_involuntary_switches            :                   66
15-08-06 22:11:45 se.load.weight                     :                 1024
15-08-06 22:11:45 policy                             :                    0
15-08-06 22:11:45 prio                               :                  120
15-08-06 22:11:45 clock-delta                        :                   56
15-08-06 22:11:46 cat (8291, #threads: 1)
15-08-06 22:11:46 ---------------------------------------------------------
15-08-06 22:11:46 se.exec_start                      :       5790193.560503
15-08-06 22:11:46 se.vruntime                        :      17853207.587196
15-08-06 22:11:46 se.sum_exec_runtime                :           946.724633
15-08-06 22:11:46 se.statistics.wait_start           :             0.000000
15-08-06 22:11:46 se.statistics.sleep_start          :       5790193.560503
15-08-06 22:11:46 se.statistics.block_start          :             0.000000
15-08-06 22:11:46 se.statistics.sleep_max            :            19.405457
15-08-06 22:11:46 se.statistics.block_max            :             8.517829
15-08-06 22:11:46 se.statistics.exec_max             :             0.315179
15-08-06 22:11:46 se.statistics.slice_max            :             0.303532
15-08-06 22:11:46 se.statistics.wait_max             :             3.450454
15-08-06 22:11:46 se.statistics.wait_sum             :           178.163357
15-08-06 22:11:46 se.statistics.wait_count           :               232283
15-08-06 22:11:46 se.statistics.iowait_sum           :            89.090687
15-08-06 22:11:46 se.statistics.iowait_count         :                   33
15-08-06 22:11:46 se.nr_migrations                   :                15553
15-08-06 22:11:46 se.statistics.nr_migrations_cold   :                    0
15-08-06 22:11:46 se.statistics.nr_failed_migrations_affine:                    0
15-08-06 22:11:46 se.statistics.nr_failed_migrations_running:                  104
15-08-06 22:11:46 se.statistics.nr_failed_migrations_hot:                  169
15-08-06 22:11:46 se.statistics.nr_forced_migrations :                    1
15-08-06 22:11:46 se.statistics.nr_wakeups           :               232190
15-08-06 22:11:46 se.statistics.nr_wakeups_sync      :               224776
15-08-06 22:11:46 se.statistics.nr_wakeups_migrate   :                15526
15-08-06 22:11:46 se.statistics.nr_wakeups_local     :                31523
15-08-06 22:11:46 se.statistics.nr_wakeups_remote    :               200667
15-08-06 22:11:46 se.statistics.nr_wakeups_affine    :                  538
15-08-06 22:11:46 se.statistics.nr_wakeups_affine_attempts:                  863
15-08-06 22:11:46 se.statistics.nr_wakeups_passive   :                    0
15-08-06 22:11:46 se.statistics.nr_wakeups_idle      :                    0
15-08-06 22:11:46 avg_atom                           :             0.004076
15-08-06 22:11:46 avg_per_cpu                        :             0.060870
15-08-06 22:11:46 nr_switches                        :               232257
15-08-06 22:11:46 nr_voluntary_switches              :               232190
15-08-06 22:11:46 nr_involuntary_switches            :                   67
15-08-06 22:11:46 se.load.weight                     :                 1024
15-08-06 22:11:46 policy                             :                    0
15-08-06 22:11:46 prio                               :                  120
15-08-06 22:11:46 clock-delta                        :                   52
15-08-06 22:12:23 se.load.weight                     :                 1024
15-08-06 22:12:23 policy                             :                    0
15-08-06 22:12:23 prio                               :                  120
15-08-06 22:12:23 clock-delta                        :                   60

...

real	0m57.677s
user	0m55.928s
sys	0m3.728s
[2]-  Done                    time { cat sample.txt | awk '$5 ~ /00$/ {print}' > result2.txt; }
cat: /proc/8291/sched: No such file or directory

補足

we have how long we wait. we do not have how long we iowait afaik... at least not in nanosecond granularity. (We do have the sampled data, but that is milisecond sampled data, not very useful for making charts based on time to show the sequence of events)

Linux Kernel - [PATCH] sched: Provide iowait counters
  • kernel-2.6.39/linux-2.6.39.x86_64/fs/proc/base.c
#ifdef CONFIG_SCHEDSTATS
/*
 * Provides /proc/PID/schedstat
 */
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
{
        return sprintf(buffer, "%llu %llu %lu\n",
                        (unsigned long long)task->se.sum_exec_runtime,
                        (unsigned long long)task->sched_info.run_delay,
                        task->sched_info.pcount);
}
#endif
  • kernel-2.6.39/linux-2.6.39.x86_64/kernel/sched_fair.c
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
#ifdef CONFIG_SCHEDSTATS
        struct task_struct *tsk = NULL;

        if (entity_is_task(se))
                tsk = task_of(se);

        if (se->statistics.sleep_start) {
                u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start;

                if ((s64)delta < 0)
                        delta = 0;

                if (unlikely(delta > se->statistics.sleep_max))
                        se->statistics.sleep_max = delta;

                se->statistics.sleep_start = 0;
                se->statistics.sum_sleep_runtime += delta;

                if (tsk) {
                        account_scheduler_latency(tsk, delta >> 10, 1);
                        trace_sched_stat_sleep(tsk, delta);
                }
        }
        if (se->statistics.block_start) {
                u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start;

                if ((s64)delta < 0)
                        delta = 0;
                if (unlikely(delta > se->statistics.block_max))
                        se->statistics.block_max = delta;

                se->statistics.block_start = 0;
                se->statistics.sum_sleep_runtime += delta;

                if (tsk) {
                        if (tsk->in_iowait) {
                                se->statistics.iowait_sum += delta;
                                se->statistics.iowait_count++;
                                trace_sched_stat_iowait(tsk, delta);
                        }

                        /*
                         * Blocking time is in units of nanosecs, so shift by
                         * 20 to get a milliseconds-range estimation of the
                         * amount of time that the task spent sleeping:
                         */
                        if (unlikely(prof_on == SLEEP_PROFILING)) {
                                profile_hits(SLEEP_PROFILING,
                                                (void *)get_wchan(tsk),
                                                delta >> 20);
                        }
                        account_scheduler_latency(tsk, delta >> 10, 0);
                }
        }
#end