ablog

不器用で落着きのない技術者のメモ

sar を Jupyter Notebook でグラフ化してみる

  • sar をテキストに出力して加工する。
export LANG=C
ls *.dat|while read LINE
do
    ARR=(${LINE//_/ })
    sar -u -f ${LINE}|perl -slane '/(\d{2}\/\d{2}\/\d{2})/ and $d=$1;if($#F==7 and !/%user/){printf(qq/%s\t%s\t%s\n/,$node,$d,join(qq/\t/,@F))}' -- -node=${ARR[0]} >> sar-u.tsv
    sar -r -f ${LINE}|perl -slane '/(\d{2}\/\d{2}\/\d{2})/ and $d=$1;if($#F==7 and !/kbmemfree/){printf(qq/%s\t%s\t%s\n/,$node,$d,join(qq/\t/,@F))}' -- -node=${ARR[0]} >> sar-r.tsv    
    sar -d -f ${LINE}|perl -slane '/(\d{2}\/\d{2}\/\d{2})/ and $d=$1;if($#F==9 and !/DEV/){printf(qq/%s\t%s\t%s\n/,$node,$d,join(qq/\t/,@F))}' -- -node=${ARR[0]} >> sar-d.tsv
done
  • sar-u.tsv にヘッダをつける
hostname	date	timestamp	CPU	%user	%nice	%system	%iowait	%steal	%idle
  • sar-r.tsv にヘッダをつける
hostname	date	timestamp	kbmemfree	kbmemused	%memused	kbbuffers	kbcached	kbcommit	%commit
  • sar-d.tsv にヘッダをつける
hostname	date	timestamp	DEV	tps	rd_sec/s	wr_sec/s	avgrq-sz	avgqu-sz	await	svctm	%util
  • ファイルを S3 にアップロードする。
  • sar -u をグラフ化
import pandas as pd
import matplotlib.pyplot as plt
df_sar_u = pd.read_csv("s3n://yohei-a-file/sar/sar-u.tsv", sep="\t")
df_sar_u_host01 = df_sar_u.query('hostname == "host01" & date == "05/01/20"')
df_sar_u_host01.plot(x='timestamp', y=['%user', '%nice', '%system', '%iowait', '%steal', '%idle'], kind='area', stacked=True, alpha=0.4, figsize=(20,10))
plt.show()

f:id:yohei-a:20200524082207p:plain

  • sar -r をグラフ化
import pandas as pd
import matplotlib.pyplot as plt
df_sar_r = pd.read_csv("s3n://yohei-a-files/sar/sar-r.tsv", sep="\t")
df_sar_r_host01 = df_sar_r.query('hostname == "host01" & date == "05/01/20"')
df_sar_r_host01.plot(x='timestamp', y=['kbmemfree', 'kbmemused', 'kbbuffers', 'kbcached'], kind='area', stacked=True, alpha=0.4, figsize=(20,10))
plt.show()

f:id:yohei-a:20200524083326p:plain

  • sar -d をグラフ化
import pandas as pd
import matplotlib.pyplot as plt
df_sar_r = pd.read_csv("s3n://yohei-a-files/sar/sar-d.tsv", sep="\t")
df_sar_r_host01 = df_sar_r.query('hostname == "host01" & date == "05/15/20"')
df_sar_r_host01.plot(x='timestamp', y=['await', 'svctm'], stacked=True, alpha=0.4, figsize=(20,10))
plt.show()

f:id:yohei-a:20200524102501p:plain

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df_sar_d = pd.read_csv("s3n://yohei-a-files/sar/sar-d.tsv", sep="\t")
df_sar_d_host01 = df_sar_d.query('hostname == "host01" & date == "05/15/20"')
sns.jointplot(x="tps", y="await", data=df_sar_d_host01)

f:id:yohei-a:20200524110159p:plain

  • まとめ
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df_sar_u = pd.read_csv("s3n://aws-yohei-a-files/sar/sar-u.tsv", sep="\t")
df_sar_u_host01 = df_sar_u.query('hostname == "host01" & date == "05/15/20"')
df_sar_u_host01.plot(x='timestamp', y=['%user', '%nice', '%system', '%iowait', '%steal', '%idle'], kind='area', stacked=True, alpha=0.4, figsize=(20,10))
plt.show()

df_sar_r = pd.read_csv("s3n://aws-yohei-a-files/sar/sar-r.tsv", sep="\t")
df_sar_r_host01 = df_sar_r.query('hostname == "host01" & date == "05/15/20"')
df_sar_r_host01.plot(x='timestamp', y=['kbmemfree', 'kbmemused', 'kbbuffers', 'kbcached'], kind='area', stacked=True, alpha=0.4, figsize=(20,10))
plt.show()

df_sar_d = pd.read_csv("s3n://aws-yohei-a-files/sar/sar-d.tsv", sep="\t")
df_sar_d_host01 = df_sar_d.query('hostname == "host01" & date == "05/15/20"')
df_sar_d_host01.plot(x='timestamp', y='await', stacked=True, alpha=0.4, figsize=(20,10))
plt.show()

sns.jointplot(x="tps", y="await", data=df_sar_d_host01)

f:id:yohei-a:20200524115331p:plain