# HG changeset patch # User Masataka Kohagura # Date 1455607406 -32400 # Node ID 6af8c277a662fc2284776972e9c780fa7f10bf07 # Parent 2525c1e8899d071570a133dfd1bcb3aa87551d25 fix diff -r 2525c1e8899d -r 6af8c277a662 paper/c5.tex --- a/paper/c5.tex Tue Feb 16 15:44:01 2016 +0900 +++ b/paper/c5.tex Tue Feb 16 16:23:26 2016 +0900 @@ -88,21 +88,22 @@ 表\ref{table:AZaz} '[A-Z][A-Za-z0-9]*s' ファイルサイズの側のかっこ書き内は与えられた正規表現にマッチした数 +1GBのファイルには約1.7 億の単語が存在する。 \begin{tiny} \begin{table}[ht] \begin{center} \begin{tabular}[t]{c|r|r|r} \hline - 実行方式/File Size(Match Num) & 100MB(100万) & 500MB(500万) & 1GB(1000万) \\ + 実行方式/File Size(Match Num) & 500MB(500万) & 1GB(1000万) \\ \hline - DFAの状態遷移での逐次実行 & 6.53 & 20.62 & 40.10\\ + DFAの状態遷移での逐次実行 & 20.62 & 40.10\\ \hline - CeriumGrep(CPU 12) mmap & 6.41 & 18.00 & 26.96\\ + CeriumGrep(CPU 12) mmap & 18.00 & 26.96\\ \hline - CeriumGrep(CPU 12) bread & 6.32 & 12.48 & 21.14\\ + CeriumGrep(CPU 12) bread & 12.48 & 21.14\\ \hline - egrep & 6.31 & 59.51 & 119.23\\ + egrep & 59.51 & 119.23\\ \hline \end{tabular} \caption{[A-Z][A-Za-z0-9]*s のマッチング} @@ -111,25 +112,53 @@ \end{table} \end{tiny} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +表\ref{table:abab} +約500 MB 、単語数約2300万 abab がめちゃくちゃ含まれているファイル + +\begin{tiny} + \begin{table}[ht] + \begin{center} + \begin{tabular}[t]{l|r|r|r} + \hline + 正規表現 & マッチ数 & CeriumGrep time (s) & egrep time(s)\\ + \hline + '(a|b)*a(a|b)(a|b)' & 約1950万 & 38.67 & 86.66 \\ + \hline + '(a|b)*a(a|b)(a|b)(a|b)' & 約1640万 & 38.72 & 94.25 \\ + \hline + '(a|b)*a(a|b)(a|b)(a|b)(a|b)' & 約1640万 & 39.59 & 100.98 \\ + \hline + '(a|b)*a(a|b)(a|b)(a|b)(a|b)(a|b)' & 約1550万 & 38.68 & 104.82 \\ + \hline + \end{tabular} + \caption{abab} + \label{table:abab} + \end{center} + \end{table} +\end{tiny} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 表\ref{table:nomatch} ab の文字列がならんでいるところに (W|w)ord の正規表現 全くマッチしないパターン +Filesize:500MB \begin{tiny} \begin{table}[ht] \begin{center} \begin{tabular}[t]{c|r|r|r} \hline - 実行方式/File Size(Match Num) & 1GB(0) & & \\ + 実行方式/File Size(Match Num) & I/O含む & I/O 含まない & \\ \hline - & & & \\ + DFAの状態遷移での逐次実行& 27.130 & 14.763 & \\ \hline - & & & \\ + CeriumGrep(CPU 12) mmap & 21.576 & 1.873 & \\ \hline - CeriumGrep(CPU 12) bread & 15.12 & & \\ + CeriumGrep(CPU 12) bread & 19.986 & & \\ \hline - & & & \\ + egrep & 28.332 & & \\ \hline \end{tabular} \caption{(W|w)ork のマッチング} @@ -138,7 +167,7 @@ \end{table} \end{tiny} - +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 表\ref{table:abab} @@ -164,6 +193,7 @@ \end{table} \end{tiny} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 表\ref{table:metachar} diff -r 2525c1e8899d -r 6af8c277a662 paper/master_paper.pdf Binary file paper/master_paper.pdf has changed diff -r 2525c1e8899d -r 6af8c277a662 paper/memo/data.txt --- a/paper/memo/data.txt Tue Feb 16 15:44:01 2016 +0900 +++ b/paper/memo/data.txt Tue Feb 16 16:23:26 2016 +0900 @@ -1,3 +1,6 @@ + + + +firefly+one ./time.pl './cerium/ceriumGrep -br -cpu 12 -subset -regex '\''(a|b)*a(a|b)(a|b)(a|b)(a|b)(a|b)'\'' -file file/+firefly+one ./time.pl './cerium/ceriumGrep -br -cpu 12 -subset -regex '\''(a|b)*a(a|b)(a|b)'\'' -file file/ab500MB.txt' 10 ------setting------ command = ./cerium/ceriumGrep -br -cpu 12 -subset -regex '(a|b)*a(a|b)(a|b)' -file file/ab500MB.txt