changeset 65:6af8c277a662

fix
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Tue, 16 Feb 2016 16:23:26 +0900
parents 2525c1e8899d
children 5defec0399f9
files paper/c5.tex paper/master_paper.pdf paper/memo/data.txt
diffstat 3 files changed, 44 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/paper/c5.tex	Tue Feb 16 15:44:01 2016 +0900
+++ b/paper/c5.tex	Tue Feb 16 16:23:26 2016 +0900
@@ -88,21 +88,22 @@
 
 表\ref{table:AZaz} '[A-Z][A-Za-z0-9]*s'
 ファイルサイズの側のかっこ書き内は与えられた正規表現にマッチした数
+1GBのファイルには約1.7 億の単語が存在する。
 
 \begin{tiny}
   \begin{table}[ht]
     \begin{center}
       \begin{tabular}[t]{c|r|r|r}
         \hline
-        実行方式/File Size(Match Num) & 100MB(100万) & 500MB(500万) & 1GB(1000万) \\
+        実行方式/File Size(Match Num) & 500MB(500万) & 1GB(1000万) \\
         \hline
-        DFAの状態遷移での逐次実行 & 6.53 & 20.62 & 40.10\\
+        DFAの状態遷移での逐次実行 & 20.62 & 40.10\\
         \hline
-        CeriumGrep(CPU 12) mmap  & 6.41 & 18.00 & 26.96\\
+        CeriumGrep(CPU 12) mmap  & 18.00 & 26.96\\
         \hline
-        CeriumGrep(CPU 12) bread & 6.32 & 12.48 & 21.14\\
+        CeriumGrep(CPU 12) bread & 12.48 & 21.14\\
         \hline
-        egrep & 6.31 & 59.51 & 119.23\\
+        egrep & 59.51 & 119.23\\
         \hline
       \end{tabular}
   \caption{[A-Z][A-Za-z0-9]*s のマッチング}
@@ -111,25 +112,53 @@
   \end{table}
 \end{tiny}
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+表\ref{table:abab}
+約500 MB 、単語数約2300万 abab がめちゃくちゃ含まれているファイル
+
+\begin{tiny}
+  \begin{table}[ht]
+    \begin{center}
+      \begin{tabular}[t]{l|r|r|r}
+        \hline
+        正規表現 & マッチ数 & CeriumGrep time (s) & egrep time(s)\\
+        \hline
+        '(a|b)*a(a|b)(a|b)'                  & 約1950万  & 38.67 &  86.66 \\
+        \hline
+        '(a|b)*a(a|b)(a|b)(a|b)'             & 約1640万  & 38.72 &  94.25 \\
+        \hline
+        '(a|b)*a(a|b)(a|b)(a|b)(a|b)'        & 約1640万  & 39.59 & 100.98 \\
+        \hline
+        '(a|b)*a(a|b)(a|b)(a|b)(a|b)(a|b)'   & 約1550万 & 38.68 & 104.82 \\
+        \hline
+      \end{tabular}
+  \caption{abab}
+  \label{table:abab}
+    \end{center}
+  \end{table}
+\end{tiny}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 表\ref{table:nomatch} ab の文字列がならんでいるところに (W|w)ord の正規表現
 
 全くマッチしないパターン
+Filesize:500MB
 
 \begin{tiny}
   \begin{table}[ht]
     \begin{center}
       \begin{tabular}[t]{c|r|r|r}
         \hline
-        実行方式/File Size(Match Num) & 1GB(0) & & \\
+        実行方式/File Size(Match Num) & I/O含む & I/O 含まない & \\
         \hline
-        & & & \\
+        DFAの状態遷移での逐次実行& 27.130 & 14.763 & \\
         \hline
-        & & & \\
+        CeriumGrep(CPU 12) mmap  & 21.576 & 1.873 & \\
         \hline
-        CeriumGrep(CPU 12) bread & 15.12 & & \\
+        CeriumGrep(CPU 12) bread & 19.986 & & \\
         \hline
-        & & & \\
+        egrep & 28.332 & & \\
         \hline
       \end{tabular}
   \caption{(W|w)ork のマッチング}
@@ -138,7 +167,7 @@
   \end{table}
 \end{tiny}
 
-
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 表\ref{table:abab}
 
@@ -164,6 +193,7 @@
   \end{table}
 \end{tiny}
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 表\ref{table:metachar}
 
Binary file paper/master_paper.pdf has changed
--- a/paper/memo/data.txt	Tue Feb 16 15:44:01 2016 +0900
+++ b/paper/memo/data.txt	Tue Feb 16 16:23:26 2016 +0900
@@ -1,3 +1,6 @@
+
+
+
 +firefly+one ./time.pl './cerium/ceriumGrep -br -cpu 12 -subset -regex '\''(a|b)*a(a|b)(a|b)(a|b)(a|b)(a|b)'\'' -file  file/+firefly+one ./time.pl './cerium/ceriumGrep -br -cpu 12 -subset -regex '\''(a|b)*a(a|b)(a|b)'\'' -file  file/ab500MB.txt' 10 
 ------setting------
     command  = ./cerium/ceriumGrep -br -cpu 12 -subset -regex '(a|b)*a(a|b)(a|b)' -file  file/ab500MB.txt