# HG changeset patch # User Masataka Kohagura # Date 1438685157 -32400 # Node ID 1b92285a767a2bc60844f259c5f4b30e6c275c9f # Parent 5789a323629579c3fd0c0c5a8ae8c4d8267b18f2 add 0804 diff -r 5789a3236295 -r 1b92285a767a 2015/0804.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2015/0804.html Tue Aug 04 19:45:57 2015 +0900 @@ -0,0 +1,276 @@ + + + + + seminar + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+

Cerium 上での正規表現の実装

+
+
+ Masataka Kohagura 14th, July , 2015 +
+
+ +
+

研究目的

+ 正規表現はオートマトンに変換することができ、 そしてオートマトンの受理の問題は Class NC と呼ばれる問題でもある。
+ この問題は計算機の台数が多ければ多いほど高速化できるという特徴を持ち、並列化に向いている問題といえる。
+ コンピュータの動作やゲームの動作などの多くの問題はオートマトンの受理問題に落としこむことができるので、この問題を解決すれば様々な問題に対応できるようになる。
+ 本研究では正規表現を並列処理で実装することによってこの問題を解決し、Class NC に対応するライブラリを作成する。 + +
+ +
+

したこと

+ characterClass 以外を実装 + +
+ +
+

BNF記法で正規表現の文法規則を表記してみる

+
    +
  • +<literal> ::= [a-z][A-Z][0-9] +
  • +
  • +<characterClass> ::= '['<literal>'-'<literal>']' +
  • +
  • +<string> :: = <literal> | <literal>* +
  • +
  • +<or> ::= '('<regex>'|'<regex>')' +
  • +
  • +<*> ::= <regex>'*' +
  • +
  • +<regex> ::= <literal>|<string>|<or> +
  • +
+<or> が '|' とグループ化の '('')' とまだ分解できるので、<or> を <or> と <group> に分割 +
+ +
+

BNF記法で正規表現の文法規則を表記してみる(修正後)

+
    +
  • +<literal> ::= [a-z][A-Z][0-9] +
  • +
  • +<characterClass> ::= '['<literal>'-'<literal>']' +
  • +
  • +<string> :: = <literal> | <literal>* +
  • +
  • +<group> ::= '('<regex>')' <- 追加 +
  • +
  • +<or> ::= <regex>'|'<regex> <- 修正 +
  • +
  • +<*> ::= <regex>'*' +
  • +
  • +<regex> ::= <literal>|<string>|<or> +
  • +
+
+ +
+

問題点

+

正規表現 a*b の tree 構造(本当はこうなってほしい)

+ +
+

正規表現 a*b の tree 構造(現状)

+
+
+ +
+

問題点

+

正規表現 a tree 構造(現状)

+
+

原因は string()

+ +
+    
+NodePtr string() {
+    char c = *ptr;
+    NodePtr n = NULL;
+    if (isLiteral(c)) {
+        n = createNode(0,literal(),string());
+    } else {
+        n = createNode(0,0,0);
+    }
+    return n;
+}
+    
+    
+

string なのか literal なのか判断しないで createNode をしてる

+
+ +
+

これからすること

+
    +
  • + tree 生成の修正 +
  • +
  • + tree を見やすく表示 +
  • +
+
+ + + + + + +
+ + diff -r 5789a3236295 -r 1b92285a767a 2015/images/omni/parser.graffle --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2015/images/omni/parser.graffle Tue Aug 04 19:45:57 2015 +0900 @@ -0,0 +1,713 @@ + + + + + ActiveLayerIndex + 0 + ApplicationVersion + + com.omnigroup.OmniGraffle + 139.18.0.187838 + + AutoAdjust + + BackgroundGraphic + + Bounds + {{0, 0}, {559, 783}} + Class + SolidGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasOrigin + {0, 0} + ColumnAlign + 1 + ColumnSpacing + 36 + CreationDate + 2015-06-09 04:23:13 +0000 + Creator + MasaKoha + DisplayScale + 1 0/72 in = 1 0/72 in + GraphDocumentVersion + 8 + GraphicsList + + + Bounds + {{72.000005521060515, 172.76470310826437}, {255.9999944789395, 14}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 19 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 or} + VerticalPad + 0 + + + + Class + LineGraphic + ID + 18 + Points + + {71.999996417430879, 167.01470435733228} + {195.8823366324645, 166.51470317400478} + {327.99999089637038, 167.01470435733228} + + Style + + stroke + + HeadArrow + FilledArrow + HeadScale + 0.5 + Legacy + + LineType + 1 + TailArrow + FilledArrow + TailScale + 0.5 + + + + + Bounds + {{104.00000341386111, 147.26470431231868}, {63.999996586138849, 14}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 17 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 concrete} + VerticalPad + 0 + + + + Class + LineGraphic + ID + 16 + Points + + {104.00000113795377, 141.51470556138659} + {134.97058520762377, 141.01470437805909} + {167.99999772409265, 141.51470556138659} + + Style + + stroke + + HeadArrow + FilledArrow + HeadScale + 0.5 + Legacy + + LineType + 1 + TailArrow + FilledArrow + TailScale + 0.5 + + + + + Bounds + {{104.00000113795379, 118.74999875093209}, {32, 14}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 15 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 literal} + VerticalPad + 0 + + + + Class + LineGraphic + ID + 14 + Points + + {104, 113} + {119.48529286084498, 112.4999988166725} + {136, 113} + + Style + + stroke + + HeadArrow + FilledArrow + HeadScale + 0.5 + Legacy + + LineType + 1 + TailArrow + FilledArrow + TailScale + 0.5 + + + + + Bounds + {{360, 56}, {32, 32}} + Class + ShapedGraphic + ID + 12 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 b} + + + + Bounds + {{328, 56}, {32, 32}} + Class + ShapedGraphic + ID + 11 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 *} + + + + Bounds + {{296, 56}, {32, 32}} + Class + ShapedGraphic + ID + 10 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 )} + + + + Bounds + {{264, 56}, {32, 32}} + Class + ShapedGraphic + ID + 9 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 a} + + + + Bounds + {{232, 56}, {32, 32}} + Class + ShapedGraphic + ID + 8 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 a} + + + + Bounds + {{200, 56}, {32, 32}} + Class + ShapedGraphic + ID + 7 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 a} + + + + Bounds + {{168, 56}, {32, 32}} + Class + ShapedGraphic + ID + 6 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 |} + + + + Bounds + {{136, 56}, {32, 32}} + Class + ShapedGraphic + ID + 5 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 a} + + + + Bounds + {{104, 56}, {32, 32}} + Class + ShapedGraphic + ID + 4 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 a} + + + + Bounds + {{72, 56}, {32, 32}} + Class + ShapedGraphic + ID + 3 + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Text + {\rtf1\ansi\ansicpg932\cocoartf1347\cocoasubrtf570 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 (} + + + + GridInfo + + GuidesLocked + NO + GuidesVisible + YES + HPages + 1 + ImageCounter + 1 + KeepToScale + + Layers + + + Lock + NO + Name + レイヤー 1 + Print + YES + View + YES + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + LinksVisible + NO + MagnetsVisible + NO + MasterSheets + + ModificationDate + 2015-06-09 04:29:53 +0000 + Modifier + MasaKoha + NotesVisible + NO + Orientation + 2 + OriginVisible + NO + PageBreaks + YES + PrintInfo + + NSBottomMargin + + float + 41 + + NSHorizonalPagination + + coded + BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG + + NSLeftMargin + + float + 18 + + NSPaperSize + + size + {595, 842} + + NSPrintReverseOrientation + + int + 0 + + NSRightMargin + + float + 18 + + NSTopMargin + + float + 18 + + + PrintOnePage + + ReadOnly + NO + RowAlign + 1 + RowSpacing + 36 + SheetTitle + キャンバス 1 + SmartAlignmentGuidesActive + YES + SmartDistanceGuidesActive + YES + UniqueID + 1 + UseEntirePage + + VPages + 1 + WindowInfo + + CurrentSheet + 0 + ExpandedCanvases + + Frame + {{56, 135}, {945, 1042}} + ListView + + OutlineWidth + 142 + RightSidebar + + ShowRuler + + Sidebar + + SidebarWidth + 120 + VisibleRegion + {{31.56681916665568, 0}, {373.27187627724231, 414.74652919693591}} + Zoom + 2.1700000762939453 + ZoomValues + + + キャンバス 1 + 2.1700000762939453 + 2.25 + + + + + diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/aastabfalse.pdf Binary file 2015/images/vector/aastabfalse.pdf has changed diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/aastabfalse.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2015/images/vector/aastabfalse.svg Tue Aug 04 19:45:57 2015 +0900 @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/aastabtrue.pdf Binary file 2015/images/vector/aastabtrue.pdf has changed diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/aastabtrue.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2015/images/vector/aastabtrue.svg Tue Aug 04 19:45:57 2015 +0900 @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/aonly.pdf Binary file 2015/images/vector/aonly.pdf has changed diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/aonly.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2015/images/vector/aonly.svg Tue Aug 04 19:45:57 2015 +0900 @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/bitvectorTable.pdf Binary file 2015/images/vector/bitvectorTable.pdf has changed diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/bitvectorTable.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2015/images/vector/bitvectorTable.svg Tue Aug 04 19:45:57 2015 +0900 @@ -0,0 +1,303 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 5789a3236295 -r 1b92285a767a 2015/images/vector/grepstate.pdf Binary file 2015/images/vector/grepstate.pdf has changed