annotate regexParser/CeriumGrep.cc @ 314:a4484c02cba5

add wordMode in regexParser
author mir3636
date Sat, 07 May 2016 18:38:54 +0900
parents c9ac6f06e706
children a1b65d39b947
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
2 #include <stdlib.h>
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
3 #include <string.h>
235
4aab1e93a971 fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
4 #include <unistd.h>
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
5 #include "CeriumGrep.h"
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
6 #include "subsetConstruction.h"
190
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 189
diff changeset
7 #include "node.h"
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
8 #include "grepWalk.h"
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
9 #include "fileread.h"
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
10 #include "threadedSearch.h"
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
11 #include "generateSequentialSearch.h"
76
d98a036441e2 add createNode comment
masa
parents: 75
diff changeset
12
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
13 Search grep(int argc,char **argv,bool parallel)
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
14 {
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
15 bool generate = true;
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
16 bool subset = false;
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
17 void (*generateSequentialSearch)(TransitionGeneratorPtr) = NULL;
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
18 bool ts = false;
269
3a95be4a5bb0 filename=NULL initialized
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 268
diff changeset
19 char *filename = NULL;
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
20 Search s;
269
3a95be4a5bb0 filename=NULL initialized
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 268
diff changeset
21 s.filename = 0;
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
22 s.tg = NULL;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
23
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
24 RegexInfo ri;
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
25 ri.stateNumber = 1;
314
a4484c02cba5 add wordMode in regexParser
mir3636
parents: 312
diff changeset
26 ri.wordMode = true;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
27 for (int i = 1; i < argc; i++) {
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
28 if (strcmp(argv[i],"-regex") == 0) {
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
29 ri.ptr = (unsigned char*)argv[i+1]; i++;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
30 } else if (strcmp(argv[i],"-noGeneration") == 0) {
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
31 generate = false;
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
32 } else if (strcmp(argv[i],"-subset") == 0) {
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
33 subset = true;
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
34 } else if (strcmp(argv[i],"-seq") == 0) {
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
35 generateSequentialSearch = exportState;
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
36 } else if (strcmp(argv[i],"-loop") == 0) {
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
37 generateSequentialSearch = exportStateLoop;
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
38 } else if (strcmp(argv[i],"-cbc") == 0) {
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
39 generateSequentialSearch = exportStateCbC;
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
40 } else if (strcmp(argv[i],"-file") == 0) {
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
41 s.filename = filename = argv[i+1]; i++;
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
42 } else if (strcmp(argv[i],"-ts") == 0) {
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
43 ts = true;
314
a4484c02cba5 add wordMode in regexParser
mir3636
parents: 312
diff changeset
44 } else if (strcmp(argv[i],"+word") == 0) {
a4484c02cba5 add wordMode in regexParser
mir3636
parents: 312
diff changeset
45 ri.wordMode = false;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
46 }
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
47 }
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
48 if (!ri.ptr) return s;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
49
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
50 printf("regex : %s\n",ri.ptr);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
51 NodePtr n = regex(&ri); // parse only
214
a94f57af1600 remove allocateCCstate createCCState
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 195
diff changeset
52 printTree(n);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
53
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
54 TGValue tgv;
219
a8e477f1352d add an example for subset construction
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 217
diff changeset
55 if (generate && !subset) { // NFA generation
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
56 tgv = generateTransitionList(n);
217
a9e3512120e2 NFA generation end
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 216
diff changeset
57 printTree(n);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
58 printState(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
59 if (generateSequentialSearch) {
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
60 exportState(tgv.tg);
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
61 }
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
62 } else if (subset) {
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
63 tgv = generateTransitionList(n);
249
9493800265a8 remove SCValue(not all remove)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 235
diff changeset
64 subsetConstruction(tgv.tg); // Determinization
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
65 printState(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
66 if (generateSequentialSearch) {
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
67 generateSequentialSearch(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
68 }
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
69 }
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
70
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
71 if (filename != NULL && !parallel) {
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
72 int fd = 0;
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
73 st_mmap_t st_mmap = createSt_mmap(filename,fd);
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
74 Buffer buff = createBuffer(st_mmap);
262
157f6886ba55 write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 260
diff changeset
75 if (ts) threadedSearch(tgv.tg,buff);
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
76 else grepWalk(tgv.tg,buff);
235
4aab1e93a971 fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
77 close(fd);
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
78 }
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
79
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
80 s.tg = tgv.tg;
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
81 return s;
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 }