annotate regexParser/CeriumGrep.cc @ 324:879dc5d1cb6a default tip

fix
author mir3636
date Fri, 27 May 2016 21:21:09 +0900
parents a1b65d39b947
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
2 #include <stdlib.h>
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
3 #include <string.h>
235
4aab1e93a971 fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
4 #include <unistd.h>
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
5 #include "CeriumGrep.h"
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
6 #include "subsetConstruction.h"
190
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 189
diff changeset
7 #include "node.h"
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
8 #include "grepWalk.h"
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
9 #include "fileread.h"
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
10 #include "threadedSearch.h"
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
11 #include "generateSequentialSearch.h"
76
d98a036441e2 add createNode comment
masa
parents: 75
diff changeset
12
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
13 Search grep(int argc,char **argv,bool parallel)
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
14 {
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
15 bool generate = true;
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
16 bool subset = false;
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
17 void (*generateSequentialSearch)(TransitionGeneratorPtr) = NULL;
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
18 bool ts = false;
269
3a95be4a5bb0 filename=NULL initialized
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 268
diff changeset
19 char *filename = NULL;
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
20 Search s;
269
3a95be4a5bb0 filename=NULL initialized
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 268
diff changeset
21 s.filename = 0;
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
22 s.tg = NULL;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
23
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
24 RegexInfo ri;
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
25 ri.stateNumber = 1;
314
a4484c02cba5 add wordMode in regexParser
mir3636
parents: 312
diff changeset
26 ri.wordMode = true;
321
a1b65d39b947 bmSearch fix
mir3636
parents: 314
diff changeset
27 ri.maxWordLen = 0;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
28 for (int i = 1; i < argc; i++) {
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
29 if (strcmp(argv[i],"-regex") == 0) {
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
30 ri.ptr = (unsigned char*)argv[i+1]; i++;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
31 } else if (strcmp(argv[i],"-noGeneration") == 0) {
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
32 generate = false;
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
33 } else if (strcmp(argv[i],"-subset") == 0) {
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
34 subset = true;
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
35 } else if (strcmp(argv[i],"-seq") == 0) {
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
36 generateSequentialSearch = exportState;
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
37 } else if (strcmp(argv[i],"-loop") == 0) {
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
38 generateSequentialSearch = exportStateLoop;
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
39 } else if (strcmp(argv[i],"-cbc") == 0) {
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
40 generateSequentialSearch = exportStateCbC;
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
41 } else if (strcmp(argv[i],"-file") == 0) {
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
42 s.filename = filename = argv[i+1]; i++;
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
43 } else if (strcmp(argv[i],"-ts") == 0) {
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
44 ts = true;
314
a4484c02cba5 add wordMode in regexParser
mir3636
parents: 312
diff changeset
45 } else if (strcmp(argv[i],"+word") == 0) {
a4484c02cba5 add wordMode in regexParser
mir3636
parents: 312
diff changeset
46 ri.wordMode = false;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
47 }
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
48 }
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
49 if (!ri.ptr) return s;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
50
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
51 printf("regex : %s\n",ri.ptr);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
52 NodePtr n = regex(&ri); // parse only
214
a94f57af1600 remove allocateCCstate createCCState
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 195
diff changeset
53 printTree(n);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
54
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
55 TGValue tgv;
219
a8e477f1352d add an example for subset construction
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 217
diff changeset
56 if (generate && !subset) { // NFA generation
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
57 tgv = generateTransitionList(n);
217
a9e3512120e2 NFA generation end
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 216
diff changeset
58 printTree(n);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
59 printState(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
60 if (generateSequentialSearch) {
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
61 exportState(tgv.tg);
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
62 }
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
63 } else if (subset) {
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
64 tgv = generateTransitionList(n);
249
9493800265a8 remove SCValue(not all remove)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 235
diff changeset
65 subsetConstruction(tgv.tg); // Determinization
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
66 printState(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
67 if (generateSequentialSearch) {
312
c9ac6f06e706 add loop
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
68 generateSequentialSearch(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
69 }
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
70 }
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
71
321
a1b65d39b947 bmSearch fix
mir3636
parents: 314
diff changeset
72 tgv.tg->maxWordLen = ri.maxWordLen;
a1b65d39b947 bmSearch fix
mir3636
parents: 314
diff changeset
73
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
74 if (filename != NULL && !parallel) {
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
75 int fd = 0;
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
76 st_mmap_t st_mmap = createSt_mmap(filename,fd);
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
77 Buffer buff = createBuffer(st_mmap);
262
157f6886ba55 write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 260
diff changeset
78 if (ts) threadedSearch(tgv.tg,buff);
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
79 else grepWalk(tgv.tg,buff);
235
4aab1e93a971 fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
80 close(fd);
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
81 }
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
82
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
83 s.tg = tgv.tg;
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
84 return s;
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 }