Mercurial > hg > Applications > Grep
annotate regexParser/CeriumGrep.cc @ 314:a4484c02cba5
add wordMode in regexParser
author | mir3636 |
---|---|
date | Sat, 07 May 2016 18:38:54 +0900 |
parents | c9ac6f06e706 |
children | a1b65d39b947 |
rev | line source |
---|---|
45 | 1 #include <stdio.h> |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2 #include <stdlib.h> |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
3 #include <string.h> |
235
4aab1e93a971
fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
232
diff
changeset
|
4 #include <unistd.h> |
266 | 5 #include "CeriumGrep.h" |
216 | 6 #include "subsetConstruction.h" |
190
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
189
diff
changeset
|
7 #include "node.h" |
231
d67649929e96
add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
228
diff
changeset
|
8 #include "grepWalk.h" |
232
1a34e702776a
add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
231
diff
changeset
|
9 #include "fileread.h" |
260
3f3c9902bb6d
initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
259
diff
changeset
|
10 #include "threadedSearch.h" |
312 | 11 #include "generateSequentialSearch.h" |
76 | 12 |
266 | 13 Search grep(int argc,char **argv,bool parallel) |
55 | 14 { |
216 | 15 bool generate = true; |
16 bool subset = false; | |
312 | 17 void (*generateSequentialSearch)(TransitionGeneratorPtr) = NULL; |
260
3f3c9902bb6d
initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
259
diff
changeset
|
18 bool ts = false; |
269
3a95be4a5bb0
filename=NULL initialized
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
268
diff
changeset
|
19 char *filename = NULL; |
266 | 20 Search s; |
269
3a95be4a5bb0
filename=NULL initialized
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
268
diff
changeset
|
21 s.filename = 0; |
266 | 22 s.tg = NULL; |
216 | 23 |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
167
diff
changeset
|
24 RegexInfo ri; |
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
167
diff
changeset
|
25 ri.stateNumber = 1; |
314 | 26 ri.wordMode = true; |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
27 for (int i = 1; i < argc; i++) { |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
28 if (strcmp(argv[i],"-regex") == 0) { |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
167
diff
changeset
|
29 ri.ptr = (unsigned char*)argv[i+1]; i++; |
216 | 30 } else if (strcmp(argv[i],"-noGeneration") == 0) { |
31 generate = false; | |
32 } else if (strcmp(argv[i],"-subset") == 0) { | |
33 subset = true; | |
228
399380ad95b7
fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
222
diff
changeset
|
34 } else if (strcmp(argv[i],"-seq") == 0) { |
312 | 35 generateSequentialSearch = exportState; |
36 } else if (strcmp(argv[i],"-loop") == 0) { | |
37 generateSequentialSearch = exportStateLoop; | |
38 } else if (strcmp(argv[i],"-cbc") == 0) { | |
39 generateSequentialSearch = exportStateCbC; | |
232
1a34e702776a
add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
231
diff
changeset
|
40 } else if (strcmp(argv[i],"-file") == 0) { |
266 | 41 s.filename = filename = argv[i+1]; i++; |
260
3f3c9902bb6d
initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
259
diff
changeset
|
42 } else if (strcmp(argv[i],"-ts") == 0) { |
3f3c9902bb6d
initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
259
diff
changeset
|
43 ts = true; |
314 | 44 } else if (strcmp(argv[i],"+word") == 0) { |
45 ri.wordMode = false; | |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
46 } |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
47 } |
266 | 48 if (!ri.ptr) return s; |
216 | 49 |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
167
diff
changeset
|
50 printf("regex : %s\n",ri.ptr); |
216 | 51 NodePtr n = regex(&ri); // parse only |
214
a94f57af1600
remove allocateCCstate createCCState
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
195
diff
changeset
|
52 printTree(n); |
216 | 53 |
231
d67649929e96
add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
228
diff
changeset
|
54 TGValue tgv; |
219
a8e477f1352d
add an example for subset construction
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
217
diff
changeset
|
55 if (generate && !subset) { // NFA generation |
231
d67649929e96
add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
228
diff
changeset
|
56 tgv = generateTransitionList(n); |
217 | 57 printTree(n); |
216 | 58 printState(tgv.tg); |
228
399380ad95b7
fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
222
diff
changeset
|
59 if (generateSequentialSearch) { |
399380ad95b7
fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
222
diff
changeset
|
60 exportState(tgv.tg); |
399380ad95b7
fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
222
diff
changeset
|
61 } |
216 | 62 } else if (subset) { |
231
d67649929e96
add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
228
diff
changeset
|
63 tgv = generateTransitionList(n); |
249
9493800265a8
remove SCValue(not all remove)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
235
diff
changeset
|
64 subsetConstruction(tgv.tg); // Determinization |
216 | 65 printState(tgv.tg); |
228
399380ad95b7
fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
222
diff
changeset
|
66 if (generateSequentialSearch) { |
312 | 67 generateSequentialSearch(tgv.tg); |
228
399380ad95b7
fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
222
diff
changeset
|
68 } |
216 | 69 } |
231
d67649929e96
add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
228
diff
changeset
|
70 |
266 | 71 if (filename != NULL && !parallel) { |
232
1a34e702776a
add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
231
diff
changeset
|
72 int fd = 0; |
1a34e702776a
add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
231
diff
changeset
|
73 st_mmap_t st_mmap = createSt_mmap(filename,fd); |
1a34e702776a
add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
231
diff
changeset
|
74 Buffer buff = createBuffer(st_mmap); |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
75 if (ts) threadedSearch(tgv.tg,buff); |
299 | 76 else grepWalk(tgv.tg,buff); |
235
4aab1e93a971
fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
232
diff
changeset
|
77 close(fd); |
232
1a34e702776a
add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
231
diff
changeset
|
78 } |
266 | 79 |
80 s.tg = tgv.tg; | |
81 return s; | |
45 | 82 } |