annotate regexParser/CeriumGrep.cc @ 266:e51cac73e42a

CeriumGrep start
author masa
date Thu, 28 Jan 2016 21:14:34 +0900
parents regexParser/main.cc@157f6886ba55
children 5d4afe6f6d00
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
2 #include <stdlib.h>
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
3 #include <string.h>
235
4aab1e93a971 fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
4 #include <unistd.h>
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
5 #include "CeriumGrep.h"
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
6 #include "subsetConstruction.h"
190
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 189
diff changeset
7 #include "node.h"
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
8 #include "grepWalk.h"
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
9 #include "fileread.h"
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
10 #include "threadedSearch.h"
76
d98a036441e2 add createNode comment
masa
parents: 75
diff changeset
11
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
12 Search grep(int argc,char **argv,bool parallel)
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
13 {
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
14 bool generate = true;
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
15 bool subset = false;
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
16 bool generateSequentialSearch = false;
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
17 bool ts = false;
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
18 char *filename;
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
19 Search s;
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
20 s.filename = "";
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
21 s.tg = NULL;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
22
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
23 RegexInfo ri;
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
24 ri.stateNumber = 1;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
25 for (int i = 1; i < argc; i++) {
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
26 if (strcmp(argv[i],"-regex") == 0) {
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
27 ri.ptr = (unsigned char*)argv[i+1]; i++;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
28 } else if (strcmp(argv[i],"-noGeneration") == 0) {
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
29 generate = false;
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
30 } else if (strcmp(argv[i],"-subset") == 0) {
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
31 subset = true;
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
32 } else if (strcmp(argv[i],"-seq") == 0) {
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
33 generateSequentialSearch = true;
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
34 } else if (strcmp(argv[i],"-file") == 0) {
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
35 s.filename = filename = argv[i+1]; i++;
260
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
36 } else if (strcmp(argv[i],"-ts") == 0) {
3f3c9902bb6d initialize node in allocateNode()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 259
diff changeset
37 ts = true;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
38 }
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
39 }
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
40 if (!ri.ptr) return s;
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
41
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 167
diff changeset
42 printf("regex : %s\n",ri.ptr);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
43 NodePtr n = regex(&ri); // parse only
214
a94f57af1600 remove allocateCCstate createCCState
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 195
diff changeset
44 printTree(n);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
45
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
46 TGValue tgv;
219
a8e477f1352d add an example for subset construction
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 217
diff changeset
47 if (generate && !subset) { // NFA generation
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
48 tgv = generateTransitionList(n);
217
a9e3512120e2 NFA generation end
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 216
diff changeset
49 printTree(n);
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
50 printState(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
51 if (generateSequentialSearch) {
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
52 exportState(tgv.tg);
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
53 }
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
54 } else if (subset) {
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
55 tgv = generateTransitionList(n);
249
9493800265a8 remove SCValue(not all remove)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 235
diff changeset
56 subsetConstruction(tgv.tg); // Determinization
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
57 printState(tgv.tg);
228
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
58 if (generateSequentialSearch) {
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
59 exportState(tgv.tg);
399380ad95b7 fix generateTransitionGenerator
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 222
diff changeset
60 }
216
4852bfa85db4 spell fix
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
61 }
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 228
diff changeset
62
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
63 if (filename != NULL && !parallel) {
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
64 int fd = 0;
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
65 st_mmap_t st_mmap = createSt_mmap(filename,fd);
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
66 Buffer buff = createBuffer(st_mmap);
262
157f6886ba55 write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 260
diff changeset
67 if (ts) threadedSearch(tgv.tg,buff);
259
6a6546a753cf tSearch of driver in main.cc (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 249
diff changeset
68 else grepWalk(tgv.tg,buff);
235
4aab1e93a971 fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
69 close(fd);
232
1a34e702776a add fileread.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 231
diff changeset
70 }
266
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
71
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
72 if (parallel) {
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
73 createAllPossibleState(tgv.tg);
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
74 }
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
75
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
76 s.tg = tgv.tg;
e51cac73e42a CeriumGrep start
masa
parents: 262
diff changeset
77 return s;
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 }