annotate regexParser/bmSearch.cc @ 319:7b8234c090f7

bmSearch
author mir3636
date Sun, 08 May 2016 22:53:20 +0900
parents
children da02a7258d54
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
319
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
1 #include "regexPaser.h"
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
2 #include "CharClass.h"
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
3
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
4 /**
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
5 * if start node contains only words, Boyer-Moore Search can be used.
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
6 * if so skip table is created for each word.
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
7 */
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
8
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
9 static void
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
10 create_BMskiptable(BMPtr bm,unsigned char *word,int len)
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
11 {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
12 bm->skip_table = (int*)malloc(sizeof(int)*256);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
13 for (int i = 0; i < 256; ++i) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
14 bm->skip_table[i] = len;
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
15 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
16
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
17 for (int j = 0; j < len - 1; ++j) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
18 bm->skip_table[word[j]] = len - j - 1;
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
19 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
20 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
21
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
22 void checkBMSearch(CharaClassPtr cc) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
23
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
24 // first check there is no Chareclass range
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
25 CharClassWalkerPtr cw = createCharClassWalker(st->cc);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
26 while (hasNext(cw)) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
27 CharClassPtr cc = getNext(cw);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
28 if (cc->cond.w.word == NULL) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
29 free(cw);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
30 return;
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
31 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
32 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
33 free(cw);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
34
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
35 // make skip table for each word
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
36 cw = createCharClassWalker(st->cc);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
37 while (hasNext(cw)) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
38 CharClassPtr cc = getNext(cw);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
39 if (cc->cond.w.word) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
40 WordPtr w = &cc->cond.w;
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
41 while (w) {
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
42 BMPtr bm = NEW(BM);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
43 cc->cond.w.bm = bm;
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
44 create_BMskiptable(bm,cc->cond.w.word,cc->cond.length);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
45 w = w->next;
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
46 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
47 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
48 }
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
49 free(cw);
7b8234c090f7 bmSearch
mir3636
parents:
diff changeset
50 }