annotate regex/main.cc @ 40:c25b75f764a7

add Regex.h
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Mon, 02 Mar 2015 22:18:37 +0900
parents 120c8116e831
children e1c5ecbf8836
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <stdlib.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <unistd.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include <math.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <string.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 #include <sys/mman.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 #include <sys/stat.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 #include <sys/types.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #include <fcntl.h>
40
c25b75f764a7 add Regex.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 39
diff changeset
10 #include "Regex.h"
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n";
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 //Boyer Moore法に使用するテーブルを作成
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
15 int *
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
16 createBMskiptable(BMDataPtr bmdata)
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 {
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
18 int searchWordLen = bmdata->searchWordLen;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
19 char *searchWord = bmdata->searchWord;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
20 int* skipTable = (int*)malloc(sizeof(int)*256);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
21
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 for(int i = 0; i < 256; ++i){
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
23 bmdata->skipTable[i] = searchWordLen;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
26 for(int j = 0; j < searchWordLen - 1; ++j){
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
27 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 }
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
29 return skipTable;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 //Boyer Moore法による文字列検索アルゴリズム
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
33 static void* BMmethod(BMDataPtr bmdata,ResultPtr result)
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 {
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
35 int i = bmdata->searchWordLen - 1;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 int matchCounter = 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
38 while ( i < bmdata->readTextLen){
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
39 int j = bmdata->searchWordLen - 1;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
40 while (bmdata->readText[i] == bmdata->searchWord[j]){
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 if (j == 0){
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 matchCounter++;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 --i;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 --j;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 }
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
47 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 }
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
49
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
50 result->matchNum = matchCounter;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
51 return result;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 int main(int argc, char* argv[]) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 char *filename = 0;
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
57 char *searchWord = 0;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 // check argument
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 for (int i = 0; argv[i]; i++) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 if (strcmp(argv[i], "-file") == 0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 filename = (char*)argv[i+1]; i++;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 }else if (strcmp(argv[i], "-sw") == 0) {
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
64 searchWord = (char*)argv[i+1]; i++;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 // prepare file read
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 if (filename == 0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 puts(usr_help_str);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 exit(1);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 struct stat sb;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 long fd = 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 char *textfile;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
77
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 if ((fd=open(filename,O_RDONLY,0666))==0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 fprintf(stderr,"can't open %s\n",filename);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
81
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 if (fstat(fd,&sb)) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 fprintf(stderr,"can't fstat %s\n",filename);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
86 BMDataPtr bmdata = (BMDataPtr)malloc(sizeof(BMData));
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
87
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 textfile = (char*)malloc(sb.st_size);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 read(fd,textfile,sb.st_size);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
90
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
91 bmdata->readText = textfile;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
92 bmdata->readTextLen = sb.st_size;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
93 bmdata->skipTable = (int*)malloc(sizeof(int)*256);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
94
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 if (textfile == (char*)-1) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 fprintf(stderr,"Can't mmap file\n");
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 perror(NULL);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 exit(0);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
100
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 // prepare Boyer Moore Search
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
102 bmdata->searchWord = searchWord;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
103 bmdata->searchWordLen = strlen((const char*)bmdata->searchWord);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
104 bmdata->skipTable = createBMskiptable(bmdata);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
105
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
106 ResultPtr result = (ResultPtr)malloc(sizeof(Result));
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
107
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
108 BMmethod(bmdata,result);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
109
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
110 printf("sword: %s len: %d\n",bmdata->searchWord,bmdata->searchWordLen);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
111 printf("Match : %d\n",result->matchNum);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
112
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
113 free(result);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
114 free(bmdata);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 return 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 }