annotate regex/main.cc @ 39:120c8116e831

refactoring
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Mon, 02 Mar 2015 22:15:55 +0900
parents d15b9d342421
children c25b75f764a7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <stdlib.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <unistd.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include <math.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <string.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 #include <sys/mman.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 #include <sys/stat.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 #include <sys/types.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #include <fcntl.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n";
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
13 typedef struct result {
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
14 int matchNum;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
15 int matchLineNum;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
16 char* matchLine;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
17 } Result, *ResultPtr;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
18
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
19 typedef struct bmData {
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
20 int* skipTable;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
21 char* readText;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
22 int readTextLen;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
23 char* searchWord;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
24 int searchWordLen;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
25 } BMData, *BMDataPtr;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
26
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 //Boyer Moore法に使用するテーブルを作成
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
28 int *
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
29 createBMskiptable(BMDataPtr bmdata)
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 {
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
31 int searchWordLen = bmdata->searchWordLen;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
32 char *searchWord = bmdata->searchWord;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
33 int* skipTable = (int*)malloc(sizeof(int)*256);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
34
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 for(int i = 0; i < 256; ++i){
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
36 bmdata->skipTable[i] = searchWordLen;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
39 for(int j = 0; j < searchWordLen - 1; ++j){
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
40 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 }
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
42 return skipTable;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 //Boyer Moore法による文字列検索アルゴリズム
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
46 static void* BMmethod(BMDataPtr bmdata,ResultPtr result)
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 {
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
48 int i = bmdata->searchWordLen - 1;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 int matchCounter = 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
51 while ( i < bmdata->readTextLen){
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
52 int j = bmdata->searchWordLen - 1;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
53 while (bmdata->readText[i] == bmdata->searchWord[j]){
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 if (j == 0){
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 matchCounter++;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 --i;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 --j;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 }
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
60 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 }
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
62
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
63 result->matchNum = matchCounter;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
64 return result;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 int main(int argc, char* argv[]) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 char *filename = 0;
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
70 char *searchWord = 0;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 // check argument
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 for (int i = 0; argv[i]; i++) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 if (strcmp(argv[i], "-file") == 0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 filename = (char*)argv[i+1]; i++;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 }else if (strcmp(argv[i], "-sw") == 0) {
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
77 searchWord = (char*)argv[i+1]; i++;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
80
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 // prepare file read
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 if (filename == 0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 puts(usr_help_str);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 exit(1);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
86
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 struct stat sb;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 long fd = 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 char *textfile;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
90
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 if ((fd=open(filename,O_RDONLY,0666))==0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 fprintf(stderr,"can't open %s\n",filename);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
94
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 if (fstat(fd,&sb)) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 fprintf(stderr,"can't fstat %s\n",filename);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
99 BMDataPtr bmdata = (BMDataPtr)malloc(sizeof(BMData));
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
100
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 textfile = (char*)malloc(sb.st_size);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 read(fd,textfile,sb.st_size);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
103
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
104 bmdata->readText = textfile;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
105 bmdata->readTextLen = sb.st_size;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
106 bmdata->skipTable = (int*)malloc(sizeof(int)*256);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
107
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 if (textfile == (char*)-1) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 fprintf(stderr,"Can't mmap file\n");
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 perror(NULL);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 exit(0);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
113
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 // prepare Boyer Moore Search
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
115 bmdata->searchWord = searchWord;
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
116 bmdata->searchWordLen = strlen((const char*)bmdata->searchWord);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
117 bmdata->skipTable = createBMskiptable(bmdata);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
118
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
119 ResultPtr result = (ResultPtr)malloc(sizeof(Result));
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
120
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
121 BMmethod(bmdata,result);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
122
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
123 printf("sword: %s len: %d\n",bmdata->searchWord,bmdata->searchWordLen);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
124 printf("Match : %d\n",result->matchNum);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
125
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
126 free(result);
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
127 free(bmdata);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
128 return 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 }