comparison regex/main.cc @ 39:120c8116e831

refactoring
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Mon, 02 Mar 2015 22:15:55 +0900
parents d15b9d342421
children c25b75f764a7
comparison
equal deleted inserted replaced
38:d15b9d342421 39:120c8116e831
8 #include <sys/types.h> 8 #include <sys/types.h>
9 #include <fcntl.h> 9 #include <fcntl.h>
10 10
11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n"; 11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n";
12 12
13 typedef struct result {
14 int matchNum;
15 int matchLineNum;
16 char* matchLine;
17 } Result, *ResultPtr;
18
19 typedef struct bmData {
20 int* skipTable;
21 char* readText;
22 int readTextLen;
23 char* searchWord;
24 int searchWordLen;
25 } BMData, *BMDataPtr;
26
13 //Boyer Moore法に使用するテーブルを作成 27 //Boyer Moore法に使用するテーブルを作成
14 int* 28 int *
15 createBMskiptable(u_char *search_word,int search_word_len,int *skip_table) 29 createBMskiptable(BMDataPtr bmdata)
16 { 30 {
31 int searchWordLen = bmdata->searchWordLen;
32 char *searchWord = bmdata->searchWord;
33 int* skipTable = (int*)malloc(sizeof(int)*256);
34
17 for(int i = 0; i < 256; ++i){ 35 for(int i = 0; i < 256; ++i){
18 skip_table[i] = search_word_len; 36 bmdata->skipTable[i] = searchWordLen;
19 } 37 }
20 38
21 for(int j = 0; j < search_word_len - 1; ++j){ 39 for(int j = 0; j < searchWordLen - 1; ++j){
22 skip_table[search_word[j]] = search_word_len - j - 1; 40 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1;
23 } 41 }
24 return skip_table; 42 return skipTable;
25 } 43 }
26 44
27 //Boyer Moore法による文字列検索アルゴリズム 45 //Boyer Moore法による文字列検索アルゴリズム
28 static int BMmethod(u_char *text,int textLen, 46 static void* BMmethod(BMDataPtr bmdata,ResultPtr result)
29 u_char *pattern,int swLen,int *skipTable)
30 { 47 {
31 int i = swLen - 1; 48 int i = bmdata->searchWordLen - 1;
32 int matchCounter = 0; 49 int matchCounter = 0;
33 50
34 while ( i < textLen){ 51 while ( i < bmdata->readTextLen){
35 int j = swLen - 1; 52 int j = bmdata->searchWordLen - 1;
36 while (text[i] == pattern[j]){ 53 while (bmdata->readText[i] == bmdata->searchWord[j]){
37 if (j == 0){ 54 if (j == 0){
38 matchCounter++; 55 matchCounter++;
39 } 56 }
40 --i; 57 --i;
41 --j; 58 --j;
42 } 59 }
43 i = i + fmax(skipTable[text[i]],swLen - j); 60 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j);
44 } 61 }
45 return matchCounter; 62
63 result->matchNum = matchCounter;
64 return result;
46 } 65 }
47 66
48 int main(int argc, char* argv[]) { 67 int main(int argc, char* argv[]) {
49 68
50 char *filename = 0; 69 char *filename = 0;
51 // "u_char" is "unsigned char" in sys/types.h 70 char *searchWord = 0;
52 u_char *searchWord = 0;
53 71
54 // check argument 72 // check argument
55 for (int i = 0; argv[i]; i++) { 73 for (int i = 0; argv[i]; i++) {
56 if (strcmp(argv[i], "-file") == 0) { 74 if (strcmp(argv[i], "-file") == 0) {
57 filename = (char*)argv[i+1]; i++; 75 filename = (char*)argv[i+1]; i++;
58 }else if (strcmp(argv[i], "-sw") == 0) { 76 }else if (strcmp(argv[i], "-sw") == 0) {
59 searchWord = (u_char*)argv[i+1]; i++; 77 searchWord = (char*)argv[i+1]; i++;
60 } 78 }
61 } 79 }
62 80
63 // prepare file read 81 // prepare file read
64 if (filename == 0) { 82 if (filename == 0) {
76 94
77 if (fstat(fd,&sb)) { 95 if (fstat(fd,&sb)) {
78 fprintf(stderr,"can't fstat %s\n",filename); 96 fprintf(stderr,"can't fstat %s\n",filename);
79 } 97 }
80 98
99 BMDataPtr bmdata = (BMDataPtr)malloc(sizeof(BMData));
100
81 textfile = (char*)malloc(sb.st_size); 101 textfile = (char*)malloc(sb.st_size);
82 read(fd,textfile,sb.st_size); 102 read(fd,textfile,sb.st_size);
103
104 bmdata->readText = textfile;
105 bmdata->readTextLen = sb.st_size;
106 bmdata->skipTable = (int*)malloc(sizeof(int)*256);
83 107
84 if (textfile == (char*)-1) { 108 if (textfile == (char*)-1) {
85 fprintf(stderr,"Can't mmap file\n"); 109 fprintf(stderr,"Can't mmap file\n");
86 perror(NULL); 110 perror(NULL);
87 exit(0); 111 exit(0);
88 } 112 }
89 113
90 // prepare Boyer Moore Search 114 // prepare Boyer Moore Search
115 bmdata->searchWord = searchWord;
116 bmdata->searchWordLen = strlen((const char*)bmdata->searchWord);
117 bmdata->skipTable = createBMskiptable(bmdata);
91 118
92 int *skipTable = (int*)malloc(256 * sizeof(int)); // 文字列に対応した table を用意 119 ResultPtr result = (ResultPtr)malloc(sizeof(Result));
93 int searchWordLen = strlen((const char*)searchWord);
94 int *BMskip_table = createBMskiptable(searchWord, searchWordLen, skipTable);
95 120
96 int matchNum = BMmethod((u_char*)textfile,sb.st_size,searchWord,searchWordLen,BMskip_table); 121 BMmethod(bmdata,result);
97 122
98 printf("sword: %s len: %d\n",searchWord,searchWordLen); 123 printf("sword: %s len: %d\n",bmdata->searchWord,bmdata->searchWordLen);
99 printf("Match : %d\n",matchNum); 124 printf("Match : %d\n",result->matchNum);
100 125
101 free(textfile); 126 free(result);
102 free(skipTable); 127 free(bmdata);
103 return 0; 128 return 0;
104 } 129 }