comparison regex/main.cc @ 38:d15b9d342421

add regex
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Sun, 01 Mar 2015 14:10:25 +0900
parents
children 120c8116e831
comparison
equal deleted inserted replaced
37:0433a15ea8d2 38:d15b9d342421
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <unistd.h>
4 #include <math.h>
5 #include <string.h>
6 #include <sys/mman.h>
7 #include <sys/stat.h>
8 #include <sys/types.h>
9 #include <fcntl.h>
10
11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n";
12
13 //Boyer Moore法に使用するテーブルを作成
14 int*
15 createBMskiptable(u_char *search_word,int search_word_len,int *skip_table)
16 {
17 for(int i = 0; i < 256; ++i){
18 skip_table[i] = search_word_len;
19 }
20
21 for(int j = 0; j < search_word_len - 1; ++j){
22 skip_table[search_word[j]] = search_word_len - j - 1;
23 }
24 return skip_table;
25 }
26
27 //Boyer Moore法による文字列検索アルゴリズム
28 static int BMmethod(u_char *text,int textLen,
29 u_char *pattern,int swLen,int *skipTable)
30 {
31 int i = swLen - 1;
32 int matchCounter = 0;
33
34 while ( i < textLen){
35 int j = swLen - 1;
36 while (text[i] == pattern[j]){
37 if (j == 0){
38 matchCounter++;
39 }
40 --i;
41 --j;
42 }
43 i = i + fmax(skipTable[text[i]],swLen - j);
44 }
45 return matchCounter;
46 }
47
48 int main(int argc, char* argv[]) {
49
50 char *filename = 0;
51 // "u_char" is "unsigned char" in sys/types.h
52 u_char *searchWord = 0;
53
54 // check argument
55 for (int i = 0; argv[i]; i++) {
56 if (strcmp(argv[i], "-file") == 0) {
57 filename = (char*)argv[i+1]; i++;
58 }else if (strcmp(argv[i], "-sw") == 0) {
59 searchWord = (u_char*)argv[i+1]; i++;
60 }
61 }
62
63 // prepare file read
64 if (filename == 0) {
65 puts(usr_help_str);
66 exit(1);
67 }
68
69 struct stat sb;
70 long fd = 0;
71 char *textfile;
72
73 if ((fd=open(filename,O_RDONLY,0666))==0) {
74 fprintf(stderr,"can't open %s\n",filename);
75 }
76
77 if (fstat(fd,&sb)) {
78 fprintf(stderr,"can't fstat %s\n",filename);
79 }
80
81 textfile = (char*)malloc(sb.st_size);
82 read(fd,textfile,sb.st_size);
83
84 if (textfile == (char*)-1) {
85 fprintf(stderr,"Can't mmap file\n");
86 perror(NULL);
87 exit(0);
88 }
89
90 // prepare Boyer Moore Search
91
92 int *skipTable = (int*)malloc(256 * sizeof(int)); // 文字列に対応した table を用意
93 int searchWordLen = strlen((const char*)searchWord);
94 int *BMskip_table = createBMskiptable(searchWord, searchWordLen, skipTable);
95
96 int matchNum = BMmethod((u_char*)textfile,sb.st_size,searchWord,searchWordLen,BMskip_table);
97
98 printf("sword: %s len: %d\n",searchWord,searchWordLen);
99 printf("Match : %d\n",matchNum);
100
101 free(textfile);
102 free(skipTable);
103 return 0;
104 }