annotate regex/main.cc @ 117:166136236891 pairPro

add header files
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Wed, 25 Nov 2015 14:58:03 +0900
parents 62739627f8ec
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <stdlib.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <unistd.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include <math.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <string.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 #include <sys/mman.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 #include <sys/stat.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 #include <sys/types.h>
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #include <fcntl.h>
41
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 40
diff changeset
10 #include "regex.h"
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n";
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
13 const char *usr_help_mode = "Please add -bm or -regex";
41
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 40
diff changeset
14 extern int *createBMskiptable(BMDataPtr);
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 40
diff changeset
15 extern void *BMmethod(BMDataPtr,ResultPtr);
43
ead0a307449e add regex.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 42
diff changeset
16 extern void *regex(RegexDataPtr);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 int main(int argc, char* argv[]) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 char *filename = 0;
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
21 char *searchWord = 0;
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
22 bool bmSearchFlag = false;
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
23 bool regexFlag = false;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 // check argument
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 for (int i = 0; argv[i]; i++) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 if (strcmp(argv[i], "-file") == 0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 filename = (char*)argv[i+1]; i++;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 }else if (strcmp(argv[i], "-sw") == 0) {
39
120c8116e831 refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 38
diff changeset
30 searchWord = (char*)argv[i+1]; i++;
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
31 }else if (strcmp(argv[i], "-bm") == 0) {
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
32 bmSearchFlag = true;
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
33 }else if (strcmp(argv[i], "-regex") == 0) {
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
34 regexFlag = true;
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 // prepare file read
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 if (filename == 0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 puts(usr_help_str);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 exit(1);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 struct stat sb;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 long fd = 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 char *textfile;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 if ((fd=open(filename,O_RDONLY,0666))==0) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 fprintf(stderr,"can't open %s\n",filename);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 if (fstat(fd,&sb)) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 fprintf(stderr,"can't fstat %s\n",filename);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 textfile = (char*)malloc(sb.st_size);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 read(fd,textfile,sb.st_size);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 if (textfile == (char*)-1) {
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 fprintf(stderr,"Can't mmap file\n");
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 perror(NULL);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 exit(0);
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 }
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
64
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
65 if (bmSearchFlag == true) {
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
66 BMDataPtr bmdata = (BMDataPtr)malloc(sizeof(BMData));
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
67 ResultPtr result = (ResultPtr)malloc(sizeof(Result));
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
69 bmdata->readText = textfile;
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
70 bmdata->readTextLen = sb.st_size;
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
71 bmdata->skipTable = (int*)malloc(sizeof(int)*256);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
72
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
73 // prepare Boyer Moore Search
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
74 bmdata->searchWord = searchWord;
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
75 bmdata->searchWordLen = strlen((const char*)bmdata->searchWord);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
76 bmdata->skipTable = createBMskiptable(bmdata);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
77
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
78 BMmethod(bmdata,result);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
79
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
80 printf("sword: %s len: %d\n",bmdata->searchWord,bmdata->searchWordLen);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
81 printf("Match : %d\n",result->matchNum);
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
83 free(result);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
84 free(bmdata);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
85 }else if (regexFlag == true) {
43
ead0a307449e add regex.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 42
diff changeset
86 RegexDataPtr regexD = (RegexDataPtr)malloc(sizeof(RegexData));
ead0a307449e add regex.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 42
diff changeset
87 regexD->readText = textfile;
ead0a307449e add regex.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 42
diff changeset
88 regexD->readTextLen = sb.st_size;
ead0a307449e add regex.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 42
diff changeset
89 regexD->searchWord = searchWord;
ead0a307449e add regex.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 42
diff changeset
90 regexD->searchWordLen = strlen((const char*)regexD->searchWord);
ead0a307449e add regex.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 42
diff changeset
91 regex(regexD);
44
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 43
diff changeset
92 free(regexD);
42
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
93 }else{
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
94 puts(usr_help_mode);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
95 exit(1);
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
96 }
cdb4fd81c31f add regex mode
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 41
diff changeset
97
38
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 return 0;
d15b9d342421 add regex
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 }