changeset 38:d15b9d342421

add regex
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Sun, 01 Mar 2015 14:10:25 +0900
parents 0433a15ea8d2
children 120c8116e831
files regex/Makefile regex/main.cc
diffstat 2 files changed, 114 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex/Makefile	Sun Mar 01 14:10:25 2015 +0900
@@ -0,0 +1,10 @@
+TARGET=regex
+OPTION= -Wall -O0 -g
+
+$(TARGET):main.cc
+	clang $(OPTION) -o $(TARGET) main.cc
+
+clean:
+	rm -f $(TARGET)
+	rm -r $(TARGET).dSYM
+	rm -f *~ \#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex/main.cc	Sun Mar 01 14:10:25 2015 +0900
@@ -0,0 +1,104 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n";
+
+//Boyer Moore法に使用するテーブルを作成
+int*
+createBMskiptable(u_char *search_word,int search_word_len,int *skip_table)
+{
+    for(int i = 0; i < 256; ++i){
+        skip_table[i] = search_word_len;
+    }
+
+    for(int j = 0; j < search_word_len - 1; ++j){
+        skip_table[search_word[j]] = search_word_len - j - 1;
+    }
+    return skip_table;
+}
+
+//Boyer Moore法による文字列検索アルゴリズム
+static int BMmethod(u_char *text,int textLen,
+              u_char *pattern,int swLen,int *skipTable)
+{
+    int i = swLen - 1;
+    int matchCounter = 0;
+
+    while ( i < textLen){
+        int j = swLen - 1;
+        while (text[i] == pattern[j]){
+            if (j == 0){
+                matchCounter++;
+            }
+            --i;
+            --j;
+        }
+        i = i + fmax(skipTable[text[i]],swLen - j);
+    }
+    return matchCounter;
+}
+
+int main(int argc, char* argv[]) {
+
+    char *filename = 0;
+    // "u_char" is "unsigned char" in sys/types.h
+    u_char *searchWord = 0;
+
+    // check argument
+    for (int i = 0; argv[i]; i++) {
+        if (strcmp(argv[i], "-file") == 0) {
+            filename = (char*)argv[i+1]; i++;
+        }else if (strcmp(argv[i], "-sw") == 0) {
+            searchWord = (u_char*)argv[i+1]; i++;
+        }
+    }
+
+    // prepare file read
+    if (filename == 0) {
+        puts(usr_help_str);
+        exit(1);
+    }
+
+    struct stat sb;
+    long fd = 0;
+    char *textfile;
+
+    if ((fd=open(filename,O_RDONLY,0666))==0) {
+        fprintf(stderr,"can't open %s\n",filename);
+    }
+
+    if (fstat(fd,&sb)) {
+        fprintf(stderr,"can't fstat %s\n",filename);
+    }
+
+    textfile = (char*)malloc(sb.st_size);
+    read(fd,textfile,sb.st_size);
+
+    if (textfile == (char*)-1) {
+        fprintf(stderr,"Can't mmap file\n");
+        perror(NULL);
+        exit(0);
+    }
+
+    // prepare Boyer Moore Search
+
+    int *skipTable = (int*)malloc(256 * sizeof(int));  // 文字列に対応した table を用意
+    int searchWordLen = strlen((const char*)searchWord);
+    int *BMskip_table = createBMskiptable(searchWord, searchWordLen, skipTable);
+
+    int matchNum = BMmethod((u_char*)textfile,sb.st_size,searchWord,searchWordLen,BMskip_table);
+
+    printf("sword: %s len: %d\n",searchWord,searchWordLen);
+    printf("Match : %d\n",matchNum);
+
+    free(textfile);
+    free(skipTable);
+    return 0;
+}