view regex/main.cc @ 38:d15b9d342421

add regex
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Sun, 01 Mar 2015 14:10:25 +0900
parents
children 120c8116e831
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>

const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n";

//Boyer Moore法に使用するテーブルを作成
int*
createBMskiptable(u_char *search_word,int search_word_len,int *skip_table)
{
    for(int i = 0; i < 256; ++i){
        skip_table[i] = search_word_len;
    }

    for(int j = 0; j < search_word_len - 1; ++j){
        skip_table[search_word[j]] = search_word_len - j - 1;
    }
    return skip_table;
}

//Boyer Moore法による文字列検索アルゴリズム
static int BMmethod(u_char *text,int textLen,
              u_char *pattern,int swLen,int *skipTable)
{
    int i = swLen - 1;
    int matchCounter = 0;

    while ( i < textLen){
        int j = swLen - 1;
        while (text[i] == pattern[j]){
            if (j == 0){
                matchCounter++;
            }
            --i;
            --j;
        }
        i = i + fmax(skipTable[text[i]],swLen - j);
    }
    return matchCounter;
}

int main(int argc, char* argv[]) {

    char *filename = 0;
    // "u_char" is "unsigned char" in sys/types.h
    u_char *searchWord = 0;

    // check argument
    for (int i = 0; argv[i]; i++) {
        if (strcmp(argv[i], "-file") == 0) {
            filename = (char*)argv[i+1]; i++;
        }else if (strcmp(argv[i], "-sw") == 0) {
            searchWord = (u_char*)argv[i+1]; i++;
        }
    }

    // prepare file read
    if (filename == 0) {
        puts(usr_help_str);
        exit(1);
    }

    struct stat sb;
    long fd = 0;
    char *textfile;

    if ((fd=open(filename,O_RDONLY,0666))==0) {
        fprintf(stderr,"can't open %s\n",filename);
    }

    if (fstat(fd,&sb)) {
        fprintf(stderr,"can't fstat %s\n",filename);
    }

    textfile = (char*)malloc(sb.st_size);
    read(fd,textfile,sb.st_size);

    if (textfile == (char*)-1) {
        fprintf(stderr,"Can't mmap file\n");
        perror(NULL);
        exit(0);
    }

    // prepare Boyer Moore Search

    int *skipTable = (int*)malloc(256 * sizeof(int));  // 文字列に対応した table を用意
    int searchWordLen = strlen((const char*)searchWord);
    int *BMskip_table = createBMskiptable(searchWord, searchWordLen, skipTable);

    int matchNum = BMmethod((u_char*)textfile,sb.st_size,searchWord,searchWordLen,BMskip_table);

    printf("sword: %s len: %d\n",searchWord,searchWordLen);
    printf("Match : %d\n",matchNum);

    free(textfile);
    free(skipTable);
    return 0;
}