changeset 56:8901bc071d33

implement string() and literal()
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 11 Jun 2015 16:24:40 +0900
parents 883e3473a9f5
children 71b497d25273
files c/regexParser/main.cc
diffstat 1 files changed, 82 insertions(+), 157 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/main.cc	Mon Jun 08 23:40:09 2015 +0900
+++ b/c/regexParser/main.cc	Thu Jun 11 16:24:40 2015 +0900
@@ -1,186 +1,111 @@
 /*
-    Very Simple Calculator
-    $Id$
+ * <literal> ::= [a-z][A-Z][0-9]
+ * <charClass> ::= '['<literal>'-'<literal>']'
+ * <string> ::= <literal><literal>*
+ * <or> ::= '('<regex>'|'<regex>')'
+ * <*> ::= <regex>'*'
+ * <regex> ::= <literal>|<conc>|<or>|<charClass>
  */
 
 #include <stdio.h>
-
-static char *ptr,*last_ptr;
-static int  value,lvalue;
-static int  last_token;
-static int  variable[48];
+#include <stdlib.h>
+#include <string.h>
+char *ptr;
 
-static int  expr();
-static int  aexpr();
-static int  mexpr();
-static int  term();
-static int  token();
-static void  error(char *);
+typedef struct node {
+    int character;
+    struct node *left;
+    struct node *right;
+} Node, *NodePtr;
 
-
-static int
-token()
-{
-    int c,d;
+NodePtr charClass();
+NodePtr string();
+NodePtr _or();
+NodePtr asterisk();
+NodePtr regex();
+NodePtr createNode(int,NodePtr,NodePtr);
 
-    last_ptr = ptr;  /* for error position */
-    c= *ptr;
-    if(!c) {
-        last_token = EOF;
-        return last_token;
-    }
-    ptr++;
-    if (c<=' ') {       /* comment */
-        while(*ptr++);
-            ptr--;
-            last_token = EOF;
-            last_ptr = ptr;
-            return last_token;
-    }
+NodePtr createNode(int character, NodePtr left, NodePtr right) {
+    NodePtr n;
+    n = (NodePtr)malloc(sizeof(Node));
+    n->character = character;
+    n->left = left;
+    n->right = right;
+    return n;
+}
+
+// <charClass> ::= '['<literal>'-'<literal>']'
+NodePtr charClass() {
+    NodePtr n = createNode(0,0,0);
+    return n;
+}
 
-    if('0'<=c && c<='9') {     /* Decimal */
-        d = c-'0';
-        while((c= *ptr++)) {
-            if('0'<=c && c<='9') {
-                d = d*10 + (c - '0');
-            } else {
-                break;
-            }
-        }
-        c && ptr--;
-        value = d;
-        last_token = '0';
-        return last_token;
+// <literal> ::= [a-z][A-Z][0-9]
+NodePtr literal() {
+    char c = *ptr;
+    createNode(c,0,0);
+}
 
-    } else if ('a'<=c && c<='z') {    /* variable */
-        value = c-'a';                /* return variable reference */
-        last_token = 'v';
-        return last_token;
+// <string> ::= <literal><literal>*
+NodePtr string() {
+    char c = *ptr;
+    NodePtr n = (NodePtr)malloc(sizeof(Node));
+
+    if (('a'<=c && c<='z')||('A'<=c && c<='Z')||('0'<=c && c<='9')) {
+        n = createNode(0,literal(),string());
+        return n;
     } else {
-        last_token = c;
-        return last_token;
-        return c;
+        n = createNode(0,0,0);
     }
 }
 
-static int
-expr()
-{
-    int d,assign;
-
-    d = aexpr();
-    assign = lvalue;
-    switch(last_token) {
-    case '>':
-        d = (d > aexpr());
-        return d;
-    case '=':
-        if(assign>=0) {
-            d = expr(); 
-            variable[assign] = d;
-            return d;
-        } else {
-            error("Bad assignment");
-            return 0;
+// <or> ::= '('<regex>'|'<regex>')'
+NodePtr _or() {
+    regex();
+    while(*ptr++ == ')') {
+        if (*ptr == '|') {
+            ptr++;
+            regex();
         }
-    case ')':
-        return d;
-    case EOF:
-        return d;
-    default:
-        error("Bad expression");
-        return d;
-    }
-}
-
-static int
-aexpr()
-{
-    int d;
-
-    d = mexpr();
-    switch(last_token) {
-    case '-':
-        d -= aexpr();
-        return d;
-    case '+':
-        d += aexpr();
-        return d;
-    default:
-        return d;
     }
 }
 
-static int
-mexpr()
-{
-    int d;
-    d = term();
-    switch(last_token) {
-    case '*':
-        d *= mexpr();
-        return d;
-    case '/':
-        d /= mexpr();
-        return d;
-    default:
-        return d;
-    }
+// <*> ::= <regex>'*'
+NodePtr asterisk() {
+
 }
 
-static int
-term()
-{
-    int d;
+// <regex> ::= <literal>|<string>|<or>|<charClass>
+// <literal> は <string> に内包されるから、<regex> ::= <string>|<or>|<charClass>が正しい??
+NodePtr regex() {
+
+    NodePtr n;
 
-    lvalue= -1;
-    token();
-    if(last_token==EOF) {
-        error("Term expected");
+    while (int c = *ptr++) {
+        if (c == '(') {
+            ptr++;
+            _or();
+        } else if (c == '[') {
+            charClass();
+        } else {
+            n = createNode(0,string(),regex());
+        }
     }
-    switch(last_token) {
-    case '0':
-        d = value;
-        token();
-        return d;
-    case 'v':
-        d = lvalue = value;
-        token();
-        return variable[d];
-    case '(':
-        d = expr();
-        if(last_token != ')') {
-            error("Unbalanced parenthsis");
-        }
-        token();
-        return d;
-    default:
-        token();
-        error("Unknown term");
-        return 0;
-    }
-}
 
-static int lineno = 0;
-
-void
-error(char *msg)
-{
-    fprintf(stderr,"%s on line %d\n",msg, lineno);
+    return n;
 }
 
 int
-main()
+main(int argc, char **argv)
 {
-    int d;
-    char buf[BUFSIZ];
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i],"-regex") == 0) {
+            ptr = argv[i+1]; i++;
+        }
+    }
 
-    while (fgets(buf,BUFSIZ,stdin)) {
-        ptr = buf;
-        d = expr();
-        printf("%s = 0x%08x = %d\n",buf,d,d);
-        fflush(stdout);
-        lineno++;
-    }
+    printf("regex : %s\n",ptr);
+    NodePtr n = regex();
+
     return 0;
 }