comparison c/regexParser/main.cc @ 58:4053c3e0fa7f

implement group()
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Fri, 12 Jun 2015 19:02:00 +0900
parents 71b497d25273
children af189c727733
comparison
equal deleted inserted replaced
57:71b497d25273 58:4053c3e0fa7f
1 /* 1 /*
2 * <literal> ::= [a-z][A-Z][0-9] 2 * <literal> ::= [a-z][A-Z][0-9]
3 * <charClass> ::= '['<literal>'-'<literal>']' 3 * <charClass> ::= '['<literal>'-'<literal>']'
4 * <string> ::= <literal><literal>* 4 * <string> ::= <literal><literal>*
5 * <or> ::= '('<regex>'|'<regex>')' 5 * <group> ::= '('<regex>')'
6 * <or> ::= <regex>'|'<regex>
6 * <*> ::= <regex>'*' 7 * <*> ::= <regex>'*'
7 * <regex> ::= <literal>|<conc>|<or>|<charClass> 8 * <regex> ::= <string>|<or>|<charClass>|<group>|<*>
8 */ 9 */
9 10
10 #include <stdio.h> 11 #include <stdio.h>
11 #include <stdlib.h> 12 #include <stdlib.h>
12 #include <string.h> 13 #include <string.h>
13 14
14 char *ptr;
15 typedef struct node { 15 typedef struct node {
16 struct node *self;
16 char character; 17 char character;
17 struct node *left; 18 struct node *left;
18 struct node *right; 19 struct node *right;
19 } Node, *NodePtr; 20 } Node, *NodePtr;
20 21
22 char *ptr;
23 NodePtr regexHeadNode;
24
21 NodePtr charClass(); 25 NodePtr charClass();
22 NodePtr string(); 26 NodePtr string();
27 NodePtr group();
23 NodePtr _or(); 28 NodePtr _or();
24 NodePtr asterisk(); 29 NodePtr asterisk();
25 NodePtr regex(); 30 NodePtr regex();
26 NodePtr createNode(char,NodePtr,NodePtr); 31 NodePtr createNode(char,NodePtr,NodePtr);
27 32
28 NodePtr createNode(char character, NodePtr left, NodePtr right) { 33 NodePtr createNode(char character, NodePtr left, NodePtr right) {
29 NodePtr n = (NodePtr)malloc(sizeof(Node)); 34 NodePtr n = (NodePtr)malloc(sizeof(Node));
35 n->self = n;
30 n->character = character; 36 n->character = character;
31 n->left = left; 37 n->left = left;
32 n->right = right; 38 n->right = right;
39
33 return n; 40 return n;
34 } 41 }
35 42
36 // <charClass> ::= '['<literal>'-'<literal>']' 43 // <charClass> ::= '['<literal>'-'<literal>']'
37 NodePtr charClass() { 44 NodePtr charClass() {
45 ptr++;
38 NodePtr n = createNode(0,0,0); 46 NodePtr n = createNode(0,0,0);
39 return n; 47 return n;
40 } 48 }
41 49
42 // <literal> ::= [a-z][A-Z][0-9] 50 // <literal> ::= [a-z][A-Z][0-9]
48 } 56 }
49 57
50 // <string> ::= <literal><literal>* 58 // <string> ::= <literal><literal>*
51 NodePtr string() { 59 NodePtr string() {
52 char c = *ptr; 60 char c = *ptr;
53 NodePtr n; 61 NodePtr n = NULL;
54 62 printf("%c\n",c);
55 if (('a'<=c && c<='z')||('A'<=c && c<='Z')||('0'<=c && c<='9')) { 63 if (('a'<=c && c<='z')||('A'<=c && c<='Z')||('0'<=c && c<='9')) {
56 n = createNode(0,literal(),string()); 64 n = createNode(0,literal(),string());
57 return n;
58 } else { 65 } else {
59 n = createNode(0,0,0); 66 n = createNode(0,0,0);
60 } 67 }
68 return n;
61 } 69 }
62 70
63 // <or> ::= '('<regex>'|'<regex>')' 71 // <group> ::= '('<regex>')' | '('<regex>'|'<regex>')'
72 NodePtr group() {
73 NodePtr n;
74 if (*ptr == ')') {
75 n = createNode(0,0,0);
76 ptr++;
77 } else {
78 ptr++;
79 n = regex();
80 }
81
82 return n;
83 }
84
85
86 // <or> ::= <regex>'|'<regex>
64 NodePtr _or() { 87 NodePtr _or() {
65 regex(); 88 NodePtr n = createNode('|',regexHeadNode,regex());
66 while(*ptr++ == ')') { 89 return n;
67 if (*ptr == '|') {
68 ptr++;
69 regex();
70 }
71 }
72 } 90 }
73 91
74 // <*> ::= <regex>'*' 92 // <*> ::= <regex>'*'
75 NodePtr asterisk() { 93 NodePtr asterisk() {
76 94
77 } 95 }
78 96
79 // <regex> ::= <literal>|<string>|<or>|<charClass> 97 // <regex> ::= <string>|<or>|<charClass>|<group>|<*>
80 // <literal> は <string> に内包されるから、<regex> ::= <string>|<or>|<charClass>が正しい??
81 NodePtr regex() { 98 NodePtr regex() {
82 99
83 NodePtr n; 100 NodePtr n;
84 101
85 while (char c = *ptr) { 102 while (*ptr != '\0') {
86 if (c == '(') { 103 if ((*ptr == '(') || (*ptr == ')')) {
87 ptr++; 104 n = group();
105 } else if (*ptr == '[') {
106 n = charClass();
107 } else if (*ptr == '|'){
88 n = _or(); 108 n = _or();
89 } else if (c == '[') {
90 n = charClass();
91 } else { 109 } else {
92 n = string(); 110 n = string();
111 regexHeadNode = n;
93 } 112 }
94 ptr++;
95 } 113 }
96 114
97 return n; 115 return n;
98 } 116 }
99 117