annotate src/plparser/PropertyListStreamScanner.java @ 9:29e309b2f624

Try several Tokenizer
author one
date Thu, 02 Sep 2010 10:00:23 +0900
parents
children 79d492bce828
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
1 package plparser;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
2
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
3 import java.io.FileNotFoundException;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
4 import java.io.FileReader;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
5 import java.io.InputStream;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
6 import java.io.InputStreamReader;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
7 import java.io.Reader;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
8 import java.io.StringReader;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
9 import java.util.Scanner;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
10 import java.util.regex.Pattern;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
11
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
12 /**
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
13 * delimiter に何を設定しても動いてくれない。
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
14 * */
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
15
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
16 public class PropertyListStreamScanner<T> extends PLScannerImpl<T> implements
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
17 PLScanner<T> {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
18
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
19 private Scanner scan;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
20
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
21 public PropertyListStreamScanner(
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
22 PLScanner<T> s,
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
23 Dictionary<T> dict, Token<T> nullToken) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
24 this.dict = dict;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
25 this.nullToken = nullToken;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
26 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
27
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
28
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
29 public PropertyListStreamScanner(Dictionary<T> dict) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
30 this.dict = dict;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
31 nullToken = new Token<T>("",TokenID.NULL);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
32 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
33
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
34
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
35 public void init() {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
36 String pattern = ".";
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
37 scan.useDelimiter(pattern);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
38 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
39
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
40 // Pattern must contain exact 1 group
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
41 private static Pattern tokenPat = Pattern.compile(
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
42 "([={}(),;])"
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
43 );
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
44 private static Pattern namePat = Pattern.compile("([_a-zA-Z][\\@\\w]*)");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
45 private static final Pattern numPat = Pattern.compile("([0-9]+)");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
46 private static final Pattern stringPat1 = Pattern.compile("\\\"([^\"]*)\\\"");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
47 private static final Pattern stringPat = Pattern.compile("\\'([^\\']*)\\'");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
48 private static final Pattern stringPat1cont = Pattern.compile("\\\"([^\"]*)$");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
49 private static final Pattern stringPatCont = Pattern.compile("\\'([^\\']*)$");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
50 private static final Pattern stringPat1End = Pattern.compile("([^\"]*)\\\"");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
51 private static final Pattern stringPatEnd = Pattern.compile("([^\\']*)\'");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
52 private static final Pattern commentPat = Pattern.compile("(//.*)");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
53 private static final Pattern commentPat1 = Pattern.compile("(/\\*)");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
54 private static final Pattern commentPat1End = Pattern.compile("(.*\\*/)");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
55 private static final Pattern errorPat = Pattern.compile("([^\\s])");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
56 private static final Pattern anyPat = Pattern.compile("(.)");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
57
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
58 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
59 public Token<T> nextToken() {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
60 String s;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
61 nextToken = nullToken;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
62 while(hasRemaining()) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
63 if ((s=scan.next(tokenPat))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
64 Token<T> t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
65 if ((t = dict.get(s))==null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
66 dict.put(s, t = new Token<T>(s,TokenID.Any));
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
67 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
68 return nextToken = t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
69 } else if ((s=scan.next(stringPatCont))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
70 // non terminated string
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
71 String s1;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
72 while((s1=scan.next(stringPatEnd))==null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
73 s += scan.nextLine();
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
74 lineno++;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
75 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
76 s += s1;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
77 Token<T> t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
78 if ((t = dict.get(s))==null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
79 dict.put(s, t = new Token<T>(s,TokenID.VARIABLE));
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
80 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
81 return nextToken = t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
82 } else if ((s=scan.next(stringPat1cont))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
83 // non terminated string
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
84 String s1;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
85 while((s1=scan.next(stringPat1End))==null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
86 s += scan.nextLine();
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
87 lineno++;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
88 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
89 s += s1;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
90 Token<T> t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
91 if ((t = dict.get(s))==null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
92 dict.put(s, t = new Token<T>(s,TokenID.VARIABLE));
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
93 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
94 return nextToken = t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
95 } else if ((s=scan.next(stringPat))!=null||(s=scan.next(stringPat1))!=null||(s=scan.next(namePat))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
96 Token<T> t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
97 if ((t = dict.get(s))==null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
98 dict.put(s, t = new Token<T>(s,TokenID.VARIABLE));
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
99 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
100 if (t.type!=TokenID.VARIABLE) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
101 t = new Token<T>(s,TokenID.VARIABLE);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
102 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
103 return nextToken = t;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
104 } else if ((s=scan.next(numPat))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
105 return nextToken = new Token<T>(s,TokenID.NUMBER);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
106 } else if ((s=scan.next(commentPat))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
107 scan.nextLine();
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
108 continue;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
109 } else if ((s=scan.next(commentPat1))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
110 while(scan.next(commentPat1End)==null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
111 scan.nextLine();
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
112 lineno++;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
113 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
114 continue;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
115 } else if ((s=scan.next(errorPat))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
116 error("Don't understand '"+s+"'");
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
117 continue;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
118 } else if ((s=scan.next(anyPat))!=null) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
119 // skip space
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
120 continue;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
121 } else {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
122 lineno++;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
123 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
124 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
125 return nextToken;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
126 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
127
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
128
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
129 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
130 public boolean hasRemaining() {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
131 return scan.hasNext(anyPat);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
132 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
133
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
134 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
135 public PLScanner<T> pushScannerFile(InputStream newfile,
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
136 String prompt) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
137 return new PropertyListStreamScanner<T>(this,dict,nullToken).setFile(newfile,prompt);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
138 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
139
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
140 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
141 public PLScanner<T> pushScanner(String exp) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
142 return new PropertyListStreamScanner<T>(this,dict,nullToken).set(exp);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
143 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
144
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
145 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
146 public PLScanner<T> pushScannerFile(String file)
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
147 throws FileNotFoundException {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
148 return new PropertyListStreamScanner<T>(this,dict,nullToken).setFile(file);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
149 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
150
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
151 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
152 public PLScanner<T> set(String exp) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
153 Reader reader = new StringReader(exp);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
154 scan = new Scanner(reader);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
155 return this;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
156 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
157
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
158 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
159 public PLScanner<T> setFile(String file)
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
160 throws FileNotFoundException {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
161 Reader reader = new FileReader(file);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
162 scan = new Scanner(reader);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
163 return this;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
164 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
165
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
166 @Override
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
167 public PLScanner<T> set(InputStreamReader reader) {
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
168 scan = new Scanner(reader);
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
169 return this;
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
170 }
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
171
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
172
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
173
29e309b2f624 Try several Tokenizer
one
parents:
diff changeset
174 }