changeset 9:29e309b2f624

Try several Tokenizer
author one
date Thu, 02 Sep 2010 10:00:23 +0900
parents 8d0f9c1816f5
children 0d74081c1309
files src/plparser/PropertyListCharTokenizer.java src/plparser/PropertyListScanner.java src/plparser/PropertyListStreamScanner.java src/plparser/PropertyListStreamTokenizer.java src/plparser/TestScanner.java
diffstat 5 files changed, 442 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plparser/PropertyListCharTokenizer.java	Thu Sep 02 10:00:23 2010 +0900
@@ -0,0 +1,113 @@
+package plparser;
+
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.nio.CharBuffer;
+
+public class PropertyListCharTokenizer<T> extends PropertyListScanner<T>
+		implements PLScanner<T> {
+
+	public PropertyListCharTokenizer(Dictionary<T> dict) {
+		super(dict);
+	}
+
+	public PropertyListCharTokenizer(
+			PLScanner<T>s,
+			Dictionary<T> dict, Token<T> nullToken) {
+		super(dict);
+		this.nullToken = nullToken;
+		this.prev = null;
+	}
+
+	public char ch;
+	
+	@Override
+	public Token<T> nextToken() {
+		nextToken = nullToken;
+		if (cb==null) return nextToken;
+		if (!hasRemaining()) return nextToken;
+		while(Character.isSpaceChar(ch)) {
+			if (!hasRemaining()) return nextToken;
+			ch = nextChar(); 
+		}
+		CharBuffer w = CharBuffer.allocate(BufferSize);
+		if (Character.isJavaIdentifierStart(ch)) {
+			w.put(ch);
+			while(hasRemaining()&&Character.isJavaIdentifierPart((ch=nextChar()))) {
+				w.put(ch);
+			}
+			return lookupDict(w);
+		} else if (Character.isDigit(ch)||ch=='-'||ch=='+') {
+			w.put(ch);
+			while(hasRemaining()&&Character.isDigit((ch=nextChar()))) {
+				w.put(ch);
+			}
+			return nextToken = new Token<T>(w.toString(),TokenID.NUMBER);
+		} else if (ch=='/') {
+			w.put(ch); 	
+			if (!hasRemaining()) return new Token<T>(w.toString(),TokenID.Any);
+			ch = nextChar();
+			if (ch=='/') {
+				while(hasRemaining() && (ch=nextChar())!='\n');
+				if (!hasRemaining())return nullToken;
+				ch = nextChar();
+				return nextToken();
+			}
+			if (ch=='*') {
+				while(hasRemaining() && !((ch=nextChar())=='*'&&(ch=nextChar())=='/'));
+				if (!hasRemaining())return nullToken;
+				ch = nextChar();
+				return nextToken();
+			}
+			return new Token<T>(w.toString(),TokenID.Any);
+		} else if (ch=='\'') {
+			while(hasRemaining() && (ch=nextChar())!='\'') w.put(ch);
+			if (!hasRemaining())return nullToken; // non terminate string
+			ch = nextChar();
+			return lookupDict(w);
+		} else if (ch=='"') {
+			while(hasRemaining() && (ch=nextChar())!='"') w.put(ch);
+			if (!hasRemaining())return nullToken; // non terminate string
+			ch = nextChar();
+			return lookupDict(w);
+		} else {
+			nextToken = lookupDict(w);
+			if (!hasRemaining())return nextToken; 
+			ch = nextChar();
+			return nextToken;
+		}
+	}
+
+	private Token<T> lookupDict(CharBuffer w) {
+		Token<T> t;
+		String s = w.toString();
+		if ((t = dict.get(s))==null) {
+			dict.put(s, t = new Token<T>(s,TokenID.Any));
+		}
+		return nextToken = t;
+	}
+
+	private char nextChar() {
+		if (!cb.hasRemaining()) extendInput();
+		char ch = cb.get();
+		return ch;
+	}
+
+
+	@Override
+	public PLScanner<T> pushScannerFile(InputStream newfile, String prompt) {
+		return new PropertyListCharTokenizer<T>(this,dict,nullToken).setFile(newfile,prompt);
+	}
+
+	@Override
+	public PLScanner<T> pushScanner(String exp) {
+		return new PropertyListCharTokenizer<T>(this,dict,nullToken).set(exp);
+	}
+
+	@Override
+	public PLScanner<T> pushScannerFile(String newfile)
+			throws FileNotFoundException {
+		return new PropertyListCharTokenizer<T>(this,dict,nullToken).setFile(newfile);
+	}
+}
+
--- a/src/plparser/PropertyListScanner.java	Wed Sep 01 18:43:06 2010 +0900
+++ b/src/plparser/PropertyListScanner.java	Thu Sep 02 10:00:23 2010 +0900
@@ -31,7 +31,7 @@
 	 */
 
 	public Matcher scan;
-	private CharBuffer cb;
+	public CharBuffer cb;
 	public PropertyListScanner(Dictionary<Node> dict) {
 		this.dict = dict;
 		nullToken = new Token<Node>("",TokenID.NULL);
@@ -118,7 +118,8 @@
 			} else if ((s=next(numPat))!=null) {
 				return nextToken = new Token<Node>(s,TokenID.NUMBER);
 			} else if ((s=next(commentPat))!=null) {
-				while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full)
+				cb.get(); scan.reset(); lineno++;
+				// while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full)
 				continue;
 			} else if ((s=next(commentPat1))!=null) {
 				while(next(commentPat1End)==null) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plparser/PropertyListStreamScanner.java	Thu Sep 02 10:00:23 2010 +0900
@@ -0,0 +1,174 @@
+package plparser;
+
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Scanner;
+import java.util.regex.Pattern;
+
+/**
+ * delimiter に何を設定しても動いてくれない。
+ * */
+
+public class PropertyListStreamScanner<T> extends PLScannerImpl<T> implements
+		PLScanner<T> {
+
+	private Scanner scan;
+	
+	public PropertyListStreamScanner(
+			PLScanner<T> s,
+			Dictionary<T> dict, Token<T> nullToken) {
+		this.dict = dict;
+		this.nullToken = nullToken;
+	}
+
+
+	public PropertyListStreamScanner(Dictionary<T> dict) {
+		this.dict = dict;
+		nullToken = new Token<T>("",TokenID.NULL);
+	}
+
+
+	public void init() {
+		String pattern = ".";
+		scan.useDelimiter(pattern);
+	}
+
+	// Pattern must contain exact 1 group
+	private static Pattern tokenPat = Pattern.compile(
+			"([={}(),;])"
+	);
+	private static Pattern namePat  = Pattern.compile("([_a-zA-Z][\\@\\w]*)");
+	private static final Pattern numPat  = Pattern.compile("([0-9]+)");
+	private static final Pattern stringPat1  = Pattern.compile("\\\"([^\"]*)\\\"");
+	private static final Pattern stringPat  = Pattern.compile("\\'([^\\']*)\\'");
+	private static final Pattern stringPat1cont  = Pattern.compile("\\\"([^\"]*)$");
+	private static final Pattern stringPatCont  = Pattern.compile("\\'([^\\']*)$");
+	private static final Pattern stringPat1End  = Pattern.compile("([^\"]*)\\\"");
+	private static final Pattern stringPatEnd  = Pattern.compile("([^\\']*)\'");
+	private static final Pattern commentPat  = Pattern.compile("(//.*)");
+	private static final Pattern commentPat1  = Pattern.compile("(/\\*)");
+	private static final Pattern commentPat1End  = Pattern.compile("(.*\\*/)");
+	private static final Pattern errorPat = Pattern.compile("([^\\s])");
+	private static final Pattern anyPat = Pattern.compile("(.)");
+	
+	@Override
+	public Token<T> nextToken() {
+		String s;
+		nextToken = nullToken;
+		while(hasRemaining()) {
+			if ((s=scan.next(tokenPat))!=null) {
+				Token<T> t;
+				if ((t = dict.get(s))==null) {
+					dict.put(s, t = new Token<T>(s,TokenID.Any));
+				}
+				return nextToken = t;
+			} else if ((s=scan.next(stringPatCont))!=null) {
+				// non terminated string
+				String s1;
+				while((s1=scan.next(stringPatEnd))==null) {
+					s += scan.nextLine();
+					lineno++;
+				}
+				s += s1;
+				Token<T> t;
+				if ((t = dict.get(s))==null) {
+					dict.put(s, t = new Token<T>(s,TokenID.VARIABLE));
+				}
+				return nextToken = t;
+			} else if ((s=scan.next(stringPat1cont))!=null) {
+				// non terminated string
+				String s1;
+				while((s1=scan.next(stringPat1End))==null) {
+					s += scan.nextLine();
+					lineno++;
+				}
+				s += s1;
+				Token<T> t;
+				if ((t = dict.get(s))==null) {
+					dict.put(s, t = new Token<T>(s,TokenID.VARIABLE));
+				}
+				return nextToken = t;
+			} else if ((s=scan.next(stringPat))!=null||(s=scan.next(stringPat1))!=null||(s=scan.next(namePat))!=null) {
+				Token<T> t;
+				if ((t = dict.get(s))==null) {
+					dict.put(s, t = new Token<T>(s,TokenID.VARIABLE));
+				}
+				if (t.type!=TokenID.VARIABLE) {
+					t = new Token<T>(s,TokenID.VARIABLE);
+				}
+				return nextToken = t;
+			} else if ((s=scan.next(numPat))!=null) {
+				return nextToken = new Token<T>(s,TokenID.NUMBER);
+			} else if ((s=scan.next(commentPat))!=null) {
+				scan.nextLine();
+				continue;
+			} else if ((s=scan.next(commentPat1))!=null) {
+				while(scan.next(commentPat1End)==null) {
+					scan.nextLine();
+					lineno++;
+				}
+				continue;
+			} else if ((s=scan.next(errorPat))!=null) {
+				error("Don't understand '"+s+"'");
+				continue;
+			} else if ((s=scan.next(anyPat))!=null) {
+				// skip space
+				continue;
+			} else {
+				lineno++;
+			}
+		}
+		return nextToken;
+	}
+
+
+	@Override
+	public boolean hasRemaining() {
+		return scan.hasNext(anyPat);
+	}
+
+	@Override
+	public PLScanner<T> pushScannerFile(InputStream newfile,
+			String prompt) {
+		return new PropertyListStreamScanner<T>(this,dict,nullToken).setFile(newfile,prompt);
+	}
+
+	@Override
+	public PLScanner<T> pushScanner(String exp) {
+		return new PropertyListStreamScanner<T>(this,dict,nullToken).set(exp);
+	}
+
+	@Override
+	public PLScanner<T> pushScannerFile(String file)
+			throws FileNotFoundException {
+		return new PropertyListStreamScanner<T>(this,dict,nullToken).setFile(file);
+	}
+
+	@Override
+	public PLScanner<T> set(String exp) {
+		Reader reader = new StringReader(exp);
+		scan = new Scanner(reader);
+		return this;
+	}
+
+	@Override
+	public PLScanner<T> setFile(String file)
+			throws FileNotFoundException {
+		Reader reader = new FileReader(file);
+		scan = new Scanner(reader);
+		return this;
+	}
+
+	@Override
+	public PLScanner<T> set(InputStreamReader reader) {
+		scan = new Scanner(reader);
+		return this;
+	}
+
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plparser/PropertyListStreamTokenizer.java	Thu Sep 02 10:00:23 2010 +0900
@@ -0,0 +1,150 @@
+package plparser;
+
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StreamTokenizer;
+import java.io.StringReader;
+
+/**
+ * このTokenizer は実装は簡単だが、複数行を持つ"",''を取り扱えないらしい。
+ * 
+ * @author kono
+ *
+ * @param <T>
+ */
+public class PropertyListStreamTokenizer<T> extends PLScannerImpl<T>
+		implements PLScanner<T> {
+	
+	private StreamTokenizer tokenizer;
+	
+	public final static char QUOTE = '\'';
+	public final static char DOUBLE_QUOTE = '"';
+
+	public PropertyListStreamTokenizer(
+			PropertyListStreamTokenizer<T> propertyListStreamTokenizer,
+			Dictionary<T> dict, Token<T> nullToken) {
+		this.dict = dict;
+		this.nullToken = nullToken;
+	}
+
+
+	public PropertyListStreamTokenizer(Dictionary<T> dict) {
+		this.dict = dict;
+		nullToken = new Token<T>("",TokenID.NULL);
+	}
+
+
+	public void init() {
+		tokenizer.resetSyntax();
+		tokenizer.wordChars('0', '9');
+		tokenizer.wordChars('a', 'z');
+		tokenizer.wordChars('A', 'Z');
+		tokenizer.wordChars('_', '_');
+		tokenizer.ordinaryChar('=');
+		tokenizer.ordinaryChar('{');
+		tokenizer.ordinaryChar('}');
+		tokenizer.ordinaryChar('(');
+		tokenizer.ordinaryChar(')');
+		tokenizer.ordinaryChar(';');
+		tokenizer.ordinaryChar(',');
+		tokenizer.whitespaceChars(' ', ' ');
+		tokenizer.whitespaceChars('\t', '\t');
+		tokenizer.whitespaceChars('\n', '\n');
+		tokenizer.whitespaceChars('\r', '\r');
+		tokenizer.quoteChar(QUOTE);
+		tokenizer.quoteChar(DOUBLE_QUOTE);
+		tokenizer.parseNumbers();
+		tokenizer.eolIsSignificant(false);
+		tokenizer.slashStarComments(true);
+		tokenizer.slashSlashComments(true);
+	}
+
+
+	@Override
+	public Token<T> nextToken() {
+		int token;
+		nextToken = nullToken;
+		lineno = tokenizer.lineno();
+		try {
+			token = tokenizer.nextToken();
+				switch (token) {
+				case StreamTokenizer.TT_EOF:
+					return nextToken;
+				case StreamTokenizer.TT_NUMBER:
+					return nextToken = new Token<T>(tokenizer.sval,TokenID.NUMBER);
+				case StreamTokenizer.TT_WORD:
+					String s = tokenizer.sval;
+					Token<T> t;
+					if ((t = dict.get(s))==null) {
+						dict.put(s, t = new Token<T>(s,TokenID.Any));
+					}
+					return nextToken = t;
+				case QUOTE:
+				case DOUBLE_QUOTE:
+					return nextToken = new Token<T>(tokenizer.sval,TokenID.VARIABLE);
+				case StreamTokenizer.TT_EOL:
+					if (prompt!=null) System.out.print(prompt);
+					return nextToken();
+				default:
+					return nextToken = new Token<T>(tokenizer.sval,TokenID.Any);
+				}
+		} catch (IOException e) {
+			return nullToken;
+		}
+	}
+
+	@Override
+	public boolean hasRemaining() {
+		int nextToken = StreamTokenizer.TT_EOF;
+		try {
+			nextToken = tokenizer.nextToken();
+		} catch (IOException e) {
+			return false;
+		};
+		return nextToken!=StreamTokenizer.TT_EOF;
+	}
+
+	@Override
+	public PLScanner<T> pushScannerFile(InputStream newfile,
+			String prompt) {
+		return new PropertyListStreamTokenizer<T>(this,dict,nullToken).setFile(newfile,prompt);
+	}
+
+	@Override
+	public PLScanner<T> pushScanner(String exp) {
+		return new PropertyListStreamTokenizer<T>(this,dict,nullToken).set(exp);
+	}
+
+	@Override
+	public PLScanner<T> pushScannerFile(String file)
+			throws FileNotFoundException {
+		return new PropertyListStreamTokenizer<T>(this,dict,nullToken).setFile(file);
+	}
+
+	@Override
+	public PLScanner<T> set(String exp) {
+		Reader reader = new StringReader(exp);
+		tokenizer = new StreamTokenizer(reader);
+		return this;
+	}
+
+	@Override
+	public PLScanner<T> setFile(String file)
+			throws FileNotFoundException {
+		Reader reader = new FileReader(file);
+		tokenizer = new StreamTokenizer(reader);
+		return this;
+	}
+
+	@Override
+	public PLScanner<T> set(InputStreamReader reader) {
+		tokenizer = new StreamTokenizer(reader);
+		return this;
+	}
+
+
+}
--- a/src/plparser/TestScanner.java	Wed Sep 01 18:43:06 2010 +0900
+++ b/src/plparser/TestScanner.java	Thu Sep 02 10:00:23 2010 +0900
@@ -27,7 +27,8 @@
 		Dictionary<Property> dict = new Dictionary<Property>();
 		 // scan = new PropertyListScanner<Property>(dict);
 		// scan = new PropertyListStreamTokenizer<Property>(dict);
-		scan = new PropertyListStreamScanner<Property>(dict);
+		// scan = new PropertyListStreamScanner<Property>(dict);
+		scan = new PropertyListCharTokenizer<Property>(dict);
 	}