# HG changeset patch # User one # Date 1283389223 -32400 # Node ID 29e309b2f624c2de4929f39ff35afb6df0a7b684 # Parent 8d0f9c1816f505e0e5bf123947e72624194ed22d Try several Tokenizer diff -r 8d0f9c1816f5 -r 29e309b2f624 src/plparser/PropertyListCharTokenizer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plparser/PropertyListCharTokenizer.java Thu Sep 02 10:00:23 2010 +0900 @@ -0,0 +1,113 @@ +package plparser; + +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.nio.CharBuffer; + +public class PropertyListCharTokenizer extends PropertyListScanner + implements PLScanner { + + public PropertyListCharTokenizer(Dictionary dict) { + super(dict); + } + + public PropertyListCharTokenizer( + PLScanners, + Dictionary dict, Token nullToken) { + super(dict); + this.nullToken = nullToken; + this.prev = null; + } + + public char ch; + + @Override + public Token nextToken() { + nextToken = nullToken; + if (cb==null) return nextToken; + if (!hasRemaining()) return nextToken; + while(Character.isSpaceChar(ch)) { + if (!hasRemaining()) return nextToken; + ch = nextChar(); + } + CharBuffer w = CharBuffer.allocate(BufferSize); + if (Character.isJavaIdentifierStart(ch)) { + w.put(ch); + while(hasRemaining()&&Character.isJavaIdentifierPart((ch=nextChar()))) { + w.put(ch); + } + return lookupDict(w); + } else if (Character.isDigit(ch)||ch=='-'||ch=='+') { + w.put(ch); + while(hasRemaining()&&Character.isDigit((ch=nextChar()))) { + w.put(ch); + } + return nextToken = new Token(w.toString(),TokenID.NUMBER); + } else if (ch=='/') { + w.put(ch); + if (!hasRemaining()) return new Token(w.toString(),TokenID.Any); + ch = nextChar(); + if (ch=='/') { + while(hasRemaining() && (ch=nextChar())!='\n'); + if (!hasRemaining())return nullToken; + ch = nextChar(); + return nextToken(); + } + if (ch=='*') { + while(hasRemaining() && !((ch=nextChar())=='*'&&(ch=nextChar())=='/')); + if (!hasRemaining())return nullToken; + ch = nextChar(); + return nextToken(); + } + return new Token(w.toString(),TokenID.Any); + } else if (ch=='\'') { + while(hasRemaining() && (ch=nextChar())!='\'') w.put(ch); + if (!hasRemaining())return nullToken; // non terminate string + ch = nextChar(); + return lookupDict(w); + } else if (ch=='"') { + while(hasRemaining() && (ch=nextChar())!='"') w.put(ch); + if (!hasRemaining())return nullToken; // non terminate string + ch = nextChar(); + return lookupDict(w); + } else { + nextToken = lookupDict(w); + if (!hasRemaining())return nextToken; + ch = nextChar(); + return nextToken; + } + } + + private Token lookupDict(CharBuffer w) { + Token t; + String s = w.toString(); + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token(s,TokenID.Any)); + } + return nextToken = t; + } + + private char nextChar() { + if (!cb.hasRemaining()) extendInput(); + char ch = cb.get(); + return ch; + } + + + @Override + public PLScanner pushScannerFile(InputStream newfile, String prompt) { + return new PropertyListCharTokenizer(this,dict,nullToken).setFile(newfile,prompt); + } + + @Override + public PLScanner pushScanner(String exp) { + return new PropertyListCharTokenizer(this,dict,nullToken).set(exp); + } + + @Override + public PLScanner pushScannerFile(String newfile) + throws FileNotFoundException { + return new PropertyListCharTokenizer(this,dict,nullToken).setFile(newfile); + } +} + diff -r 8d0f9c1816f5 -r 29e309b2f624 src/plparser/PropertyListScanner.java --- a/src/plparser/PropertyListScanner.java Wed Sep 01 18:43:06 2010 +0900 +++ b/src/plparser/PropertyListScanner.java Thu Sep 02 10:00:23 2010 +0900 @@ -31,7 +31,7 @@ */ public Matcher scan; - private CharBuffer cb; + public CharBuffer cb; public PropertyListScanner(Dictionary dict) { this.dict = dict; nullToken = new Token("",TokenID.NULL); @@ -118,7 +118,8 @@ } else if ((s=next(numPat))!=null) { return nextToken = new Token(s,TokenID.NUMBER); } else if ((s=next(commentPat))!=null) { - while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full) + cb.get(); scan.reset(); lineno++; + // while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full) continue; } else if ((s=next(commentPat1))!=null) { while(next(commentPat1End)==null) { diff -r 8d0f9c1816f5 -r 29e309b2f624 src/plparser/PropertyListStreamScanner.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plparser/PropertyListStreamScanner.java Thu Sep 02 10:00:23 2010 +0900 @@ -0,0 +1,174 @@ +package plparser; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.util.Scanner; +import java.util.regex.Pattern; + +/** + * delimiter に何を設定しても動いてくれない。 + * */ + +public class PropertyListStreamScanner extends PLScannerImpl implements + PLScanner { + + private Scanner scan; + + public PropertyListStreamScanner( + PLScanner s, + Dictionary dict, Token nullToken) { + this.dict = dict; + this.nullToken = nullToken; + } + + + public PropertyListStreamScanner(Dictionary dict) { + this.dict = dict; + nullToken = new Token("",TokenID.NULL); + } + + + public void init() { + String pattern = "."; + scan.useDelimiter(pattern); + } + + // Pattern must contain exact 1 group + private static Pattern tokenPat = Pattern.compile( + "([={}(),;])" + ); + private static Pattern namePat = Pattern.compile("([_a-zA-Z][\\@\\w]*)"); + private static final Pattern numPat = Pattern.compile("([0-9]+)"); + private static final Pattern stringPat1 = Pattern.compile("\\\"([^\"]*)\\\""); + private static final Pattern stringPat = Pattern.compile("\\'([^\\']*)\\'"); + private static final Pattern stringPat1cont = Pattern.compile("\\\"([^\"]*)$"); + private static final Pattern stringPatCont = Pattern.compile("\\'([^\\']*)$"); + private static final Pattern stringPat1End = Pattern.compile("([^\"]*)\\\""); + private static final Pattern stringPatEnd = Pattern.compile("([^\\']*)\'"); + private static final Pattern commentPat = Pattern.compile("(//.*)"); + private static final Pattern commentPat1 = Pattern.compile("(/\\*)"); + private static final Pattern commentPat1End = Pattern.compile("(.*\\*/)"); + private static final Pattern errorPat = Pattern.compile("([^\\s])"); + private static final Pattern anyPat = Pattern.compile("(.)"); + + @Override + public Token nextToken() { + String s; + nextToken = nullToken; + while(hasRemaining()) { + if ((s=scan.next(tokenPat))!=null) { + Token t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token(s,TokenID.Any)); + } + return nextToken = t; + } else if ((s=scan.next(stringPatCont))!=null) { + // non terminated string + String s1; + while((s1=scan.next(stringPatEnd))==null) { + s += scan.nextLine(); + lineno++; + } + s += s1; + Token t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token(s,TokenID.VARIABLE)); + } + return nextToken = t; + } else if ((s=scan.next(stringPat1cont))!=null) { + // non terminated string + String s1; + while((s1=scan.next(stringPat1End))==null) { + s += scan.nextLine(); + lineno++; + } + s += s1; + Token t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token(s,TokenID.VARIABLE)); + } + return nextToken = t; + } else if ((s=scan.next(stringPat))!=null||(s=scan.next(stringPat1))!=null||(s=scan.next(namePat))!=null) { + Token t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token(s,TokenID.VARIABLE)); + } + if (t.type!=TokenID.VARIABLE) { + t = new Token(s,TokenID.VARIABLE); + } + return nextToken = t; + } else if ((s=scan.next(numPat))!=null) { + return nextToken = new Token(s,TokenID.NUMBER); + } else if ((s=scan.next(commentPat))!=null) { + scan.nextLine(); + continue; + } else if ((s=scan.next(commentPat1))!=null) { + while(scan.next(commentPat1End)==null) { + scan.nextLine(); + lineno++; + } + continue; + } else if ((s=scan.next(errorPat))!=null) { + error("Don't understand '"+s+"'"); + continue; + } else if ((s=scan.next(anyPat))!=null) { + // skip space + continue; + } else { + lineno++; + } + } + return nextToken; + } + + + @Override + public boolean hasRemaining() { + return scan.hasNext(anyPat); + } + + @Override + public PLScanner pushScannerFile(InputStream newfile, + String prompt) { + return new PropertyListStreamScanner(this,dict,nullToken).setFile(newfile,prompt); + } + + @Override + public PLScanner pushScanner(String exp) { + return new PropertyListStreamScanner(this,dict,nullToken).set(exp); + } + + @Override + public PLScanner pushScannerFile(String file) + throws FileNotFoundException { + return new PropertyListStreamScanner(this,dict,nullToken).setFile(file); + } + + @Override + public PLScanner set(String exp) { + Reader reader = new StringReader(exp); + scan = new Scanner(reader); + return this; + } + + @Override + public PLScanner setFile(String file) + throws FileNotFoundException { + Reader reader = new FileReader(file); + scan = new Scanner(reader); + return this; + } + + @Override + public PLScanner set(InputStreamReader reader) { + scan = new Scanner(reader); + return this; + } + + + +} diff -r 8d0f9c1816f5 -r 29e309b2f624 src/plparser/PropertyListStreamTokenizer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plparser/PropertyListStreamTokenizer.java Thu Sep 02 10:00:23 2010 +0900 @@ -0,0 +1,150 @@ +package plparser; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StreamTokenizer; +import java.io.StringReader; + +/** + * このTokenizer は実装は簡単だが、複数行を持つ"",''を取り扱えないらしい。 + * + * @author kono + * + * @param + */ +public class PropertyListStreamTokenizer extends PLScannerImpl + implements PLScanner { + + private StreamTokenizer tokenizer; + + public final static char QUOTE = '\''; + public final static char DOUBLE_QUOTE = '"'; + + public PropertyListStreamTokenizer( + PropertyListStreamTokenizer propertyListStreamTokenizer, + Dictionary dict, Token nullToken) { + this.dict = dict; + this.nullToken = nullToken; + } + + + public PropertyListStreamTokenizer(Dictionary dict) { + this.dict = dict; + nullToken = new Token("",TokenID.NULL); + } + + + public void init() { + tokenizer.resetSyntax(); + tokenizer.wordChars('0', '9'); + tokenizer.wordChars('a', 'z'); + tokenizer.wordChars('A', 'Z'); + tokenizer.wordChars('_', '_'); + tokenizer.ordinaryChar('='); + tokenizer.ordinaryChar('{'); + tokenizer.ordinaryChar('}'); + tokenizer.ordinaryChar('('); + tokenizer.ordinaryChar(')'); + tokenizer.ordinaryChar(';'); + tokenizer.ordinaryChar(','); + tokenizer.whitespaceChars(' ', ' '); + tokenizer.whitespaceChars('\t', '\t'); + tokenizer.whitespaceChars('\n', '\n'); + tokenizer.whitespaceChars('\r', '\r'); + tokenizer.quoteChar(QUOTE); + tokenizer.quoteChar(DOUBLE_QUOTE); + tokenizer.parseNumbers(); + tokenizer.eolIsSignificant(false); + tokenizer.slashStarComments(true); + tokenizer.slashSlashComments(true); + } + + + @Override + public Token nextToken() { + int token; + nextToken = nullToken; + lineno = tokenizer.lineno(); + try { + token = tokenizer.nextToken(); + switch (token) { + case StreamTokenizer.TT_EOF: + return nextToken; + case StreamTokenizer.TT_NUMBER: + return nextToken = new Token(tokenizer.sval,TokenID.NUMBER); + case StreamTokenizer.TT_WORD: + String s = tokenizer.sval; + Token t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token(s,TokenID.Any)); + } + return nextToken = t; + case QUOTE: + case DOUBLE_QUOTE: + return nextToken = new Token(tokenizer.sval,TokenID.VARIABLE); + case StreamTokenizer.TT_EOL: + if (prompt!=null) System.out.print(prompt); + return nextToken(); + default: + return nextToken = new Token(tokenizer.sval,TokenID.Any); + } + } catch (IOException e) { + return nullToken; + } + } + + @Override + public boolean hasRemaining() { + int nextToken = StreamTokenizer.TT_EOF; + try { + nextToken = tokenizer.nextToken(); + } catch (IOException e) { + return false; + }; + return nextToken!=StreamTokenizer.TT_EOF; + } + + @Override + public PLScanner pushScannerFile(InputStream newfile, + String prompt) { + return new PropertyListStreamTokenizer(this,dict,nullToken).setFile(newfile,prompt); + } + + @Override + public PLScanner pushScanner(String exp) { + return new PropertyListStreamTokenizer(this,dict,nullToken).set(exp); + } + + @Override + public PLScanner pushScannerFile(String file) + throws FileNotFoundException { + return new PropertyListStreamTokenizer(this,dict,nullToken).setFile(file); + } + + @Override + public PLScanner set(String exp) { + Reader reader = new StringReader(exp); + tokenizer = new StreamTokenizer(reader); + return this; + } + + @Override + public PLScanner setFile(String file) + throws FileNotFoundException { + Reader reader = new FileReader(file); + tokenizer = new StreamTokenizer(reader); + return this; + } + + @Override + public PLScanner set(InputStreamReader reader) { + tokenizer = new StreamTokenizer(reader); + return this; + } + + +} diff -r 8d0f9c1816f5 -r 29e309b2f624 src/plparser/TestScanner.java --- a/src/plparser/TestScanner.java Wed Sep 01 18:43:06 2010 +0900 +++ b/src/plparser/TestScanner.java Thu Sep 02 10:00:23 2010 +0900 @@ -27,7 +27,8 @@ Dictionary dict = new Dictionary(); // scan = new PropertyListScanner(dict); // scan = new PropertyListStreamTokenizer(dict); - scan = new PropertyListStreamScanner(dict); + // scan = new PropertyListStreamScanner(dict); + scan = new PropertyListCharTokenizer(dict); }