Mercurial > hg > Members > kono > PLparser
view src/plparser/PropertyListScanner.java @ 0:b0dee5b76b12
Scanner worked.
author | kono@ie.u-ryukyu.ac.jp |
---|---|
date | Sat, 28 Aug 2010 16:07:00 +0900 |
parents | |
children | 29c0866e3a84 |
line wrap: on
line source
package plparser; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.CharBuffer; import java.util.Iterator; import java.util.regex.Matcher; import java.util.regex.Pattern; public class PropertyListScanner<Node> { // We cannot make Generic Singleton pattern // static PropertyListScanner scanner = new PropertyListScanner(); // // public static PropertyListScanner getScanner() { // return scanner; // } /* * Tokenizer for Property List * Pattern/Matcher implementation * * sannerStack is used for nested parsing. * scanner.push(); * parser.parse(exp); * nextToken = scanner.pop(); * * 2010/8 Shinji Kono */ public int lineno; public Matcher scan; public Token<Node> nextToken; public Dictionary<Node> dict; public PropertyListScanner<Node> next; protected CharBuffer cb; private InputStreamReader file; private String filename; public PropertyListScanner<Node> prev; public Token<Node> nullToken ; public String prompt; public PropertyListScanner(Dictionary<Node> dict) { this.dict = dict; nullToken = new Token<Node>("",TokenID.NULL); } /* * Scanner Container for Stack */ public PropertyListScanner(PropertyListScanner<Node> prev, Dictionary<Node> dict, Token<Node> nullToken) { this.prev = prev; this.dict = dict; this.nullToken = nullToken; } // Pattern must contain exact 1 group public static Pattern tokenPat = Pattern.compile( "([={}(),;])" ); public static Pattern namePat = Pattern.compile("([a-zA-Z][\\@\\w]*)"); public static final Pattern numPat = Pattern.compile("([0-9]+)"); public static final Pattern stringPat1 = Pattern.compile("\\\"([^\"]*)\\\""); public static final Pattern stringPat = Pattern.compile("\\'([^\\']*)\\'"); public static final Pattern stringPat1cont = Pattern.compile("\\\"([^\"]*)$"); public static final Pattern stringPatCont = Pattern.compile("\\'([^\\']*)$"); public static final Pattern stringPat1End = Pattern.compile("([^\"]*)\\\""); public static final Pattern stringPatEnd = Pattern.compile("([^\\']*)\'"); public static final Pattern commentPat = Pattern.compile("(//.*)"); public static final Pattern errorPat = Pattern.compile("([^\\s])"); public static final Pattern anyPat = Pattern.compile("(.)"); private static final int BufferSize = 4096; /* * Get next token * * No looking up method nor put back. It never returns null but * may return nullToken. So nextToken.type is always valid. * nullToken means the end of the input. * * Token is a syntax element and it may have macro binding as * predicate, infix or prefix operator. To get the value, use * makeVariable(). Operator order for infix and prefix is in * Token.order. TokenID.order is default order for fix element and * currently never used. * * When matcher hit an end of the input, hasRemaining() method try * to extend the input using extendInput(). */ public Token<Node> nextToken() { String s; nextToken = nullToken; while(hasRemaining()) { scan.reset(); // to tell CharBuffer is modified if ((s=next(tokenPat))!=null) { Token<Node> t; if ((t = dict.get(s))==null) { dict.put(s, t = new Token<Node>(s,TokenID.Any)); } return nextToken = t; } else if ((s=next(stringPatCont))!=null) { // non terminated string String s1; cb.get(); scan.reset(); while((s1=next(stringPatEnd))==null) { s += next(anyPat); cb.get(); scan.reset(); } s += s1; Token<Node> t; if ((t = dict.get(s))==null) { dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE)); } return nextToken = t; } else if ((s=next(stringPat1cont))!=null) { // non terminated string String s1; cb.get(); scan.reset(); while((s1=next(stringPat1End))==null) { s += next(anyPat); cb.get(); scan.reset(); } s += s1; Token<Node> t; if ((t = dict.get(s))==null) { dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE)); } return nextToken = t; } else if ((s=next(stringPat))!=null||(s=next(stringPat1))!=null||(s=next(namePat))!=null) { Token<Node> t; if ((t = dict.get(s))==null) { dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE)); } return nextToken = t; } else if ((s=next(numPat))!=null) { return nextToken = new Token<Node>(s,TokenID.NUMBER); } else if ((s=next(commentPat))!=null) { while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full) continue; } else if ((s=next(errorPat))!=null) { error("Don't understand '"+s+"'"); continue; } else if ((s=next(anyPat))!=null) { // skip space continue; } else { lineno++; cb.get(); // discard one ( new line ) } } return nextToken; } protected String next(Pattern pattern) { String s = null; while(true) { Boolean match = scan.usePattern(pattern).lookingAt(); if (scan.hitEnd()) { if (extendInput()) { // input is extended try again scan.reset(); continue; } // no extension. } if (match) { // This won't work in Java 6 // s = scan.group(1); s = cb.toString().substring(scan.start(1),scan.end(1)); // fix position in CharBuffer // scan.end() is relative position cb.position(cb.position()+scan.end()); // scan.reset(); will be done on top of nextToken() } if (scan.hitEnd()) { // previous extendInput is failed because of Buffer full. // Now we have a space. Try again extendInput();scan.hitEnd(); } return s; } } public boolean hasRemaining() { return cb.hasRemaining()||extendInput(); } /* * Extend Input data */ protected boolean extendInput() { if (file!=null && cb.position()!=0) { // move remaining data to the top, set position for next read cb.compact(); try { if (prompt!=null) System.out.print(prompt); if (file.read(cb)>0) { cb.flip(); // prepare for get (but we don't...) return true; } else { throw new IOException(); } } catch (IOException e) { file = null ; cb.flip(); } } return false; } protected PropertyListScanner<Node> pushScanner(String exp) { // Save current matcher for nested parsing return new PropertyListScanner<Node>(this,dict,nullToken).set(exp); } protected PropertyListScanner<Node> pushScannerFile(String newfile) throws FileNotFoundException { // Save current matcher for nested file return new PropertyListScanner<Node>(this,dict,nullToken).setFile(newfile); } public PropertyListScanner<Node> pushScannerFile(InputStream newfile,String prompt) { return new PropertyListScanner<Node>(this,dict,nullToken).setFile(newfile,prompt); } protected PropertyListScanner<Node> popScanner() { return prev; } private PropertyListScanner<Node> findFileName() { for(PropertyListScanner<Node> s = this;s!=null ; s = s.prev) { if (s.filename!=null) return s; } return null; } /* * Read From String */ public PropertyListScanner<Node> set(String exp) { cb = CharBuffer.wrap(exp); scan = tokenPat.matcher(cb); filename = null; file = null; nextToken = nullToken; return this; } /* * Read From File * We cannot read symbol bigger than Buffersize */ public PropertyListScanner<Node> setFile(String file) throws FileNotFoundException { this.filename = file; nextToken = nullToken; set(new FileReader(file)); return this; } public PropertyListScanner<Node> set(InputStreamReader file) { this.file = file; cb = CharBuffer.allocate(BufferSize); try { if (prompt!=null) System.out.print(prompt); if (file.read(cb) <= 0) { throw new IOException(); } } catch (IOException e) { file = null; cb = null; set(""); return this; } finally { cb.flip(); } scan = tokenPat.matcher(cb); lineno = 0; return this; } public void error(String err) { PropertyListScanner<Node> s = findFileName(); if (s!=null) { System.err.print(s.filename+":"+s.lineno+": "); } System.err.println("error: "+err); } /* * Iterator for Test Routing * for(Token<Node> t: scanner.scanToken(FileReader(file)) { ... } */ public Iterable<Token<Node>> scanToken(String exp) { set(exp); return iterator(); } public Iterable<Token<Node>> scanToken(FileReader file) { set(file); return iterator(); } private Iterable<Token<Node>> iterator() { return new Iterable<Token<Node>>() { public Iterator<Token<Node>> iterator() { return new Iterator<Token<Node>>() { public boolean hasNext() { return hasRemaining(); } public Token<Node> next() { return nextToken(); } public void remove() { } }; } }; } private PropertyListScanner<Node> setFile(InputStream newfile,String prompt) { this.filename = newfile.toString(); nextToken = nullToken; this.prompt = prompt; set(new InputStreamReader(newfile)); return this; } }