view src/plparser/PropertyListScanner.java @ 6:563bcb96e4fa

pretty printer
author one
date Mon, 30 Aug 2010 12:35:23 +0900
parents 29b5497fc942
children 619472ca4742
line wrap: on
line source

package plparser;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.CharBuffer;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PropertyListScanner<Node> {

	//  We cannot make Generic Singleton pattern
	//	static PropertyListScanner scanner = new PropertyListScanner();
	//	
	//	public static PropertyListScanner getScanner() {
	//		return scanner;
	//	}

	/*
	 * Tokenizer for Property List
	 *     Pattern/Matcher implementation
	 *     
	 *     sannerStack is used for nested parsing.
	 *         scanner.push();
	 *         parser.parse(exp);
	 *         nextToken = scanner.pop();
	 *         
	 *     2010/8 Shinji Kono    
	 */

	public int lineno;
	public Matcher scan;
	public Token<Node> nextToken;
	public Dictionary<Node> dict;
	public PropertyListScanner<Node> next;
	protected CharBuffer cb;
	private InputStreamReader file;
	private String filename;
	public PropertyListScanner<Node> prev;
	public Token<Node> nullToken ;
	public String prompt;

	public PropertyListScanner(Dictionary<Node> dict) {
		this.dict = dict;
		nullToken = new Token<Node>("",TokenID.NULL);
	}

	/*
	 * Scanner Container for Stack
	 */
	public PropertyListScanner(PropertyListScanner<Node> prev, Dictionary<Node> dict, Token<Node> nullToken) {
		this.prev = prev;
		this.dict = dict;
		this.nullToken = nullToken;
	}


	// Pattern must contain exact 1 group
	public static Pattern tokenPat = Pattern.compile(
			"([={}(),;])"
	);
	public static Pattern namePat  = Pattern.compile("([_a-zA-Z][\\@\\w]*)");
	public static final Pattern numPat  = Pattern.compile("([0-9]+)");
	public static final Pattern stringPat1  = Pattern.compile("\\\"([^\"]*)\\\"");
	public static final Pattern stringPat  = Pattern.compile("\\'([^\\']*)\\'");
	public static final Pattern stringPat1cont  = Pattern.compile("\\\"([^\"]*)$");
	public static final Pattern stringPatCont  = Pattern.compile("\\'([^\\']*)$");
	public static final Pattern stringPat1End  = Pattern.compile("([^\"]*)\\\"");
	public static final Pattern stringPatEnd  = Pattern.compile("([^\\']*)\'");
	public static final Pattern commentPat  = Pattern.compile("(//.*)");
	public static final Pattern commentPat1  = Pattern.compile("(/\\*)");
	public static final Pattern commentPat1End  = Pattern.compile("(.*\\*/)");
	public static final Pattern errorPat = Pattern.compile("([^\\s])");
	public static final Pattern anyPat = Pattern.compile("(.)");
	private static final int BufferSize = 4096;

	/*
	 * Get next token
	 * 
	 *    No looking up method nor put back. It never returns null but
	 *    may return nullToken. So nextToken.type is always valid.
	 *    nullToken means the end of the input.
	 *    
	 *    Token is a syntax element and it may have macro binding as
	 *    predicate, infix or prefix operator. To get the value, use
	 *    makeVariable(). Operator order for infix and prefix is in
	 *    Token.order. TokenID.order is default order for fix element and
	 *    currently never used.
	 *    
	 *    When matcher hit an end of the input, hasRemaining() method try
	 *    to extend the input using extendInput().
	 */

	public Token<Node> nextToken() {
		String s;
		nextToken = nullToken;
		while(hasRemaining()) {
			scan.reset(); // to tell CharBuffer is modified
			if ((s=next(tokenPat))!=null) {
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.Any));
				}
				return nextToken = t;
			} else if ((s=next(stringPatCont))!=null) {
				// non terminated string
				String s1;
				cb.get(); scan.reset();
				while((s1=next(stringPatEnd))==null) {
					s += cb.toString();
					cb.get(); scan.reset();
				}
				s += s1;
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE));
				}
				return nextToken = t;
			} else if ((s=next(stringPat1cont))!=null) {
				// non terminated string
				String s1;
				cb.get(); scan.reset();
				while((s1=next(stringPat1End))==null) {
					s += cb.toString();
					cb.get(); scan.reset(); lineno++;
				}
				s += s1;
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE));
				}
				return nextToken = t;
			} else if ((s=next(stringPat))!=null||(s=next(stringPat1))!=null||(s=next(namePat))!=null) {
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE));
				}
				if (t.type!=TokenID.VARIABLE) {
					t = new Token<Node>(s,TokenID.VARIABLE);
				}
				return nextToken = t;
			} else if ((s=next(numPat))!=null) {
				return nextToken = new Token<Node>(s,TokenID.NUMBER);
			} else if ((s=next(commentPat))!=null) {
				while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full)
				continue;
			} else if ((s=next(commentPat1))!=null) {
				while(next(commentPat1End)==null) {
					cb.get(); scan.reset(); lineno++;
				}
				continue;
			} else if ((s=next(errorPat))!=null) {
				error("Don't understand '"+s+"'");
				continue;
			} else if ((s=next(anyPat))!=null) {
				// skip space
				continue;
			} else {
				lineno++;
				cb.get(); // discard one ( new line )
			}
		}
		return nextToken;
	}

	protected String next(Pattern pattern) {
		String s = null;
		while(true) {
			Boolean match = scan.usePattern(pattern).lookingAt();
			if (scan.hitEnd()) {
				if (extendInput()) {
					// input is extended try again
					scan.reset();
					continue;
				}
				// no extension.
			}
			if (match) {
				// This won't work in Java 6
				//if (true) {
						s = scan.group(1);
				//} else {
				//	s = cb.toString().substring(scan.start(1),scan.end(1));
				//}
				// fix position in CharBuffer
				// scan.end() is relative position
				cb.position(cb.position()+scan.end());
				// scan.reset(); will be done on top of nextToken()
			}
			if (scan.hitEnd()) {
				// previous extendInput is failed because of Buffer full.
				// Now we have a space. Try again
				extendInput();scan.hitEnd();
			}
			return s;
		}
	}

	public boolean hasRemaining() {
		return cb.hasRemaining()||extendInput();
	}

	/*
	 *    Extend Input data
	 */
	protected boolean extendInput() {
		if (file!=null && cb.position()!=0) {
			// move remaining data to the top, set position for next read
			cb.compact();
			try {
				if (prompt!=null) System.out.print(prompt);
				if (file.read(cb)>0) {
					cb.flip();    // prepare for get (but we don't...) 
					return true;
				} else {
					throw new IOException();
				}
			} catch (IOException e) {
				file = null ; 
				cb.flip();
			}
		}
		return false;
	}

	protected PropertyListScanner<Node> pushScanner(String exp) {
		// Save current matcher for nested parsing
		return new PropertyListScanner<Node>(this,dict,nullToken).set(exp);
	}

	protected PropertyListScanner<Node> pushScannerFile(String newfile) throws FileNotFoundException {
		// Save current matcher for nested file
		return new PropertyListScanner<Node>(this,dict,nullToken).setFile(newfile);
	}

	public PropertyListScanner<Node> pushScannerFile(InputStream newfile,String prompt) {
		return new PropertyListScanner<Node>(this,dict,nullToken).setFile(newfile,prompt);
	}

	protected PropertyListScanner<Node> popScanner() {
		return prev;
	}

	private PropertyListScanner<Node> findFileName() {
		for(PropertyListScanner<Node> s = this;s!=null ; s = s.prev) {
			if (s.filename!=null) return s;
		}
		return null;
	}
	/*
	 * Read From String
	 */

	public PropertyListScanner<Node> set(String exp) {
		cb = CharBuffer.wrap(exp);
		scan = tokenPat.matcher(cb);
		filename = null; file = null;
		nextToken = nullToken;
		return this;
	}

	/*
	 * Read From File
	 *    We cannot read symbol bigger than Buffersize
	 */
	public PropertyListScanner<Node> setFile(String file) throws FileNotFoundException {
		this.filename = file;
		nextToken = nullToken;
		set(new FileReader(file));
		return this;
	}

	public PropertyListScanner<Node> set(InputStreamReader file) {
		this.file = file;
		cb = CharBuffer.allocate(BufferSize);
		try {
			if (prompt!=null) System.out.print(prompt);
			if (file.read(cb) <= 0) {
				throw new IOException();
			}
		} catch (IOException e) {
			file = null; cb = null;
			set("");
			return this;
		} finally {
			cb.flip();
		}
		scan = tokenPat.matcher(cb);
		lineno = 0;
		return this;
	}

	public void error(String err) {
		PropertyListScanner<Node> s = findFileName();
		if (s!=null) {
			System.err.print(s.filename+":"+s.lineno+": ");
		}
		System.err.println("error: "+err);
	}

	/*
	 * Iterator for Test Routing
	 *    for(Token<Node> t: scanner.scanToken(FileReader(file)) { ... }
	 */

	public Iterable<Token<Node>> scanToken(String exp) {
		set(exp);
		return iterator();
	}

	public Iterable<Token<Node>> scanToken(FileReader file) {
		set(file);
		return iterator();
	}

	private Iterable<Token<Node>> iterator() {
		return new Iterable<Token<Node>>() {
			public Iterator<Token<Node>> iterator() {
				return new Iterator<Token<Node>>() {
					public boolean hasNext() {
						return hasRemaining();
					}
					public Token<Node> next() {
						return nextToken();
					}
					public void remove() {
					}
				};
			}
		};
	}

	private PropertyListScanner<Node> setFile(InputStream newfile,String prompt) {
		this.filename = newfile.toString();
		nextToken = nullToken;
		this.prompt = prompt;
		set(new InputStreamReader(newfile));
		return this;
	}

}