view src/plparser/PropertyListScanner.java @ 9:29e309b2f624

Try several Tokenizer
author one
date Thu, 02 Sep 2010 10:00:23 +0900
parents 619472ca4742
children 0d74081c1309
line wrap: on
line source

package plparser;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.CharBuffer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PropertyListScanner<Node> extends PLScannerImpl<Node> implements PLScanner<Node> {

	//  We cannot make Generic Singleton pattern
	//	static PropertyListScanner scanner = new PropertyListScanner();
	//	
	//	public static PropertyListScanner getScanner() {
	//		return scanner;
	//	}

	/*
	 * Tokenizer for Property List
	 *     Pattern/Matcher implementation
	 *     
	 *     sannerStack is used for nested parsing.
	 *         scanner.push();
	 *         parser.parse(exp);
	 *         nextToken = scanner.pop();
	 *         
	 *     2010/8 Shinji Kono    
	 */

	public Matcher scan;
	public CharBuffer cb;
	public PropertyListScanner(Dictionary<Node> dict) {
		this.dict = dict;
		nullToken = new Token<Node>("",TokenID.NULL);
	}

	/*
	 * Scanner Container for Stack
	 */
	public PropertyListScanner(PropertyListScanner<Node> prev, Dictionary<Node> dict, Token<Node> nullToken) {
		this.prev = prev;
		this.dict = dict;
		this.nullToken = nullToken;
	}


	// Pattern must contain exact 1 group
	private static Pattern tokenPat = Pattern.compile(
			"([={}(),;])"
	);
	private static Pattern namePat  = Pattern.compile("([_a-zA-Z][\\@\\w]*)");
	private static final Pattern numPat  = Pattern.compile("([0-9]+)");
	private static final Pattern stringPat1  = Pattern.compile("\\\"([^\"]*)\\\"");
	private static final Pattern stringPat  = Pattern.compile("\\'([^\\']*)\\'");
	private static final Pattern stringPat1cont  = Pattern.compile("\\\"([^\"]*)$");
	private static final Pattern stringPatCont  = Pattern.compile("\\'([^\\']*)$");
	private static final Pattern stringPat1End  = Pattern.compile("([^\"]*)\\\"");
	private static final Pattern stringPatEnd  = Pattern.compile("([^\\']*)\'");
	private static final Pattern commentPat  = Pattern.compile("(//.*)");
	private static final Pattern commentPat1  = Pattern.compile("(/\\*)");
	private static final Pattern commentPat1End  = Pattern.compile("(.*\\*/)");
	private static final Pattern errorPat = Pattern.compile("([^\\s])");
	private static final Pattern anyPat = Pattern.compile("(.)");
	
	@Override
	public Token<Node> nextToken() {
		String s;
		nextToken = nullToken;
		if (cb==null) return nextToken;
		while(hasRemaining()) {
			scan.reset(); // to tell CharBuffer is modified
			if ((s=next(tokenPat))!=null) {
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.Any));
				}
				return nextToken = t;
			} else if ((s=next(stringPatCont))!=null) {
				// non terminated string
				String s1;
				cb.get(); scan.reset();
				while((s1=next(stringPatEnd))==null) {
					s += cb.toString();
					cb.get(); scan.reset();
				}
				s += s1;
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE));
				}
				return nextToken = t;
			} else if ((s=next(stringPat1cont))!=null) {
				// non terminated string
				String s1;
				cb.get(); scan.reset();
				while((s1=next(stringPat1End))==null) {
					s += cb.toString();
					cb.get(); scan.reset(); lineno++;
				}
				s += s1;
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE));
				}
				return nextToken = t;
			} else if ((s=next(stringPat))!=null||(s=next(stringPat1))!=null||(s=next(namePat))!=null) {
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE));
				}
				if (t.type!=TokenID.VARIABLE) {
					t = new Token<Node>(s,TokenID.VARIABLE);
				}
				return nextToken = t;
			} else if ((s=next(numPat))!=null) {
				return nextToken = new Token<Node>(s,TokenID.NUMBER);
			} else if ((s=next(commentPat))!=null) {
				cb.get(); scan.reset(); lineno++;
				// while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full)
				continue;
			} else if ((s=next(commentPat1))!=null) {
				while(next(commentPat1End)==null) {
					cb.get(); scan.reset(); lineno++;
				}
				continue;
			} else if ((s=next(errorPat))!=null) {
				error("Don't understand '"+s+"'");
				continue;
			} else if ((s=next(anyPat))!=null) {
				// skip space
				continue;
			} else {
				lineno++;
				cb.get(); // discard one ( new line )
			}
		}
		return nextToken;
	}

	protected String next(Pattern pattern) {
		String s = null;
		while(true) {
			Boolean match = scan.usePattern(pattern).lookingAt();
			if (scan.hitEnd()) {
				if (extendInput()) {
					// input is extended try again
					scan.reset();
					continue;
				}
				// no extension.
			}
			if (match) {
				// This won't work in Java 6
				//if (true) {
						s = scan.group(1);
				//} else {
				//	s = cb.toString().substring(scan.start(1),scan.end(1));
				//}
				// fix position in CharBuffer
				// scan.end() is relative position
				cb.position(cb.position()+scan.end());
				// scan.reset(); will be done on top of nextToken()
			}
			if (scan.hitEnd()) {
				// previous extendInput is failed because of Buffer full.
				// Now we have a space. Try again
				extendInput();scan.hitEnd();
			}
			return s;
		}
	}

	@Override
	public boolean hasRemaining() {
		return cb.hasRemaining()||extendInput();
	}

	/*
	 *    Extend Input data
	 */
	protected boolean extendInput() {
		if (file!=null && cb.position()!=0) {
			// move remaining data to the top, set position for next read
			cb.compact();
			try {
				if (prompt!=null) System.out.print(prompt);
				if (file.read(cb)>0) {
					cb.flip();    // prepare for get (but we don't...) 
					return true;
				} else {
					throw new IOException();
				}
			} catch (IOException e) {
				file = null ; 
				cb.flip();
			}
		}
		return false;
	}

	@Override
	public PLScanner<Node> set(String exp) {
		cb = CharBuffer.wrap(exp);
		scan = tokenPat.matcher(cb);
		filename = null; file = null;
		nextToken = nullToken;
		return this;
	}

	/*
	 * Read From File
	 *    We cannot read symbol bigger than Buffersize
	 */
	@Override
	public PLScanner<Node> setFile(String file) throws FileNotFoundException {
		this.filename = file;
		nextToken = nullToken;
		set(new FileReader(file));
		return this;
	}

	@Override
	public PLScanner<Node> set(InputStreamReader file) {
		this.file = file;
		cb = CharBuffer.allocate(BufferSize);
		try {
			if (prompt!=null) System.out.print(prompt);
			if (file.read(cb) <= 0) {
				throw new IOException();
			}
		} catch (IOException e) {
			file = null; cb = null;
			set("");
			return this;
		} finally {
			cb.flip();
		}
		scan = tokenPat.matcher(cb);
		lineno = 0;
		return this;
	}


	@Override
	public PLScanner<Node> pushScannerFile(InputStream newfile, String prompt) {
		return new PropertyListScanner<Node>(this,dict,nullToken).setFile(newfile,prompt);
	}

	@Override
	public PLScanner<Node> pushScanner(String exp) {
		return new PropertyListScanner<Node>(this,dict,nullToken).set(exp);
	}

	@Override
	public PLScanner<Node> pushScannerFile(String newfile)
			throws FileNotFoundException {
		return new PropertyListScanner<Node>(this,dict,nullToken).setFile(newfile);
	}
}