view src/howtouse/TestSaxParser.java @ 13:84dd614f1414 draft

add LinkConvertGraph.java
author one
date Thu, 23 Aug 2012 07:22:37 +0900
parents bf7863a55cd6
children fa38ded4c054
line wrap: on
line source

package howtouse;

import java.io.FileInputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Stack;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class TestSaxParser extends DefaultHandler {

	public static void main(String[] args) {
		try {
//			final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
			final String filename = "./resource/article3.xml";
			FileInputStream fis = new FileInputStream(filename);
			TestSaxParser sample = new TestSaxParser();

			SAXParserFactory factory = SAXParserFactory.newInstance();
			SAXParser parser = factory.newSAXParser();

			parser.parse(fis, sample);
			HashMap<String,HashSet<String>> hash;
			hash = sample.getHash();
			
			
			for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
				String title = entry.getKey();
				System.out.println("title: " + title);
				for (String link : entry.getValue()) {
					System.out.println("\t"+link);
				}
			}
			
			
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	protected Stack<MyObject> stack;
	private MyObject currentObj;
	private Attributes currentAttr;
	private String currentTag;
	private String currentTitleName;
	final static String TAGNAME_TITLE = "title";
	final static String TAGNAME_TEXT = "text";
	WikiLinkParser linkParser = new WikiLinkParser();
	
	HashMap<String,HashSet<String>> hash = new HashMap<String,HashSet<String>>();
	HashSet<String> currentLinkHash = new HashSet<String>();
	
	public TestSaxParser() {
		stack = new Stack<MyObject>();
	}
	
	public HashMap<String,HashSet<String>> getHash() {
		return hash;
	}

	public void startDocument() {
//		System.out.println("read start");
	}

	public void startElement(String uri, String localName, String qName,
			Attributes attributes) throws SAXException {
		currentTag = qName;
		if (qName.equals(TAGNAME_TITLE)) {
			currentObj = new MyObject(attributes);
			stack.push(currentObj);
		} else if (qName.equals(TAGNAME_TEXT)) {
			currentObj = new MyObject(attributes);
			stack.push(currentObj);
		} else {
			
		}
	}

	public void characters(char[] ch, int offset, int length) {
		
		String value = new String(ch, offset, length);
		if (currentObj != null) {
			currentObj.setValue(currentTag, currentAttr, value);

			if(currentTag.equals(TAGNAME_TITLE)) { 
				currentTitleName = value;
			}
			if(currentTag.equals(TAGNAME_TEXT)) { 
				HashSet<String> tmpHash = linkParser.parse(value);
				if (tmpHash.size() <= 0) return;
				for (String link: tmpHash) {
					currentLinkHash.add(link);
				}
			}
		}
	}

	public void endElement(String uri, String localName, String qName) {
		
		if (currentObj == null)
			return;
		if (qName.equals(TAGNAME_TITLE)) {
			stack.pop();
		} else if (qName.equals(TAGNAME_TEXT)) {
			hash.put(currentTitleName, currentLinkHash);
			currentLinkHash = new HashSet<String>();
			stack.pop();				
		} else {
			
		}
		if (stack.empty())
			currentObj = null;
		else
			currentObj = (MyObject) stack.peek();
	}

	public void endDocument() {
//		System.out.println("end reading file.");
	}

}