view src/wikigraph/LinkConvertGraph.java @ 17:e30f2714686b draft

fix CharReader.java
author one
date Thu, 23 Aug 2012 17:36:46 +0900
parents 5c8c554ee3ac
children 7e4aa7037c7c
line wrap: on
line source

package wikigraph;

import howtouse.TextTagParser;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.SAXException;

public class LinkConvertGraph {
	
	private String filename;
	private FileInputStream fis;
	private SAXParserFactory factory;
	private SAXParser parser;
	private TextTagParser xmlParser;

	private HashMap<String,HashSet<String>> hash;


	LinkConvertGraph() throws ParserConfigurationException, SAXException {
		xmlParser = new TextTagParser();
		factory = SAXParserFactory.newInstance();
		parser = factory.newSAXParser();
	}

	LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
		this.filename = filename;
		fis = new FileInputStream(filename);
		xmlParser = new TextTagParser();
		factory = SAXParserFactory.newInstance();
		parser = factory.newSAXParser();
	}
	
	private void setFilename(final String filename) throws FileNotFoundException {
		this.filename = filename;
		fis = new FileInputStream(filename);
	}
	
	private void parseXml() throws SAXException, IOException {
		parser.parse(this.fis, this.xmlParser);
		hash = xmlParser.getHash();
	}
	
	private HashMap<String,HashSet<String>> getHash() {
		return hash;
	}

	private void printHash() {
		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
			String title = entry.getKey();
			System.out.println("title: " + title);
			for (String link : entry.getValue()) {
				System.out.println("\t"+link);
			}
			System.out.println();
		}		
	}
	
	private void printHash(FileOutputStream os) throws IOException {
		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
			String title = entry.getKey();
			os.write( ("title: " + title + "\n").getBytes());
			for (String link : entry.getValue()) {
				os.write( ("\t"+link+"\n").getBytes());
			}
			os.write( ("\n").getBytes());
			os.flush();
		}		
	}
	
	public static void main(String[] args) {
		final String filename = "./resource/article3.xml";
//		final String filename = "~/testProgram/wiki/ja-pages_current.xml";
		
		try {
			LinkConvertGraph lcg = new LinkConvertGraph(filename);

			lcg.parseXml();

			
			
/*
			final String logFile = "./resource/log2.txt";
			FileOutputStream os = new FileOutputStream(logFile);
			
			lcg.printHash(os);
			os.close();
*/
			lcg.printHash();

		
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();			
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
		} catch (IOException e) {
			System.err.println("Failed to parse xml");
			e.printStackTrace();
		}

	}
}