view src/wikigraph/LinkConvertGraph.java @ 18:7e4aa7037c7c draft

modify LinkToNode.java
author one
date Thu, 23 Aug 2012 18:49:47 +0900
parents e30f2714686b
children e01c8a8190cc
line wrap: on
line source

package wikigraph;

import howtouse.TextTagParser;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Transaction;
import org.neo4j.kernel.EmbeddedGraphDatabase;
import org.xml.sax.SAXException;

public class LinkConvertGraph {
	
	private String filename;
	private FileInputStream fis;
	private SAXParserFactory factory;
	private SAXParser parser;
	private TextTagParser xmlParser;

	private HashMap<String,HashSet<String>> hash;


	LinkConvertGraph() throws ParserConfigurationException, SAXException {
		xmlParser = new TextTagParser();
		factory = SAXParserFactory.newInstance();
		parser = factory.newSAXParser();
	}

	LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
		this.filename = filename;
		fis = new FileInputStream(filename);
		xmlParser = new TextTagParser();
		factory = SAXParserFactory.newInstance();
		parser = factory.newSAXParser();
	}
	
	private void setFilename(final String filename) throws FileNotFoundException {
		this.filename = filename;
		fis = new FileInputStream(filename);
	}
	
	private void parseXml() throws SAXException, IOException {
		parser.parse(this.fis, this.xmlParser);
		hash = xmlParser.getHash();
	}
	
	private HashMap<String,HashSet<String>> getHash() {
		return hash;
	}

	private void printHash() {
		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
			String title = entry.getKey();
			System.out.println("title: " + title);
			for (String link : entry.getValue()) {
				System.out.println("\t"+link);
			}
			System.out.println();
		}		
	}
	
	private void printHash(FileOutputStream os) throws IOException {
		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
			String title = entry.getKey();
			os.write( ("title: " + title + "\n").getBytes());
			for (String link : entry.getValue()) {
				os.write( ("\t"+link+"\n").getBytes());
			}
			os.write( ("\n").getBytes());
			os.flush();
		}		
	}
	
	
	
	
	
	public static void main(String[] args) {
		final String filename = "./resource/article3.xml";
//		final String filename = "~/testProgram/wiki/ja-pages_current.xml";
		
		LinkConvertGraph lcg;
		try {
			lcg = new LinkConvertGraph(filename);

			lcg.parseXml();
			
			/*
			final String logFile = "./resource/log2.txt";
			FileOutputStream os = new FileOutputStream(logFile);
			
			lcg.printHash(os);
			os.close();
*/
			lcg.printHash();


			HashMap<String,HashSet<String>> hash = lcg.getHash();
			
			GraphDatabaseService graphDb = new EmbeddedGraphDatabase("db");
			LinkToNode ltn = new LinkToNode(graphDb);
			
			Transaction tx = graphDb. beginTx();
			
			for (Map.Entry<String,HashSet<String>> map : hash.entrySet()) {
				String page_title = map.getKey();
				
				if ( ! ltn.addName(page_title) ) {
					ltn.createNodeWithPageTitle(page_title);
				}
				
				
				
				
			}
			
			
			
			
			tx.success();
			tx.finish();
			graphDb.shutdown();
			
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();			
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
		} catch (IOException e) {
			System.err.println("Failed to parse xml");
			e.printStackTrace();
		}

		
		
		


	}
}