view src/pagerank/LinkConvertGraph.java @ 3:b44abb9aa09f draft

add resources/article.xml
author one
date Wed, 05 Sep 2012 11:59:02 +0900
parents 1744340f8be6
children dcd59917a2dd
line wrap: on
line source

package pagerank;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.SAXException;

import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.Vertex;
import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;

import xmlParser.TextTagParser;

public class LinkConvertGraph {
	
	private String filename;
	private FileInputStream fis;
	private SAXParserFactory factory;
	private SAXParser parser;
	private TextTagParser xmlParser;

	private HashMap<String,HashSet<String>> hash;


	LinkConvertGraph() throws ParserConfigurationException, SAXException {
		xmlParser = new TextTagParser();
		factory = SAXParserFactory.newInstance();
		parser = factory.newSAXParser();
	}

	LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
		this.filename = filename;
		fis = new FileInputStream(filename);
		xmlParser = new TextTagParser();
		factory = SAXParserFactory.newInstance();
		parser = factory.newSAXParser();
	}
	
	public void setFilename(final String filename) throws FileNotFoundException {
		this.filename = filename;
		this.fis = new FileInputStream(filename);
	}
	
	private void parseXml() throws SAXException, IOException {
		parser.parse(this.fis, this.xmlParser);
		hash = xmlParser.getHash();
	}
	
	private HashMap<String,HashSet<String>> getHash() {
		return hash;
	}

	public void printHash() {
		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
			String title = entry.getKey();
			System.out.println("title: " + title);
			for (String link : entry.getValue()) {
				System.out.println("\t"+link);
			}
			System.out.println();
		}		
	}
	
	private void printHash(FileOutputStream os) throws IOException {
		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
			String title = entry.getKey();
			os.write( ("title: " + title + "\n").getBytes());
			for (String link : entry.getValue()) {
				os.write( ("\t"+link+"\n").getBytes());
			}
			os.write( ("\n").getBytes());
			os.flush();
		}		
	}
	
	
	
	
	
	public static void main(String[] args) {
		final String filename = "./resources/article.xml";
//		final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
		
		LinkConvertGraph lcg;

		try {
			
			lcg = new LinkConvertGraph(filename);

			lcg.parseXml();
//			lcg.printHash();

			FileOutputStream fos = new FileOutputStream("./resources/wikiLink.log");
			lcg.printHash(fos);

			HashMap<String,HashSet<String>> hash = lcg.getHash();

			
			final String filenameD = "./resources/tinkerpopDB";
			
			Graph graph = new TinkerGraph();
			FileOutputStream out = new FileOutputStream(new File(filename));
			LinkToVertex ltn = new LinkToVertex(graph);
			
			for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
				String pageTitle = map.getKey();
				
				Vertex v;// = graph.addVertex(null);
				
				if ( ltn.getId(pageTitle) == null ) {
					v = ltn.createVertexWithPageTitle(pageTitle);
					
				} else {
					v = ltn.getVertex(pageTitle);
				}

				for (String linkPageTitle : map.getValue()) {
					Vertex linkV;
					if ( ltn.getId(linkPageTitle) == null) {
						linkV = ltn.createVertexWithPageTitle(linkPageTitle);
						ltn.setPageRank(linkV, (Double)0.0);
					} else {
						linkV = ltn.getVertex(linkPageTitle);
					}
					ltn.setHasLink(v, linkV);
				}
				
			}

			GraphMLWriter.outputGraph(graph, out);

		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();			
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
		} catch (IOException e) {
			System.err.println("Failed to parse xml");
			e.printStackTrace();
		}

		

	}
}