diff src/pagerank/LinkConvertGraph.java @ 2:1744340f8be6 draft

add some java files
author one
date Wed, 05 Sep 2012 11:56:21 +0900
parents
children b44abb9aa09f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/pagerank/LinkConvertGraph.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,158 @@
+package pagerank;
+
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.SAXException;
+
+import com.tinkerpop.blueprints.Graph;
+import com.tinkerpop.blueprints.Vertex;
+import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
+import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
+
+import xmlParser.TextTagParser;
+
+public class LinkConvertGraph {
+	
+	private String filename;
+	private FileInputStream fis;
+	private SAXParserFactory factory;
+	private SAXParser parser;
+	private TextTagParser xmlParser;
+
+	private HashMap<String,HashSet<String>> hash;
+
+
+	LinkConvertGraph() throws ParserConfigurationException, SAXException {
+		xmlParser = new TextTagParser();
+		factory = SAXParserFactory.newInstance();
+		parser = factory.newSAXParser();
+	}
+
+	LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
+		this.filename = filename;
+		fis = new FileInputStream(filename);
+		xmlParser = new TextTagParser();
+		factory = SAXParserFactory.newInstance();
+		parser = factory.newSAXParser();
+	}
+	
+	public void setFilename(final String filename) throws FileNotFoundException {
+		this.filename = filename;
+		this.fis = new FileInputStream(filename);
+	}
+	
+	private void parseXml() throws SAXException, IOException {
+		parser.parse(this.fis, this.xmlParser);
+		hash = xmlParser.getHash();
+	}
+	
+	private HashMap<String,HashSet<String>> getHash() {
+		return hash;
+	}
+
+	public void printHash() {
+		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
+			String title = entry.getKey();
+			System.out.println("title: " + title);
+			for (String link : entry.getValue()) {
+				System.out.println("\t"+link);
+			}
+			System.out.println();
+		}		
+	}
+	
+	private void printHash(FileOutputStream os) throws IOException {
+		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
+			String title = entry.getKey();
+			os.write( ("title: " + title + "\n").getBytes());
+			for (String link : entry.getValue()) {
+				os.write( ("\t"+link+"\n").getBytes());
+			}
+			os.write( ("\n").getBytes());
+			os.flush();
+		}		
+	}
+	
+	
+	
+	
+	
+	public static void main(String[] args) {
+//		final String filename = "./resource/article.xml";
+		final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
+		
+		LinkConvertGraph lcg;
+
+		try {
+			
+			lcg = new LinkConvertGraph(filename);
+
+			lcg.parseXml();
+//			lcg.printHash();
+
+			FileOutputStream fos = new FileOutputStream("./resource/wikiLink.log");
+			lcg.printHash(fos);
+
+			HashMap<String,HashSet<String>> hash = lcg.getHash();
+
+			
+			final String filenameD = "./resource/tinkerpopDB";
+			
+			Graph graph = new TinkerGraph();
+			FileOutputStream out = new FileOutputStream(new File(filename));
+			LinkToVertex ltn = new LinkToVertex(graph);
+			
+			for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
+				String pageTitle = map.getKey();
+				
+				Vertex v;// = graph.addVertex(null);
+				
+				if ( ltn.getId(pageTitle) == null ) {
+					v = ltn.createVertexWithPageTitle(pageTitle);
+					
+				} else {
+					v = ltn.getVertex(pageTitle);
+				}
+
+				for (String linkPageTitle : map.getValue()) {
+					Vertex linkV;
+					if ( ltn.getId(linkPageTitle) == null) {
+						linkV = ltn.createVertexWithPageTitle(linkPageTitle);
+						ltn.setPageRank(linkV, (Double)0.0);
+					} else {
+						linkV = ltn.getVertex(linkPageTitle);
+					}
+					ltn.setHasLink(v, linkV);
+				}
+				
+			}
+
+			GraphMLWriter.outputGraph(graph, out);
+
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (SAXException e) {
+			e.printStackTrace();			
+		} catch (ParserConfigurationException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			System.err.println("Failed to parse xml");
+			e.printStackTrace();
+		}
+
+		
+
+	}
+}