Mercurial > hg > Members > nobuyasu > TPPageRank
view src/pagerank/LinkConvertGraph.java @ 5:140272228818 draft
fix LinkToVertex.java
author | one |
---|---|
date | Wed, 05 Sep 2012 15:38:57 +0900 |
parents | dcd59917a2dd |
children | c7b139ff27e2 |
line wrap: on
line source
package pagerank; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import com.tinkerpop.blueprints.Graph; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.tg.TinkerGraph; import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter; import xmlParser.TextTagParser; public class LinkConvertGraph { private String filename; private FileInputStream fis; private SAXParserFactory factory; private SAXParser parser; private TextTagParser xmlParser; private HashMap<String,HashSet<String>> hash; LinkConvertGraph() throws ParserConfigurationException, SAXException { xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException { this.filename = filename; fis = new FileInputStream(filename); xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } public void setFilename(final String filename) throws FileNotFoundException { this.filename = filename; this.fis = new FileInputStream(filename); } private void parseXml() throws SAXException, IOException { parser.parse(this.fis, this.xmlParser); hash = xmlParser.getHash(); } private HashMap<String,HashSet<String>> getHash() { return hash; } public void printHash() { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); System.out.println("title: " + title); for (String link : entry.getValue()) { System.out.println("\t"+link); } System.out.println(); } } private void printHash(FileOutputStream os) throws IOException { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); os.write( ("title: " + title + "\n").getBytes()); for (String link : entry.getValue()) { os.write( ("\t"+link+"\n").getBytes()); } os.write( ("\n").getBytes()); os.flush(); } } public static void main(String[] args) { final String filename = "./resources/article.xml"; // final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; LinkConvertGraph lcg; try { lcg = new LinkConvertGraph(filename); lcg.parseXml(); // lcg.printHash(); FileOutputStream fos = new FileOutputStream("./resources/wikiLink.log"); lcg.printHash(fos); HashMap<String,HashSet<String>> hash = lcg.getHash(); final String fileDB = "./resources/tinkerpopDB"; Graph graph = new TinkerGraph(); FileOutputStream out = new FileOutputStream(new File(fileDB)); LinkToVertex ltv = new LinkToVertex(graph); long countId = 1; for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { String pageTitle = map.getKey(); Vertex v;// = graph.addVertex(null); if ( ltv.getId(pageTitle) == null ) { v = ltv.createVertexWithPageTitle(pageTitle,countId); ltv.setPageRank(v, (Double)0.0); countId++; } else { v = ltv.getVertex(pageTitle); } for (String linkPageTitle : map.getValue()) { Vertex linkV; if ( ltv.getId(linkPageTitle) == null) { linkV = ltv.createVertexWithPageTitle(linkPageTitle,countId); countId++; ltv.setPageRank(linkV, (Double)0.0); } else { linkV = ltv.getVertex(linkPageTitle); } ltv.setHasLink(v, linkV); } } GraphMLWriter.outputGraph(graph, out); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (IOException e) { System.err.println("Failed to parse xml"); e.printStackTrace(); } } }