Mercurial > hg > Members > nobuyasu > TPPageRank
view src/pagerank/LinkConvertGraph.java @ 14:86567db31710 draft
fix
author | one |
---|---|
date | Sat, 08 Sep 2012 04:50:50 +0900 |
parents | 0ef7268bbbac |
children | e1d758d08e9c |
line wrap: on
line source
package pagerank; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import com.tinkerpop.blueprints.Graph; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.tg.TinkerGraph; import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter; import xmlParser.TextTagParser; public class LinkConvertGraph { public static void main(String[] args) { // final String filename = "./resources/article.xml"; final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; // final String fileDB = "./resources/tinkerpopDB"; final long PAGENUM = 11; final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM); final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log"; LinkConvertGraph lcg; try { lcg = new LinkConvertGraph(filename); lcg.parseXml(); // lcg.printHash(); HashMap<String,HashSet<String>> hash = lcg.getHash(); Graph graph = new TinkerGraph(); LinkToVertex ltv = new LinkToVertex(graph); FileOutputStream fos = new FileOutputStream(logFile); long countId = 0; for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { lcg.printLinkLog(map, fos); // if (PAGENUM <= countId) break; String pageTitle = map.getKey(); Vertex v;// = graph.addVertex(null); if ( ltv.getId(pageTitle) == null ) { v = ltv.createVertexWithPageTitle(pageTitle); ltv.setPageRank(v, (Double)0.0); countId++; // if (PAGENUM <= countId) break; } else { v = ltv.getVertex(pageTitle); } for (String linkPageTitle : map.getValue()) { Vertex linkV; if ( ltv.getId(linkPageTitle) == null) { linkV = ltv.createVertexWithPageTitle(linkPageTitle); countId++; ltv.setPageRank(linkV, (Double)0.0); } else { linkV = ltv.getVertex(linkPageTitle); } ltv.setHasLink(v, linkV); // if (PAGENUM <= countId) break; } } System.out.println("countId = "+countId); FileOutputStream out = new FileOutputStream(new File(fileDB)); GraphMLWriter.outputGraph(graph, out); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (IOException e) { System.err.println("Failed to parse xml"); e.printStackTrace(); } } private String filename; private FileInputStream fis; private SAXParserFactory factory; private SAXParser parser; private TextTagParser xmlParser; private HashMap<String,HashSet<String>> hash; LinkConvertGraph() throws ParserConfigurationException, SAXException { xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException { this.filename = filename; fis = new FileInputStream(filename); xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } public void setFilename(final String filename) throws FileNotFoundException { this.filename = filename; this.fis = new FileInputStream(filename); } private void parseXml() throws SAXException, IOException { parser.parse(this.fis, this.xmlParser); hash = xmlParser.getHash(); } private HashMap<String,HashSet<String>> getHash() { return hash; } public void printHash() { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); System.out.println("title: " + title); for (String link : entry.getValue()) { System.out.println("\t"+link); } System.out.println(); } } private void printHash(FileOutputStream os) throws IOException { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); os.write( ("title: " + title + "\n").getBytes()); for (String link : entry.getValue()) { os.write( ("\t"+link+"\n").getBytes()); } os.write( ("\n").getBytes()); os.flush(); } } void printLinkLog(Map.Entry<String, HashSet<String>> map, FileOutputStream os) throws IOException { String title = map.getKey(); os.write( ("title: " + title + "\n").getBytes()); for (String link : map.getValue()) { os.write( ("\t"+link+"\n").getBytes()); } os.write( ("\n").getBytes()); os.flush(); } }