Mercurial > hg > Members > nobuyasu > TPPageRank
diff src/pagerank/LinkConvertGraph.java @ 2:1744340f8be6 draft
add some java files
author | one |
---|---|
date | Wed, 05 Sep 2012 11:56:21 +0900 |
parents | |
children | b44abb9aa09f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/pagerank/LinkConvertGraph.java Wed Sep 05 11:56:21 2012 +0900 @@ -0,0 +1,158 @@ +package pagerank; + + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.xml.sax.SAXException; + +import com.tinkerpop.blueprints.Graph; +import com.tinkerpop.blueprints.Vertex; +import com.tinkerpop.blueprints.impls.tg.TinkerGraph; +import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter; + +import xmlParser.TextTagParser; + +public class LinkConvertGraph { + + private String filename; + private FileInputStream fis; + private SAXParserFactory factory; + private SAXParser parser; + private TextTagParser xmlParser; + + private HashMap<String,HashSet<String>> hash; + + + LinkConvertGraph() throws ParserConfigurationException, SAXException { + xmlParser = new TextTagParser(); + factory = SAXParserFactory.newInstance(); + parser = factory.newSAXParser(); + } + + LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException { + this.filename = filename; + fis = new FileInputStream(filename); + xmlParser = new TextTagParser(); + factory = SAXParserFactory.newInstance(); + parser = factory.newSAXParser(); + } + + public void setFilename(final String filename) throws FileNotFoundException { + this.filename = filename; + this.fis = new FileInputStream(filename); + } + + private void parseXml() throws SAXException, IOException { + parser.parse(this.fis, this.xmlParser); + hash = xmlParser.getHash(); + } + + private HashMap<String,HashSet<String>> getHash() { + return hash; + } + + public void printHash() { + for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { + String title = entry.getKey(); + System.out.println("title: " + title); + for (String link : entry.getValue()) { + System.out.println("\t"+link); + } + System.out.println(); + } + } + + private void printHash(FileOutputStream os) throws IOException { + for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { + String title = entry.getKey(); + os.write( ("title: " + title + "\n").getBytes()); + for (String link : entry.getValue()) { + os.write( ("\t"+link+"\n").getBytes()); + } + os.write( ("\n").getBytes()); + os.flush(); + } + } + + + + + + public static void main(String[] args) { +// final String filename = "./resource/article.xml"; + final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; + + LinkConvertGraph lcg; + + try { + + lcg = new LinkConvertGraph(filename); + + lcg.parseXml(); +// lcg.printHash(); + + FileOutputStream fos = new FileOutputStream("./resource/wikiLink.log"); + lcg.printHash(fos); + + HashMap<String,HashSet<String>> hash = lcg.getHash(); + + + final String filenameD = "./resource/tinkerpopDB"; + + Graph graph = new TinkerGraph(); + FileOutputStream out = new FileOutputStream(new File(filename)); + LinkToVertex ltn = new LinkToVertex(graph); + + for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { + String pageTitle = map.getKey(); + + Vertex v;// = graph.addVertex(null); + + if ( ltn.getId(pageTitle) == null ) { + v = ltn.createVertexWithPageTitle(pageTitle); + + } else { + v = ltn.getVertex(pageTitle); + } + + for (String linkPageTitle : map.getValue()) { + Vertex linkV; + if ( ltn.getId(linkPageTitle) == null) { + linkV = ltn.createVertexWithPageTitle(linkPageTitle); + ltn.setPageRank(linkV, (Double)0.0); + } else { + linkV = ltn.getVertex(linkPageTitle); + } + ltn.setHasLink(v, linkV); + } + + } + + GraphMLWriter.outputGraph(graph, out); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (SAXException e) { + e.printStackTrace(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } catch (IOException e) { + System.err.println("Failed to parse xml"); + e.printStackTrace(); + } + + + + } +}