Mercurial > hg > Members > nobuyasu > TPPageRank
comparison src/pagerank/LinkConvertGraph.java @ 2:1744340f8be6 draft
add some java files
author | one |
---|---|
date | Wed, 05 Sep 2012 11:56:21 +0900 |
parents | |
children | b44abb9aa09f |
comparison
equal
deleted
inserted
replaced
1:08f01b5c4d4a | 2:1744340f8be6 |
---|---|
1 package pagerank; | |
2 | |
3 | |
4 import java.io.File; | |
5 import java.io.FileInputStream; | |
6 import java.io.FileNotFoundException; | |
7 import java.io.FileOutputStream; | |
8 import java.io.IOException; | |
9 import java.util.HashMap; | |
10 import java.util.HashSet; | |
11 import java.util.Map; | |
12 | |
13 import javax.xml.parsers.ParserConfigurationException; | |
14 import javax.xml.parsers.SAXParser; | |
15 import javax.xml.parsers.SAXParserFactory; | |
16 | |
17 import org.xml.sax.SAXException; | |
18 | |
19 import com.tinkerpop.blueprints.Graph; | |
20 import com.tinkerpop.blueprints.Vertex; | |
21 import com.tinkerpop.blueprints.impls.tg.TinkerGraph; | |
22 import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter; | |
23 | |
24 import xmlParser.TextTagParser; | |
25 | |
26 public class LinkConvertGraph { | |
27 | |
28 private String filename; | |
29 private FileInputStream fis; | |
30 private SAXParserFactory factory; | |
31 private SAXParser parser; | |
32 private TextTagParser xmlParser; | |
33 | |
34 private HashMap<String,HashSet<String>> hash; | |
35 | |
36 | |
37 LinkConvertGraph() throws ParserConfigurationException, SAXException { | |
38 xmlParser = new TextTagParser(); | |
39 factory = SAXParserFactory.newInstance(); | |
40 parser = factory.newSAXParser(); | |
41 } | |
42 | |
43 LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException { | |
44 this.filename = filename; | |
45 fis = new FileInputStream(filename); | |
46 xmlParser = new TextTagParser(); | |
47 factory = SAXParserFactory.newInstance(); | |
48 parser = factory.newSAXParser(); | |
49 } | |
50 | |
51 public void setFilename(final String filename) throws FileNotFoundException { | |
52 this.filename = filename; | |
53 this.fis = new FileInputStream(filename); | |
54 } | |
55 | |
56 private void parseXml() throws SAXException, IOException { | |
57 parser.parse(this.fis, this.xmlParser); | |
58 hash = xmlParser.getHash(); | |
59 } | |
60 | |
61 private HashMap<String,HashSet<String>> getHash() { | |
62 return hash; | |
63 } | |
64 | |
65 public void printHash() { | |
66 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { | |
67 String title = entry.getKey(); | |
68 System.out.println("title: " + title); | |
69 for (String link : entry.getValue()) { | |
70 System.out.println("\t"+link); | |
71 } | |
72 System.out.println(); | |
73 } | |
74 } | |
75 | |
76 private void printHash(FileOutputStream os) throws IOException { | |
77 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { | |
78 String title = entry.getKey(); | |
79 os.write( ("title: " + title + "\n").getBytes()); | |
80 for (String link : entry.getValue()) { | |
81 os.write( ("\t"+link+"\n").getBytes()); | |
82 } | |
83 os.write( ("\n").getBytes()); | |
84 os.flush(); | |
85 } | |
86 } | |
87 | |
88 | |
89 | |
90 | |
91 | |
92 public static void main(String[] args) { | |
93 // final String filename = "./resource/article.xml"; | |
94 final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; | |
95 | |
96 LinkConvertGraph lcg; | |
97 | |
98 try { | |
99 | |
100 lcg = new LinkConvertGraph(filename); | |
101 | |
102 lcg.parseXml(); | |
103 // lcg.printHash(); | |
104 | |
105 FileOutputStream fos = new FileOutputStream("./resource/wikiLink.log"); | |
106 lcg.printHash(fos); | |
107 | |
108 HashMap<String,HashSet<String>> hash = lcg.getHash(); | |
109 | |
110 | |
111 final String filenameD = "./resource/tinkerpopDB"; | |
112 | |
113 Graph graph = new TinkerGraph(); | |
114 FileOutputStream out = new FileOutputStream(new File(filename)); | |
115 LinkToVertex ltn = new LinkToVertex(graph); | |
116 | |
117 for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { | |
118 String pageTitle = map.getKey(); | |
119 | |
120 Vertex v;// = graph.addVertex(null); | |
121 | |
122 if ( ltn.getId(pageTitle) == null ) { | |
123 v = ltn.createVertexWithPageTitle(pageTitle); | |
124 | |
125 } else { | |
126 v = ltn.getVertex(pageTitle); | |
127 } | |
128 | |
129 for (String linkPageTitle : map.getValue()) { | |
130 Vertex linkV; | |
131 if ( ltn.getId(linkPageTitle) == null) { | |
132 linkV = ltn.createVertexWithPageTitle(linkPageTitle); | |
133 ltn.setPageRank(linkV, (Double)0.0); | |
134 } else { | |
135 linkV = ltn.getVertex(linkPageTitle); | |
136 } | |
137 ltn.setHasLink(v, linkV); | |
138 } | |
139 | |
140 } | |
141 | |
142 GraphMLWriter.outputGraph(graph, out); | |
143 | |
144 } catch (FileNotFoundException e) { | |
145 e.printStackTrace(); | |
146 } catch (SAXException e) { | |
147 e.printStackTrace(); | |
148 } catch (ParserConfigurationException e) { | |
149 e.printStackTrace(); | |
150 } catch (IOException e) { | |
151 System.err.println("Failed to parse xml"); | |
152 e.printStackTrace(); | |
153 } | |
154 | |
155 | |
156 | |
157 } | |
158 } |