2
|
1 package pagerank;
|
|
2
|
|
3
|
|
4 import java.io.File;
|
|
5 import java.io.FileInputStream;
|
|
6 import java.io.FileNotFoundException;
|
|
7 import java.io.FileOutputStream;
|
|
8 import java.io.IOException;
|
|
9 import java.util.HashMap;
|
|
10 import java.util.HashSet;
|
|
11 import java.util.Map;
|
|
12
|
|
13 import javax.xml.parsers.ParserConfigurationException;
|
|
14 import javax.xml.parsers.SAXParser;
|
|
15 import javax.xml.parsers.SAXParserFactory;
|
|
16
|
|
17 import org.xml.sax.SAXException;
|
|
18
|
|
19 import com.tinkerpop.blueprints.Graph;
|
|
20 import com.tinkerpop.blueprints.Vertex;
|
|
21 import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
|
|
22 import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
|
|
23
|
|
24 import xmlParser.TextTagParser;
|
|
25
|
|
26 public class LinkConvertGraph {
|
14
|
27
|
|
28
|
|
29
|
|
30
|
|
31 public static void main(String[] args) {
|
|
32 // final String filename = "./resources/article.xml";
|
|
33 final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
|
|
34 // final String fileDB = "./resources/tinkerpopDB";
|
|
35
|
|
36
|
15
|
37 final long PAGENUM = 22;
|
14
|
38 final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM);
|
|
39 final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log";
|
|
40
|
|
41 LinkConvertGraph lcg;
|
|
42
|
|
43 try {
|
|
44 lcg = new LinkConvertGraph(filename);
|
|
45 lcg.parseXml();
|
|
46 // lcg.printHash();
|
|
47
|
|
48 HashMap<String,HashSet<String>> hash = lcg.getHash();
|
|
49
|
|
50 Graph graph = new TinkerGraph();
|
|
51 LinkToVertex ltv = new LinkToVertex(graph);
|
|
52
|
|
53 FileOutputStream fos = new FileOutputStream(logFile);
|
|
54 long countId = 0;
|
|
55 for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
|
|
56 lcg.printLinkLog(map, fos);
|
|
57 // if (PAGENUM <= countId) break;
|
|
58 String pageTitle = map.getKey();
|
|
59 Vertex v;// = graph.addVertex(null);
|
|
60 if ( ltv.getId(pageTitle) == null ) {
|
15
|
61 v = ltv.createVertexWithPageTitle(pageTitle,countId);
|
14
|
62 ltv.setPageRank(v, (Double)0.0);
|
|
63 countId++;
|
|
64 // if (PAGENUM <= countId) break;
|
|
65 } else {
|
|
66 v = ltv.getVertex(pageTitle);
|
|
67 }
|
|
68
|
|
69 for (String linkPageTitle : map.getValue()) {
|
|
70 Vertex linkV;
|
|
71 if ( ltv.getId(linkPageTitle) == null) {
|
15
|
72 linkV = ltv.createVertexWithPageTitle(linkPageTitle,countId);
|
14
|
73 countId++;
|
|
74 ltv.setPageRank(linkV, (Double)0.0);
|
|
75 } else {
|
|
76 linkV = ltv.getVertex(linkPageTitle);
|
|
77 }
|
|
78 ltv.setHasLink(v, linkV);
|
|
79 // if (PAGENUM <= countId) break;
|
|
80 }
|
|
81
|
|
82 }
|
|
83 System.out.println("countId = "+countId);
|
|
84
|
|
85 FileOutputStream out = new FileOutputStream(new File(fileDB));
|
|
86 GraphMLWriter.outputGraph(graph, out);
|
|
87
|
|
88 } catch (FileNotFoundException e) {
|
|
89 e.printStackTrace();
|
|
90 } catch (SAXException e) {
|
|
91 e.printStackTrace();
|
|
92 } catch (ParserConfigurationException e) {
|
|
93 e.printStackTrace();
|
|
94 } catch (IOException e) {
|
|
95 System.err.println("Failed to parse xml");
|
|
96 e.printStackTrace();
|
|
97 }
|
|
98
|
|
99
|
|
100
|
|
101 }
|
2
|
102
|
|
103 private String filename;
|
|
104 private FileInputStream fis;
|
|
105 private SAXParserFactory factory;
|
|
106 private SAXParser parser;
|
|
107 private TextTagParser xmlParser;
|
|
108
|
|
109 private HashMap<String,HashSet<String>> hash;
|
|
110
|
|
111
|
|
112 LinkConvertGraph() throws ParserConfigurationException, SAXException {
|
|
113 xmlParser = new TextTagParser();
|
|
114 factory = SAXParserFactory.newInstance();
|
|
115 parser = factory.newSAXParser();
|
|
116 }
|
|
117
|
|
118 LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
|
|
119 this.filename = filename;
|
|
120 fis = new FileInputStream(filename);
|
|
121 xmlParser = new TextTagParser();
|
|
122 factory = SAXParserFactory.newInstance();
|
|
123 parser = factory.newSAXParser();
|
|
124 }
|
|
125
|
|
126 public void setFilename(final String filename) throws FileNotFoundException {
|
|
127 this.filename = filename;
|
|
128 this.fis = new FileInputStream(filename);
|
|
129 }
|
|
130
|
|
131 private void parseXml() throws SAXException, IOException {
|
|
132 parser.parse(this.fis, this.xmlParser);
|
|
133 hash = xmlParser.getHash();
|
|
134 }
|
|
135
|
|
136 private HashMap<String,HashSet<String>> getHash() {
|
|
137 return hash;
|
|
138 }
|
|
139
|
|
140 public void printHash() {
|
|
141 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
|
|
142 String title = entry.getKey();
|
|
143 System.out.println("title: " + title);
|
|
144 for (String link : entry.getValue()) {
|
|
145 System.out.println("\t"+link);
|
|
146 }
|
|
147 System.out.println();
|
|
148 }
|
|
149 }
|
|
150
|
|
151 private void printHash(FileOutputStream os) throws IOException {
|
|
152 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
|
|
153 String title = entry.getKey();
|
|
154 os.write( ("title: " + title + "\n").getBytes());
|
|
155 for (String link : entry.getValue()) {
|
|
156 os.write( ("\t"+link+"\n").getBytes());
|
|
157 }
|
|
158 os.write( ("\n").getBytes());
|
|
159 os.flush();
|
|
160 }
|
|
161 }
|
|
162
|
13
|
163 void printLinkLog(Map.Entry<String, HashSet<String>> map, FileOutputStream os) throws IOException {
|
|
164 String title = map.getKey();
|
|
165 os.write( ("title: " + title + "\n").getBytes());
|
|
166 for (String link : map.getValue()) {
|
|
167 os.write( ("\t"+link+"\n").getBytes());
|
|
168 }
|
|
169 os.write( ("\n").getBytes());
|
|
170 os.flush();
|
|
171 }
|
2
|
172
|
|
173
|
|
174 }
|