comparison src/pagerank/LinkConvertGraph.java @ 2:1744340f8be6 draft

add some java files
author one
date Wed, 05 Sep 2012 11:56:21 +0900
parents
children b44abb9aa09f
comparison
equal deleted inserted replaced
1:08f01b5c4d4a 2:1744340f8be6
1 package pagerank;
2
3
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.FileNotFoundException;
7 import java.io.FileOutputStream;
8 import java.io.IOException;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Map;
12
13 import javax.xml.parsers.ParserConfigurationException;
14 import javax.xml.parsers.SAXParser;
15 import javax.xml.parsers.SAXParserFactory;
16
17 import org.xml.sax.SAXException;
18
19 import com.tinkerpop.blueprints.Graph;
20 import com.tinkerpop.blueprints.Vertex;
21 import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
22 import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
23
24 import xmlParser.TextTagParser;
25
26 public class LinkConvertGraph {
27
28 private String filename;
29 private FileInputStream fis;
30 private SAXParserFactory factory;
31 private SAXParser parser;
32 private TextTagParser xmlParser;
33
34 private HashMap<String,HashSet<String>> hash;
35
36
37 LinkConvertGraph() throws ParserConfigurationException, SAXException {
38 xmlParser = new TextTagParser();
39 factory = SAXParserFactory.newInstance();
40 parser = factory.newSAXParser();
41 }
42
43 LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
44 this.filename = filename;
45 fis = new FileInputStream(filename);
46 xmlParser = new TextTagParser();
47 factory = SAXParserFactory.newInstance();
48 parser = factory.newSAXParser();
49 }
50
51 public void setFilename(final String filename) throws FileNotFoundException {
52 this.filename = filename;
53 this.fis = new FileInputStream(filename);
54 }
55
56 private void parseXml() throws SAXException, IOException {
57 parser.parse(this.fis, this.xmlParser);
58 hash = xmlParser.getHash();
59 }
60
61 private HashMap<String,HashSet<String>> getHash() {
62 return hash;
63 }
64
65 public void printHash() {
66 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
67 String title = entry.getKey();
68 System.out.println("title: " + title);
69 for (String link : entry.getValue()) {
70 System.out.println("\t"+link);
71 }
72 System.out.println();
73 }
74 }
75
76 private void printHash(FileOutputStream os) throws IOException {
77 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
78 String title = entry.getKey();
79 os.write( ("title: " + title + "\n").getBytes());
80 for (String link : entry.getValue()) {
81 os.write( ("\t"+link+"\n").getBytes());
82 }
83 os.write( ("\n").getBytes());
84 os.flush();
85 }
86 }
87
88
89
90
91
92 public static void main(String[] args) {
93 // final String filename = "./resource/article.xml";
94 final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
95
96 LinkConvertGraph lcg;
97
98 try {
99
100 lcg = new LinkConvertGraph(filename);
101
102 lcg.parseXml();
103 // lcg.printHash();
104
105 FileOutputStream fos = new FileOutputStream("./resource/wikiLink.log");
106 lcg.printHash(fos);
107
108 HashMap<String,HashSet<String>> hash = lcg.getHash();
109
110
111 final String filenameD = "./resource/tinkerpopDB";
112
113 Graph graph = new TinkerGraph();
114 FileOutputStream out = new FileOutputStream(new File(filename));
115 LinkToVertex ltn = new LinkToVertex(graph);
116
117 for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
118 String pageTitle = map.getKey();
119
120 Vertex v;// = graph.addVertex(null);
121
122 if ( ltn.getId(pageTitle) == null ) {
123 v = ltn.createVertexWithPageTitle(pageTitle);
124
125 } else {
126 v = ltn.getVertex(pageTitle);
127 }
128
129 for (String linkPageTitle : map.getValue()) {
130 Vertex linkV;
131 if ( ltn.getId(linkPageTitle) == null) {
132 linkV = ltn.createVertexWithPageTitle(linkPageTitle);
133 ltn.setPageRank(linkV, (Double)0.0);
134 } else {
135 linkV = ltn.getVertex(linkPageTitle);
136 }
137 ltn.setHasLink(v, linkV);
138 }
139
140 }
141
142 GraphMLWriter.outputGraph(graph, out);
143
144 } catch (FileNotFoundException e) {
145 e.printStackTrace();
146 } catch (SAXException e) {
147 e.printStackTrace();
148 } catch (ParserConfigurationException e) {
149 e.printStackTrace();
150 } catch (IOException e) {
151 System.err.println("Failed to parse xml");
152 e.printStackTrace();
153 }
154
155
156
157 }
158 }