13
|
1 package wikigraph;
|
|
2
|
16
|
3 import howtouse.TextTagParser;
|
13
|
4
|
|
5 import java.io.FileInputStream;
|
|
6 import java.io.FileNotFoundException;
|
14
|
7 import java.io.FileOutputStream;
|
13
|
8 import java.io.IOException;
|
|
9 import java.util.HashMap;
|
|
10 import java.util.HashSet;
|
|
11 import java.util.Map;
|
|
12
|
|
13 import javax.xml.parsers.ParserConfigurationException;
|
|
14 import javax.xml.parsers.SAXParser;
|
|
15 import javax.xml.parsers.SAXParserFactory;
|
|
16
|
18
|
17 import org.neo4j.graphdb.GraphDatabaseService;
|
19
|
18 import org.neo4j.graphdb.Node;
|
18
|
19 import org.neo4j.graphdb.Transaction;
|
|
20 import org.neo4j.kernel.EmbeddedGraphDatabase;
|
13
|
21 import org.xml.sax.SAXException;
|
|
22
|
|
23 public class LinkConvertGraph {
|
|
24
|
|
25 private String filename;
|
|
26 private FileInputStream fis;
|
|
27 private SAXParserFactory factory;
|
|
28 private SAXParser parser;
|
16
|
29 private TextTagParser xmlParser;
|
13
|
30
|
|
31 private HashMap<String,HashSet<String>> hash;
|
|
32
|
|
33
|
|
34 LinkConvertGraph() throws ParserConfigurationException, SAXException {
|
16
|
35 xmlParser = new TextTagParser();
|
13
|
36 factory = SAXParserFactory.newInstance();
|
|
37 parser = factory.newSAXParser();
|
|
38 }
|
|
39
|
|
40 LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
|
|
41 this.filename = filename;
|
|
42 fis = new FileInputStream(filename);
|
16
|
43 xmlParser = new TextTagParser();
|
13
|
44 factory = SAXParserFactory.newInstance();
|
|
45 parser = factory.newSAXParser();
|
|
46 }
|
|
47
|
|
48 private void setFilename(final String filename) throws FileNotFoundException {
|
|
49 this.filename = filename;
|
20
|
50 this.fis = new FileInputStream(filename);
|
13
|
51 }
|
|
52
|
|
53 private void parseXml() throws SAXException, IOException {
|
|
54 parser.parse(this.fis, this.xmlParser);
|
|
55 hash = xmlParser.getHash();
|
|
56 }
|
|
57
|
|
58 private HashMap<String,HashSet<String>> getHash() {
|
|
59 return hash;
|
|
60 }
|
|
61
|
|
62 private void printHash() {
|
|
63 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
|
|
64 String title = entry.getKey();
|
|
65 System.out.println("title: " + title);
|
|
66 for (String link : entry.getValue()) {
|
|
67 System.out.println("\t"+link);
|
|
68 }
|
14
|
69 System.out.println();
|
|
70 }
|
|
71 }
|
|
72
|
|
73 private void printHash(FileOutputStream os) throws IOException {
|
|
74 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
|
|
75 String title = entry.getKey();
|
|
76 os.write( ("title: " + title + "\n").getBytes());
|
|
77 for (String link : entry.getValue()) {
|
|
78 os.write( ("\t"+link+"\n").getBytes());
|
|
79 }
|
|
80 os.write( ("\n").getBytes());
|
|
81 os.flush();
|
13
|
82 }
|
|
83 }
|
|
84
|
18
|
85
|
|
86
|
|
87
|
|
88
|
13
|
89 public static void main(String[] args) {
|
20
|
90 // final String filename = "./resource/article.xml";
|
|
91 final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
|
14
|
92
|
18
|
93 LinkConvertGraph lcg;
|
19
|
94
|
|
95
|
13
|
96 try {
|
21
|
97
|
18
|
98 lcg = new LinkConvertGraph(filename);
|
13
|
99
|
|
100 lcg.parseXml();
|
20
|
101 // lcg.printHash();
|
13
|
102
|
20
|
103 FileOutputStream fos = new FileOutputStream("./resource/wikilink.log");
|
|
104 lcg.printHash(fos);
|
18
|
105
|
|
106 HashMap<String,HashSet<String>> hash = lcg.getHash();
|
20
|
107
|
21
|
108 GraphDatabaseService graphDb = new EmbeddedGraphDatabase("wikiLinkDB");
|
18
|
109 LinkToNode ltn = new LinkToNode(graphDb);
|
|
110
|
|
111 Transaction tx = graphDb. beginTx();
|
21
|
112
|
|
113 Node delNode = graphDb.getNodeById(0);
|
|
114 delNode.delete();
|
20
|
115
|
18
|
116 for (Map.Entry<String,HashSet<String>> map : hash.entrySet()) {
|
19
|
117 String pageTitle = map.getKey();
|
18
|
118
|
19
|
119 Node pageNode;
|
|
120 if ( ltn.getId(pageTitle) == null) {
|
|
121 pageNode = ltn.createNodeWithPageTitle(pageTitle);
|
|
122 } else {
|
|
123 pageNode = ltn.getNode(pageTitle);
|
18
|
124 }
|
|
125
|
19
|
126 for (String linkPageTitle : map.getValue()) {
|
|
127 Node linkNode;
|
|
128 if ( ltn.getId(linkPageTitle) == null) {
|
|
129 linkNode = ltn.createNodeWithPageTitle(linkPageTitle);
|
21
|
130 ltn.setPageRank(linkNode, 0);
|
19
|
131 } else {
|
|
132 linkNode = ltn.getNode(linkPageTitle);
|
|
133 }
|
|
134 ltn.setHasLink(pageNode, linkNode);
|
|
135
|
|
136 }
|
18
|
137
|
|
138 }
|
20
|
139
|
18
|
140
|
20
|
141 // ltn.printAllNodes();
|
|
142
|
18
|
143 tx.success();
|
|
144 tx.finish();
|
|
145 graphDb.shutdown();
|
20
|
146
|
13
|
147 } catch (FileNotFoundException e) {
|
|
148 e.printStackTrace();
|
|
149 } catch (SAXException e) {
|
|
150 e.printStackTrace();
|
|
151 } catch (ParserConfigurationException e) {
|
|
152 e.printStackTrace();
|
|
153 } catch (IOException e) {
|
|
154 System.err.println("Failed to parse xml");
|
|
155 e.printStackTrace();
|
|
156 }
|
|
157
|
20
|
158
|
|
159
|
13
|
160 }
|
|
161 }
|