annotate src/wikigraph/LinkConvertGraph.java @ 21:f9ef906676eb draft

modify LinkToNode.java
author one
date Tue, 28 Aug 2012 14:04:15 +0900
parents 2c3a10047ec6
children fbf0cf550b06
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
1 package wikigraph;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
2
16
5c8c554ee3ac hg mv TestSaxParser.java TextTagParser.java
one
parents: 14
diff changeset
3 import howtouse.TextTagParser;
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
4
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
5 import java.io.FileInputStream;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
6 import java.io.FileNotFoundException;
14
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
7 import java.io.FileOutputStream;
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
8 import java.io.IOException;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
9 import java.util.HashMap;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
10 import java.util.HashSet;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
11 import java.util.Map;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
12
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
13 import javax.xml.parsers.ParserConfigurationException;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
14 import javax.xml.parsers.SAXParser;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
15 import javax.xml.parsers.SAXParserFactory;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
16
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
17 import org.neo4j.graphdb.GraphDatabaseService;
19
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
18 import org.neo4j.graphdb.Node;
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
19 import org.neo4j.graphdb.Transaction;
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
20 import org.neo4j.kernel.EmbeddedGraphDatabase;
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
21 import org.xml.sax.SAXException;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
22
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
23 public class LinkConvertGraph {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
24
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
25 private String filename;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
26 private FileInputStream fis;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
27 private SAXParserFactory factory;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
28 private SAXParser parser;
16
5c8c554ee3ac hg mv TestSaxParser.java TextTagParser.java
one
parents: 14
diff changeset
29 private TextTagParser xmlParser;
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
30
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
31 private HashMap<String,HashSet<String>> hash;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
32
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
33
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
34 LinkConvertGraph() throws ParserConfigurationException, SAXException {
16
5c8c554ee3ac hg mv TestSaxParser.java TextTagParser.java
one
parents: 14
diff changeset
35 xmlParser = new TextTagParser();
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
36 factory = SAXParserFactory.newInstance();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
37 parser = factory.newSAXParser();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
38 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
39
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
40 LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
41 this.filename = filename;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
42 fis = new FileInputStream(filename);
16
5c8c554ee3ac hg mv TestSaxParser.java TextTagParser.java
one
parents: 14
diff changeset
43 xmlParser = new TextTagParser();
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
44 factory = SAXParserFactory.newInstance();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
45 parser = factory.newSAXParser();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
46 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
47
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
48 private void setFilename(final String filename) throws FileNotFoundException {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
49 this.filename = filename;
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
50 this.fis = new FileInputStream(filename);
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
51 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
52
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
53 private void parseXml() throws SAXException, IOException {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
54 parser.parse(this.fis, this.xmlParser);
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
55 hash = xmlParser.getHash();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
56 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
57
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
58 private HashMap<String,HashSet<String>> getHash() {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
59 return hash;
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
60 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
61
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
62 private void printHash() {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
63 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
64 String title = entry.getKey();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
65 System.out.println("title: " + title);
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
66 for (String link : entry.getValue()) {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
67 System.out.println("\t"+link);
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
68 }
14
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
69 System.out.println();
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
70 }
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
71 }
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
72
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
73 private void printHash(FileOutputStream os) throws IOException {
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
74 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
75 String title = entry.getKey();
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
76 os.write( ("title: " + title + "\n").getBytes());
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
77 for (String link : entry.getValue()) {
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
78 os.write( ("\t"+link+"\n").getBytes());
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
79 }
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
80 os.write( ("\n").getBytes());
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
81 os.flush();
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
82 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
83 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
84
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
85
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
86
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
87
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
88
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
89 public static void main(String[] args) {
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
90 // final String filename = "./resource/article.xml";
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
91 final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
14
1405a0c97c67 fix LinkConvertGraph.java
one
parents: 13
diff changeset
92
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
93 LinkConvertGraph lcg;
19
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
94
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
95
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
96 try {
21
f9ef906676eb modify LinkToNode.java
one
parents: 20
diff changeset
97
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
98 lcg = new LinkConvertGraph(filename);
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
99
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
100 lcg.parseXml();
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
101 // lcg.printHash();
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
102
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
103 FileOutputStream fos = new FileOutputStream("./resource/wikilink.log");
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
104 lcg.printHash(fos);
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
105
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
106 HashMap<String,HashSet<String>> hash = lcg.getHash();
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
107
21
f9ef906676eb modify LinkToNode.java
one
parents: 20
diff changeset
108 GraphDatabaseService graphDb = new EmbeddedGraphDatabase("wikiLinkDB");
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
109 LinkToNode ltn = new LinkToNode(graphDb);
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
110
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
111 Transaction tx = graphDb. beginTx();
21
f9ef906676eb modify LinkToNode.java
one
parents: 20
diff changeset
112
f9ef906676eb modify LinkToNode.java
one
parents: 20
diff changeset
113 Node delNode = graphDb.getNodeById(0);
f9ef906676eb modify LinkToNode.java
one
parents: 20
diff changeset
114 delNode.delete();
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
115
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
116 for (Map.Entry<String,HashSet<String>> map : hash.entrySet()) {
19
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
117 String pageTitle = map.getKey();
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
118
19
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
119 Node pageNode;
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
120 if ( ltn.getId(pageTitle) == null) {
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
121 pageNode = ltn.createNodeWithPageTitle(pageTitle);
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
122 } else {
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
123 pageNode = ltn.getNode(pageTitle);
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
124 }
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
125
19
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
126 for (String linkPageTitle : map.getValue()) {
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
127 Node linkNode;
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
128 if ( ltn.getId(linkPageTitle) == null) {
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
129 linkNode = ltn.createNodeWithPageTitle(linkPageTitle);
21
f9ef906676eb modify LinkToNode.java
one
parents: 20
diff changeset
130 ltn.setPageRank(linkNode, 0);
19
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
131 } else {
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
132 linkNode = ltn.getNode(linkPageTitle);
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
133 }
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
134 ltn.setHasLink(pageNode, linkNode);
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
135
e01c8a8190cc fix LinkToNode.java
one
parents: 18
diff changeset
136 }
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
137
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
138 }
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
139
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
140
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
141 // ltn.printAllNodes();
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
142
18
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
143 tx.success();
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
144 tx.finish();
7e4aa7037c7c modify LinkToNode.java
one
parents: 17
diff changeset
145 graphDb.shutdown();
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
146
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
147 } catch (FileNotFoundException e) {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
148 e.printStackTrace();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
149 } catch (SAXException e) {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
150 e.printStackTrace();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
151 } catch (ParserConfigurationException e) {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
152 e.printStackTrace();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
153 } catch (IOException e) {
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
154 System.err.println("Failed to parse xml");
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
155 e.printStackTrace();
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
156 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
157
20
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
158
2c3a10047ec6 add ReadWikiLink.java and LinkToNode.java
one
parents: 19
diff changeset
159
13
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
160 }
84dd614f1414 add LinkConvertGraph.java
one
parents:
diff changeset
161 }