annotate src/pagerank/TPReadWikiLink.java @ 10:c7a7c53702dd draft

add gremlin.jar
author one
date Wed, 05 Sep 2012 18:52:37 +0900
parents 9787663edb54
children 8e3bb1843beb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
1 package pagerank;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
2
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
3 import java.io.BufferedReader;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
4 import java.io.File;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
5 import java.io.FileInputStream;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
6 import java.io.FileNotFoundException;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
7 import java.io.FileOutputStream;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
8 import java.io.IOException;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
9 import java.io.InputStreamReader;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
10 import java.util.ArrayList;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
11 import java.util.Collections;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
12 import java.util.Comparator;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
13 import java.util.HashMap;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
14 import java.util.LinkedList;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
15
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
16 import com.tinkerpop.blueprints.Graph;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
17 import com.tinkerpop.blueprints.Vertex;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
18 import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
19 import com.tinkerpop.blueprints.util.io.graphml.GraphMLReader;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
20 import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
21
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
22 import pagerank.WikiPage;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
23
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
24 public class TPReadWikiLink {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
25
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
26 public static void main(String[] args) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
27
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
28 final String fileDB = "./resources/tinkerpopDB";
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
29
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
30
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
31 try {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
32 Graph graph = new TinkerGraph();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
33 FileInputStream in = new FileInputStream(new File(fileDB));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
34 GraphMLReader.inputGraph(graph, in);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
35 in.close();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
36 LinkToVertex ltv = new LinkToVertex(graph);
7
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
37
8
4d1885a2fa36 fix method of calculation for computing PageRank.
one
parents: 7
diff changeset
38 // ltv.initPageRankAllVertex();
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
39 final long AllVertexNumber = ltv.searchAllVertices();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
40 HashMap<String, WikiPage> wikiHash = ltv.getWikiPageHash();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
41 System.out.println("AllVertexNumber = "+AllVertexNumber);
7
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
42
9
9787663edb54 delete if expression in computePageRank method.
one
parents: 8
diff changeset
43 /*
9787663edb54 delete if expression in computePageRank method.
one
parents: 8
diff changeset
44 String nodeIds[] = {"80", "290", "21", "164"};
9787663edb54 delete if expression in computePageRank method.
one
parents: 8
diff changeset
45 writeComputeTransition(ltv, nodeIds, 50);
8
4d1885a2fa36 fix method of calculation for computing PageRank.
one
parents: 7
diff changeset
46
9
9787663edb54 delete if expression in computePageRank method.
one
parents: 8
diff changeset
47 */
10
c7a7c53702dd add gremlin.jar
one
parents: 9
diff changeset
48
8
4d1885a2fa36 fix method of calculation for computing PageRank.
one
parents: 7
diff changeset
49 FileOutputStream fos = new FileOutputStream(new File("./resources/wikiPageRank.log"));
4d1885a2fa36 fix method of calculation for computing PageRank.
one
parents: 7
diff changeset
50 descendingOrder(wikiHash, fos);
4d1885a2fa36 fix method of calculation for computing PageRank.
one
parents: 7
diff changeset
51
4d1885a2fa36 fix method of calculation for computing PageRank.
one
parents: 7
diff changeset
52
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
53 FileOutputStream out = new FileOutputStream(new File(fileDB));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
54 GraphMLWriter.outputGraph(graph, out);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
55 out.close();
7
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
56
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
57
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
58 // loop(ltv);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
59
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
60 } catch (NumberFormatException e){
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
61 System.out.println("Program exit");
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
62 } catch (Exception e) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
63 e.printStackTrace();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
64 } finally {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
65
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
66 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
67
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
68 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
69
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
70 public static void loop(LinkToVertex ltv) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
71 BufferedReader r = new BufferedReader( new InputStreamReader(System.in), 1);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
72 System.out.print("\nPlease enter Node Id...>");
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
73 System.out.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
74 String s;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
75 while ((s = r.readLine()) != null ) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
76 int nodeId = Integer.parseInt(s);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
77 ltv.printVertexInfo(nodeId);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
78 System.out.print("\nPlease enter Node Id...>");
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
79 System.out.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
80 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
81 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
82
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
83 public static void writeComputeTransition(LinkToVertex ltv,final String nodeIds[], int count) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
84 LinkedList<FileOutputStream> fosList = new LinkedList<FileOutputStream>();
7
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
85 for (String id: nodeIds) {
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
86 String filename = "./resources/NodeId_"+id+".dat";
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
87 FileOutputStream fos = null;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
88 fos = new FileOutputStream(filename);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
89
7
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
90 Vertex v = ltv.getVertexById(id);
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
91 fos.write( ("# Node ID "+id+" "+ ltv.getPageTitle(v)+"\n").getBytes());
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
92 fosList.add(fos);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
93 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
94
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
95 for (int i=0; i<count; i++) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
96 for (Vertex v : ltv.getAllVertices() ) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
97 ltv.computePageRank(v);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
98 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
99
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
100 for (int index=0; index<nodeIds.length; index++){
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
101 FileOutputStream fos = fosList.get(index);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
102 printPageRankLog(fos, ltv, nodeIds[index], i);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
103 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
104 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
105 for (FileOutputStream fos: fosList) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
106 fos.close();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
107 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
108
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
109 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
110
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
111 // Write PageRank in descending order to fos.
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
112 public static void descendingOrder(HashMap<String, WikiPage> wikiHash , FileOutputStream fos) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
113 ArrayList<WikiPage> list = new ArrayList<WikiPage>();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
114 for (String title : wikiHash.keySet()) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
115 WikiPage w = wikiHash.get(title);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
116 list.add(w);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
117 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
118 Collections.sort(list, new Comparator<WikiPage>(){
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
119 public int compare(WikiPage w1, WikiPage w2) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
120 return (int)(w2.getRank()*Math.pow(10, 10)) - (int)(w1.getRank()*Math.pow(10,10));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
121 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
122 });
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
123
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
124 for (WikiPage w : list) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
125 w.printInfo(fos);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
126 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
127 fos.close();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
128
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
129 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
130
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
131 public static void printPageRankLog(FileOutputStream fos, int x, double rank) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
132 fos.write( (x+" "+ rank+"\n").getBytes() );
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
133 fos.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
134 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
135
7
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
136 public static void printPageRankLog(FileOutputStream fos, LinkToVertex ltv, String id, int x) throws IOException {
c7b139ff27e2 compute PageRank. initial value is 1/ AllVertexNumber
one
parents: 6
diff changeset
137 double rank = ltv.getPageRank(ltv.getVertexById(id));
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
138 fos.write( (x+" "+ rank+"\n").getBytes() );
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
139 fos.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
140 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
141
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
142 }