annotate src/pagerank/TPReadWikiLink.java @ 6:8ea2212eaee0 draft

add TPReadWikiLink.java
author one
date Wed, 05 Sep 2012 15:39:26 +0900
parents
children c7b139ff27e2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
1 package pagerank;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
2
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
3 import java.io.BufferedReader;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
4 import java.io.File;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
5 import java.io.FileInputStream;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
6 import java.io.FileNotFoundException;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
7 import java.io.FileOutputStream;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
8 import java.io.IOException;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
9 import java.io.InputStreamReader;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
10 import java.util.ArrayList;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
11 import java.util.Collections;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
12 import java.util.Comparator;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
13 import java.util.HashMap;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
14 import java.util.LinkedList;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
15
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
16 import com.tinkerpop.blueprints.Graph;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
17 import com.tinkerpop.blueprints.Vertex;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
18 import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
19 import com.tinkerpop.blueprints.util.io.graphml.GraphMLReader;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
20 import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
21
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
22 import pagerank.WikiPage;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
23
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
24 public class TPReadWikiLink {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
25
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
26 public static void main(String[] args) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
27
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
28 final String fileDB = "./resources/tinkerpopDB";
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
29
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
30
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
31 try {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
32 Graph graph = new TinkerGraph();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
33 FileInputStream in = new FileInputStream(new File(fileDB));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
34 GraphMLReader.inputGraph(graph, in);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
35 in.close();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
36 LinkToVertex ltv = new LinkToVertex(graph);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
37
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
38 final long AllVertexNumber = ltv.searchAllVertices();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
39 HashMap<String, WikiPage> wikiHash = ltv.getWikiPageHash();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
40 System.out.println("AllVertexNumber = "+AllVertexNumber);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
41
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
42
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
43 for (Vertex v : graph.getVertices() ) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
44 String id = (String) v.getId();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
45 System.out.println("id:"+id+" title:"+v.getProperty("pageTitle")+" rank:"+v.getProperty("pageRank"));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
46 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
47 /*
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
48 String nodeIds[] = {"1574", "2829", "2850", "3618"};
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
49 writeComputeTransition(ltv, nodeIds, 30);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
50
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
51 FileOutputStream out = new FileOutputStream(new File(fileDB));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
52 GraphMLWriter.outputGraph(graph, out);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
53 out.close();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
54 */
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
55
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
56 // loop(ltv);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
57
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
58 } catch (NumberFormatException e){
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
59 System.out.println("Program exit");
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
60 } catch (Exception e) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
61 e.printStackTrace();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
62 } finally {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
63
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
64 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
65
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
66 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
67
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
68 public static void loop(LinkToVertex ltv) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
69 BufferedReader r = new BufferedReader( new InputStreamReader(System.in), 1);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
70 System.out.print("\nPlease enter Node Id...>");
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
71 System.out.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
72 String s;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
73 while ((s = r.readLine()) != null ) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
74 int nodeId = Integer.parseInt(s);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
75 ltv.printVertexInfo(nodeId);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
76 System.out.print("\nPlease enter Node Id...>");
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
77 System.out.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
78 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
79 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
80
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
81 public static void writeComputeTransition(LinkToVertex ltv,final String nodeIds[], int count) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
82 LinkedList<FileOutputStream> fosList = new LinkedList<FileOutputStream>();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
83 for (String i: nodeIds) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
84 String filename = "./resources/NodeId_"+i+".dat";
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
85 FileOutputStream fos = null;
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
86 fos = new FileOutputStream(filename);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
87
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
88 Vertex v = ltv.getVertex(i);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
89 fos.write( ("# Node ID "+i+" "+ (String)ltv.getPageTitle(v)).getBytes());
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
90 fosList.add(fos);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
91 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
92
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
93 for (int i=0; i<count; i++) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
94 for (Vertex v : ltv.getAllVertices() ) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
95 ltv.computePageRank(v);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
96 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
97
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
98 for (int index=0; index<nodeIds.length; index++){
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
99 FileOutputStream fos = fosList.get(index);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
100 printPageRankLog(fos, ltv, nodeIds[index], i);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
101 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
102 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
103 for (FileOutputStream fos: fosList) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
104 fos.close();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
105 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
106
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
107 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
108
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
109 // Write PageRank in descending order to fos.
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
110 public static void descendingOrder(HashMap<String, WikiPage> wikiHash , FileOutputStream fos) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
111 ArrayList<WikiPage> list = new ArrayList<WikiPage>();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
112 for (String title : wikiHash.keySet()) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
113 WikiPage w = wikiHash.get(title);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
114 list.add(w);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
115 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
116 Collections.sort(list, new Comparator<WikiPage>(){
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
117 public int compare(WikiPage w1, WikiPage w2) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
118 return (int)(w2.getRank()*Math.pow(10, 10)) - (int)(w1.getRank()*Math.pow(10,10));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
119 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
120 });
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
121
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
122 for (WikiPage w : list) {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
123 w.printInfo(fos);
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
124 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
125 fos.close();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
126
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
127 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
128
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
129 public static void printPageRankLog(FileOutputStream fos, int x, double rank) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
130 fos.write( (x+" "+ rank+"\n").getBytes() );
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
131 fos.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
132 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
133
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
134 public static void printPageRankLog(FileOutputStream fos, LinkToVertex ltv, String nodeId, int x) throws IOException {
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
135 double rank = ltv.getPageRank(ltv.getNode(nodeId));
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
136 fos.write( (x+" "+ rank+"\n").getBytes() );
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
137 fos.flush();
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
138 }
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
139
8ea2212eaee0 add TPReadWikiLink.java
one
parents:
diff changeset
140 }