changeset 13:0ef7268bbbac draft

create descendiangOrder(Graph,FileOutputStream) method.
author one
date Sat, 08 Sep 2012 04:12:19 +0900
parents 7e38484474f4
children 86567db31710
files src/pagerank/LinkConvertGraph.java src/pagerank/LinkToVertex.java src/pagerank/TPReadWikiLink.java src/sample/CreateTinkerGraph.java src/sample/GremlinSample.java
diffstat 5 files changed, 176 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/src/pagerank/LinkConvertGraph.java	Thu Sep 06 06:05:24 2012 +0900
+++ b/src/pagerank/LinkConvertGraph.java	Sat Sep 08 04:12:19 2012 +0900
@@ -85,6 +85,15 @@
 		}		
 	}
 	
+	void printLinkLog(Map.Entry<String, HashSet<String>> map, FileOutputStream os) throws IOException {
+		String title = map.getKey();
+		os.write( ("title: " + title + "\n").getBytes());
+		for (String link : map.getValue()) {
+			os.write( ("\t"+link+"\n").getBytes());
+		}
+		os.write( ("\n").getBytes());
+		os.flush();
+	}
 	
 	
 	
@@ -92,56 +101,57 @@
 	public static void main(String[] args) {
 //		final String filename = "./resources/article.xml";
 		final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
+//		final String fileDB = "./resources/tinkerpopDB";
+
+
+		final long PAGENUM = 100; 
+		final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM);
+		final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log";
 		
 		LinkConvertGraph lcg;
 
 		try {
-			
 			lcg = new LinkConvertGraph(filename);
-
 			lcg.parseXml();
 //			lcg.printHash();
-		
-			FileOutputStream fos = new FileOutputStream("./resources/wikiLink.log");
-			lcg.printHash(fos);
 
 			HashMap<String,HashSet<String>> hash = lcg.getHash();
 
-			
-			final String fileDB = "./resources/tinkerpopDB";
-			
 			Graph graph = new TinkerGraph();
-			FileOutputStream out = new FileOutputStream(new File(fileDB));
 			LinkToVertex ltv = new LinkToVertex(graph);
 
-			long countId = 1;
+			FileOutputStream fos = new FileOutputStream(logFile);
+			long countId = 0;
 			for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
+				lcg.printLinkLog(map, fos);
+				if (PAGENUM <= countId) break;
 				String pageTitle = map.getKey();
-				
 				Vertex v;// = graph.addVertex(null);
-				
 				if ( ltv.getId(pageTitle) == null ) {
-					v = ltv.createVertexWithPageTitle(pageTitle,countId);
+					v = ltv.createVertexWithPageTitle(pageTitle);
 					ltv.setPageRank(v, (Double)0.0);
 					countId++;
+					if (PAGENUM <= countId) break;
 				} else {
 					v = ltv.getVertex(pageTitle);
 				}
-
+				
 				for (String linkPageTitle : map.getValue()) {
 					Vertex linkV;
 					if ( ltv.getId(linkPageTitle) == null) {
-						linkV = ltv.createVertexWithPageTitle(linkPageTitle,countId);
+						linkV = ltv.createVertexWithPageTitle(linkPageTitle);
 						countId++;
 						ltv.setPageRank(linkV, (Double)0.0);
 					} else {
 						linkV = ltv.getVertex(linkPageTitle);
 					}
 					ltv.setHasLink(v, linkV);
+					if (PAGENUM <= countId) break;
 				}
 				
 			}
 
+			FileOutputStream out = new FileOutputStream(new File(fileDB));
 			GraphMLWriter.outputGraph(graph, out);
 
 		} catch (FileNotFoundException e) {
--- a/src/pagerank/LinkToVertex.java	Thu Sep 06 06:05:24 2012 +0900
+++ b/src/pagerank/LinkToVertex.java	Sat Sep 08 04:12:19 2012 +0900
@@ -6,6 +6,8 @@
 import com.tinkerpop.blueprints.Edge;
 import com.tinkerpop.blueprints.Graph;
 import com.tinkerpop.blueprints.Vertex;
+import com.tinkerpop.gremlin.java.GremlinPipeline;
+import com.tinkerpop.pipes.util.iterators.SingleIterator;
 
 import pagerank.WikiPage;
 
@@ -14,12 +16,15 @@
 	Graph graph;
 	public final static String PAGE_TITLE = "pageTitle";
 	public final static String PAGE_RANK = "pageRank";
-	// pageIdTable
-	// key: pageTitle value: Vertex ID
+
+	/* pageIdTable
+	 * key: pageTitle value: Vertex ID
+	 */
 	private HashMap<String, Object> pageIdTable = new HashMap<String, Object>();
 
-	// wikiPageHash 
-	// key: pageTitle  value: wikiPage(class)
+	/* wikiPageHash 
+	 * key: pageTitle  value: wikiPage(class)
+	 */
 	private HashMap<String, WikiPage> wikiPageHash = new HashMap<String, WikiPage>();
 	private long AllVertexNumber;
 
@@ -108,7 +113,7 @@
 		}
 	}
 	
-	long searchAllVertices() {
+	public long searchAllVertices() {
 		AllVertexNumber = 0;
 		for (Vertex v : graph.getVertices()) {
 			if ( (v.getProperty(PAGE_TITLE) != null) && 
@@ -221,15 +226,35 @@
 			double pr = (Double)linkV.getProperty(PAGE_RANK);
 			sum += (double) pr / computeOutHasLink(linkV) ;
 		}
-		double tmp = (double) 1 - weight;
-		pageRank = (double) tmp / AllVertexNumber
-				+ (double) sum * weight;
+		pageRank = (double) 1 - weight + (double) sum * weight;
 
 		wiki.setRank(pageRank);
 		v.setProperty(PAGE_RANK, pageRank);
 		return pageRank;
 	}
 	
+	public double computePageRankUsingPipes(Object id) {
+		double sum = 0.0;
+		double pageRank = 0.0;
+		Vertex v = graph.getVertex(id);
+		WikiPage wiki = wikiPageHash.get(v.getProperty(PAGE_TITLE));
+		
+		GremlinPipeline<Vertex,Vertex> pipe = new GremlinPipeline<Vertex,Vertex>();
+		pipe.start(graph.getVertex(id)).in("HasLink");
+		for (Vertex inVer : pipe) {
+			Object inVerId = inVer.getId();
+			GremlinPipeline<Vertex,Vertex> inPipe = new GremlinPipeline<Vertex,Vertex>();
+			inPipe.start(graph.getVertex(inVerId)).out("HasLink");
+			long linkNum = inPipe.count();
+			double pr = (Double) inVer.getProperty(PAGE_RANK);
+			sum += 	(double) pr / linkNum;
+		}
+		pageRank = (double) 1 - weight + (double) sum * weight;		
+		wiki.setRank(pageRank);
+		v.setProperty(PAGE_RANK, pageRank);
+		return pageRank;
+	}
+	
 	public void printVertexInfo(int nodeId) {
 		Vertex v = graph.getVertex(nodeId);
 		printInHasLink(v, 1);
--- a/src/pagerank/TPReadWikiLink.java	Thu Sep 06 06:05:24 2012 +0900
+++ b/src/pagerank/TPReadWikiLink.java	Sat Sep 08 04:12:19 2012 +0900
@@ -25,8 +25,11 @@
 
 	public static void main(String[] args) {
 
-		final String fileDB = "./resources/tinkerpopDB";
-		
+//		final String fileDB = "./resources/tinkerpopDB";
+
+		final long PAGENUM = 100; 
+		final String fileDB = "./resources/tinkerGraph"+Long.toString(PAGENUM);	
+		final String pageRankLog = "./resources/wikiPageRank"+Long.toString(PAGENUM)+".log";	
 		
 		try {
 			Graph graph = new TinkerGraph();
@@ -35,28 +38,55 @@
 			in.close();
 			LinkToVertex ltv = new LinkToVertex(graph);
 
-//			ltv.initPageRankAllVertex();
-/*
+			ltv.initPageRankAllVertex();
+
 			final long AllVertexNumber = ltv.searchAllVertices();
 			HashMap<String, WikiPage> wikiHash = ltv.getWikiPageHash();
 			System.out.println("AllVertexNumber = "+AllVertexNumber);
-*/
+
+
+//			String nodeIds[] = {"80", "290", "21", "164", "41972", "103700", "65956", "103700"};
+			String nodeIds[] = {"146","148"};
+			
 /*
-			String nodeIds[] = {"80", "290", "21", "164"};
-			writeComputeTransition(ltv, nodeIds, 50);
+			long start = java.lang.System.currentTimeMillis();
+			writeComputeTransition(ltv, nodeIds, 50, PAGENUM);
+			long end = java.lang.System.currentTimeMillis();
+			long time = end - start;
+			System.out.println(time);
+*/
+			
 
-*/
-/*
-			FileOutputStream fos = new FileOutputStream(new File("./resources/wikiPageRank.log"));
-			descendingOrder(wikiHash, fos);			
+			for (int i=0; i<10; i++) {
+				long start = java.lang.System.currentTimeMillis();
+				for (int j=0; j<10; j++){
+					for (Vertex v : graph.getVertices()) {
+						ltv.computePageRankUsingPipes(v.getId());
+					}
+				}
+				long end = java.lang.System.currentTimeMillis();
+				long time = end - start;
+//				System.out.println(time);
+			}
+/*			
+			long count = 0;
+			for (Vertex v: graph.getVertices()) {
+				count++;
+				System.out.print("No."+count+" ");
+				System.out.print("title: "+v.getProperty("pageTitle"));
+				System.out.print(" pageRank: "+v.getProperty("pageRank"));
+				System.out.println();
+				System.out.flush();
+			}
 */
 
-			
+			FileOutputStream fos = new FileOutputStream(new File(pageRankLog));
+			descendingOrder(graph, fos);			
+			//			descendingOrder(wikiHash, fos);			
 			
 			FileOutputStream out = new FileOutputStream(new File(fileDB));
 			GraphMLWriter.outputGraph(graph, out);
 			out.close();
-
 			
 //			loop(ltv);
 			
@@ -83,21 +113,22 @@
 		}
 	}
 
-	public static void writeComputeTransition(LinkToVertex ltv,final String nodeIds[], int count) throws IOException {
+	public static void writeComputeTransition(LinkToVertex ltv,final String nodeIds[], int count, long pagenum) throws IOException {
 		LinkedList<FileOutputStream> fosList = new LinkedList<FileOutputStream>();
 		for (String id: nodeIds) {
-			String filename = "./resources/NodeId_"+id+".dat";
+			String filename = "./resources/VertexId_"+id+"_num"+Long.toString(pagenum)+".dat";
 			FileOutputStream fos = null;
 			fos = new FileOutputStream(filename);
 
 			Vertex v = ltv.getVertexById(id);
-			fos.write( ("# Node ID "+id+" "+ ltv.getPageTitle(v)+"\n").getBytes());
+			fos.write( ("# Vertex ID "+id+" "+ ltv.getPageTitle(v)+"\n").getBytes());
 			fosList.add(fos);
 		}
 		
 		for (int i=0; i<count; i++) {
 			for (Vertex v : ltv.getAllVertices() ) {
-				ltv.computePageRank(v);
+//				ltv.computePageRank(v);
+				ltv.computePageRankUsingPipes(v.getId());
 			}
 
 			for (int index=0; index<nodeIds.length; index++){
@@ -120,12 +151,37 @@
 		}
 		Collections.sort(list, new Comparator<WikiPage>(){
 				public int compare(WikiPage w1, WikiPage w2) {
-				return (int)(w2.getRank()*Math.pow(10, 10)) - (int)(w1.getRank()*Math.pow(10,10));
+				return (int)(w2.getRank()*Math.pow(10, 5)) - (int)(w1.getRank()*Math.pow(10, 5));
 				}
 		});
 		
+		long count = 1;
 		for (WikiPage w : list) {
+			fos.write(("No."+count+"\n").getBytes());
 			w.printInfo(fos);
+			count++;
+		}
+		fos.close();
+		
+	}
+
+	public static void descendingOrder(Graph graph, FileOutputStream fos) throws IOException {
+		ArrayList<WikiPage> list = new ArrayList<WikiPage>();
+		for (Vertex v : graph.getVertices()) {
+			WikiPage w = new WikiPage(v);
+			list.add(w);
+		}
+		Collections.sort(list, new Comparator<WikiPage>(){
+				public int compare(WikiPage w1, WikiPage w2) {
+				return (int)(w2.getRank()*Math.pow(10, 5)) - (int)(w1.getRank()*Math.pow(10, 5));
+				}
+		});
+		
+		long count = 1;
+		for (WikiPage w : list) {
+			fos.write(("No."+count+"\n").getBytes());
+			w.printInfo(fos);
+			count++;
 		}
 		fos.close();
 		
--- a/src/sample/CreateTinkerGraph.java	Thu Sep 06 06:05:24 2012 +0900
+++ b/src/sample/CreateTinkerGraph.java	Sat Sep 08 04:12:19 2012 +0900
@@ -11,27 +11,36 @@
 import com.tinkerpop.blueprints.Edge;
 import com.tinkerpop.blueprints.Graph;
 import com.tinkerpop.blueprints.Vertex;
+import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
 import com.tinkerpop.blueprints.util.io.graphml.GraphMLReader;
 import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
 
 public class CreateTinkerGraph {
 
-	public static final String filename = "./resources/tinkerDB";
+	public static final String filename = "./resources/sampleDB";
 	
 	public static void main(String[] args) {
 
-		TinkerGraph graph = new TinkerGraph("/db");
-		Vertex a = graph.addVertex(null);
-		Vertex b = graph.addVertex(null);
-		a.setProperty("name", "maro");
-		b.setProperty("name", "Peter");
-		Edge e = graph.addEdge(null, a, b, "knows");
-		System.out.println(e.getVertex(Direction.OUT).getProperty("name")
-				+ "--" + e.getLabel() + "-->"
-				+ e.getVertex(Direction.IN).getProperty("name"));
-		graph.shutdown();
+		try {
+//			outputGraph();
+/*			
+			Graph graph = new TinkerGraph();
+			FileInputStream in = new FileInputStream(new File(filename));
+		
+			GraphMLReader.inputGraph(graph, in);
+			
+			for (Vertex v: graph.getVertices()) {
+				System.out.println(v.getId().getClass());
+			}
+*/
+			readGraph();
 
-
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		
+		
 	}
 
 	public static void createGraph() {
--- a/src/sample/GremlinSample.java	Thu Sep 06 06:05:24 2012 +0900
+++ b/src/sample/GremlinSample.java	Sat Sep 08 04:12:19 2012 +0900
@@ -16,8 +16,8 @@
 	
 	public static void main(String[] args) {
 	
-		final String fileDB = "./resources/tinkerpopDB";
-		
+//		final String fileDB = "./resources/tinkerpopDB";
+		final String fileDB = "./resources/tinkerGraph100";		
 		
 		try {
 			Graph graph = new TinkerGraph();
@@ -26,15 +26,29 @@
 			in.close();
 			LinkToVertex ltv = new LinkToVertex(graph);
 
-			String id = "21"; 
-			GremlinPipeline pipe = new GremlinPipeline(); 
-			pipe.start(graph.getVertex(id)).out("HasLink").property("pageTitle");
-			pipe.setStarts(new SingleIterator<Vertex>(graph.getVertex(id)));
+			final long AllVertexNumber = ltv.searchAllVertices();
+			System.out.println("AllVertexNumber = "+ AllVertexNumber);
+
+			for (Vertex o : graph.getVertices()) {
+				System.out.println(o);
+			}
+/*			
+			GremlinPipeline pipe = new GremlinPipeline();
+			pipe.start(graph.getVertex(1));
+*/			
+			
+/*
+			String id = "85956";
+			GremlinPipeline pipe = new GremlinPipeline();
+			pipe.start(graph.getVertex(id)).out("HasLink");//.property("pageTitle");
+			pipe.property("pageTitle");
+//			pipe.start(graph.getVertex(id)).out("HasLink");
+//			pipe.setStarts(new SingleIterator<Vertex>(graph.getVertex(id)));
+
 			for (Object title : pipe) {
-				System.out.println((String)title);				
+				System.out.println(title);
 			}
-			
-		
+*/			
 		} catch (NumberFormatException e){
 			System.out.println("Program exit");
 		} catch (Exception e) {