changeset 1870:44fa0f1320a9 draft

run wordcount with iterate
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Thu, 26 Dec 2013 17:05:49 +0900
parents 5e06a8089625
children c3f7ba33222d
files TaskManager/ChangeLog TaskManager/Gpu/GpuScheduler.cc TaskManager/kernel/ppe/TaskList.h example/multiply/gpu/Multi.cl example/multiply/main.cc example/word_count/gpu/Exec.cl example/word_count/gpu/Exec_Data_Parallel.cl example/word_count/main.cc example/word_count/ppe/Exec.cc example/word_count/ppe/Exec_Data_Parallel.cc
diffstat 10 files changed, 51 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/ChangeLog	Thu Dec 26 15:33:06 2013 +0900
+++ b/TaskManager/ChangeLog	Thu Dec 26 17:05:49 2013 +0900
@@ -8,7 +8,7 @@
 
 	現在の GpuScheduler の pipeline 実行は2並列(cur=0,1)
 	これをn個に拡張する
-	
+
 2013-11-23 Shinji kONO <kono@ie.u-ryukyu.ac.jp>
 
 	Open CL の event の扱い方が良くない
@@ -20,7 +20,7 @@
 	    memout    x n
 	    read_event       x n
 	    write_event      x n
-	    kernel_event  
+	    kernel_event
 	これらを、すべて二重に持つ。必要なら n の分 extension する。
 
 	event は、上書きす前にすべて、release する必要がある。
--- a/TaskManager/Gpu/GpuScheduler.cc	Thu Dec 26 15:33:06 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Thu Dec 26 17:05:49 2013 +0900
@@ -247,7 +247,7 @@
                 ret = clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count,
                                            nextTask->param(0), 0, NULL, &memin[cur].event[0]);
                 if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
-
+                
                 ret = clSetKernelArg(kernel[cur], 0, sizeof(memaddr),(void *)&memin[cur].buf[0]);
                 if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
 
@@ -270,7 +270,7 @@
                     param++;
                 }
                 memin[cur].size  = param; // +1 means param
-
+                
                 for(int i = 0; i<nextTask->outData_count;i++) { // set output data
                     ListElement *output_buf = nextTask->outData(i);
                     if (output_buf->size==0) break;
--- a/TaskManager/kernel/ppe/TaskList.h	Thu Dec 26 15:33:06 2013 +0900
+++ b/TaskManager/kernel/ppe/TaskList.h	Thu Dec 26 17:05:49 2013 +0900
@@ -22,8 +22,8 @@
     size_t x,y,z; // 8*3 byte
     unsigned long long task_start_time,task_end_time; // 8*2 byte
     Task tasks[TASK_MAX_SIZE]; // 32*TASK_MAX_SIZE
-
-
+    
+    
     TaskPtr last() { return (TaskPtr)(((memaddr)tasks)+lastTask); }
     void set_last(Task *t) { lastTask = ((memaddr)t) - ((memaddr)tasks); }
     void init() { lastTask = ((memaddr)&tasks[TASK_MAX_SIZE])-(memaddr)(tasks); waiter=this; dim=0;}
--- a/example/multiply/gpu/Multi.cl	Thu Dec 26 15:33:06 2013 +0900
+++ b/example/multiply/gpu/Multi.cl	Thu Dec 26 17:05:49 2013 +0900
@@ -1,10 +1,13 @@
 __kernel void
-multi(__global const long *params,__global const float *A, __global const float*B, __global float *C)
+multi(__global const long *params,__global const float *A, __global const float*B,__global float* C_, __global float *C)
 {
     //    int i=get_global_id(0);
     long length = (long)params[0];
     long id = get_global_id(0);
     //    for(int i=0;i<length;i++) {
-    C[id]=A[id]*B[id];
+    if(length)
+        C[id]=A[id]*B[id];
+    else
+        C[id] = C_[id];
     //}
 }
--- a/example/multiply/main.cc	Thu Dec 26 15:33:06 2013 +0900
+++ b/example/multiply/main.cc	Thu Dec 26 17:05:49 2013 +0900
@@ -70,8 +70,9 @@
 void
 multi_init(TaskManager *manager)
 {
-    HTask *multiply;
-    
+    HTask* multiply;
+    HTask* hoge;
+
     A = new float[length];
     B = new float[length];
     C = new float[length];
@@ -90,7 +91,6 @@
     //    for(int i=0;i<10;i++) {
     multiply = manager->create_task(MULTIPLY_TASK);
     multiply->set_cpu(spe_cpu);
-        
     /**
      * Set of Input Data
      *   add_inData(address of input data, size of input data);
@@ -98,14 +98,25 @@
      */
     multiply->set_inData(0,(memaddr)A, sizeof(float)*length);
     multiply->set_inData(1,(memaddr)B, sizeof(float)*length);
+    multiply->set_inData(2,(memaddr)C,sizeof(float)*length);
     /**
      * Set of OutPut area
      *   add_outData(address of output area, size of output area);
      */
     multiply->set_outData(0,(memaddr)C, sizeof(float)*length);
     // param 0に0~length-1をsetしたtaskをlength個spawnする
-    multiply->set_param(0,(long)length);
+    multiply->set_param(0,(long)1);
     multiply->iterate(length); 
+    
+    hoge = manager->create_task(MULTIPLY_TASK);
+    hoge->set_cpu(spe_cpu);
+    hoge->set_inData(0,(memaddr)A,sizeof(float)*length);
+    hoge->set_inData(1,(memaddr)B,sizeof(float)*length);
+    hoge->set_inData(2,(memaddr)C,sizeof(float)*length);
+    hoge->set_outData(0,(memaddr)C,sizeof(float)*length);
+    hoge->set_param(0,(long)0);
+    hoge->wait_for(multiply);
+    hoge->iterate(length);
     //        multiply->spawn();
     //    }
 }
--- a/example/word_count/gpu/Exec.cl	Thu Dec 26 15:33:06 2013 +0900
+++ b/example/word_count/gpu/Exec.cl	Thu Dec 26 17:05:49 2013 +0900
@@ -12,7 +12,6 @@
 
     head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A);
     word_num -= 1-head_tail_flag[0];
-
     for (; i < length; i++) {
         if (i_data[i] == 0x20) {
             word_flag = 1;
--- a/example/word_count/gpu/Exec_Data_Parallel.cl	Thu Dec 26 15:33:06 2013 +0900
+++ b/example/word_count/gpu/Exec_Data_Parallel.cl	Thu Dec 26 17:05:49 2013 +0900
@@ -1,23 +1,22 @@
 __kernel void
 wordcount_parallel(__constant long *param,
                    __global char *rbuf,
-                   __global unsigned long long *wbuf)
+                   __global unsigned long *wbuf)
 {
-    long task_spwaned = (long)param[0];
-    long division_size = (long)param[1];
-    long length = (long)param[2];
-    long out_size = (long)param[3];
+    long task_spwaned = param[0];
+    long division_size = param[1];
+    long length = param[2];
+    long out_size = param[3];
     
     long allocation = task_spwaned + (long)get_global_id(0);
+    __global char *i_data = rbuf + allocation*division_size;
+    __global unsigned long *o_data = wbuf + allocation*out_size;
+    __global unsigned long *head_tail_flag = o_data+2;
+    long word_flag = 0;
+    long word_num = 0;
+    long line_num = 0;
+    long i = 0;
     
-    __global char *i_data = rbuf + allocation*division_size;
-    __global unsigned long long *o_data = wbuf + allocation*out_size;
-    __global unsigned long long *head_tail_flag = o_data +2;
-    int word_flag = 0;
-    int word_num = 0;
-    int line_num = 0;
-    int i = 0;
-
     head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A);
     word_num -= 1-head_tail_flag[0];
 
@@ -38,7 +37,7 @@
 
     // s->printf("SPE word %d line %d\n",word_num,line_num);
 
-    o_data[0] = (unsigned long long)word_num;
-    o_data[1] = (unsigned long long)line_num;
+    o_data[0] = (unsigned long)word_num;
+    o_data[1] = (unsigned long)line_num;
 
 }
--- a/example/word_count/main.cc	Thu Dec 26 15:33:06 2013 +0900
+++ b/example/word_count/main.cc	Thu Dec 26 17:05:49 2013 +0900
@@ -134,8 +134,11 @@
                 w->size -= size*array_task_num;
                 if(w->size < 0) array_task_num -= 1;
                 h_exec = manager->create_task(TASK_EXEC_DATA_PARALLEL);
+                h_exec->flip();
                 h_exec->set_inData(0,w->file_mmap,w->file_size);
-                h_exec->set_outData(0,w->o_data,w->out_size_);
+                h_exec->set_inData(1,w->o_data,w->out_size_);
+                h_exec->set_outData(0,w->file_mmap,w->file_size);
+                h_exec->set_outData(1,w->o_data,w->out_size_);
                 h_exec->set_param(0,(long)i);
                 h_exec->set_param(1,(long)w->division_size);
                 h_exec->set_param(2,(long)size);
@@ -150,8 +153,11 @@
                 
                 if(w->size < 0) {
                     h_exec = manager->create_task(TASK_EXEC_DATA_PARALLEL);
+                    h_exec->flip();
                     h_exec->set_inData(0,w->file_mmap,w->file_size);
-                    h_exec->set_outData(0,w->o_data,w->out_size_);
+                    h_exec->set_inData(1,w->o_data,w->out_size_);
+                    h_exec->set_outData(0,w->file_mmap,w->file_size);
+                    h_exec->set_outData(1,w->o_data,w->out_size_);
                     h_exec->set_param(0,(long)w->task_spwaned);
                     h_exec->set_param(1,(long)w->division_size);
                     h_exec->set_param(2,(long)(size+w->size));
@@ -160,6 +166,7 @@
                     t_next->wait_for(h_exec);
                     h_exec->set_cpu(spe_cpu);
                     h_exec->iterate(1);
+
                     w->task_num -= 1;
                     w->task_spwaned += 1;
                     array_task_num += 1;
--- a/example/word_count/ppe/Exec.cc	Thu Dec 26 15:33:06 2013 +0900
+++ b/example/word_count/ppe/Exec.cc	Thu Dec 26 17:05:49 2013 +0900
@@ -17,7 +17,7 @@
     int word_num = 0;
     int line_num = 0;
     int i = 0;
-
+    s->printf("%ld\n",o_data);
     head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A);
     word_num -= 1-head_tail_flag[0];
 
--- a/example/word_count/ppe/Exec_Data_Parallel.cc	Thu Dec 26 15:33:06 2013 +0900
+++ b/example/word_count/ppe/Exec_Data_Parallel.cc	Thu Dec 26 17:05:49 2013 +0900
@@ -13,9 +13,8 @@
     long division_size = (long)s->get_param(1);
     long length = (long)s->get_param(2);
     long out_size = (long)s->get_param(3);
-
+    
     long allocation = task_spwaned + (long)s->x;
-    
     char *i_data = (char *)rbuf + allocation*division_size;
     unsigned long long *o_data = (unsigned long long*)wbuf + allocation*out_size;
     unsigned long long *head_tail_flag = o_data +2;