changeset 1:403e35dd9b6d

word count ...
author Yutaka_Kinjyo
date Tue, 12 Jul 2011 17:17:11 +0900
parents 0e6e76dbdb0f
children 1a32564347d5
files WordCount/Makefile WordCount/main.cc WordCount/oclUtils.cc WordCount/oclUtils.h WordCount/word_count.cl
diffstat 5 files changed, 82 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/WordCount/Makefile	Tue Jul 12 11:12:51 2011 +0900
+++ b/WordCount/Makefile	Tue Jul 12 17:17:11 2011 +0900
@@ -1,7 +1,7 @@
 TARGET= word_count
 CC = g++
 WARN = -Wall
-CFLAGS = -isysroot /Developer/SDKs/MacOSX10.6.sdk
+CFLAGS = -g #-isysroot /Developer/SDKs/MacOSX10.6.sdk
 LIBS = -framework OpenCL #-lclsdk
 
 HEADERS = $(shell ls *.h)
--- a/WordCount/main.cc	Tue Jul 12 11:12:51 2011 +0900
+++ b/WordCount/main.cc	Tue Jul 12 17:17:11 2011 +0900
@@ -9,21 +9,19 @@
 #include <OpenCL/opencl.h>
 #include <oclUtils.h>
 
-#define OUT_PARAM_NUM 2
+#define OUT_PARAM_NUM 4
+#define PRINT_PARAM_NUM 2
 
 typedef struct {
     caddr_t file_mmap;
     off_t size;
 } st_mmap_t;
 
-/*与えられたsizeをfix_byte_sizeの倍数にする(丸め込むっていうのかな?)*/
-static int
-fix_byte(int size,int fix_byte_size)
-{
-    size = (size/fix_byte_size)*fix_byte_size  + ((size%fix_byte_size)!= 0)*fix_byte_size;
-    
-    return size;
-}
+typedef struct {
+    int work_num;
+    int size;
+    int remain_size;
+} param_t;
 
 
 static st_mmap_t
@@ -44,12 +42,9 @@
 	fprintf(stderr,"can't fstat %s\n",filename);
     }
 
-    printf("file size %d\n",(int)sb.st_size);
-   
-    /*sizeをページングサイズの倍数にあわせる*/
-    st_mmap.size = fix_byte(sb.st_size,4096);
+    st_mmap.size = sb.st_size;
 
-    printf("fix 4096byte file size %d\n",(int)st_mmap.size);
+    printf("file size %d\n",(int)st_mmap.size);
 
     st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0);
     if (st_mmap.file_mmap == (caddr_t)-1) {
@@ -115,7 +110,7 @@
     cl_uint ret_num_devices = NULL;
     // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる
     // CL_DEVICE_TYPE_DEFAULT はどうなるのか
-    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 
+    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, 
                           1, &device_id, &ret_num_devices);
 
     oclCheckError(ret, CL_SUCCESS);
@@ -128,8 +123,13 @@
     // カーネルプログラムを読み込む
     cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
                                                    (const size_t *)&source_size, &ret);
+
     ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
+    if (ret != CL_SUCCESS) {
+        oclLogBuildInfo(program, device_id);
+    }
+
     oclCheckError(ret, CL_SUCCESS);
 
     //カーネルプログラムをビルド
@@ -149,7 +149,7 @@
 
     // 必要なパラメータのオブジェクト
     cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 
-                                         sizeof(int), NULL, &ret);
+                                         sizeof(param_t), NULL, &ret);
 
     ret = clEnqueueWriteBuffer(command_queue, param_memobj,
                                CL_TRUE, 0, sizeof(int), (int*)(&st_mmap.size),
@@ -166,12 +166,14 @@
 
     // このdivi_size はどうやって決めるよ
     int divi_size = 1024;
-    st_mmap.size / 1024;
-    
+    size_t global_work_size = (st_mmap.size + divi_size - 1) / divi_size;
+    int remain_size = st_mmap.size - global_work_size * divi_size; 
 
-    size_t global_work_size = 4;
+    /* 
+     * 行数、単語数, 分割地点のフラグを格納する配列
+     * word_num, line_num, head, tail
+     */
 
-    // 行数、単語数を格納する2のint配列
     int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size;
     cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
                                        out_size * sizeof(char), NULL, &ret);
@@ -191,7 +193,7 @@
      * kernel実行
    * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える
      *
-    */   
+     */
 
     //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
     // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず
@@ -203,12 +205,14 @@
 
     // 演算結果の読み込み
     ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, 
-                              out_size * sizeof(char), out_data, 0, NULL, NULL);
+                              out_size, out_data, 0, NULL, NULL);
 
     oclCheckError(ret, CL_SUCCESS);
 
+    printf("global_work_size %d\n", (int)global_work_size);
+
     for (int i = 0; i < global_work_size; i++) {
-        for (int j = 0; j < OUT_PARAM_NUM; j++) {
+        for (int j = 0; j < PRINT_PARAM_NUM; j++) {
             printf("%d ", out_data[i*OUT_PARAM_NUM+j]);
         }
         printf("\n");
--- a/WordCount/oclUtils.cc	Tue Jul 12 11:12:51 2011 +0900
+++ b/WordCount/oclUtils.cc	Tue Jul 12 17:17:11 2011 +0900
@@ -83,3 +83,19 @@
     return (index >= 0 && index < errorCount) ? errorString[index] : "Unspecified Error";
 }
 
+//////////////////////////////////////////////////////////////////////////////                                                      
+//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device                                        
+//!                                                                                                                                 
+//! @param cpProgram    OpenCL program                                                                                              
+//! @param cdDevice     device of interest                                                                                          
+//////////////////////////////////////////////////////////////////////////////                                                      
+void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice)
+{
+  // write out the build log and ptx, then exit                                                                                   
+  char cBuildLog[10240];
+  clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG,
+			sizeof(cBuildLog), cBuildLog, NULL );
+  //shrLog("\n%s\nBuild Log:\n%s\n%s\n", HDASHLINE, cBuildLog, HDASHLINE);
+  printf("Build Log:\n%s\n", cBuildLog);
+}
+
--- a/WordCount/oclUtils.h	Tue Jul 12 11:12:51 2011 +0900
+++ b/WordCount/oclUtils.h	Tue Jul 12 17:17:11 2011 +0900
@@ -46,5 +46,9 @@
     }
 }
 
+
+extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice);
+
+
 #endif
 
--- a/WordCount/word_count.cl	Tue Jul 12 11:12:51 2011 +0900
+++ b/WordCount/word_count.cl	Tue Jul 12 17:17:11 2011 +0900
@@ -1,26 +1,47 @@
-#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
+typedef struct {
+    int work_num;
+    int size;
+    int remain_size;
+} param_t;
 
-__kernel void word_count(__global char* text_data,  __global int* size, __global int* out_data)
+__kernel void word_count(__global char* text_data,  __global param_t* param, __global int* out_data)
 {
 
-  //unsigned long long *head_tail_flag = o_data +2;
+    /*
+     *                   out_data  
+     *
+     *  ___int____ ___int____ ___int____ ___int____ 
+     *  __________ __________ __________ __________ 
+     * |          |          |          |          |
+     * | word_num | line_num |   head   |   tail   |
+     * |__________|__________|__________|__________|
+     *
+     */
+
 
   uint gid = get_global_id(0);
+  int index = gid*4;
+  int head = gid*4+2;
+  int tail = gid*4+3;
 
-  int length = *size;
+  int work_num = param->work_num;
+  int size = param->size;
+  int remain_size = param->remain_size;
+  // gid が最後なら remain_size を見る。gid は 0 からスタート
+  int length = ( (work_num - 1) != gid ) ? size : remain_size;
   int word_flag = 0;
   int word_num = 0;
   int line_num = 0;
   int i = 0;
-    
-  int start = gid*length/2;
-  int end = start + length/2;
-  
-  //head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A);
-  //word_num -= 1-head_tail_flag[0];
+  int start = gid*size;
+  int end = start + length;
 
-  for (i = 0; i < length; i++) {
-  //for (i = start; i < end; i++) {
+  out_data[head] = (text_data[0] != 0x20) && (text_data[0] != 0x0A);
+  word_num -= 1-out_data[head];
+
+
+  //for (i = 0; i < length; i++) {
+  for (i = start; i < end; i++) {
     if (text_data[i] == 0x20) {
       word_flag = 1;
     } else if (text_data[i] == 0x0A) {
@@ -34,17 +55,11 @@
 
   word_num += word_flag;
 
-  //head_tail_flag[1] = (i_data[i-1] != 0x20) && (i_data[i-1] != 0x0A);
-  
-  // s->printf("SPE word %d line %d\n",word_num,line_num);
+  out_data[tail] = (text_data[i-1] != 0x20) && (text_data[i-1] != 0x0A);
 
-  int index = gid*2;
-  
   out_data[index] = word_num;
   out_data[index+1] = line_num;
 
-
-  
   return 0;
 
 }