diff WordCount/main.cc @ 1:403e35dd9b6d

word count ...
author Yutaka_Kinjyo
date Tue, 12 Jul 2011 17:17:11 +0900
parents 0e6e76dbdb0f
children 1a32564347d5
line wrap: on
line diff
--- a/WordCount/main.cc	Tue Jul 12 11:12:51 2011 +0900
+++ b/WordCount/main.cc	Tue Jul 12 17:17:11 2011 +0900
@@ -9,21 +9,19 @@
 #include <OpenCL/opencl.h>
 #include <oclUtils.h>
 
-#define OUT_PARAM_NUM 2
+#define OUT_PARAM_NUM 4
+#define PRINT_PARAM_NUM 2
 
 typedef struct {
     caddr_t file_mmap;
     off_t size;
 } st_mmap_t;
 
-/*与えられたsizeをfix_byte_sizeの倍数にする(丸め込むっていうのかな?)*/
-static int
-fix_byte(int size,int fix_byte_size)
-{
-    size = (size/fix_byte_size)*fix_byte_size  + ((size%fix_byte_size)!= 0)*fix_byte_size;
-    
-    return size;
-}
+typedef struct {
+    int work_num;
+    int size;
+    int remain_size;
+} param_t;
 
 
 static st_mmap_t
@@ -44,12 +42,9 @@
 	fprintf(stderr,"can't fstat %s\n",filename);
     }
 
-    printf("file size %d\n",(int)sb.st_size);
-   
-    /*sizeをページングサイズの倍数にあわせる*/
-    st_mmap.size = fix_byte(sb.st_size,4096);
+    st_mmap.size = sb.st_size;
 
-    printf("fix 4096byte file size %d\n",(int)st_mmap.size);
+    printf("file size %d\n",(int)st_mmap.size);
 
     st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0);
     if (st_mmap.file_mmap == (caddr_t)-1) {
@@ -115,7 +110,7 @@
     cl_uint ret_num_devices = NULL;
     // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる
     // CL_DEVICE_TYPE_DEFAULT はどうなるのか
-    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 
+    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, 
                           1, &device_id, &ret_num_devices);
 
     oclCheckError(ret, CL_SUCCESS);
@@ -128,8 +123,13 @@
     // カーネルプログラムを読み込む
     cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
                                                    (const size_t *)&source_size, &ret);
+
     ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
+    if (ret != CL_SUCCESS) {
+        oclLogBuildInfo(program, device_id);
+    }
+
     oclCheckError(ret, CL_SUCCESS);
 
     //カーネルプログラムをビルド
@@ -149,7 +149,7 @@
 
     // 必要なパラメータのオブジェクト
     cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 
-                                         sizeof(int), NULL, &ret);
+                                         sizeof(param_t), NULL, &ret);
 
     ret = clEnqueueWriteBuffer(command_queue, param_memobj,
                                CL_TRUE, 0, sizeof(int), (int*)(&st_mmap.size),
@@ -166,12 +166,14 @@
 
     // このdivi_size はどうやって決めるよ
     int divi_size = 1024;
-    st_mmap.size / 1024;
-    
+    size_t global_work_size = (st_mmap.size + divi_size - 1) / divi_size;
+    int remain_size = st_mmap.size - global_work_size * divi_size; 
 
-    size_t global_work_size = 4;
+    /* 
+     * 行数、単語数, 分割地点のフラグを格納する配列
+     * word_num, line_num, head, tail
+     */
 
-    // 行数、単語数を格納する2のint配列
     int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size;
     cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
                                        out_size * sizeof(char), NULL, &ret);
@@ -191,7 +193,7 @@
      * kernel実行
    * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える
      *
-    */   
+     */
 
     //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
     // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず
@@ -203,12 +205,14 @@
 
     // 演算結果の読み込み
     ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, 
-                              out_size * sizeof(char), out_data, 0, NULL, NULL);
+                              out_size, out_data, 0, NULL, NULL);
 
     oclCheckError(ret, CL_SUCCESS);
 
+    printf("global_work_size %d\n", (int)global_work_size);
+
     for (int i = 0; i < global_work_size; i++) {
-        for (int j = 0; j < OUT_PARAM_NUM; j++) {
+        for (int j = 0; j < PRINT_PARAM_NUM; j++) {
             printf("%d ", out_data[i*OUT_PARAM_NUM+j]);
         }
         printf("\n");