changeset 4:641eef31681e

non bloking clEnqueuReadBuffer
author Yutaka_Kinjyo
date Thu, 28 Jul 2011 10:55:36 +0900
parents 1c0c9299c292
children ef8efbd04df9
files WordCount/Makefile WordCount/main.cc
diffstat 2 files changed, 33 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/WordCount/Makefile	Mon Jul 25 21:19:42 2011 +0900
+++ b/WordCount/Makefile	Thu Jul 28 10:55:36 2011 +0900
@@ -2,6 +2,7 @@
 CC = g++
 WARN = -Wall
 CCFLAGS = -g #-isysroot /Developer/SDKs/MacOSX10.6.sdk
+#CCFLAGS = -g -DBLOKING #-isysroot /Developer/SDKs/MacOSX10.6.sdk
 LIBS = -framework OpenCL #-lclsdk
 
 HEADERS = $(shell ls *.h)
--- a/WordCount/main.cc	Mon Jul 25 21:19:42 2011 +0900
+++ b/WordCount/main.cc	Thu Jul 28 10:55:36 2011 +0900
@@ -62,12 +62,21 @@
 {
 
     char *filename = 0;
+    cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT;
 
     for (int i = 1; argv[i]; ++i) {	
 	if (strcmp(argv[i], "-file") == 0) {
 	    filename = argv[i+1];
-	} else if (strcmp(argv[i], "-help")) {
+	} else if (strcmp(argv[i], "-help") == 0) {
             printf("Usage: ./word_count [-file filename]\n");
+        } else if (strcmp(argv[i], "-type") == 0) {
+
+            if (strcmp(argv[i+1], "gpu") == 0) {
+                dev_type = CL_DEVICE_TYPE_GPU;
+            } else if (strcmp(argv[i+1], "cpu") == 0) {
+                dev_type = CL_DEVICE_TYPE_CPU;
+            }
+
         }
     }
 
@@ -109,7 +118,7 @@
     cl_uint ret_num_devices = NULL;
     // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる
     // CL_DEVICE_TYPE_DEFAULT はどうなるのか
-    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, 
+    ret = clGetDeviceIDs( platform_id, dev_type, 
                           1, &device_id, &ret_num_devices);
 
     oclCheckError(ret, CL_SUCCESS);
@@ -180,15 +189,12 @@
     param.size        = divi_size;
     param.remain_size = remain_size;
 
-
     ret = clEnqueueWriteBuffer(command_queue, param_memobj,
                                CL_TRUE, 0, sizeof(param_t), &param,
                                0, NULL, NULL);
 
     oclCheckError(ret, CL_SUCCESS);
 
-
-
     // 引数のSet
     // memory object にしなくてもできるsetできるかも
     ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj);
@@ -207,14 +213,34 @@
     // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず
     ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
 
+
     oclCheckError(ret, CL_SUCCESS);
 
     int *out_data = (int*)malloc(out_size);
 
+
+#ifndef BLOKING
+
     // 演算結果の読み込み
+    // CL_TRUE で bloking
     ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, 
                               out_size, out_data, 0, NULL, NULL);
 
+#else 
+
+    cl_event ev;
+
+    // 演算結果の読み込み
+    // CL_FALSE で non-bloking
+    ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0, 
+                              out_size, out_data, 0, NULL, &ev);
+
+    // event object に関連する処理の完了を wait する
+    // これはeventひとつだけど、listにして、複数 wait できる
+    clWaitForEvents(1, &ev);
+
+#endif
+    
     oclCheckError(ret, CL_SUCCESS);
 
     printf("global_work_size %d\n", (int)global_work_size);
@@ -243,7 +269,7 @@
         } 
     }
 
-    printf("%d %d", word_line_num[0], word_line_num[1]);
+    printf("%d %d \n", word_line_num[0], word_line_num[1]);
 
     clFlush(command_queue);
     clFinish(command_queue);