# HG changeset patch # User Yutaka_Kinjyo # Date 1311818136 -32400 # Node ID 641eef31681eb8714d20dbc613036b6b5e6206ce # Parent 1c0c9299c29202917f2ed90c29d42aa4ff5ac226 non bloking clEnqueuReadBuffer diff -r 1c0c9299c292 -r 641eef31681e WordCount/Makefile --- a/WordCount/Makefile Mon Jul 25 21:19:42 2011 +0900 +++ b/WordCount/Makefile Thu Jul 28 10:55:36 2011 +0900 @@ -2,6 +2,7 @@ CC = g++ WARN = -Wall CCFLAGS = -g #-isysroot /Developer/SDKs/MacOSX10.6.sdk +#CCFLAGS = -g -DBLOKING #-isysroot /Developer/SDKs/MacOSX10.6.sdk LIBS = -framework OpenCL #-lclsdk HEADERS = $(shell ls *.h) diff -r 1c0c9299c292 -r 641eef31681e WordCount/main.cc --- a/WordCount/main.cc Mon Jul 25 21:19:42 2011 +0900 +++ b/WordCount/main.cc Thu Jul 28 10:55:36 2011 +0900 @@ -62,12 +62,21 @@ { char *filename = 0; + cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; for (int i = 1; argv[i]; ++i) { if (strcmp(argv[i], "-file") == 0) { filename = argv[i+1]; - } else if (strcmp(argv[i], "-help")) { + } else if (strcmp(argv[i], "-help") == 0) { printf("Usage: ./word_count [-file filename]\n"); + } else if (strcmp(argv[i], "-type") == 0) { + + if (strcmp(argv[i+1], "gpu") == 0) { + dev_type = CL_DEVICE_TYPE_GPU; + } else if (strcmp(argv[i+1], "cpu") == 0) { + dev_type = CL_DEVICE_TYPE_CPU; + } + } } @@ -109,7 +118,7 @@ cl_uint ret_num_devices = NULL; // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる // CL_DEVICE_TYPE_DEFAULT はどうなるのか - ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, + ret = clGetDeviceIDs( platform_id, dev_type, 1, &device_id, &ret_num_devices); oclCheckError(ret, CL_SUCCESS); @@ -180,15 +189,12 @@ param.size = divi_size; param.remain_size = remain_size; - ret = clEnqueueWriteBuffer(command_queue, param_memobj, CL_TRUE, 0, sizeof(param_t), ¶m, 0, NULL, NULL); oclCheckError(ret, CL_SUCCESS); - - // 引数のSet // memory object にしなくてもできるsetできるかも ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj); @@ -207,14 +213,34 @@ // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); + oclCheckError(ret, CL_SUCCESS); int *out_data = (int*)malloc(out_size); + +#ifndef BLOKING + // 演算結果の読み込み + // CL_TRUE で bloking ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, out_size, out_data, 0, NULL, NULL); +#else + + cl_event ev; + + // 演算結果の読み込み + // CL_FALSE で non-bloking + ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0, + out_size, out_data, 0, NULL, &ev); + + // event object に関連する処理の完了を wait する + // これはeventひとつだけど、listにして、複数 wait できる + clWaitForEvents(1, &ev); + +#endif + oclCheckError(ret, CL_SUCCESS); printf("global_work_size %d\n", (int)global_work_size); @@ -243,7 +269,7 @@ } } - printf("%d %d", word_line_num[0], word_line_num[1]); + printf("%d %d \n", word_line_num[0], word_line_num[1]); clFlush(command_queue); clFinish(command_queue);