# HG changeset patch # User Yutaka_Kinjyo # Date 1310458631 -32400 # Node ID 403e35dd9b6d6d980af8b79e968de0cb334bf192 # Parent 0e6e76dbdb0f45c896b8ad08679b627474d760ab word count ... diff -r 0e6e76dbdb0f -r 403e35dd9b6d WordCount/Makefile --- a/WordCount/Makefile Tue Jul 12 11:12:51 2011 +0900 +++ b/WordCount/Makefile Tue Jul 12 17:17:11 2011 +0900 @@ -1,7 +1,7 @@ TARGET= word_count CC = g++ WARN = -Wall -CFLAGS = -isysroot /Developer/SDKs/MacOSX10.6.sdk +CFLAGS = -g #-isysroot /Developer/SDKs/MacOSX10.6.sdk LIBS = -framework OpenCL #-lclsdk HEADERS = $(shell ls *.h) diff -r 0e6e76dbdb0f -r 403e35dd9b6d WordCount/main.cc --- a/WordCount/main.cc Tue Jul 12 11:12:51 2011 +0900 +++ b/WordCount/main.cc Tue Jul 12 17:17:11 2011 +0900 @@ -9,21 +9,19 @@ #include #include -#define OUT_PARAM_NUM 2 +#define OUT_PARAM_NUM 4 +#define PRINT_PARAM_NUM 2 typedef struct { caddr_t file_mmap; off_t size; } st_mmap_t; -/*与えられたsizeをfix_byte_sizeの倍数にする(丸め込むっていうのかな?)*/ -static int -fix_byte(int size,int fix_byte_size) -{ - size = (size/fix_byte_size)*fix_byte_size + ((size%fix_byte_size)!= 0)*fix_byte_size; - - return size; -} +typedef struct { + int work_num; + int size; + int remain_size; +} param_t; static st_mmap_t @@ -44,12 +42,9 @@ fprintf(stderr,"can't fstat %s\n",filename); } - printf("file size %d\n",(int)sb.st_size); - - /*sizeをページングサイズの倍数にあわせる*/ - st_mmap.size = fix_byte(sb.st_size,4096); + st_mmap.size = sb.st_size; - printf("fix 4096byte file size %d\n",(int)st_mmap.size); + printf("file size %d\n",(int)st_mmap.size); st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0); if (st_mmap.file_mmap == (caddr_t)-1) { @@ -115,7 +110,7 @@ cl_uint ret_num_devices = NULL; // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる // CL_DEVICE_TYPE_DEFAULT はどうなるのか - ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, + ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, 1, &device_id, &ret_num_devices); oclCheckError(ret, CL_SUCCESS); @@ -128,8 +123,13 @@ // カーネルプログラムを読み込む cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); + ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + if (ret != CL_SUCCESS) { + oclLogBuildInfo(program, device_id); + } + oclCheckError(ret, CL_SUCCESS); //カーネルプログラムをビルド @@ -149,7 +149,7 @@ // 必要なパラメータのオブジェクト cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, - sizeof(int), NULL, &ret); + sizeof(param_t), NULL, &ret); ret = clEnqueueWriteBuffer(command_queue, param_memobj, CL_TRUE, 0, sizeof(int), (int*)(&st_mmap.size), @@ -166,12 +166,14 @@ // このdivi_size はどうやって決めるよ int divi_size = 1024; - st_mmap.size / 1024; - + size_t global_work_size = (st_mmap.size + divi_size - 1) / divi_size; + int remain_size = st_mmap.size - global_work_size * divi_size; - size_t global_work_size = 4; + /* + * 行数、単語数, 分割地点のフラグを格納する配列 + * word_num, line_num, head, tail + */ - // 行数、単語数を格納する2のint配列 int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size; cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, out_size * sizeof(char), NULL, &ret); @@ -191,7 +193,7 @@ * kernel実行    * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える * - */   + */ //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず @@ -203,12 +205,14 @@ // 演算結果の読み込み ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, - out_size * sizeof(char), out_data, 0, NULL, NULL); + out_size, out_data, 0, NULL, NULL); oclCheckError(ret, CL_SUCCESS); + printf("global_work_size %d\n", (int)global_work_size); + for (int i = 0; i < global_work_size; i++) { - for (int j = 0; j < OUT_PARAM_NUM; j++) { + for (int j = 0; j < PRINT_PARAM_NUM; j++) { printf("%d ", out_data[i*OUT_PARAM_NUM+j]); } printf("\n"); diff -r 0e6e76dbdb0f -r 403e35dd9b6d WordCount/oclUtils.cc --- a/WordCount/oclUtils.cc Tue Jul 12 11:12:51 2011 +0900 +++ b/WordCount/oclUtils.cc Tue Jul 12 17:17:11 2011 +0900 @@ -83,3 +83,19 @@ return (index >= 0 && index < errorCount) ? errorString[index] : "Unspecified Error"; } +////////////////////////////////////////////////////////////////////////////// +//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device +//! +//! @param cpProgram OpenCL program +//! @param cdDevice device of interest +////////////////////////////////////////////////////////////////////////////// +void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice) +{ + // write out the build log and ptx, then exit + char cBuildLog[10240]; + clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, + sizeof(cBuildLog), cBuildLog, NULL ); + //shrLog("\n%s\nBuild Log:\n%s\n%s\n", HDASHLINE, cBuildLog, HDASHLINE); + printf("Build Log:\n%s\n", cBuildLog); +} + diff -r 0e6e76dbdb0f -r 403e35dd9b6d WordCount/oclUtils.h --- a/WordCount/oclUtils.h Tue Jul 12 11:12:51 2011 +0900 +++ b/WordCount/oclUtils.h Tue Jul 12 17:17:11 2011 +0900 @@ -46,5 +46,9 @@ } } + +extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice); + + #endif diff -r 0e6e76dbdb0f -r 403e35dd9b6d WordCount/word_count.cl --- a/WordCount/word_count.cl Tue Jul 12 11:12:51 2011 +0900 +++ b/WordCount/word_count.cl Tue Jul 12 17:17:11 2011 +0900 @@ -1,26 +1,47 @@ -#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable +typedef struct { + int work_num; + int size; + int remain_size; +} param_t; -__kernel void word_count(__global char* text_data, __global int* size, __global int* out_data) +__kernel void word_count(__global char* text_data, __global param_t* param, __global int* out_data) { - //unsigned long long *head_tail_flag = o_data +2; + /* + * out_data + * + * ___int____ ___int____ ___int____ ___int____ + * __________ __________ __________ __________ + * | | | | | + * | word_num | line_num | head | tail | + * |__________|__________|__________|__________| + * + */ + uint gid = get_global_id(0); + int index = gid*4; + int head = gid*4+2; + int tail = gid*4+3; - int length = *size; + int work_num = param->work_num; + int size = param->size; + int remain_size = param->remain_size; + // gid が最後なら remain_size を見る。gid は 0 からスタート + int length = ( (work_num - 1) != gid ) ? size : remain_size; int word_flag = 0; int word_num = 0; int line_num = 0; int i = 0; - - int start = gid*length/2; - int end = start + length/2; - - //head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A); - //word_num -= 1-head_tail_flag[0]; + int start = gid*size; + int end = start + length; - for (i = 0; i < length; i++) { - //for (i = start; i < end; i++) { + out_data[head] = (text_data[0] != 0x20) && (text_data[0] != 0x0A); + word_num -= 1-out_data[head]; + + + //for (i = 0; i < length; i++) { + for (i = start; i < end; i++) { if (text_data[i] == 0x20) { word_flag = 1; } else if (text_data[i] == 0x0A) { @@ -34,17 +55,11 @@ word_num += word_flag; - //head_tail_flag[1] = (i_data[i-1] != 0x20) && (i_data[i-1] != 0x0A); - - // s->printf("SPE word %d line %d\n",word_num,line_num); + out_data[tail] = (text_data[i-1] != 0x20) && (text_data[i-1] != 0x0A); - int index = gid*2; - out_data[index] = word_num; out_data[index+1] = line_num; - - return 0; }