Mercurial > hg > Members > yutaka > OpenCL
view WordCount/main.cc @ 5:ef8efbd04df9 default tip
minor change
author | Yutaka_Kinjyo |
---|---|
date | Thu, 28 Jul 2011 11:10:28 +0900 |
parents | 641eef31681e |
children |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> #include <sys/mman.h> #include <sys/types.h> #include <fcntl.h> #include <unistd.h> #include <OpenCL/opencl.h> #include <oclUtils.h> #define OUT_PARAM_NUM 4 #define PRINT_PARAM_NUM 2 typedef struct { caddr_t file_mmap; off_t size; } st_mmap_t; typedef struct { int size; int remain_size; } param_t; static st_mmap_t my_mmap(char *filename) { /*マッピングだよ!*/ int fd = -1; int map = MAP_PRIVATE; st_mmap_t st_mmap; struct stat sb; if ((fd=open(filename,O_RDONLY,0666))==0) { fprintf(stderr,"can't open %s\n",filename); } if (fstat(fd,&sb)) { fprintf(stderr,"can't fstat %s\n",filename); } st_mmap.size = sb.st_size; printf("file size %d\n",(int)st_mmap.size); st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0); if (st_mmap.file_mmap == (caddr_t)-1) { fprintf(stderr,"Can't mmap file\n"); perror(NULL); exit(0); } return st_mmap; } int main(int args, char *argv[]) { char *filename = 0; cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; for (int i = 1; argv[i]; ++i) { if (strcmp(argv[i], "-file") == 0) { filename = argv[i+1]; } else if (strcmp(argv[i], "-help") == 0) { printf("Usage: ./word_count [-file filename]\n"); } else if (strcmp(argv[i], "-type") == 0) { if (strcmp(argv[i+1], "gpu") == 0) { dev_type = CL_DEVICE_TYPE_GPU; } else if (strcmp(argv[i+1], "cpu") == 0) { dev_type = CL_DEVICE_TYPE_CPU; } } } if (filename == 0) { printf("Usage: ./word_count [-file filename]\n"); return 0; } //指定されたファイルをメモリにmap st_mmap_t st_mmap = my_mmap(filename); //kernelファイルの大きさ取得して、メモリ確保 int fd = -1; const char *kernel_filename = "./kernel/word_count.cl"; if ((fd=open(kernel_filename,O_RDONLY,0666))==0) { fprintf(stderr,"can't open %s\n",kernel_filename); } struct stat sb; if (fstat(fd,&sb)) { fprintf(stderr,"can't fstat %s\n",filename); } size_t source_size = sb.st_size; void *source_str = malloc(source_size); int err = read(fd, source_str, source_size); if (err == -1) { fprintf(stderr,"can't read %s\n",filename); } close(fd); cl_platform_id platform_id = NULL; cl_uint ret_num_platforms = NULL; cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); cl_device_id device_id = NULL; cl_uint ret_num_devices = NULL; // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる // CL_DEVICE_TYPE_DEFAULT はどうなるのか ret = clGetDeviceIDs( platform_id, dev_type, 1, &device_id, &ret_num_devices); oclCheckError(ret, CL_SUCCESS); //OpenCLコンテキストの作成 cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); oclCheckError(ret, CL_SUCCESS); cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); oclCheckError(ret, CL_SUCCESS); // カーネルプログラムを読み込む cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); if (ret != CL_SUCCESS) { oclLogBuildInfo(program, device_id); } oclCheckError(ret, CL_SUCCESS); //カーネルプログラムをビルド //Task選択にあたる cl_kernel kernel = clCreateKernel(program, "word_count", &ret); oclCheckError(ret, CL_SUCCESS); //カウントするテキストデータのメモリオブジェクト cl_mem text_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, st_mmap.size, NULL, &ret); ret = clEnqueueWriteBuffer(command_queue, text_memobj, CL_TRUE, 0, st_mmap.size, (char*)st_mmap.file_mmap, 0, NULL, NULL); oclCheckError(ret, CL_SUCCESS); /* * 並列度の計算 * wcするファイルの大きさに合わせる * */ // このdivi_size はどうやって決めるよ int divi_size = 1024; size_t global_work_size = st_mmap.size / divi_size; int remain_size = st_mmap.size - global_work_size * divi_size; if (remain_size > 0) { global_work_size += 1; } /* * 行数、単語数, 分割地点のフラグを格納する配列 * word_num, line_num, head, tail */ int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size; cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, out_size * sizeof(char), NULL, &ret); oclCheckError(ret, CL_SUCCESS); // 必要なパラメータのオブジェクト cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(param_t), NULL, &ret); param_t param; param.size = divi_size; param.remain_size = remain_size; ret = clEnqueueWriteBuffer(command_queue, param_memobj, CL_TRUE, 0, sizeof(param_t), ¶m, 0, NULL, NULL); oclCheckError(ret, CL_SUCCESS); // 引数のSet // memory object にしなくてもできるsetできるかも ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj); ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)¶m_memobj); ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&out_memobj); oclCheckError(ret, CL_SUCCESS); /* * kernel実行 * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える * */ //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); oclCheckError(ret, CL_SUCCESS); int *out_data = (int*)malloc(out_size); cl_event ev; // 演算結果の読み込み // CL_FALSE で non-bloking, CL_TURE で bloking ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0, out_size, out_data, 0, NULL, &ev); // event object に関連する処理の完了を wait する // これはeventひとつだけど、listにして、複数 wait できる clWaitForEvents(1, &ev); oclCheckError(ret, CL_SUCCESS); printf("global_work_size %d\n", (int)global_work_size); int word_line_num[PRINT_PARAM_NUM]; for (int i = 0; i < PRINT_PARAM_NUM; i++) { word_line_num[i] = 0; } for (int i = 0; i < global_work_size; i++) { for (int j = 0; j < PRINT_PARAM_NUM; j++) { word_line_num[j] += out_data[i*OUT_PARAM_NUM+j]; } //flagの判定 if ((i != global_work_size-1) && (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+1] == 1) && (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+4] == 0)) { word_line_num[0] += 1; } } printf("%d %d \n", word_line_num[0], word_line_num[1]); clFlush(command_queue); clFinish(command_queue); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseMemObject(text_memobj); clReleaseMemObject(param_memobj); clReleaseMemObject(out_memobj); clReleaseCommandQueue(command_queue); clReleaseContext(context); clReleaseEvent(ev); free(source_str); free(out_data); return 0; }