Mercurial > hg > Members > yutaka > OpenCL
comparison WordCount/main.cc @ 1:403e35dd9b6d
word count ...
author | Yutaka_Kinjyo |
---|---|
date | Tue, 12 Jul 2011 17:17:11 +0900 |
parents | 0e6e76dbdb0f |
children | 1a32564347d5 |
comparison
equal
deleted
inserted
replaced
0:0e6e76dbdb0f | 1:403e35dd9b6d |
---|---|
7 #include <fcntl.h> | 7 #include <fcntl.h> |
8 #include <unistd.h> | 8 #include <unistd.h> |
9 #include <OpenCL/opencl.h> | 9 #include <OpenCL/opencl.h> |
10 #include <oclUtils.h> | 10 #include <oclUtils.h> |
11 | 11 |
12 #define OUT_PARAM_NUM 2 | 12 #define OUT_PARAM_NUM 4 |
13 #define PRINT_PARAM_NUM 2 | |
13 | 14 |
14 typedef struct { | 15 typedef struct { |
15 caddr_t file_mmap; | 16 caddr_t file_mmap; |
16 off_t size; | 17 off_t size; |
17 } st_mmap_t; | 18 } st_mmap_t; |
18 | 19 |
19 /*与えられたsizeをfix_byte_sizeの倍数にする(丸め込むっていうのかな?)*/ | 20 typedef struct { |
20 static int | 21 int work_num; |
21 fix_byte(int size,int fix_byte_size) | 22 int size; |
22 { | 23 int remain_size; |
23 size = (size/fix_byte_size)*fix_byte_size + ((size%fix_byte_size)!= 0)*fix_byte_size; | 24 } param_t; |
24 | |
25 return size; | |
26 } | |
27 | 25 |
28 | 26 |
29 static st_mmap_t | 27 static st_mmap_t |
30 my_mmap(char *filename) | 28 my_mmap(char *filename) |
31 { | 29 { |
42 | 40 |
43 if (fstat(fd,&sb)) { | 41 if (fstat(fd,&sb)) { |
44 fprintf(stderr,"can't fstat %s\n",filename); | 42 fprintf(stderr,"can't fstat %s\n",filename); |
45 } | 43 } |
46 | 44 |
47 printf("file size %d\n",(int)sb.st_size); | 45 st_mmap.size = sb.st_size; |
48 | 46 |
49 /*sizeをページングサイズの倍数にあわせる*/ | 47 printf("file size %d\n",(int)st_mmap.size); |
50 st_mmap.size = fix_byte(sb.st_size,4096); | |
51 | |
52 printf("fix 4096byte file size %d\n",(int)st_mmap.size); | |
53 | 48 |
54 st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0); | 49 st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0); |
55 if (st_mmap.file_mmap == (caddr_t)-1) { | 50 if (st_mmap.file_mmap == (caddr_t)-1) { |
56 fprintf(stderr,"Can't mmap file\n"); | 51 fprintf(stderr,"Can't mmap file\n"); |
57 perror(NULL); | 52 perror(NULL); |
113 | 108 |
114 cl_device_id device_id = NULL; | 109 cl_device_id device_id = NULL; |
115 cl_uint ret_num_devices = NULL; | 110 cl_uint ret_num_devices = NULL; |
116 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる | 111 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる |
117 // CL_DEVICE_TYPE_DEFAULT はどうなるのか | 112 // CL_DEVICE_TYPE_DEFAULT はどうなるのか |
118 ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, | 113 ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, |
119 1, &device_id, &ret_num_devices); | 114 1, &device_id, &ret_num_devices); |
120 | 115 |
121 oclCheckError(ret, CL_SUCCESS); | 116 oclCheckError(ret, CL_SUCCESS); |
122 | 117 |
123 //OpenCLコンテキストの作成 | 118 //OpenCLコンテキストの作成 |
126 cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); | 121 cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); |
127 oclCheckError(ret, CL_SUCCESS); | 122 oclCheckError(ret, CL_SUCCESS); |
128 // カーネルプログラムを読み込む | 123 // カーネルプログラムを読み込む |
129 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, | 124 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, |
130 (const size_t *)&source_size, &ret); | 125 (const size_t *)&source_size, &ret); |
126 | |
131 ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | 127 ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); |
128 | |
129 if (ret != CL_SUCCESS) { | |
130 oclLogBuildInfo(program, device_id); | |
131 } | |
132 | 132 |
133 oclCheckError(ret, CL_SUCCESS); | 133 oclCheckError(ret, CL_SUCCESS); |
134 | 134 |
135 //カーネルプログラムをビルド | 135 //カーネルプログラムをビルド |
136 //Task選択にあたる | 136 //Task選択にあたる |
147 | 147 |
148 oclCheckError(ret, CL_SUCCESS); | 148 oclCheckError(ret, CL_SUCCESS); |
149 | 149 |
150 // 必要なパラメータのオブジェクト | 150 // 必要なパラメータのオブジェクト |
151 cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, | 151 cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, |
152 sizeof(int), NULL, &ret); | 152 sizeof(param_t), NULL, &ret); |
153 | 153 |
154 ret = clEnqueueWriteBuffer(command_queue, param_memobj, | 154 ret = clEnqueueWriteBuffer(command_queue, param_memobj, |
155 CL_TRUE, 0, sizeof(int), (int*)(&st_mmap.size), | 155 CL_TRUE, 0, sizeof(int), (int*)(&st_mmap.size), |
156 0, NULL, NULL); | 156 0, NULL, NULL); |
157 | 157 |
164 * | 164 * |
165 */ | 165 */ |
166 | 166 |
167 // このdivi_size はどうやって決めるよ | 167 // このdivi_size はどうやって決めるよ |
168 int divi_size = 1024; | 168 int divi_size = 1024; |
169 st_mmap.size / 1024; | 169 size_t global_work_size = (st_mmap.size + divi_size - 1) / divi_size; |
170 | 170 int remain_size = st_mmap.size - global_work_size * divi_size; |
171 | 171 |
172 size_t global_work_size = 4; | 172 /* |
173 | 173 * 行数、単語数, 分割地点のフラグを格納する配列 |
174 // 行数、単語数を格納する2のint配列 | 174 * word_num, line_num, head, tail |
175 */ | |
176 | |
175 int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size; | 177 int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size; |
176 cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | 178 cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, |
177 out_size * sizeof(char), NULL, &ret); | 179 out_size * sizeof(char), NULL, &ret); |
178 | 180 |
179 oclCheckError(ret, CL_SUCCESS); | 181 oclCheckError(ret, CL_SUCCESS); |
189 | 191 |
190 /* | 192 /* |
191 * kernel実行 | 193 * kernel実行 |
192 * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える | 194 * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える |
193 * | 195 * |
194 */ | 196 */ |
195 | 197 |
196 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); | 198 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); |
197 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず | 199 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず |
198 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); | 200 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); |
199 | 201 |
201 | 203 |
202 int *out_data = (int*)malloc(out_size); | 204 int *out_data = (int*)malloc(out_size); |
203 | 205 |
204 // 演算結果の読み込み | 206 // 演算結果の読み込み |
205 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, | 207 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, |
206 out_size * sizeof(char), out_data, 0, NULL, NULL); | 208 out_size, out_data, 0, NULL, NULL); |
207 | 209 |
208 oclCheckError(ret, CL_SUCCESS); | 210 oclCheckError(ret, CL_SUCCESS); |
211 | |
212 printf("global_work_size %d\n", (int)global_work_size); | |
209 | 213 |
210 for (int i = 0; i < global_work_size; i++) { | 214 for (int i = 0; i < global_work_size; i++) { |
211 for (int j = 0; j < OUT_PARAM_NUM; j++) { | 215 for (int j = 0; j < PRINT_PARAM_NUM; j++) { |
212 printf("%d ", out_data[i*OUT_PARAM_NUM+j]); | 216 printf("%d ", out_data[i*OUT_PARAM_NUM+j]); |
213 } | 217 } |
214 printf("\n"); | 218 printf("\n"); |
215 } | 219 } |
216 | 220 |