comparison WordCount/main.cc @ 4:641eef31681e

non bloking clEnqueuReadBuffer
author Yutaka_Kinjyo
date Thu, 28 Jul 2011 10:55:36 +0900
parents 1c0c9299c292
children ef8efbd04df9
comparison
equal deleted inserted replaced
3:1c0c9299c292 4:641eef31681e
60 60
61 int main(int args, char *argv[]) 61 int main(int args, char *argv[])
62 { 62 {
63 63
64 char *filename = 0; 64 char *filename = 0;
65 cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT;
65 66
66 for (int i = 1; argv[i]; ++i) { 67 for (int i = 1; argv[i]; ++i) {
67 if (strcmp(argv[i], "-file") == 0) { 68 if (strcmp(argv[i], "-file") == 0) {
68 filename = argv[i+1]; 69 filename = argv[i+1];
69 } else if (strcmp(argv[i], "-help")) { 70 } else if (strcmp(argv[i], "-help") == 0) {
70 printf("Usage: ./word_count [-file filename]\n"); 71 printf("Usage: ./word_count [-file filename]\n");
72 } else if (strcmp(argv[i], "-type") == 0) {
73
74 if (strcmp(argv[i+1], "gpu") == 0) {
75 dev_type = CL_DEVICE_TYPE_GPU;
76 } else if (strcmp(argv[i+1], "cpu") == 0) {
77 dev_type = CL_DEVICE_TYPE_CPU;
78 }
79
71 } 80 }
72 } 81 }
73 82
74 if (filename == 0) { 83 if (filename == 0) {
75 printf("Usage: ./word_count [-file filename]\n"); 84 printf("Usage: ./word_count [-file filename]\n");
107 116
108 cl_device_id device_id = NULL; 117 cl_device_id device_id = NULL;
109 cl_uint ret_num_devices = NULL; 118 cl_uint ret_num_devices = NULL;
110 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる 119 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる
111 // CL_DEVICE_TYPE_DEFAULT はどうなるのか 120 // CL_DEVICE_TYPE_DEFAULT はどうなるのか
112 ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, 121 ret = clGetDeviceIDs( platform_id, dev_type,
113 1, &device_id, &ret_num_devices); 122 1, &device_id, &ret_num_devices);
114 123
115 oclCheckError(ret, CL_SUCCESS); 124 oclCheckError(ret, CL_SUCCESS);
116 125
117 //OpenCLコンテキストの作成 126 //OpenCLコンテキストの作成
178 187
179 param_t param; 188 param_t param;
180 param.size = divi_size; 189 param.size = divi_size;
181 param.remain_size = remain_size; 190 param.remain_size = remain_size;
182 191
183
184 ret = clEnqueueWriteBuffer(command_queue, param_memobj, 192 ret = clEnqueueWriteBuffer(command_queue, param_memobj,
185 CL_TRUE, 0, sizeof(param_t), &param, 193 CL_TRUE, 0, sizeof(param_t), &param,
186 0, NULL, NULL); 194 0, NULL, NULL);
187 195
188 oclCheckError(ret, CL_SUCCESS); 196 oclCheckError(ret, CL_SUCCESS);
189
190
191 197
192 // 引数のSet 198 // 引数のSet
193 // memory object にしなくてもできるsetできるかも 199 // memory object にしなくてもできるsetできるかも
194 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj); 200 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj);
195 ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&param_memobj); 201 ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&param_memobj);
205 211
206 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); 212 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
207 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず 213 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず
208 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); 214 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
209 215
216
210 oclCheckError(ret, CL_SUCCESS); 217 oclCheckError(ret, CL_SUCCESS);
211 218
212 int *out_data = (int*)malloc(out_size); 219 int *out_data = (int*)malloc(out_size);
213 220
221
222 #ifndef BLOKING
223
214 // 演算結果の読み込み 224 // 演算結果の読み込み
225 // CL_TRUE で bloking
215 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, 226 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0,
216 out_size, out_data, 0, NULL, NULL); 227 out_size, out_data, 0, NULL, NULL);
217 228
229 #else
230
231 cl_event ev;
232
233 // 演算結果の読み込み
234 // CL_FALSE で non-bloking
235 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0,
236 out_size, out_data, 0, NULL, &ev);
237
238 // event object に関連する処理の完了を wait する
239 // これはeventひとつだけど、listにして、複数 wait できる
240 clWaitForEvents(1, &ev);
241
242 #endif
243
218 oclCheckError(ret, CL_SUCCESS); 244 oclCheckError(ret, CL_SUCCESS);
219 245
220 printf("global_work_size %d\n", (int)global_work_size); 246 printf("global_work_size %d\n", (int)global_work_size);
221 247
222 int word_line_num[PRINT_PARAM_NUM]; 248 int word_line_num[PRINT_PARAM_NUM];
241 word_line_num[0] += 1; 267 word_line_num[0] += 1;
242 268
243 } 269 }
244 } 270 }
245 271
246 printf("%d %d", word_line_num[0], word_line_num[1]); 272 printf("%d %d \n", word_line_num[0], word_line_num[1]);
247 273
248 clFlush(command_queue); 274 clFlush(command_queue);
249 clFinish(command_queue); 275 clFinish(command_queue);
250 clReleaseKernel(kernel); 276 clReleaseKernel(kernel);
251 clReleaseProgram(program); 277 clReleaseProgram(program);