Mercurial > hg > Members > yutaka > OpenCL
comparison WordCount/main.cc @ 4:641eef31681e
non bloking clEnqueuReadBuffer
author | Yutaka_Kinjyo |
---|---|
date | Thu, 28 Jul 2011 10:55:36 +0900 |
parents | 1c0c9299c292 |
children | ef8efbd04df9 |
comparison
equal
deleted
inserted
replaced
3:1c0c9299c292 | 4:641eef31681e |
---|---|
60 | 60 |
61 int main(int args, char *argv[]) | 61 int main(int args, char *argv[]) |
62 { | 62 { |
63 | 63 |
64 char *filename = 0; | 64 char *filename = 0; |
65 cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; | |
65 | 66 |
66 for (int i = 1; argv[i]; ++i) { | 67 for (int i = 1; argv[i]; ++i) { |
67 if (strcmp(argv[i], "-file") == 0) { | 68 if (strcmp(argv[i], "-file") == 0) { |
68 filename = argv[i+1]; | 69 filename = argv[i+1]; |
69 } else if (strcmp(argv[i], "-help")) { | 70 } else if (strcmp(argv[i], "-help") == 0) { |
70 printf("Usage: ./word_count [-file filename]\n"); | 71 printf("Usage: ./word_count [-file filename]\n"); |
72 } else if (strcmp(argv[i], "-type") == 0) { | |
73 | |
74 if (strcmp(argv[i+1], "gpu") == 0) { | |
75 dev_type = CL_DEVICE_TYPE_GPU; | |
76 } else if (strcmp(argv[i+1], "cpu") == 0) { | |
77 dev_type = CL_DEVICE_TYPE_CPU; | |
78 } | |
79 | |
71 } | 80 } |
72 } | 81 } |
73 | 82 |
74 if (filename == 0) { | 83 if (filename == 0) { |
75 printf("Usage: ./word_count [-file filename]\n"); | 84 printf("Usage: ./word_count [-file filename]\n"); |
107 | 116 |
108 cl_device_id device_id = NULL; | 117 cl_device_id device_id = NULL; |
109 cl_uint ret_num_devices = NULL; | 118 cl_uint ret_num_devices = NULL; |
110 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる | 119 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる |
111 // CL_DEVICE_TYPE_DEFAULT はどうなるのか | 120 // CL_DEVICE_TYPE_DEFAULT はどうなるのか |
112 ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_CPU, | 121 ret = clGetDeviceIDs( platform_id, dev_type, |
113 1, &device_id, &ret_num_devices); | 122 1, &device_id, &ret_num_devices); |
114 | 123 |
115 oclCheckError(ret, CL_SUCCESS); | 124 oclCheckError(ret, CL_SUCCESS); |
116 | 125 |
117 //OpenCLコンテキストの作成 | 126 //OpenCLコンテキストの作成 |
178 | 187 |
179 param_t param; | 188 param_t param; |
180 param.size = divi_size; | 189 param.size = divi_size; |
181 param.remain_size = remain_size; | 190 param.remain_size = remain_size; |
182 | 191 |
183 | |
184 ret = clEnqueueWriteBuffer(command_queue, param_memobj, | 192 ret = clEnqueueWriteBuffer(command_queue, param_memobj, |
185 CL_TRUE, 0, sizeof(param_t), ¶m, | 193 CL_TRUE, 0, sizeof(param_t), ¶m, |
186 0, NULL, NULL); | 194 0, NULL, NULL); |
187 | 195 |
188 oclCheckError(ret, CL_SUCCESS); | 196 oclCheckError(ret, CL_SUCCESS); |
189 | |
190 | |
191 | 197 |
192 // 引数のSet | 198 // 引数のSet |
193 // memory object にしなくてもできるsetできるかも | 199 // memory object にしなくてもできるsetできるかも |
194 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj); | 200 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj); |
195 ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)¶m_memobj); | 201 ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)¶m_memobj); |
205 | 211 |
206 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); | 212 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL); |
207 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず | 213 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず |
208 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); | 214 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); |
209 | 215 |
216 | |
210 oclCheckError(ret, CL_SUCCESS); | 217 oclCheckError(ret, CL_SUCCESS); |
211 | 218 |
212 int *out_data = (int*)malloc(out_size); | 219 int *out_data = (int*)malloc(out_size); |
213 | 220 |
221 | |
222 #ifndef BLOKING | |
223 | |
214 // 演算結果の読み込み | 224 // 演算結果の読み込み |
225 // CL_TRUE で bloking | |
215 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, | 226 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0, |
216 out_size, out_data, 0, NULL, NULL); | 227 out_size, out_data, 0, NULL, NULL); |
217 | 228 |
229 #else | |
230 | |
231 cl_event ev; | |
232 | |
233 // 演算結果の読み込み | |
234 // CL_FALSE で non-bloking | |
235 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0, | |
236 out_size, out_data, 0, NULL, &ev); | |
237 | |
238 // event object に関連する処理の完了を wait する | |
239 // これはeventひとつだけど、listにして、複数 wait できる | |
240 clWaitForEvents(1, &ev); | |
241 | |
242 #endif | |
243 | |
218 oclCheckError(ret, CL_SUCCESS); | 244 oclCheckError(ret, CL_SUCCESS); |
219 | 245 |
220 printf("global_work_size %d\n", (int)global_work_size); | 246 printf("global_work_size %d\n", (int)global_work_size); |
221 | 247 |
222 int word_line_num[PRINT_PARAM_NUM]; | 248 int word_line_num[PRINT_PARAM_NUM]; |
241 word_line_num[0] += 1; | 267 word_line_num[0] += 1; |
242 | 268 |
243 } | 269 } |
244 } | 270 } |
245 | 271 |
246 printf("%d %d", word_line_num[0], word_line_num[1]); | 272 printf("%d %d \n", word_line_num[0], word_line_num[1]); |
247 | 273 |
248 clFlush(command_queue); | 274 clFlush(command_queue); |
249 clFinish(command_queue); | 275 clFinish(command_queue); |
250 clReleaseKernel(kernel); | 276 clReleaseKernel(kernel); |
251 clReleaseProgram(program); | 277 clReleaseProgram(program); |