Mercurial > hg > Members > yutaka > OpenCL
comparison WordCount/main.cc @ 2:1a32564347d5
parallel word count
author | Yutaka_Kinjyo |
---|---|
date | Sun, 24 Jul 2011 01:45:40 +0900 |
parents | 403e35dd9b6d |
children | 1c0c9299c292 |
comparison
equal
deleted
inserted
replaced
1:403e35dd9b6d | 2:1a32564347d5 |
---|---|
9 #include <OpenCL/opencl.h> | 9 #include <OpenCL/opencl.h> |
10 #include <oclUtils.h> | 10 #include <oclUtils.h> |
11 | 11 |
12 #define OUT_PARAM_NUM 4 | 12 #define OUT_PARAM_NUM 4 |
13 #define PRINT_PARAM_NUM 2 | 13 #define PRINT_PARAM_NUM 2 |
14 #define FLAG_PARAM_NUM 2 | |
14 | 15 |
15 typedef struct { | 16 typedef struct { |
16 caddr_t file_mmap; | 17 caddr_t file_mmap; |
17 off_t size; | 18 off_t size; |
18 } st_mmap_t; | 19 } st_mmap_t; |
80 //指定されたファイルをメモリにmap | 81 //指定されたファイルをメモリにmap |
81 st_mmap_t st_mmap = my_mmap(filename); | 82 st_mmap_t st_mmap = my_mmap(filename); |
82 | 83 |
83 //kernelファイルの大きさ取得して、メモリ確保 | 84 //kernelファイルの大きさ取得して、メモリ確保 |
84 int fd = -1; | 85 int fd = -1; |
85 const char *kernel_filename = "./word_count.cl"; | 86 const char *kernel_filename = "./kernel/word_count.cl"; |
86 if ((fd=open(kernel_filename,O_RDONLY,0666))==0) { | 87 if ((fd=open(kernel_filename,O_RDONLY,0666))==0) { |
87 fprintf(stderr,"can't open %s\n",kernel_filename); | 88 fprintf(stderr,"can't open %s\n",kernel_filename); |
88 } | 89 } |
89 | 90 |
90 struct stat sb; | 91 struct stat sb; |
137 cl_kernel kernel = clCreateKernel(program, "word_count", &ret); | 138 cl_kernel kernel = clCreateKernel(program, "word_count", &ret); |
138 oclCheckError(ret, CL_SUCCESS); | 139 oclCheckError(ret, CL_SUCCESS); |
139 | 140 |
140 //カウントするテキストデータのメモリオブジェクト | 141 //カウントするテキストデータのメモリオブジェクト |
141 cl_mem text_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, | 142 cl_mem text_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, |
142 st_mmap.size * sizeof(char), NULL, &ret); | 143 st_mmap.size, NULL, &ret); |
143 | 144 |
144 ret = clEnqueueWriteBuffer(command_queue, text_memobj, | 145 ret = clEnqueueWriteBuffer(command_queue, text_memobj, |
145 CL_TRUE, 0, st_mmap.size, (char*)st_mmap.file_mmap, | 146 CL_TRUE, 0, st_mmap.size, (char*)st_mmap.file_mmap, |
146 0, NULL, NULL); | 147 0, NULL, NULL); |
147 | 148 |
148 oclCheckError(ret, CL_SUCCESS); | 149 oclCheckError(ret, CL_SUCCESS); |
149 | |
150 // 必要なパラメータのオブジェクト | |
151 cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, | |
152 sizeof(param_t), NULL, &ret); | |
153 | |
154 ret = clEnqueueWriteBuffer(command_queue, param_memobj, | |
155 CL_TRUE, 0, sizeof(int), (int*)(&st_mmap.size), | |
156 0, NULL, NULL); | |
157 | |
158 oclCheckError(ret, CL_SUCCESS); | |
159 | |
160 | 150 |
161 /* | 151 /* |
162 * 並列度の計算 | 152 * 並列度の計算 |
163 * wcするファイルの大きさに合わせる | 153 * wcするファイルの大きさに合わせる |
164 * | 154 * |
165 */ | 155 */ |
166 | 156 |
167 // このdivi_size はどうやって決めるよ | 157 // このdivi_size はどうやって決めるよ |
168 int divi_size = 1024; | 158 int divi_size = 1024; |
169 size_t global_work_size = (st_mmap.size + divi_size - 1) / divi_size; | 159 //size_t global_work_size = (st_mmap.size + divi_size - 1) / divi_size; |
160 size_t global_work_size = st_mmap.size / divi_size; | |
170 int remain_size = st_mmap.size - global_work_size * divi_size; | 161 int remain_size = st_mmap.size - global_work_size * divi_size; |
162 if (remain_size > 0) { | |
163 global_work_size += 1; | |
164 } | |
171 | 165 |
172 /* | 166 /* |
173 * 行数、単語数, 分割地点のフラグを格納する配列 | 167 * 行数、単語数, 分割地点のフラグを格納する配列 |
174 * word_num, line_num, head, tail | 168 * word_num, line_num, head, tail |
175 */ | 169 */ |
177 int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size; | 171 int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size; |
178 cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | 172 cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, |
179 out_size * sizeof(char), NULL, &ret); | 173 out_size * sizeof(char), NULL, &ret); |
180 | 174 |
181 oclCheckError(ret, CL_SUCCESS); | 175 oclCheckError(ret, CL_SUCCESS); |
176 | |
177 // 必要なパラメータのオブジェクト | |
178 cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, | |
179 sizeof(param_t), NULL, &ret); | |
180 | |
181 param_t param; | |
182 param.work_num = global_work_size; | |
183 param.size = divi_size; | |
184 param.remain_size = remain_size; | |
185 | |
186 | |
187 ret = clEnqueueWriteBuffer(command_queue, param_memobj, | |
188 CL_TRUE, 0, sizeof(param_t), ¶m, | |
189 0, NULL, NULL); | |
190 | |
191 oclCheckError(ret, CL_SUCCESS); | |
192 | |
182 | 193 |
183 | 194 |
184 // 引数のSet | 195 // 引数のSet |
185 // memory object にしなくてもできるsetできるかも | 196 // memory object にしなくてもできるsetできるかも |
186 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj); | 197 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj); |
209 | 220 |
210 oclCheckError(ret, CL_SUCCESS); | 221 oclCheckError(ret, CL_SUCCESS); |
211 | 222 |
212 printf("global_work_size %d\n", (int)global_work_size); | 223 printf("global_work_size %d\n", (int)global_work_size); |
213 | 224 |
225 int word_line_num[PRINT_PARAM_NUM]; | |
226 | |
227 for (int i = 0; i < PRINT_PARAM_NUM; i++) { | |
228 word_line_num[i] = 0; | |
229 } | |
230 | |
214 for (int i = 0; i < global_work_size; i++) { | 231 for (int i = 0; i < global_work_size; i++) { |
232 | |
215 for (int j = 0; j < PRINT_PARAM_NUM; j++) { | 233 for (int j = 0; j < PRINT_PARAM_NUM; j++) { |
234 | |
216 printf("%d ", out_data[i*OUT_PARAM_NUM+j]); | 235 printf("%d ", out_data[i*OUT_PARAM_NUM+j]); |
236 word_line_num[j] += out_data[i*OUT_PARAM_NUM+j]; | |
237 | |
217 } | 238 } |
239 | |
240 if ((i != global_work_size-1) && | |
241 (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM]) && | |
242 (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+1])) { | |
243 | |
244 word_line_num[0] += 1; | |
245 | |
246 } | |
247 | |
218 printf("\n"); | 248 printf("\n"); |
219 } | 249 |
250 } | |
251 | |
252 printf("%d %d", word_line_num[0], word_line_num[1]); | |
220 | 253 |
221 clFlush(command_queue); | 254 clFlush(command_queue); |
222 clFinish(command_queue); | 255 clFinish(command_queue); |
223 clReleaseKernel(kernel); | 256 clReleaseKernel(kernel); |
224 clReleaseProgram(program); | 257 clReleaseProgram(program); |