0
|
1 #include <stdio.h>
|
|
2 #include <stdlib.h>
|
|
3 #include <string.h>
|
|
4 #include <sys/stat.h>
|
|
5 #include <sys/mman.h>
|
|
6 #include <sys/types.h>
|
|
7 #include <fcntl.h>
|
|
8 #include <unistd.h>
|
|
9 #include <OpenCL/opencl.h>
|
|
10 #include <oclUtils.h>
|
|
11
|
1
|
12 #define OUT_PARAM_NUM 4
|
|
13 #define PRINT_PARAM_NUM 2
|
0
|
14
|
|
15 typedef struct {
|
|
16 caddr_t file_mmap;
|
|
17 off_t size;
|
|
18 } st_mmap_t;
|
|
19
|
1
|
20 typedef struct {
|
|
21 int size;
|
|
22 int remain_size;
|
|
23 } param_t;
|
0
|
24
|
|
25
|
|
26 static st_mmap_t
|
|
27 my_mmap(char *filename)
|
|
28 {
|
|
29
|
|
30 /*マッピングだよ!*/
|
|
31 int fd = -1;
|
|
32 int map = MAP_PRIVATE;
|
|
33 st_mmap_t st_mmap;
|
|
34 struct stat sb;
|
|
35
|
|
36 if ((fd=open(filename,O_RDONLY,0666))==0) {
|
|
37 fprintf(stderr,"can't open %s\n",filename);
|
|
38 }
|
|
39
|
|
40 if (fstat(fd,&sb)) {
|
|
41 fprintf(stderr,"can't fstat %s\n",filename);
|
|
42 }
|
|
43
|
1
|
44 st_mmap.size = sb.st_size;
|
0
|
45
|
1
|
46 printf("file size %d\n",(int)st_mmap.size);
|
0
|
47
|
|
48 st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0);
|
|
49 if (st_mmap.file_mmap == (caddr_t)-1) {
|
|
50 fprintf(stderr,"Can't mmap file\n");
|
|
51 perror(NULL);
|
|
52 exit(0);
|
|
53 }
|
|
54
|
|
55 return st_mmap;
|
|
56
|
|
57 }
|
|
58
|
|
59
|
|
60
|
|
61 int main(int args, char *argv[])
|
|
62 {
|
|
63
|
|
64 char *filename = 0;
|
4
|
65 cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT;
|
0
|
66
|
|
67 for (int i = 1; argv[i]; ++i) {
|
|
68 if (strcmp(argv[i], "-file") == 0) {
|
|
69 filename = argv[i+1];
|
4
|
70 } else if (strcmp(argv[i], "-help") == 0) {
|
0
|
71 printf("Usage: ./word_count [-file filename]\n");
|
4
|
72 } else if (strcmp(argv[i], "-type") == 0) {
|
|
73
|
|
74 if (strcmp(argv[i+1], "gpu") == 0) {
|
|
75 dev_type = CL_DEVICE_TYPE_GPU;
|
|
76 } else if (strcmp(argv[i+1], "cpu") == 0) {
|
|
77 dev_type = CL_DEVICE_TYPE_CPU;
|
|
78 }
|
|
79
|
0
|
80 }
|
|
81 }
|
|
82
|
|
83 if (filename == 0) {
|
|
84 printf("Usage: ./word_count [-file filename]\n");
|
|
85 return 0;
|
|
86 }
|
|
87
|
|
88 //指定されたファイルをメモリにmap
|
|
89 st_mmap_t st_mmap = my_mmap(filename);
|
|
90
|
|
91 //kernelファイルの大きさ取得して、メモリ確保
|
|
92 int fd = -1;
|
2
|
93 const char *kernel_filename = "./kernel/word_count.cl";
|
0
|
94 if ((fd=open(kernel_filename,O_RDONLY,0666))==0) {
|
|
95 fprintf(stderr,"can't open %s\n",kernel_filename);
|
|
96 }
|
|
97
|
|
98 struct stat sb;
|
|
99 if (fstat(fd,&sb)) {
|
|
100 fprintf(stderr,"can't fstat %s\n",filename);
|
|
101 }
|
|
102
|
|
103 size_t source_size = sb.st_size;
|
|
104 void *source_str = malloc(source_size);
|
|
105
|
|
106 int err = read(fd, source_str, source_size);
|
|
107 if (err == -1) {
|
|
108 fprintf(stderr,"can't read %s\n",filename);
|
|
109 }
|
|
110
|
|
111 close(fd);
|
|
112
|
|
113 cl_platform_id platform_id = NULL;
|
|
114 cl_uint ret_num_platforms = NULL;
|
|
115 cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
|
|
116
|
|
117 cl_device_id device_id = NULL;
|
|
118 cl_uint ret_num_devices = NULL;
|
|
119 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる
|
|
120 // CL_DEVICE_TYPE_DEFAULT はどうなるのか
|
4
|
121 ret = clGetDeviceIDs( platform_id, dev_type,
|
0
|
122 1, &device_id, &ret_num_devices);
|
|
123
|
|
124 oclCheckError(ret, CL_SUCCESS);
|
|
125
|
|
126 //OpenCLコンテキストの作成
|
|
127 cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
|
|
128 oclCheckError(ret, CL_SUCCESS);
|
|
129 cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
|
|
130 oclCheckError(ret, CL_SUCCESS);
|
|
131 // カーネルプログラムを読み込む
|
|
132 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
|
|
133 (const size_t *)&source_size, &ret);
|
1
|
134
|
0
|
135 ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
|
|
136
|
1
|
137 if (ret != CL_SUCCESS) {
|
|
138 oclLogBuildInfo(program, device_id);
|
|
139 }
|
|
140
|
0
|
141 oclCheckError(ret, CL_SUCCESS);
|
|
142
|
|
143 //カーネルプログラムをビルド
|
|
144 //Task選択にあたる
|
|
145 cl_kernel kernel = clCreateKernel(program, "word_count", &ret);
|
|
146 oclCheckError(ret, CL_SUCCESS);
|
|
147
|
|
148 //カウントするテキストデータのメモリオブジェクト
|
|
149 cl_mem text_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
2
|
150 st_mmap.size, NULL, &ret);
|
0
|
151
|
|
152 ret = clEnqueueWriteBuffer(command_queue, text_memobj,
|
|
153 CL_TRUE, 0, st_mmap.size, (char*)st_mmap.file_mmap,
|
|
154 0, NULL, NULL);
|
|
155
|
|
156 oclCheckError(ret, CL_SUCCESS);
|
|
157
|
|
158 /*
|
|
159 * 並列度の計算
|
|
160 * wcするファイルの大きさに合わせる
|
|
161 *
|
|
162 */
|
|
163
|
|
164 // このdivi_size はどうやって決めるよ
|
|
165 int divi_size = 1024;
|
2
|
166 size_t global_work_size = st_mmap.size / divi_size;
|
1
|
167 int remain_size = st_mmap.size - global_work_size * divi_size;
|
2
|
168 if (remain_size > 0) {
|
|
169 global_work_size += 1;
|
|
170 }
|
0
|
171
|
1
|
172 /*
|
|
173 * 行数、単語数, 分割地点のフラグを格納する配列
|
|
174 * word_num, line_num, head, tail
|
|
175 */
|
0
|
176
|
|
177 int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size;
|
|
178 cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
|
179 out_size * sizeof(char), NULL, &ret);
|
|
180
|
|
181 oclCheckError(ret, CL_SUCCESS);
|
|
182
|
2
|
183 // 必要なパラメータのオブジェクト
|
|
184 cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
|
185 sizeof(param_t), NULL, &ret);
|
|
186
|
|
187 param_t param;
|
|
188 param.size = divi_size;
|
|
189 param.remain_size = remain_size;
|
|
190
|
|
191 ret = clEnqueueWriteBuffer(command_queue, param_memobj,
|
|
192 CL_TRUE, 0, sizeof(param_t), ¶m,
|
|
193 0, NULL, NULL);
|
|
194
|
|
195 oclCheckError(ret, CL_SUCCESS);
|
|
196
|
0
|
197 // 引数のSet
|
|
198 // memory object にしなくてもできるsetできるかも
|
|
199 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj);
|
|
200 ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)¶m_memobj);
|
|
201 ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&out_memobj);
|
|
202
|
|
203 oclCheckError(ret, CL_SUCCESS);
|
|
204
|
|
205 /*
|
|
206 * kernel実行
|
|
207 * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える
|
|
208 *
|
1
|
209 */
|
0
|
210
|
|
211 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
|
|
212 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず
|
|
213 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
|
|
214
|
4
|
215
|
0
|
216 oclCheckError(ret, CL_SUCCESS);
|
|
217
|
|
218 int *out_data = (int*)malloc(out_size);
|
|
219
|
4
|
220
|
|
221 cl_event ev;
|
|
222
|
|
223 // 演算結果の読み込み
|
5
|
224 // CL_FALSE で non-bloking, CL_TURE で bloking
|
4
|
225 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0,
|
|
226 out_size, out_data, 0, NULL, &ev);
|
|
227
|
|
228 // event object に関連する処理の完了を wait する
|
|
229 // これはeventひとつだけど、listにして、複数 wait できる
|
|
230 clWaitForEvents(1, &ev);
|
|
231
|
|
232
|
0
|
233 oclCheckError(ret, CL_SUCCESS);
|
|
234
|
1
|
235 printf("global_work_size %d\n", (int)global_work_size);
|
|
236
|
2
|
237 int word_line_num[PRINT_PARAM_NUM];
|
|
238
|
|
239 for (int i = 0; i < PRINT_PARAM_NUM; i++) {
|
|
240 word_line_num[i] = 0;
|
|
241 }
|
|
242
|
0
|
243 for (int i = 0; i < global_work_size; i++) {
|
2
|
244
|
1
|
245 for (int j = 0; j < PRINT_PARAM_NUM; j++) {
|
2
|
246
|
|
247 word_line_num[j] += out_data[i*OUT_PARAM_NUM+j];
|
|
248
|
0
|
249 }
|
2
|
250
|
3
|
251 //flagの判定
|
2
|
252 if ((i != global_work_size-1) &&
|
3
|
253 (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+1] == 1) &&
|
|
254 (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+4] == 0)) {
|
2
|
255
|
|
256 word_line_num[0] += 1;
|
|
257
|
|
258 }
|
0
|
259 }
|
|
260
|
4
|
261 printf("%d %d \n", word_line_num[0], word_line_num[1]);
|
2
|
262
|
0
|
263 clFlush(command_queue);
|
|
264 clFinish(command_queue);
|
|
265 clReleaseKernel(kernel);
|
|
266 clReleaseProgram(program);
|
|
267 clReleaseMemObject(text_memobj);
|
|
268 clReleaseMemObject(param_memobj);
|
|
269 clReleaseMemObject(out_memobj);
|
|
270 clReleaseCommandQueue(command_queue);
|
|
271 clReleaseContext(context);
|
5
|
272 clReleaseEvent(ev);
|
0
|
273
|
|
274 free(source_str);
|
|
275 free(out_data);
|
|
276
|
|
277 return 0;
|
|
278
|
|
279 }
|