annotate WordCount/main.cc @ 4:641eef31681e

non bloking clEnqueuReadBuffer
author Yutaka_Kinjyo
date Thu, 28 Jul 2011 10:55:36 +0900
parents 1c0c9299c292
children ef8efbd04df9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
1 #include <stdio.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
2 #include <stdlib.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
3 #include <string.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
4 #include <sys/stat.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
5 #include <sys/mman.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
6 #include <sys/types.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
7 #include <fcntl.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
8 #include <unistd.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
9 #include <OpenCL/opencl.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
10 #include <oclUtils.h>
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
11
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
12 #define OUT_PARAM_NUM 4
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
13 #define PRINT_PARAM_NUM 2
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
14
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
15 typedef struct {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
16 caddr_t file_mmap;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
17 off_t size;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
18 } st_mmap_t;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
19
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
20 typedef struct {
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
21 int size;
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
22 int remain_size;
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
23 } param_t;
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
24
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
25
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
26 static st_mmap_t
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
27 my_mmap(char *filename)
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
28 {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
29
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
30 /*マッピングだよ!*/
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
31 int fd = -1;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
32 int map = MAP_PRIVATE;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
33 st_mmap_t st_mmap;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
34 struct stat sb;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
35
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
36 if ((fd=open(filename,O_RDONLY,0666))==0) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
37 fprintf(stderr,"can't open %s\n",filename);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
38 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
39
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
40 if (fstat(fd,&sb)) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
41 fprintf(stderr,"can't fstat %s\n",filename);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
42 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
43
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
44 st_mmap.size = sb.st_size;
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
45
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
46 printf("file size %d\n",(int)st_mmap.size);
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
47
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
48 st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
49 if (st_mmap.file_mmap == (caddr_t)-1) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
50 fprintf(stderr,"Can't mmap file\n");
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
51 perror(NULL);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
52 exit(0);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
53 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
54
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
55 return st_mmap;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
56
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
57 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
58
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
59
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
60
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
61 int main(int args, char *argv[])
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
62 {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
63
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
64 char *filename = 0;
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
65 cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT;
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
66
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
67 for (int i = 1; argv[i]; ++i) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
68 if (strcmp(argv[i], "-file") == 0) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
69 filename = argv[i+1];
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
70 } else if (strcmp(argv[i], "-help") == 0) {
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
71 printf("Usage: ./word_count [-file filename]\n");
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
72 } else if (strcmp(argv[i], "-type") == 0) {
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
73
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
74 if (strcmp(argv[i+1], "gpu") == 0) {
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
75 dev_type = CL_DEVICE_TYPE_GPU;
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
76 } else if (strcmp(argv[i+1], "cpu") == 0) {
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
77 dev_type = CL_DEVICE_TYPE_CPU;
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
78 }
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
79
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
80 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
81 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
82
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
83 if (filename == 0) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
84 printf("Usage: ./word_count [-file filename]\n");
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
85 return 0;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
86 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
87
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
88 //指定されたファイルをメモリにmap
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
89 st_mmap_t st_mmap = my_mmap(filename);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
90
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
91 //kernelファイルの大きさ取得して、メモリ確保
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
92 int fd = -1;
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
93 const char *kernel_filename = "./kernel/word_count.cl";
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
94 if ((fd=open(kernel_filename,O_RDONLY,0666))==0) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
95 fprintf(stderr,"can't open %s\n",kernel_filename);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
96 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
97
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
98 struct stat sb;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
99 if (fstat(fd,&sb)) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
100 fprintf(stderr,"can't fstat %s\n",filename);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
101 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
102
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
103 size_t source_size = sb.st_size;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
104 void *source_str = malloc(source_size);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
105
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
106 int err = read(fd, source_str, source_size);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
107 if (err == -1) {
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
108 fprintf(stderr,"can't read %s\n",filename);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
109 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
110
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
111 close(fd);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
112
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
113 cl_platform_id platform_id = NULL;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
114 cl_uint ret_num_platforms = NULL;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
115 cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
116
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
117 cl_device_id device_id = NULL;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
118 cl_uint ret_num_devices = NULL;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
119 // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
120 // CL_DEVICE_TYPE_DEFAULT はどうなるのか
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
121 ret = clGetDeviceIDs( platform_id, dev_type,
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
122 1, &device_id, &ret_num_devices);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
123
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
124 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
125
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
126 //OpenCLコンテキストの作成
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
127 cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
128 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
129 cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
130 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
131 // カーネルプログラムを読み込む
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
132 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
133 (const size_t *)&source_size, &ret);
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
134
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
135 ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
136
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
137 if (ret != CL_SUCCESS) {
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
138 oclLogBuildInfo(program, device_id);
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
139 }
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
140
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
141 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
142
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
143 //カーネルプログラムをビルド
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
144 //Task選択にあたる
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
145 cl_kernel kernel = clCreateKernel(program, "word_count", &ret);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
146 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
147
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
148 //カウントするテキストデータのメモリオブジェクト
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
149 cl_mem text_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY,
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
150 st_mmap.size, NULL, &ret);
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
151
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
152 ret = clEnqueueWriteBuffer(command_queue, text_memobj,
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
153 CL_TRUE, 0, st_mmap.size, (char*)st_mmap.file_mmap,
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
154 0, NULL, NULL);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
155
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
156 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
157
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
158 /*
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
159 * 並列度の計算
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
160 * wcするファイルの大きさに合わせる
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
161 *
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
162 */
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
163
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
164 // このdivi_size はどうやって決めるよ
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
165 int divi_size = 1024;
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
166 //size_t global_work_size = (st_mmap.size + divi_size - 1) / divi_size;
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
167 size_t global_work_size = st_mmap.size / divi_size;
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
168 int remain_size = st_mmap.size - global_work_size * divi_size;
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
169 if (remain_size > 0) {
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
170 global_work_size += 1;
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
171 }
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
172
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
173 /*
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
174 * 行数、単語数, 分割地点のフラグを格納する配列
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
175 * word_num, line_num, head, tail
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
176 */
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
177
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
178 int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
179 cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
180 out_size * sizeof(char), NULL, &ret);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
181
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
182 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
183
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
184 // 必要なパラメータのオブジェクト
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
185 cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY,
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
186 sizeof(param_t), NULL, &ret);
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
187
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
188 param_t param;
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
189 param.size = divi_size;
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
190 param.remain_size = remain_size;
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
191
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
192 ret = clEnqueueWriteBuffer(command_queue, param_memobj,
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
193 CL_TRUE, 0, sizeof(param_t), &param,
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
194 0, NULL, NULL);
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
195
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
196 oclCheckError(ret, CL_SUCCESS);
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
197
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
198 // 引数のSet
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
199 // memory object にしなくてもできるsetできるかも
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
200 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
201 ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&param_memobj);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
202 ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&out_memobj);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
203
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
204 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
205
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
206 /*
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
207 * kernel実行
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
208    * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
209 *
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
210 */
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
211
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
212 //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
213 // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
214 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
215
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
216
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
217 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
218
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
219 int *out_data = (int*)malloc(out_size);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
220
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
221
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
222 #ifndef BLOKING
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
223
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
224 // 演算結果の読み込み
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
225 // CL_TRUE で bloking
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
226 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_TRUE, 0,
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
227 out_size, out_data, 0, NULL, NULL);
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
228
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
229 #else
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
230
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
231 cl_event ev;
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
232
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
233 // 演算結果の読み込み
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
234 // CL_FALSE で non-bloking
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
235 ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0,
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
236 out_size, out_data, 0, NULL, &ev);
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
237
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
238 // event object に関連する処理の完了を wait する
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
239 // これはeventひとつだけど、listにして、複数 wait できる
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
240 clWaitForEvents(1, &ev);
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
241
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
242 #endif
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
243
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
244 oclCheckError(ret, CL_SUCCESS);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
245
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
246 printf("global_work_size %d\n", (int)global_work_size);
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
247
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
248 int word_line_num[PRINT_PARAM_NUM];
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
249
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
250 for (int i = 0; i < PRINT_PARAM_NUM; i++) {
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
251 word_line_num[i] = 0;
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
252 }
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
253
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
254 for (int i = 0; i < global_work_size; i++) {
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
255
1
403e35dd9b6d word count ...
Yutaka_Kinjyo
parents: 0
diff changeset
256 for (int j = 0; j < PRINT_PARAM_NUM; j++) {
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
257
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
258 word_line_num[j] += out_data[i*OUT_PARAM_NUM+j];
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
259
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
260 }
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
261
3
1c0c9299c292 get global_work_size
Yutaka_Kinjyo
parents: 2
diff changeset
262 //flagの判定
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
263 if ((i != global_work_size-1) &&
3
1c0c9299c292 get global_work_size
Yutaka_Kinjyo
parents: 2
diff changeset
264 (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+1] == 1) &&
1c0c9299c292 get global_work_size
Yutaka_Kinjyo
parents: 2
diff changeset
265 (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+4] == 0)) {
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
266
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
267 word_line_num[0] += 1;
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
268
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
269 }
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
270 }
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
271
4
641eef31681e non bloking clEnqueuReadBuffer
Yutaka_Kinjyo
parents: 3
diff changeset
272 printf("%d %d \n", word_line_num[0], word_line_num[1]);
2
1a32564347d5 parallel word count
Yutaka_Kinjyo
parents: 1
diff changeset
273
0
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
274 clFlush(command_queue);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
275 clFinish(command_queue);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
276 clReleaseKernel(kernel);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
277 clReleaseProgram(program);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
278 clReleaseMemObject(text_memobj);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
279 clReleaseMemObject(param_memobj);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
280 clReleaseMemObject(out_memobj);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
281 clReleaseCommandQueue(command_queue);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
282 clReleaseContext(context);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
283
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
284 free(source_str);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
285 free(out_data);
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
286
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
287 return 0;
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
288
0e6e76dbdb0f add file
Yutaka_Kinjyo
parents:
diff changeset
289 }