Mercurial > hg > Members > yuuhi > OpenCL
annotate fft_fixstart/main.cc @ 10:e38bef2012bc
fix sync bug. and NDrange option
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 11 Feb 2013 06:52:24 +0900 |
parents | ed3d4a769bf3 |
children |
rev | line source |
---|---|
3 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 #include <math.h> | |
4 #include <sys/stat.h> | |
5 #include <fcntl.h> | |
4
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
6 #include <sys/time.h> |
3 | 7 |
8 #ifdef __APPLE__ | |
9 #include <OpenCL/opencl.h> | |
10 #else | |
11 #include <CL/cl.h> | |
12 #endif | |
13 | |
14 #include "pgm.h" | |
15 | |
16 #define PI 3.14159265358979 | |
17 | |
18 #define MAX_SOURCE_SIZE (0x100000) | |
19 | |
20 #define AMP(a, b) (sqrt((a)*(a)+(b)*(b))) | |
21 | |
4
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
22 static double st_time; |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
23 static double ed_time; |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
24 int ndrange_flag; |
4
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
25 |
3 | 26 cl_device_id device_id = NULL; |
27 cl_context context = NULL; | |
28 cl_command_queue queue = NULL; | |
29 cl_program program = NULL; | |
30 cl_device_type device_type = CL_DEVICE_TYPE_GPU; | |
31 | |
32 enum Mode { | |
33 forward = 0, | |
34 inverse = 1 | |
35 }; | |
36 | |
4
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
37 static double |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
38 getTime() |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
39 { |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
40 struct timeval tv; |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
41 gettimeofday(&tv, NULL); |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
42 return tv.tv_sec + (double)tv.tv_usec*1e-6; |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
43 } |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
44 |
3 | 45 int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y) |
46 { | |
47 switch(y) { | |
48 case 1: | |
49 gws[0] = x; | |
50 gws[1] = 1; | |
51 lws[0] = 1; | |
52 lws[1] = 1; | |
53 break; | |
54 default: | |
55 gws[0] = x; | |
56 gws[1] = y; | |
57 lws[0] = 1; | |
58 lws[1] = 1; | |
59 break; | |
60 } | |
61 | |
62 return 0; | |
63 } | |
64 | |
65 int fftCore(cl_mem dst, cl_mem src, cl_mem spin, cl_int m, enum Mode direction) | |
66 { | |
67 cl_int ret; | |
68 | |
69 cl_int iter; | |
70 cl_uint flag; | |
71 | |
72 cl_int n = 1<<m; | |
73 | |
74 cl_event kernelDone; | |
75 | |
76 cl_kernel brev = NULL; | |
77 cl_kernel bfly = NULL; | |
78 cl_kernel norm = NULL; | |
79 | |
80 brev = clCreateKernel(program, "bitReverse", &ret); | |
81 bfly = clCreateKernel(program, "butterfly", &ret); | |
82 norm = clCreateKernel(program, "norm", &ret); | |
83 | |
84 size_t gws[2]; | |
85 size_t lws[2]; | |
86 | |
87 switch (direction) { | |
88 case forward:flag = 0x00000000; break; | |
89 case inverse:flag = 0x80000000; break; | |
90 } | |
91 | |
92 ret = clSetKernelArg(brev, 0, sizeof(cl_mem), (void *)&dst); | |
93 ret = clSetKernelArg(brev, 1, sizeof(cl_mem), (void *)&src); | |
94 ret = clSetKernelArg(brev, 2, sizeof(cl_int), (void *)&m); | |
95 ret = clSetKernelArg(brev, 3, sizeof(cl_int), (void *)&n); | |
96 | |
97 ret = clSetKernelArg(bfly, 0, sizeof(cl_mem), (void *)&dst); | |
98 ret = clSetKernelArg(bfly, 1, sizeof(cl_mem), (void *)&spin); | |
99 ret = clSetKernelArg(bfly, 2, sizeof(cl_int), (void *)&m); | |
100 ret = clSetKernelArg(bfly, 3, sizeof(cl_int), (void *)&n); | |
101 ret = clSetKernelArg(bfly, 5, sizeof(cl_uint), (void *)&flag); | |
102 | |
103 ret = clSetKernelArg(norm, 0, sizeof(cl_mem), (void *)&dst); | |
104 ret = clSetKernelArg(norm, 1, sizeof(cl_int), (void *)&n); | |
105 | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
106 /* Reversee bit ordering */ |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
107 if (ndrange_flag == 1) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
108 setWorkSize(gws, lws, n, n); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
109 ret = clEnqueueNDRangeKernel(queue, brev, 2, NULL, gws, lws, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
110 } else { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
111 ret = clEnqueueTask(queue, brev, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
112 } |
3 | 113 |
114 /* Perform Butterfly Operations*/ | |
115 setWorkSize(gws, lws, n/2, n); | |
116 for (iter=1; iter <= m; iter++) { | |
117 ret = clSetKernelArg(bfly, 4, sizeof(cl_int), (void *)&iter); | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
118 if (ndrange_flag == 1) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
119 ret = clEnqueueNDRangeKernel(queue, bfly, 2, NULL, gws, lws, 0, NULL, &kernelDone); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
120 } else { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
121 ret = clEnqueueTask(queue, bfly, 0, NULL, &kernelDone); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
122 } |
3 | 123 ret = clWaitForEvents(1, &kernelDone); |
124 } | |
125 | |
126 if (direction == inverse) { | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
127 if (ndrange_flag == 1) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
128 setWorkSize(gws, lws, n, n); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
129 ret = clEnqueueNDRangeKernel(queue, norm, 2, NULL, gws, lws, 0, NULL, &kernelDone); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
130 } else { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
131 ret = clEnqueueTask(queue, norm, 0, NULL, &kernelDone); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
132 } |
3 | 133 ret = clWaitForEvents(1, &kernelDone); |
134 } | |
135 | |
136 ret = clReleaseKernel(bfly); | |
137 ret = clReleaseKernel(brev); | |
138 ret = clReleaseKernel(norm); | |
139 | |
140 return 0; | |
141 } | |
142 | |
143 char * | |
144 init(int argc, char**argv){ | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
145 |
3 | 146 char *filename = 0; |
147 | |
148 for (int i = 1; argv[i]; ++i) { | |
149 if (strcmp(argv[i], "-file") == 0) { | |
150 filename = argv[i+1]; | |
151 } else if (strcmp(argv[i], "-cpu") == 0) { | |
152 device_type = CL_DEVICE_TYPE_CPU; | |
153 } else if (strcmp(argv[i], "-gpu") == 0) { | |
154 device_type = CL_DEVICE_TYPE_GPU; | |
9 | 155 } else if (strcmp(argv[i], "-all") == 0) { |
156 device_type = CL_DEVICE_TYPE_ALL; | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
157 } else if (strcmp(argv[i], "-nd") == 0 ) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
158 ndrange_flag = 1; |
3 | 159 } |
160 } | |
161 if ( (argc == 1)||(filename==0)) { | |
6 | 162 printf("Usage: ./fft -file [image filename] -cpu or -gpu \n"); |
3 | 163 exit(-1); |
164 } | |
165 | |
166 return filename; | |
167 } | |
168 | |
169 int main(int argc, char** argv) { | |
170 cl_mem xmobj = NULL; | |
171 cl_mem rmobj = NULL; | |
172 cl_mem wmobj = NULL; | |
173 cl_kernel sfac = NULL; | |
174 cl_kernel trns = NULL; | |
175 cl_kernel hpfl = NULL; | |
176 | |
177 cl_platform_id platform_id = NULL; | |
178 | |
179 cl_uint ret_num_devices; | |
180 cl_uint ret_num_platforms; | |
181 | |
182 cl_int ret; | |
183 | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
184 |
3 | 185 cl_float2 *xm; |
186 cl_float2 *rm; | |
187 cl_float2 *wm; | |
188 | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
189 /* |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
190 * typedef union |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
191 * { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
192 * cl_float CL_ALIGNED(8) s[2]; |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
193 * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
194 * __extension__ struct{ cl_float x, y; }; |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
195 * __extension__ struct{ cl_float s0, s1; }; |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
196 * __extension__ struct{ cl_float lo, hi; }; |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
197 * #endif |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
198 * #if defined( __CL_FLOAT2__) |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
199 * __cl_float2 v2; |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
200 * #endif |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
201 * }cl_float2; |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
202 */ |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
203 |
3 | 204 pgm_t ipgm; |
205 pgm_t opgm; | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
206 |
3 | 207 const char fileName[] = "./fft.cl"; |
208 size_t source_size; | |
209 char *source_str; | |
210 cl_int i, j; | |
211 cl_int n; | |
212 cl_int m; | |
213 | |
214 size_t gws[2]; | |
215 size_t lws[2]; | |
216 | |
217 /* Load kernel source code */ | |
218 int fd = open(fileName, O_RDONLY); | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
219 |
3 | 220 if (fd<0) { |
221 fprintf(stderr, "Failed to load kernel %s.\n",fileName); | |
222 exit(1); | |
223 } | |
224 struct stat stats; | |
225 fstat(fd, &stats); | |
226 off_t size = stats.st_size; | |
227 if (size<=0) { | |
228 fprintf(stderr, "Failed to load kernel.\n"); | |
229 exit(1); | |
230 } | |
231 source_str = (char*)alloca(size); | |
232 source_size = read(fd, source_str, size); | |
233 close( fd ); | |
234 | |
235 char * pgm_file = init(argc,argv); | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
236 |
3 | 237 /* Read image */ |
238 int err = readPGM(&ipgm, pgm_file); | |
239 if (err<0) { | |
240 fprintf(stderr, "Failed to read image file.\n"); | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
241 exit(1); |
3 | 242 } |
243 | |
244 n = ipgm.width; | |
245 m = (cl_int)(log((double)n)/log(2.0)); | |
246 | |
247 xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); | |
248 rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); | |
249 wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); | |
250 | |
251 for (i=0; i < n; i++) { | |
252 for (j=0; j < n; j++) { | |
253 ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i]; | |
254 ((float*)xm)[(2*n*j)+2*i+1] = (float)0; | |
255 } | |
256 } | |
257 | |
258 /* Get platform/device */ | |
259 ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); | |
5 | 260 |
261 ret = clGetDeviceIDs( platform_id, device_type, 1, &device_id, &ret_num_devices); | |
3 | 262 |
263 /* Create OpenCL context */ | |
264 context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); | |
265 | |
266 /* Create Command queue */ | |
267 queue = clCreateCommandQueue(context, device_id, 0, &ret); | |
268 | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
269 st_time = getTime(); |
3 | 270 /* Create Buffer Objects */ |
271 xmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret); | |
272 rmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret); | |
273 wmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret); | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
274 |
3 | 275 /* Transfer data to memory buffer */ |
276 ret = clEnqueueWriteBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL); | |
277 | |
278 /* Create kernel program from source */ | |
279 program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); | |
280 | |
281 /* Build kernel program */ | |
282 ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | |
283 | |
284 if (ret<0) { | |
285 size_t size; | |
286 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
287 |
3 | 288 char *log = new char[size]; |
289 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); | |
290 printf("%s ",log); | |
291 exit (ret); | |
292 } | |
293 | |
294 /* Create OpenCL Kernel */ | |
295 sfac = clCreateKernel(program, "spinFact", &ret); | |
296 trns = clCreateKernel(program, "transpose", &ret); | |
297 hpfl = clCreateKernel(program, "highPassFilter", &ret); | |
298 | |
299 /* Create spin factor */ | |
300 ret = clSetKernelArg(sfac, 0, sizeof(cl_mem), (void *)&wmobj); | |
301 ret = clSetKernelArg(sfac, 1, sizeof(cl_int), (void *)&n); | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
302 if (ndrange_flag == 1) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
303 setWorkSize(gws, lws, n/2, 1); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
304 ret = clEnqueueNDRangeKernel(queue, sfac, 1, NULL, gws, lws, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
305 } else { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
306 ret = clEnqueueTask(queue, sfac, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
307 } |
3 | 308 |
309 /* Butterfly Operation */ | |
310 fftCore(rmobj, xmobj, wmobj, m, forward); | |
311 | |
312 /* Transpose matrix */ | |
313 ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&xmobj); | |
314 ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&rmobj); | |
315 ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n); | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
316 if (ndrange_flag == 1) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
317 setWorkSize(gws, lws, n, n); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
318 ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
319 } else { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
320 ret = clEnqueueTask(queue, trns, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
321 } |
3 | 322 |
323 /* Butterfly Operation */ | |
324 fftCore(rmobj, xmobj, wmobj, m, forward); | |
325 | |
326 /* Apply high-pass filter */ | |
327 cl_int radius = n/8; | |
328 ret = clSetKernelArg(hpfl, 0, sizeof(cl_mem), (void *)&rmobj); | |
329 ret = clSetKernelArg(hpfl, 1, sizeof(cl_int), (void *)&n); | |
330 ret = clSetKernelArg(hpfl, 2, sizeof(cl_int), (void *)&radius); | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
331 if (ndrange_flag == 1) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
332 setWorkSize(gws, lws, n, n); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
333 ret = clEnqueueNDRangeKernel(queue, hpfl, 2, NULL, gws, lws, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
334 } else { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
335 ret = clEnqueueTask(queue, hpfl, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
336 } |
3 | 337 |
338 /* Inverse FFT */ | |
339 | |
340 /* Butterfly Operation */ | |
341 fftCore(xmobj, rmobj, wmobj, m, inverse); | |
342 | |
343 /* Transpose matrix */ | |
344 ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&rmobj); | |
345 ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&xmobj); | |
10
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
346 if (ndrange_flag == 1) { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
347 setWorkSize(gws, lws, n, n); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
348 ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
349 } else { |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
350 ret = clEnqueueTask(queue, trns, 0, NULL, NULL); |
e38bef2012bc
fix sync bug. and NDrange option
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
9
diff
changeset
|
351 } |
3 | 352 /* Butterfly Operation */ |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
353 |
3 | 354 fftCore(xmobj, rmobj, wmobj, m, inverse); |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
355 |
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
356 /* Read data from memory buffer */ |
3 | 357 ret = clEnqueueReadBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL); |
358 | |
359 /* */ | |
360 float* ampd; | |
361 ampd = (float*)malloc(n*n*sizeof(float)); | |
362 for (i=0; i < n; i++) { | |
363 for (j=0; j < n; j++) { | |
364 ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1])); | |
365 } | |
366 } | |
367 opgm.width = n; | |
368 opgm.height = n; | |
369 normalizeF2PGM(&opgm, ampd); | |
370 free(ampd); | |
8
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
371 ret = clFlush(queue); |
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
372 ret = clFinish(queue); |
1b8da19bb31c
change time measure point
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
6
diff
changeset
|
373 ed_time = getTime(); |
3 | 374 |
375 /* Write out image */ | |
376 writePGM(&opgm, "output.pgm"); | |
377 | |
378 /* Finalizations*/ | |
379 ret = clFlush(queue); | |
380 ret = clFinish(queue); | |
381 ret = clReleaseKernel(hpfl); | |
382 ret = clReleaseKernel(trns); | |
383 ret = clReleaseKernel(sfac); | |
384 ret = clReleaseProgram(program); | |
385 ret = clReleaseMemObject(xmobj); | |
386 ret = clReleaseMemObject(rmobj); | |
387 ret = clReleaseMemObject(wmobj); | |
388 ret = clReleaseCommandQueue(queue); | |
389 ret = clReleaseContext(context); | |
390 | |
391 destroyPGM(&ipgm); | |
392 destroyPGM(&opgm); | |
393 | |
394 free(wm); | |
395 free(rm); | |
396 free(xm); | |
397 | |
4
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
398 fprintf(stdout, "image out put succeeded.\n"); |
8df0d3128672
add time measurement function
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
3
diff
changeset
|
399 printf("Time: %0.6f\n",ed_time-st_time); |
3 | 400 return 0; |
401 } |