comparison fft_fixstart/main.cc @ 3:f3cfea46e585

add fft_fixstar sample
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Mon, 04 Feb 2013 02:59:58 +0900
parents
children 8df0d3128672
comparison
equal deleted inserted replaced
2:ccea4e6a1945 3:f3cfea46e585
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <sys/stat.h>
5 #include <fcntl.h>
6
7 #ifdef __APPLE__
8 #include <OpenCL/opencl.h>
9 #else
10 #include <CL/cl.h>
11 #endif
12
13 #include "pgm.h"
14
15 #define PI 3.14159265358979
16
17 #define MAX_SOURCE_SIZE (0x100000)
18
19 #define AMP(a, b) (sqrt((a)*(a)+(b)*(b)))
20
21 cl_device_id device_id = NULL;
22 cl_context context = NULL;
23 cl_command_queue queue = NULL;
24 cl_program program = NULL;
25 cl_device_type device_type = CL_DEVICE_TYPE_GPU;
26
27 enum Mode {
28 forward = 0,
29 inverse = 1
30 };
31
32 int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y)
33 {
34 switch(y) {
35 case 1:
36 gws[0] = x;
37 gws[1] = 1;
38 lws[0] = 1;
39 lws[1] = 1;
40 break;
41 default:
42 gws[0] = x;
43 gws[1] = y;
44 lws[0] = 1;
45 lws[1] = 1;
46 break;
47 }
48
49 return 0;
50 }
51
52 int fftCore(cl_mem dst, cl_mem src, cl_mem spin, cl_int m, enum Mode direction)
53 {
54 cl_int ret;
55
56 cl_int iter;
57 cl_uint flag;
58
59 cl_int n = 1<<m;
60
61 cl_event kernelDone;
62
63 cl_kernel brev = NULL;
64 cl_kernel bfly = NULL;
65 cl_kernel norm = NULL;
66
67 brev = clCreateKernel(program, "bitReverse", &ret);
68 bfly = clCreateKernel(program, "butterfly", &ret);
69 norm = clCreateKernel(program, "norm", &ret);
70
71 size_t gws[2];
72 size_t lws[2];
73
74 switch (direction) {
75 case forward:flag = 0x00000000; break;
76 case inverse:flag = 0x80000000; break;
77 }
78
79 ret = clSetKernelArg(brev, 0, sizeof(cl_mem), (void *)&dst);
80 ret = clSetKernelArg(brev, 1, sizeof(cl_mem), (void *)&src);
81 ret = clSetKernelArg(brev, 2, sizeof(cl_int), (void *)&m);
82 ret = clSetKernelArg(brev, 3, sizeof(cl_int), (void *)&n);
83
84 ret = clSetKernelArg(bfly, 0, sizeof(cl_mem), (void *)&dst);
85 ret = clSetKernelArg(bfly, 1, sizeof(cl_mem), (void *)&spin);
86 ret = clSetKernelArg(bfly, 2, sizeof(cl_int), (void *)&m);
87 ret = clSetKernelArg(bfly, 3, sizeof(cl_int), (void *)&n);
88 ret = clSetKernelArg(bfly, 5, sizeof(cl_uint), (void *)&flag);
89
90 ret = clSetKernelArg(norm, 0, sizeof(cl_mem), (void *)&dst);
91 ret = clSetKernelArg(norm, 1, sizeof(cl_int), (void *)&n);
92
93 /* Reverse bit ordering */
94 setWorkSize(gws, lws, n, n);
95 ret = clEnqueueNDRangeKernel(queue, brev, 2, NULL, gws, lws, 0, NULL, NULL);
96
97 /* Perform Butterfly Operations*/
98 setWorkSize(gws, lws, n/2, n);
99 for (iter=1; iter <= m; iter++) {
100 ret = clSetKernelArg(bfly, 4, sizeof(cl_int), (void *)&iter);
101 ret = clEnqueueNDRangeKernel(queue, bfly, 2, NULL, gws, lws, 0, NULL, &kernelDone);
102 ret = clWaitForEvents(1, &kernelDone);
103 }
104
105 if (direction == inverse) {
106 setWorkSize(gws, lws, n, n);
107 ret = clEnqueueNDRangeKernel(queue, norm, 2, NULL, gws, lws, 0, NULL, &kernelDone);
108 ret = clWaitForEvents(1, &kernelDone);
109 }
110
111 ret = clReleaseKernel(bfly);
112 ret = clReleaseKernel(brev);
113 ret = clReleaseKernel(norm);
114
115 return 0;
116 }
117
118 char *
119 init(int argc, char**argv){
120
121 char *filename = 0;
122
123 for (int i = 1; argv[i]; ++i) {
124 if (strcmp(argv[i], "-file") == 0) {
125 filename = argv[i+1];
126 } else if (strcmp(argv[i], "-cpu") == 0) {
127 device_type = CL_DEVICE_TYPE_CPU;
128 } else if (strcmp(argv[i], "-gpu") == 0) {
129 device_type = CL_DEVICE_TYPE_GPU;
130 }
131 }
132 if ( (argc == 1)||(filename==0)) {
133 printf("Usage: ./fft [image filename]\n");
134 exit(-1);
135 }
136
137 return filename;
138 }
139
140 int main(int argc, char** argv) {
141 cl_mem xmobj = NULL;
142 cl_mem rmobj = NULL;
143 cl_mem wmobj = NULL;
144 cl_kernel sfac = NULL;
145 cl_kernel trns = NULL;
146 cl_kernel hpfl = NULL;
147
148 cl_platform_id platform_id = NULL;
149
150 cl_uint ret_num_devices;
151 cl_uint ret_num_platforms;
152
153 cl_int ret;
154
155 cl_float2 *xm;
156 cl_float2 *rm;
157 cl_float2 *wm;
158
159 pgm_t ipgm;
160 pgm_t opgm;
161
162 const char fileName[] = "./fft.cl";
163 size_t source_size;
164 char *source_str;
165 cl_int i, j;
166 cl_int n;
167 cl_int m;
168
169 size_t gws[2];
170 size_t lws[2];
171
172 /* Load kernel source code */
173 int fd = open(fileName, O_RDONLY);
174
175 if (fd<0) {
176 fprintf(stderr, "Failed to load kernel %s.\n",fileName);
177 exit(1);
178 }
179 struct stat stats;
180 fstat(fd, &stats);
181 off_t size = stats.st_size;
182 if (size<=0) {
183 fprintf(stderr, "Failed to load kernel.\n");
184 exit(1);
185 }
186 source_str = (char*)alloca(size);
187 source_size = read(fd, source_str, size);
188 close( fd );
189
190 char * pgm_file = init(argc,argv);
191
192 /* Read image */
193 int err = readPGM(&ipgm, pgm_file);
194 if (err<0) {
195 fprintf(stderr, "Failed to read image file.\n");
196 exit(1);
197 }
198
199 n = ipgm.width;
200 m = (cl_int)(log((double)n)/log(2.0));
201
202 xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
203 rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
204 wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));
205
206 for (i=0; i < n; i++) {
207 for (j=0; j < n; j++) {
208 ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i];
209 ((float*)xm)[(2*n*j)+2*i+1] = (float)0;
210 }
211 }
212
213 /* Get platform/device */
214 ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
215 ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
216
217 /* Create OpenCL context */
218 context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
219
220 /* Create Command queue */
221 queue = clCreateCommandQueue(context, device_id, 0, &ret);
222
223 /* Create Buffer Objects */
224 xmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);
225 rmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);
226 wmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret);
227
228 /* Transfer data to memory buffer */
229 ret = clEnqueueWriteBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL);
230
231 /* Create kernel program from source */
232 program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
233
234 /* Build kernel program */
235 ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
236
237 if (ret<0) {
238 size_t size;
239 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size);
240
241 char *log = new char[size];
242 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL);
243 printf("%s ",log);
244 exit (ret);
245 }
246
247 /* Create OpenCL Kernel */
248 sfac = clCreateKernel(program, "spinFact", &ret);
249 trns = clCreateKernel(program, "transpose", &ret);
250 hpfl = clCreateKernel(program, "highPassFilter", &ret);
251
252 /* Create spin factor */
253 ret = clSetKernelArg(sfac, 0, sizeof(cl_mem), (void *)&wmobj);
254 ret = clSetKernelArg(sfac, 1, sizeof(cl_int), (void *)&n);
255 setWorkSize(gws, lws, n/2, 1);
256 ret = clEnqueueNDRangeKernel(queue, sfac, 1, NULL, gws, lws, 0, NULL, NULL);
257
258 /* Butterfly Operation */
259 fftCore(rmobj, xmobj, wmobj, m, forward);
260
261 /* Transpose matrix */
262 ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&xmobj);
263 ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&rmobj);
264 ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n);
265 setWorkSize(gws, lws, n, n);
266 ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL);
267
268 /* Butterfly Operation */
269 fftCore(rmobj, xmobj, wmobj, m, forward);
270
271 /* Apply high-pass filter */
272 cl_int radius = n/8;
273 ret = clSetKernelArg(hpfl, 0, sizeof(cl_mem), (void *)&rmobj);
274 ret = clSetKernelArg(hpfl, 1, sizeof(cl_int), (void *)&n);
275 ret = clSetKernelArg(hpfl, 2, sizeof(cl_int), (void *)&radius);
276 setWorkSize(gws, lws, n, n);
277 ret = clEnqueueNDRangeKernel(queue, hpfl, 2, NULL, gws, lws, 0, NULL, NULL);
278
279 /* Inverse FFT */
280
281 /* Butterfly Operation */
282 fftCore(xmobj, rmobj, wmobj, m, inverse);
283
284 /* Transpose matrix */
285 ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&rmobj);
286 ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&xmobj);
287 setWorkSize(gws, lws, n, n);
288 ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL);
289
290 /* Butterfly Operation */
291
292 fftCore(xmobj, rmobj, wmobj, m, inverse);
293
294 /* Read data from memory buffer */
295 ret = clEnqueueReadBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL);
296
297 /* */
298 float* ampd;
299 ampd = (float*)malloc(n*n*sizeof(float));
300 for (i=0; i < n; i++) {
301 for (j=0; j < n; j++) {
302 ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1]));
303 }
304 }
305 opgm.width = n;
306 opgm.height = n;
307 normalizeF2PGM(&opgm, ampd);
308 free(ampd);
309
310 /* Write out image */
311 writePGM(&opgm, "output.pgm");
312
313 /* Finalizations*/
314 ret = clFlush(queue);
315 ret = clFinish(queue);
316 ret = clReleaseKernel(hpfl);
317 ret = clReleaseKernel(trns);
318 ret = clReleaseKernel(sfac);
319 ret = clReleaseProgram(program);
320 ret = clReleaseMemObject(xmobj);
321 ret = clReleaseMemObject(rmobj);
322 ret = clReleaseMemObject(wmobj);
323 ret = clReleaseCommandQueue(queue);
324 ret = clReleaseContext(context);
325
326 destroyPGM(&ipgm);
327 destroyPGM(&opgm);
328
329 free(wm);
330 free(rm);
331 free(xm);
332
333 fprintf(stdout, "image out put succeeded.\n");
334 return 0;
335 }