Mercurial > hg > Game > Cerium
annotate example/fft/main.cc @ 2014:8c618e912c88 draft
optimization data transfer. wrong result
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 01 Jul 2014 17:04:01 +0900 |
parents | d43c2b7932ea |
children | 1d7d1e398833 |
rev | line source |
---|---|
1551 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 #include <math.h> | |
4 #include <sys/stat.h> | |
5 #include <fcntl.h> | |
6 #include <sys/time.h> | |
7 #include "TaskManager.h" | |
1727 | 8 #include "GpuScheduler.h" |
1560
3df1868130cb
fix fft ppe example
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1555
diff
changeset
|
9 #include "SchedTask.h" |
1551 | 10 #include "Func.h" |
11 #ifdef __APPLE__ | |
12 #include <OpenCL/opencl.h> | |
13 #else | |
14 #include <CL/cl.h> | |
15 #endif | |
16 #include "pgm.h" | |
17 extern void task_init(); | |
1815 | 18 #ifdef GPU |
1727 | 19 extern void gpu_task_init(); |
1779 | 20 #endif |
1551 | 21 #define PI 3.14159265358979 |
22 | |
23 #define MAX_SOURCE_SIZE (0x100000) | |
24 | |
25 #define AMP(a, b) (sqrt((a)*(a)+(b)*(b))) | |
26 | |
27 static double st_time; | |
28 static double ed_time; | |
29 void TMend(TaskManager *); | |
30 cl_device_id device_id = NULL; | |
31 cl_context context = NULL; | |
32 cl_command_queue queue = NULL; | |
33 cl_program program = NULL; | |
1752 | 34 CPU_TYPE spe_cpu = SPE_ANY; |
1551 | 35 |
1646 | 36 cl_float2* xm; |
37 cl_float2* rm; | |
38 cl_float2* wm; | |
39 pgm_t ipgm; | |
40 | |
1551 | 41 enum Mode { |
42 forward = 0, | |
43 inverse = 1 | |
44 }; | |
45 | |
46 static double | |
47 getTime() | |
48 { | |
49 struct timeval tv; | |
50 gettimeofday(&tv, NULL); | |
51 return tv.tv_sec + (double)tv.tv_usec*1e-6; | |
52 } | |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
53 |
1646 | 54 void |
55 output() | |
56 { | |
57 int n = ipgm.width; | |
58 float* ampd; | |
59 ampd = (float*)malloc(n*n*sizeof(float)); | |
60 for (int i=0; i < n; i++) { | |
61 for (int j=0; j < n; j++) { | |
62 ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1])); | |
63 } | |
64 } | |
65 pgm_t opgm; | |
66 opgm.width = n; | |
67 opgm.height = n; | |
68 normalizeF2PGM(&opgm, ampd); | |
69 free(ampd); | |
70 | |
71 // Write out image | |
72 writePGM(&opgm, "output.pgm"); | |
73 | |
74 // Finalizations | |
75 destroyPGM(&ipgm); | |
76 destroyPGM(&opgm); | |
77 | |
78 free(wm); | |
79 free(rm); | |
80 free(xm); | |
81 } | |
82 | |
1551 | 83 const char *usr_help_str = "Usage: ./fft [option]\n \ |
84 options\n\ | |
85 -cpu Number of SPE used (default 1)\n\ | |
86 -l, --length Sorted number of data (default 1200)\n\ | |
87 -h, --help Print this message"; | |
88 | |
89 int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y) | |
90 { | |
91 switch(y) { | |
92 case 1: | |
93 gws[0] = x; | |
94 gws[1] = 1; | |
95 lws[0] = 1; | |
96 lws[1] = 1; | |
97 break; | |
98 default: | |
99 gws[0] = x; | |
100 gws[1] = y; | |
101 lws[0] = 1; | |
102 lws[1] = 1; | |
103 break; | |
104 } | |
105 | |
106 return 0; | |
107 } | |
108 | |
1646 | 109 HTask* |
1663
ce031df3dd32
fix syntax fft gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1661
diff
changeset
|
110 fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction,HTask* waitTask) |
1551 | 111 { |
1673 | 112 long direction_flag; |
1551 | 113 switch (direction) { |
1643 | 114 case forward:direction_flag = 0x00000000; break; |
115 case inverse:direction_flag = 0x80000000; break; | |
1551 | 116 } |
1663
ce031df3dd32
fix syntax fft gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1661
diff
changeset
|
117 long n = 1<<m; |
1551 | 118 size_t gws[2],lws[2]; |
1633
fbb4757d82ee
refactor GpuScheduler
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1627
diff
changeset
|
119 int length_dst = n*n; |
fbb4757d82ee
refactor GpuScheduler
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1627
diff
changeset
|
120 int length_src = n*n; |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
121 |
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
122 HTask* brev = manager->create_task(BIT_REVERSE); |
1633
fbb4757d82ee
refactor GpuScheduler
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1627
diff
changeset
|
123 setWorkSize(gws,lws,n,n); |
1835 | 124 brev->set_param(0,m); |
125 brev->set_param(1,n); | |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
126 brev->set_inData(0, src, length_src*sizeof(cl_float2)); |
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
127 brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); |
1975
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1971
diff
changeset
|
128 brev->set_cpu(spe_cpu); |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
129 brev->flip(); |
1646 | 130 brev->wait_for(waitTask); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
131 brev->iterate(gws[0],gws[1]); |
1551 | 132 |
1658 | 133 waitTask = brev; |
134 | |
1643 | 135 setWorkSize(gws,lws,n/2,n); |
1658 | 136 for(int iter=1;iter<=m;iter++) { |
137 HTask* bfly = manager->create_task(BUTTERFLY); | |
1835 | 138 bfly->set_param(0,n); |
139 bfly->set_param(1,direction_flag); | |
140 bfly->set_param(2,(long)iter); | |
1658 | 141 bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); |
142 bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2)); | |
143 bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); | |
144 bfly->set_cpu(spe_cpu); | |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
145 bfly->flip(); |
1658 | 146 bfly->wait_for(waitTask); |
147 bfly->iterate(gws[0],gws[1]); | |
148 waitTask = bfly; | |
149 } | |
1551 | 150 |
151 if (direction == inverse) { | |
1658 | 152 setWorkSize(gws,lws,n,n); |
1551 | 153 HTask *norm = manager->create_task(NORMALIZATION); |
1658 | 154 norm->set_inData(0,dst,length_dst*sizeof(cl_float2)); |
1571 | 155 norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); |
1835 | 156 norm->set_param(0,n); |
1551 | 157 norm->set_cpu(spe_cpu); |
1658 | 158 norm->wait_for(waitTask); |
1656 | 159 norm->iterate(gws[0],gws[1]); |
1646 | 160 |
161 waitTask = norm; | |
1551 | 162 } |
1646 | 163 return waitTask; |
1551 | 164 } |
165 | |
166 char * | |
167 init(int argc, char**argv){ | |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
168 |
1551 | 169 char *filename = 0; |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
170 |
1689 | 171 // printf("%s ",argv[4]); |
1551 | 172 for (int i = 1; argv[i]; ++i) { |
173 if (strcmp(argv[i], "-file") == 0) { | |
174 filename = argv[i+1]; | |
175 } else if (strcmp(argv[i], "-g") == 0) { | |
176 spe_cpu = GPU_0; | |
1702
f52904f8f03e
fix example.run ANY_ANY
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1689
diff
changeset
|
177 } else if (strcmp(argv[i], "-any") == 0) { |
f52904f8f03e
fix example.run ANY_ANY
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1689
diff
changeset
|
178 spe_cpu = ANY_ANY; |
1551 | 179 } |
180 } | |
181 if ( (argc == 1)||(filename==0)) { | |
1643 | 182 printf("Usage: ./fft -file [image filename] -cpu or -gpu\n"); |
1551 | 183 exit(-1); |
184 } | |
185 | |
186 return filename; | |
187 } | |
188 | |
189 void | |
190 run_start(TaskManager *manager,pgm_t ipgm) | |
191 { | |
1663
ce031df3dd32
fix syntax fft gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1661
diff
changeset
|
192 long n = ipgm.width; |
ce031df3dd32
fix syntax fft gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1661
diff
changeset
|
193 long m = (cl_int)(log((double)n)/log(2.0)); |
1643 | 194 size_t *gws = new size_t[2]; |
195 size_t *lws = new size_t[2]; | |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
196 |
1646 | 197 xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); |
198 rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); | |
199 wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); | |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
200 |
1646 | 201 HTask* waitTask; |
1551 | 202 /* |
203 * [cl_float2] | |
204 * typedef union | |
205 * { | |
206 * cl_float CL_ALIGNED(8) s[2]; | |
207 * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) | |
208 * __extension__ struct{ cl_float x, y; }; | |
209 * __extension__ struct{ cl_float s0, s1; }; | |
210 * __extension__ struct{ cl_float lo, hi; }; | |
211 * #endif | |
212 * #if defined( __CL_FLOAT2__) | |
213 * __cl_float2 v2; | |
214 * #endif | |
215 * } cl_float2; | |
216 */ | |
1643 | 217 for (int i=0; i<n; i++) { |
218 for (int j=0; j < n; j++) { | |
219 ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i]; | |
220 ((float*)xm)[(2*n*j)+2*i+1] = (float)0; | |
1551 | 221 } |
222 } | |
1579 | 223 |
224 // Create spin factor | |
1658 | 225 setWorkSize(gws,lws,n/2,1); |
1643 | 226 int length_w = n / 2; |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
227 HTask* sfac = manager->create_task(SPIN_FACT); |
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
228 sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); |
1835 | 229 sfac->set_param(0,n); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
230 sfac->set_cpu(spe_cpu); |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
231 sfac->flip(); |
1656 | 232 sfac->iterate(gws[0]); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
233 |
1581
8ee897303cd0
fix multi_dimention
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1579
diff
changeset
|
234 // Butterfly Operation |
1646 | 235 waitTask = fftCore(manager, rm, xm, wm, m, forward,sfac); |
1661
19ab54c76d6f
success run to fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1658
diff
changeset
|
236 |
1551 | 237 // Transpose matrix |
1643 | 238 int length_r =n*n; |
1656 | 239 setWorkSize(gws,lws,n,n); |
240 HTask* first_trns = manager->create_task(TRANSPOSE); | |
241 first_trns->set_inData(0,rm,length_r*sizeof(cl_float2)); | |
242 first_trns->set_outData(0,xm,length_r*sizeof(cl_float2)); | |
1835 | 243 first_trns->set_param(0,n); |
1971 | 244 first_trns->set_cpu(spe_cpu); |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
245 first_trns->flip(); |
1656 | 246 first_trns->wait_for(waitTask); |
247 first_trns->iterate(gws[0],gws[1]); | |
248 | |
1551 | 249 // Butterfly Operation |
1661
19ab54c76d6f
success run to fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1658
diff
changeset
|
250 waitTask = fftCore(manager, rm, xm, wm, m, forward,first_trns); |
1551 | 251 |
252 // Apply high-pass filter | |
253 HTask *hpfl = manager->create_task(HIGH_PASS_FILTER); | |
1643 | 254 cl_int radius = n/8; |
1656 | 255 setWorkSize(gws,lws,n,n); |
1658 | 256 hpfl->set_inData(0,rm,length_r*sizeof(cl_float2)); |
1551 | 257 hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); |
1835 | 258 hpfl->set_param(0,n); |
259 hpfl->set_param(1,(long)radius); | |
1551 | 260 hpfl->set_cpu(spe_cpu); |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
261 hpfl->flip(); |
1656 | 262 hpfl->wait_for(waitTask); |
1643 | 263 hpfl->iterate(gws[0],gws[1]); |
1656 | 264 |
1551 | 265 // Inverse FFT |
266 | |
267 // Butterfly Operation | |
1646 | 268 waitTask = fftCore(manager,xm, rm, wm, m, inverse,hpfl); |
1551 | 269 |
270 // Transpose matrix | |
1643 | 271 setWorkSize(gws,lws,n,n); |
1656 | 272 HTask* second_trns = manager->create_task(TRANSPOSE); |
273 second_trns->set_inData(0,xm,length_r*sizeof(cl_float2)); | |
274 second_trns->set_outData(0,rm,length_r*sizeof(cl_float2)); | |
1835 | 275 second_trns->set_param(0,n); |
1971 | 276 second_trns->set_cpu(spe_cpu); |
2014
8c618e912c88
optimization data transfer. wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2013
diff
changeset
|
277 second_trns->flip(); |
1656 | 278 second_trns->wait_for(waitTask); |
279 second_trns->iterate(gws[0],gws[1]); | |
1551 | 280 |
281 // Butterfly Operation | |
282 | |
1656 | 283 waitTask = fftCore(manager,xm, rm, wm, m, inverse,second_trns); |
1551 | 284 } |
285 | |
286 int TMmain(TaskManager *manager, int argc, char** argv) { | |
287 task_init(); | |
1779 | 288 #ifdef GPU |
1727 | 289 gpu_task_init(); |
1779 | 290 #endif |
1551 | 291 char * pgm_file = init(argc,argv); |
292 /* Read image */ | |
293 int err = readPGM(&ipgm, pgm_file); | |
294 if (err<0) { | |
295 fprintf(stderr, "Failed to read image file.\n"); | |
296 exit(1); | |
1566 | 297 } |
1689 | 298 run_start(manager, ipgm); |
1551 | 299 st_time = getTime(); |
300 manager->set_TMend(TMend); | |
301 return 0; | |
302 } | |
303 | |
304 void | |
305 TMend(TaskManager *manager) | |
306 { | |
1669 | 307 ed_time = getTime(); |
1646 | 308 output(); |
1675 | 309 // fprintf(stdout, "image out put succeeded.\n"); |
310 printf("%0.6f\n",ed_time-st_time); | |
1551 | 311 } |