# HG changeset patch # User Shohei KOKUBO # Date 1405356117 -32400 # Node ID 433043c56a0c13c442d60e0e0a0fb0d280ed8dcf # Parent 1d7d1e3988330e662ba41f00437843858bb75503 fix fft diff -r 1d7d1e398833 -r 433043c56a0c example/fft/cuda/butterfly.cu --- a/example/fft/cuda/butterfly.cu Tue Jul 15 01:19:31 2014 +0900 +++ b/example/fft/cuda/butterfly.cu Tue Jul 15 01:41:57 2014 +0900 @@ -1,6 +1,6 @@ extern "C" { __global__ void - butterfly(long* param, float* x_in, float* w, float* x_out) + butterfly(long* param, float* x, float* w) { unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1); @@ -22,10 +22,10 @@ float xa[2], xb[2], xbxx[2], xbyy[2], wab[2], wayx[2], wbyx[2], resa[2], resb[2]; - xa[0] = x_in[2*a]; - xa[1] = x_in[2*a+1]; - xb[0] = x_in[2*b]; - xb[1] = x_in[2*b+1]; + xa[0] = x[2*a]; + xa[1] = x[2*a+1]; + xb[0] = x[2*b]; + xb[1] = x[2*b+1]; xbxx[0] = xbxx[1] = xb[0]; xbyy[0] = xbyy[1] = xb[1]; @@ -48,9 +48,9 @@ resb[0] = xa[0] - xbxx[0]*wab[0] + xbyy[0]*wbyx[0]; resb[1] = xa[1] - xbxx[1]*wab[1] + xbyy[1]*wbyx[1]; - x_out[2*a] = resa[0]; - x_out[2*a+1] = resa[1]; - x_out[2*b] = resb[0]; - x_out[2*b+1] = resb[1]; + x[2*a] = resa[0]; + x[2*a+1] = resa[1]; + x[2*b] = resb[0]; + x[2*b+1] = resb[1]; } } diff -r 1d7d1e398833 -r 433043c56a0c example/fft/cuda/norm.cu --- a/example/fft/cuda/norm.cu Tue Jul 15 01:19:31 2014 +0900 +++ b/example/fft/cuda/norm.cu Tue Jul 15 01:41:57 2014 +0900 @@ -1,13 +1,13 @@ extern "C" { __global__ void - norm(long* param, float* in_x,float* out_x) + norm(long* param, float* x) { unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; //(unsigned long)s->get_param(1); long n = param[0]; - out_x[(nid*n+gid)*2] = in_x[(nid*n+gid)*2] / (float)n; - out_x[(nid*n+gid)*2+1] = in_x[(nid*n+gid)*2+1] / (float)n; + x[(nid*n+gid)*2] = x[(nid*n+gid)*2] / (float)n; + x[(nid*n+gid)*2+1] = x[(nid*n+gid)*2+1] / (float)n; } } diff -r 1d7d1e398833 -r 433043c56a0c example/fft/main.cc --- a/example/fft/main.cc Tue Jul 15 01:19:31 2014 +0900 +++ b/example/fft/main.cc Tue Jul 15 01:41:57 2014 +0900 @@ -107,7 +107,7 @@ } HTask* -fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction,HTask* waitTask) +fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction, HTask* waitTask, bool last) { long direction_flag; switch (direction) { @@ -140,7 +140,7 @@ bfly->set_param(2,(long)iter); bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2)); - bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); + bfly->set_outData(0, dst,length_dst*sizeof(cl_float2)); bfly->set_cpu(spe_cpu); bfly->flip(); bfly->wait_for(waitTask); @@ -151,7 +151,9 @@ if (direction == inverse) { setWorkSize(gws,lws,n,n); HTask *norm = manager->create_task(NORMALIZATION); - norm->set_inData(0,dst,length_dst*sizeof(cl_float2)); + norm->set_inData(0, dst,length_dst*sizeof(cl_float2)); + if (!last) + norm->flip(); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); norm->set_param(0,n); norm->set_cpu(spe_cpu); @@ -228,10 +230,11 @@ sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); sfac->set_param(0,n); sfac->set_cpu(spe_cpu); + sfac->flip(); sfac->iterate(gws[0]); // Butterfly Operation - waitTask = fftCore(manager, rm, xm, wm, m, forward,sfac); + waitTask = fftCore(manager, rm, xm, wm, m, forward, sfac, false); // Transpose matrix int length_r =n*n; @@ -246,7 +249,7 @@ first_trns->iterate(gws[0],gws[1]); // Butterfly Operation - waitTask = fftCore(manager, rm, xm, wm, m, forward,first_trns); + waitTask = fftCore(manager, rm, xm, wm, m, forward, first_trns, false); // Apply high-pass filter HTask *hpfl = manager->create_task(HIGH_PASS_FILTER); @@ -264,7 +267,7 @@ // Inverse FFT // Butterfly Operation - waitTask = fftCore(manager,xm, rm, wm, m, inverse,hpfl); + waitTask = fftCore(manager,xm, rm, wm, m, inverse, hpfl, false); // Transpose matrix setWorkSize(gws,lws,n,n); @@ -279,7 +282,7 @@ // Butterfly Operation - waitTask = fftCore(manager,xm, rm, wm, m, inverse, second_trns); + waitTask = fftCore(manager,xm, rm, wm, m, inverse, second_trns, true); } int TMmain(TaskManager *manager, int argc, char** argv) {