view example/cuda_fft/fft.cu @ 2011:faaea4e1ce1c draft

minor change
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Wed, 11 Jun 2014 17:22:17 +0900
parents 6fced32f85fd
children
line wrap: on
line source

extern "C" {
    
#define PI 3.14159265358979323846
#define PI_2 1.57079632679489661923
    
    __global__ void
    bitReverse(float2* dst, float2* src, int m, int n)
    {
        unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
        unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
        
        unsigned int j = gid;
        j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
        j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
        j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
        j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
        j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;
        
        j >>= (32-m);
        
        dst[nid*n+j] = src[nid*n+gid];
    }
    
    __global__ void
    butterfly(float2* x, float2* w, int m, int n, int iter, unsigned int flag)
    {
        unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
        unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
        
        int butterflySize = 1 << (iter-1);
        int butterflyGrpDist = 1 << iter;
        int butterflyGrpNum = n >> iter;
        int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
        int butterflyGrpOffset = gid & (butterflySize-1);
        
        int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
        int b = a + butterflySize;
        
        int l = butterflyGrpNum * butterflyGrpOffset;
        
        float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
        
        xa = x[a];
        xb = x[b];
        xbxx.x = xbxx.y = xb.x;
        xbyy.x = xbyy.y = xb.y;
        
        wab.x = (float)((unsigned int)w[l].x ^ (unsigned int)0x0);
        wab.y = (float)((unsigned int)w[l].y ^ (unsigned int)flag);
        
        wayx.x = (float)((unsigned int)wab.y ^ (unsigned int)0x80000000);
        wayx.y = (float)((unsigned int)wab.x ^ (unsigned int)0x0);
        
        wbyx.x = (float)((unsigned int)wab.y ^ (unsigned int)0x0);
        wbyx.y = (float)((unsigned int)wab.x ^ (unsigned int)0x80000000);
        
        resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x;
        resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y;
        
        resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
        resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;

        x[a] = resa;
        x[b] = resb;
    }

    __global__ void
    highPassFilter(float2* image, int n, int radius)
    {
        unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
        unsigned long ygid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);

        int2 n_2;
        n_2.x = n_2.y = n>>1;
    
        int2 mask;
        mask.x = mask.y = n-1;

        int2 gid;
        gid.x = (xgid + n_2.x) & mask.x;
        gid.y = (ygid + n_2.y) & mask.y;

        int2 diff;
        diff.x = n_2.x - gid.x;
        diff.y = n_2.y - gid.y;
    
        int2 diff2;
        diff2.x = diff.x * diff.x;
        diff2.y = diff.y * diff.y;

        int dist2 = diff2.x + diff2.y;

        int2 window;

        if (dist2 < radius*radius) {
            window.x = window.y = (int)0L;
        } else {
            window.x = window.y = (int)-1L;
        }

        image[ygid*n+xgid].x = (float)((int)image[ygid*n+xgid].x & window.x);
        image[ygid*n+xgid].y = (float)((int)image[ygid*n+xgid].y & window.y);
    }

    __global__ void
    norm(float2* x, int n)
    {
        unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
        unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; //(unsigned long)s->get_param(1);
        
        x[nid*n+gid].x = x[nid*n+gid].x / (float)n;
        x[nid*n+gid].y = x[nid*n+gid].y / (float)n;
    }

    
    
    __global__ void
    spinFact(float2* w, int n)
    {
        unsigned long i = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
        float2 angle;
        angle.x = (float)(2*i*PI/(float)n);
        angle.y = (float)((2*i*PI/(float)n) + PI_2);

        w[i].x = cos(angle.x);
        w[i].y = cos(angle.y);
    }

    __global__ void 
    transpose(float2* dst, float2* src, int n)
    {
        unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
        unsigned long ygid = blockIdx.y*blockDim.y*threadIdx.y; // (unsigned long)s->get_param(1);

        unsigned int iid = ygid * n + xgid;
        unsigned int oid = xgid * n + ygid;

        dst[oid] = src[iid];
    }
}