view fft_fixstart/fft.cl @ 12:a664602e1819 default tip

add sample
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Tue, 12 Feb 2013 17:13:26 +0900
parents f3cfea46e585
children
line wrap: on
line source

#define PI 3.14159265358979323846
#define PI_2 1.57079632679489661923

__kernel void spinFact(__global float2* w, int n)
{
    unsigned int i = get_global_id(0);

    float2 angle = (float2)(2*i*PI/(float)n,(2*i*PI/(float)n)+PI_2);
    w[i] = cos(angle);
}

__kernel void bitReverse(__global float2 *dst, __global float2 *src, int m, int n)
{
    unsigned int gid = get_global_id(0);
    unsigned int nid = get_global_id(1);

    unsigned int j = gid;
    j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
    j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
    j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
    j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
    j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;

    j >>= (32-m);

    dst[nid*n+j] = src[nid*n+gid];
}

__kernel void norm(__global float2 *x, int n)
{
    unsigned int gid = get_global_id(0);
    unsigned int nid = get_global_id(1);

    x[nid*n+gid] = x[nid*n+gid] / (float2)((float)n, (float)n);
}

__kernel void butterfly(__global float2 *x, __global float2* w, int m, int n, int iter, uint flag)
{
    unsigned int gid = get_global_id(0);
    unsigned int nid = get_global_id(1);

    int butterflySize = 1 << (iter-1);
    int butterflyGrpDist = 1 << iter;
    int butterflyGrpNum = n >> iter;
    int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
    int butterflyGrpOffset = gid & (butterflySize-1);

    int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
    int b = a + butterflySize;

    int l = butterflyGrpNum * butterflyGrpOffset;

    float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;

    xa = x[a];
    xb = x[b];
    xbxx = xb.xx;
    xbyy = xb.yy;

    wab = as_float2(as_uint2(w[l]) ^ (uint2)(0x0, flag));
    wayx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x80000000, 0x0));
    wbyx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x0, 0x80000000));

    resa = xa + xbxx*wab + xbyy*wayx;
    resb = xa - xbxx*wab + xbyy*wbyx;

    x[a] = resa;
    x[b] = resb;
}

__kernel void transpose(__global float2 *dst, __global float2* src, int n)
{
    unsigned int xgid = get_global_id(0);
    unsigned int ygid = get_global_id(1);

    unsigned int iid = ygid * n + xgid;
    unsigned int oid = xgid * n + ygid;

    dst[oid] = src[iid];
}

__kernel void highPassFilter(__global float2* image, int n, int radius)
{
    unsigned int xgid = get_global_id(0);
    unsigned int ygid = get_global_id(1);

    int2 n_2 = (int2)(n>>1, n>>1);
    int2 mask = (int2)(n-1, n-1);

    int2 gid = ((int2)(xgid, ygid) + n_2) & mask;

    int2 diff = n_2 - gid;
    int2 diff2 = diff * diff;
    int dist2 = diff2.x + diff2.y;

    int2 window;

    if (dist2 < radius*radius) {
        window = (int2)(0L, 0L);
    } else {
        window = (int2)(-1L, -1L);
    }
        image[ygid*n+xgid] = as_float2(as_int2(image[ygid*n+xgid]) & window);
}