12
|
1 #define PI 3.14159265358979323846
|
|
2 #define PI_2 1.57079632679489661923
|
|
3
|
|
4 __kernel void spinFact(__global float2* w, int n)
|
|
5 {
|
|
6 unsigned int i = get_global_id(0);
|
|
7
|
|
8 float2 angle = (float2)(2*i*PI/(float)n,(2*i*PI/(float)n)+PI_2);
|
|
9 w[i] = cos(angle);
|
|
10 }
|
|
11
|
|
12 __kernel void bitReverse(__global float2 *dst, __global float2 *src, int m, int n)
|
|
13 {
|
|
14 unsigned int gid = get_global_id(0);
|
|
15 unsigned int nid = get_global_id(1);
|
|
16
|
|
17 unsigned int j = gid;
|
|
18 j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
|
|
19 j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
|
|
20 j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
|
|
21 j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
|
|
22 j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;
|
|
23
|
|
24 j >>= (32-m);
|
|
25
|
|
26 dst[nid*n+j] = src[nid*n+gid];
|
|
27 }
|
|
28
|
|
29 __kernel void norm(__global float2 *x, int n)
|
|
30 {
|
|
31 unsigned int gid = get_global_id(0);
|
|
32 unsigned int nid = get_global_id(1);
|
|
33
|
|
34 x[nid*n+gid] = x[nid*n+gid] / (float2)((float)n, (float)n);
|
|
35 }
|
|
36
|
|
37 __kernel void butterfly(__global float2 *x, __global float2* w, int m, int n, int iter, uint flag)
|
|
38 {
|
|
39 unsigned int gid = get_global_id(0);
|
|
40 unsigned int nid = get_global_id(1);
|
|
41
|
|
42 int butterflySize = 1 << (iter-1);
|
|
43 int butterflyGrpDist = 1 << iter;
|
|
44 int butterflyGrpNum = n >> iter;
|
|
45 int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
|
|
46 int butterflyGrpOffset = gid & (butterflySize-1);
|
|
47
|
|
48 int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
|
|
49 int b = a + butterflySize;
|
|
50
|
|
51 int l = butterflyGrpNum * butterflyGrpOffset;
|
|
52
|
|
53 float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
|
|
54
|
|
55 xa = x[a];
|
|
56 xb = x[b];
|
|
57 xbxx = xb.xx;
|
|
58 xbyy = xb.yy;
|
|
59
|
|
60 wab = as_float2(as_uint2(w[l]) ^ (uint2)(0x0, flag));
|
|
61 wayx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x80000000, 0x0));
|
|
62 wbyx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x0, 0x80000000));
|
|
63
|
|
64 resa = xa + xbxx*wab + xbyy*wayx;
|
|
65 resb = xa - xbxx*wab + xbyy*wbyx;
|
|
66
|
|
67 x[a] = resa;
|
|
68 x[b] = resb;
|
|
69 }
|
|
70
|
|
71 __kernel void transpose(__global float2 *dst, __global float2* src, int n)
|
|
72 {
|
|
73 unsigned int xgid = get_global_id(0);
|
|
74 unsigned int ygid = get_global_id(1);
|
|
75
|
|
76 unsigned int iid = ygid * n + xgid;
|
|
77 unsigned int oid = xgid * n + ygid;
|
|
78
|
|
79 dst[oid] = src[iid];
|
|
80 }
|
|
81
|
|
82 __kernel void highPassFilter(__global float2* image, int n, int radius)
|
|
83 {
|
|
84 unsigned int xgid = get_global_id(0);
|
|
85 unsigned int ygid = get_global_id(1);
|
|
86
|
|
87 int2 n_2 = (int2)(n>>1, n>>1);
|
|
88 int2 mask = (int2)(n-1, n-1);
|
|
89
|
|
90 int2 gid = ((int2)(xgid, ygid) + n_2) & mask;
|
|
91
|
|
92 int2 diff = n_2 - gid;
|
|
93 int2 diff2 = diff * diff;
|
|
94 int dist2 = diff2.x + diff2.y;
|
|
95
|
|
96 int2 window;
|
|
97
|
|
98 if (dist2 < radius*radius) {
|
|
99 window = (int2)(0L, 0L);
|
|
100 } else {
|
|
101 window = (int2)(-1L, -1L);
|
|
102 }
|
3
|
103 image[ygid*n+xgid] = as_float2(as_int2(image[ygid*n+xgid]) & window);
|
|
104 }
|