annotate fft_Example/fft_kernelstring.cc @ 2:ccea4e6a1945

add OpenCL example
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Jan 2013 23:19:41 +0900
parents
children 3602b23914ad
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 // File: fft_kernelstring.cpp
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 // Version: <1.0>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 // in consideration of your agreement to the following terms, and your use,
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 // installation, modification or redistribution of this Apple software
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 // constitutes acceptance of these terms. If you do not agree with these
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 // terms, please do not use, install, modify or redistribute this Apple
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 // software.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 // In consideration of your agreement to abide by the following terms, and
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 // subject to these terms, Apple grants you a personal, non - exclusive
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 // license, under Apple's copyrights in this original Apple software ( the
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 // "Apple Software" ), to use, reproduce, modify and redistribute the Apple
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 // Software, with or without modifications, in source and / or binary forms;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 // provided that if you redistribute the Apple Software in its entirety and
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 // without modifications, you must retain this notice and the following text
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 // and disclaimers in all such redistributions of the Apple Software. Neither
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 // the name, trademarks, service marks or logos of Apple Inc. may be used to
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 // endorse or promote products derived from the Apple Software without specific
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 // prior written permission from Apple. Except as expressly stated in this
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 // notice, no other rights or licenses, express or implied, are granted by
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 // Apple herein, including but not limited to any patent rights that may be
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 // infringed by your derivative works or by other works in which the Apple
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 // Software may be incorporated.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 // ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 #include <stdio.h>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 #include <stdlib.h>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 #include <math.h>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 #include <iostream>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 #include <sstream>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 #include <string>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 #include <assert.h>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 #include "fft_internal.h"
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 #include "clFFT.h"
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 using namespace std;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 #define max(A,B) ((A) > (B) ? (A) : (B))
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 #define min(A,B) ((A) < (B) ? (A) : (B))
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 static string
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 num2str(int num)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 char temp[200];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 sprintf(temp, "%d", num);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 return string(temp);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 // For any n, this function decomposes n into factors for loacal memory tranpose
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 // based fft. Factors (radices) are sorted such that the first one (radixArray[0])
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 // is the largest. This base radix determines the number of registers used by each
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 // work item and product of remaining radices determine the size of work group needed.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 // To make things concrete with and example, suppose n = 1024. It is decomposed into
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 // 1024 = 16 x 16 x 4. Hence kernel uses float2 a[16], for local in-register fft and
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 // needs 16 x 4 = 64 work items per work group. So kernel first performance 64 length
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 // 16 ffts (64 work items working in parallel) following by transpose using local
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 // memory followed by again 64 length 16 ffts followed by transpose using local memory
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 // followed by 256 length 4 ffts. For the last step since with size of work group is
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 // 64 and each work item can array for 16 values, 64 work items can compute 256 length
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 // 4 ffts by each work item computing 4 length 4 ffts.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 // Similarly for n = 2048 = 8 x 8 x 8 x 4, each work group has 8 x 8 x 4 = 256 work
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 // iterms which each computes 256 (in-parallel) length 8 ffts in-register, followed
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 // by transpose using local memory, followed by 256 length 8 in-register ffts, followed
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 // by transpose using local memory, followed by 256 length 8 in-register ffts, followed
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 // by transpose using local memory, followed by 512 length 4 in-register ffts. Again,
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 // for the last step, each work item computes two length 4 in-register ffts and thus
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 // 256 work items are needed to compute all 512 ffts.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 // For n = 32 = 8 x 4, 4 work items first compute 4 in-register
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 // lenth 8 ffts, followed by transpose using local memory followed by 8 in-register
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 // length 4 ffts, where each work item computes two length 4 ffts thus 4 work items
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 // can compute 8 length 4 ffts. However if work group size of say 64 is choosen,
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 // each work group can compute 64/ 4 = 16 size 32 ffts (batched transform).
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 // Users can play with these parameters to figure what gives best performance on
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 // their particular device i.e. some device have less register space thus using
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 // smaller base radix can avoid spilling ... some has small local memory thus
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 // using smaller work group size may be required etc
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
100
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 getRadixArray(unsigned int n, unsigned int *radixArray, unsigned int *numRadices, unsigned int maxRadix)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 if(maxRadix > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 maxRadix = min(n, maxRadix);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 unsigned int cnt = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 while(n > maxRadix)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 radixArray[cnt++] = maxRadix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 n /= maxRadix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 radixArray[cnt++] = n;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 *numRadices = cnt;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 return;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
117
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 switch(n)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
119 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
120 case 2:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
121 *numRadices = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
122 radixArray[0] = 2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
124
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 case 4:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
126 *numRadices = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 radixArray[0] = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
128 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
129
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 case 8:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 *numRadices = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 radixArray[0] = 8;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
134
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 case 16:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
136 *numRadices = 2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 radixArray[0] = 8; radixArray[1] = 2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
138 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
139
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 case 32:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
141 *numRadices = 2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 radixArray[0] = 8; radixArray[1] = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
144
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 case 64:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 *numRadices = 2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 radixArray[0] = 8; radixArray[1] = 8;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
149
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
150 case 128:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 *numRadices = 3;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
152 radixArray[0] = 8; radixArray[1] = 4; radixArray[2] = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
154
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 case 256:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
156 *numRadices = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 radixArray[0] = 4; radixArray[1] = 4; radixArray[2] = 4; radixArray[3] = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
159
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
160 case 512:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 *numRadices = 3;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
162 radixArray[0] = 8; radixArray[1] = 8; radixArray[2] = 8;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
163 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
164
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
165 case 1024:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
166 *numRadices = 3;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
167 radixArray[0] = 16; radixArray[1] = 16; radixArray[2] = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
168 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
169 case 2048:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
170 *numRadices = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
171 radixArray[0] = 8; radixArray[1] = 8; radixArray[2] = 8; radixArray[3] = 4;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
172 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
173 default:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
174 *numRadices = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
175 return;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
176 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
177 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
178
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
180 insertHeader(string &kernelString, string &kernelName, clFFT_DataFormat dataFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
181 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
182 if(dataFormat == clFFT_SplitComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
183 kernelString += string("__kernel void ") + kernelName + string("(__global float *in_real, __global float *in_imag, __global float *out_real, __global float *out_imag, int dir, int S)\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
184 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
185 kernelString += string("__kernel void ") + kernelName + string("(__global float2 *in, __global float2 *out, int dir, int S)\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
186 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
187
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
188 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
189 insertVariables(string &kStream, int maxRadix)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
190 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 kStream += string(" int i, j, r, indexIn, indexOut, index, tid, bNum, xNum, k, l;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
192 kStream += string(" int s, ii, jj, offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 kStream += string(" float2 w;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
194 kStream += string(" float ang, angf, ang1;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
195 kStream += string(" __local float *lMemStore, *lMemLoad;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
196 kStream += string(" float2 a[") + num2str(maxRadix) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
197 kStream += string(" int lId = get_local_id( 0 );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
198 kStream += string(" int groupId = get_group_id( 0 );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
199 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
200
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
201 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
202 formattedLoad(string &kernelString, int aIndex, int gIndex, clFFT_DataFormat dataFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
203 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
204 if(dataFormat == clFFT_InterleavedComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
205 kernelString += string(" a[") + num2str(aIndex) + string("] = in[") + num2str(gIndex) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
206 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
207 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
208 kernelString += string(" a[") + num2str(aIndex) + string("].x = in_real[") + num2str(gIndex) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
209 kernelString += string(" a[") + num2str(aIndex) + string("].y = in_imag[") + num2str(gIndex) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
210 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
211 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
212
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
213 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
214 formattedStore(string &kernelString, int aIndex, int gIndex, clFFT_DataFormat dataFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
215 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
216 if(dataFormat == clFFT_InterleavedComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
217 kernelString += string(" out[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
218 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
219 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
220 kernelString += string(" out_real[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
221 kernelString += string(" out_imag[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
222 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
223 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
224
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
225 static int
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
226 insertGlobalLoadsAndTranspose(string &kernelString, int N, int numWorkItemsPerXForm, int numXFormsPerWG, int R0, int mem_coalesce_width, clFFT_DataFormat dataFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
227 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
228 int log2NumWorkItemsPerXForm = (int) log2(numWorkItemsPerXForm);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
229 int groupSize = numWorkItemsPerXForm * numXFormsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
230 int i, j;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
231 int lMemSize = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
232
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
233 if(numXFormsPerWG > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
234 kernelString += string(" s = S & ") + num2str(numXFormsPerWG - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
235
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
236 if(numWorkItemsPerXForm >= mem_coalesce_width)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
237 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
238 if(numXFormsPerWG > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
239 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
240 kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm-1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
241 kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
242 kernelString += string(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
243 kernelString += string(" offset = mad24( mad24(groupId, ") + num2str(numXFormsPerWG) + string(", jj), ") + num2str(N) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
244 if(dataFormat == clFFT_InterleavedComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
245 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 kernelString += string(" in += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
247 kernelString += string(" out += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
248 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
249 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
250 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
251 kernelString += string(" in_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
252 kernelString += string(" in_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
253 kernelString += string(" out_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
254 kernelString += string(" out_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
255 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
256 for(i = 0; i < R0; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
257 formattedLoad(kernelString, i, i*numWorkItemsPerXForm, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
258 kernelString += string(" }\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
259 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
260 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
261 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
262 kernelString += string(" ii = lId;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
263 kernelString += string(" jj = 0;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
264 kernelString += string(" offset = mad24(groupId, ") + num2str(N) + string(", ii);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
265 if(dataFormat == clFFT_InterleavedComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
266 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
267 kernelString += string(" in += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
268 kernelString += string(" out += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
269 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
270 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
271 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
272 kernelString += string(" in_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
273 kernelString += string(" in_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
274 kernelString += string(" out_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
275 kernelString += string(" out_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
276 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
277 for(i = 0; i < R0; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
278 formattedLoad(kernelString, i, i*numWorkItemsPerXForm, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
279 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
280 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
281 else if( N >= mem_coalesce_width )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
282 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
283 int numInnerIter = N / mem_coalesce_width;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
284 int numOuterIter = numXFormsPerWG / ( groupSize / mem_coalesce_width );
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
285
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
286 kernelString += string(" ii = lId & ") + num2str(mem_coalesce_width - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
287 kernelString += string(" jj = lId >> ") + num2str((int)log2(mem_coalesce_width)) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
288 kernelString += string(" lMemStore = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
289 kernelString += string(" offset = mad24( groupId, ") + num2str(numXFormsPerWG) + string(", jj);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
290 kernelString += string(" offset = mad24( offset, ") + num2str(N) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
291 if(dataFormat == clFFT_InterleavedComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
292 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
293 kernelString += string(" in += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
294 kernelString += string(" out += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
295 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
296 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
297 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
298 kernelString += string(" in_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
299 kernelString += string(" in_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
300 kernelString += string(" out_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
301 kernelString += string(" out_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
302 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
303
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
304 kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
305 for(i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
306 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
307 kernelString += string(" if( jj < s ) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
308 for(j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
309 formattedLoad(kernelString, i * numInnerIter + j, j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * N, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
310 kernelString += string(" }\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
311 if(i != numOuterIter - 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
312 kernelString += string(" jj += ") + num2str(groupSize / mem_coalesce_width) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
313 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
314 kernelString += string("}\n ");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
315 kernelString += string("else {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
316 for(i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
317 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
318 for(j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
319 formattedLoad(kernelString, i * numInnerIter + j, j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * N, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
320 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
321 kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
322
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
323 kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
324 kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
325 kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
326
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
327 for( i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
328 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
329 for( j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
330 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
331 kernelString += string(" lMemStore[") + num2str(j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * (N + numWorkItemsPerXForm )) + string("] = a[") +
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
332 num2str(i * numInnerIter + j) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
333 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
334 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
335 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
336
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
337 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
338 kernelString += string(" a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
339 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
340
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
341 for( i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
342 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
343 for( j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
344 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
345 kernelString += string(" lMemStore[") + num2str(j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * (N + numWorkItemsPerXForm )) + string("] = a[") +
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
346 num2str(i * numInnerIter + j) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
347 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
348 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
349 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
350
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
351 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
352 kernelString += string(" a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
353 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
354
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
355 lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
356 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
357 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
358 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
359 kernelString += string(" offset = mad24( groupId, ") + num2str(N * numXFormsPerWG) + string(", lId );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
360 if(dataFormat == clFFT_InterleavedComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
361 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
362 kernelString += string(" in += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
363 kernelString += string(" out += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
364 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
365 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
366 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
367 kernelString += string(" in_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
368 kernelString += string(" in_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
369 kernelString += string(" out_real += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
370 kernelString += string(" out_imag += offset;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
371 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
372
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
373 kernelString += string(" ii = lId & ") + num2str(N-1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
374 kernelString += string(" jj = lId >> ") + num2str((int)log2(N)) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
375 kernelString += string(" lMemStore = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
376
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
377 kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
378 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
379 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
380 kernelString += string(" if(jj < s )\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
381 formattedLoad(kernelString, i, i*groupSize, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
382 if(i != R0 - 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
383 kernelString += string(" jj += ") + num2str(groupSize / N) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
384 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
385 kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
386 kernelString += string("else {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
387 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
388 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
389 formattedLoad(kernelString, i, i*groupSize, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
390 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
391 kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
392
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
393 if(numWorkItemsPerXForm > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
394 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
395 kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
396 kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
397 kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
398 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
399 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
400 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
401 kernelString += string(" ii = 0;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
402 kernelString += string(" jj = lId;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
403 kernelString += string(" lMemLoad = sMem + mul24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
404 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
405
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
406
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
407 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
408 kernelString += string(" lMemStore[") + num2str(i * ( groupSize / N ) * ( N + numWorkItemsPerXForm )) + string("] = a[") + num2str(i) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
409 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
410
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
411 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
412 kernelString += string(" a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
413 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
414
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
415 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
416 kernelString += string(" lMemStore[") + num2str(i * ( groupSize / N ) * ( N + numWorkItemsPerXForm )) + string("] = a[") + num2str(i) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
417 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
418
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
419 for( i = 0; i < R0; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
420 kernelString += string(" a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
421 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
422
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
423 lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
424 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
425
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
426 return lMemSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
427 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
428
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
429 static int
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
430 insertGlobalStoresAndTranspose(string &kernelString, int N, int maxRadix, int Nr, int numWorkItemsPerXForm, int numXFormsPerWG, int mem_coalesce_width, clFFT_DataFormat dataFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
431 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
432 int groupSize = numWorkItemsPerXForm * numXFormsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
433 int i, j, k, ind;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
434 int lMemSize = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
435 int numIter = maxRadix / Nr;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
436 string indent = string("");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
437
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
438 if( numWorkItemsPerXForm >= mem_coalesce_width )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
439 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
440 if(numXFormsPerWG > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
441 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
442 kernelString += string(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
443 indent = string(" ");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
444 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
445 for(i = 0; i < maxRadix; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
446 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
447 j = i % numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
448 k = i / numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
449 ind = j * Nr + k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
450 formattedStore(kernelString, ind, i*numWorkItemsPerXForm, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
451 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
452 if(numXFormsPerWG > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
453 kernelString += string(" }\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
454 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
455 else if( N >= mem_coalesce_width )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
456 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
457 int numInnerIter = N / mem_coalesce_width;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
458 int numOuterIter = numXFormsPerWG / ( groupSize / mem_coalesce_width );
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
459
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
460 kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
461 kernelString += string(" ii = lId & ") + num2str(mem_coalesce_width - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
462 kernelString += string(" jj = lId >> ") + num2str((int)log2(mem_coalesce_width)) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
463 kernelString += string(" lMemStore = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
464
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
465 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
466 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
467 j = i % numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
468 k = i / numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
469 ind = j * Nr + k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
470 kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
471 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
472 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
473
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
474 for( i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
475 for( j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
476 kernelString += string(" a[") + num2str(i*numInnerIter + j) + string("].x = lMemStore[") + num2str(j*mem_coalesce_width + i*( groupSize / mem_coalesce_width )*(N + numWorkItemsPerXForm)) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
477 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
478
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
479 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
480 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
481 j = i % numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
482 k = i / numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
483 ind = j * Nr + k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
484 kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
485 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
486 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
487
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
488 for( i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
489 for( j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
490 kernelString += string(" a[") + num2str(i*numInnerIter + j) + string("].y = lMemStore[") + num2str(j*mem_coalesce_width + i*( groupSize / mem_coalesce_width )*(N + numWorkItemsPerXForm)) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
491 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
492
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
493 kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
494 for(i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
495 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
496 kernelString += string(" if( jj < s ) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
497 for(j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
498 formattedStore(kernelString, i*numInnerIter + j, j*mem_coalesce_width + i*(groupSize/mem_coalesce_width)*N, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
499 kernelString += string(" }\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
500 if(i != numOuterIter - 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
501 kernelString += string(" jj += ") + num2str(groupSize / mem_coalesce_width) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
502 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
503 kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
504 kernelString += string("else {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
505 for(i = 0; i < numOuterIter; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
506 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
507 for(j = 0; j < numInnerIter; j++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
508 formattedStore(kernelString, i*numInnerIter + j, j*mem_coalesce_width + i*(groupSize/mem_coalesce_width)*N, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
509 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
510 kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
511
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
512 lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
513 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
514 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
515 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
516 kernelString += string(" lMemLoad = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
517
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
518 kernelString += string(" ii = lId & ") + num2str(N - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
519 kernelString += string(" jj = lId >> ") + num2str((int) log2(N)) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
520 kernelString += string(" lMemStore = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
521
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
522 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
523 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
524 j = i % numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
525 k = i / numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
526 ind = j * Nr + k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
527 kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
528 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
529 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
530
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
531 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
532 kernelString += string(" a[") + num2str(i) + string("].x = lMemStore[") + num2str(i*( groupSize / N )*( N + numWorkItemsPerXForm )) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
533 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
534
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
535 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
536 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
537 j = i % numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
538 k = i / numIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
539 ind = j * Nr + k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
540 kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
541 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
542 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
543
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
544 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
545 kernelString += string(" a[") + num2str(i) + string("].y = lMemStore[") + num2str(i*( groupSize / N )*( N + numWorkItemsPerXForm )) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
546 kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
547
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
548 kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
549 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
550 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
551 kernelString += string(" if(jj < s ) {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
552 formattedStore(kernelString, i, i*groupSize, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
553 kernelString += string(" }\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
554 if( i != maxRadix - 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
555 kernelString += string(" jj +=") + num2str(groupSize / N) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
556 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
557 kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
558 kernelString += string("else {\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
559 for( i = 0; i < maxRadix; i++ )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
560 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
561 formattedStore(kernelString, i, i*groupSize, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
562 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
563 kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
564
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
565 lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
566 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
567
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
568 return lMemSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
569 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
570
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
571 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
572 insertfftKernel(string &kernelString, int Nr, int numIter)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
573 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
574 int i;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
575 for(i = 0; i < numIter; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
576 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
577 kernelString += string(" fftKernel") + num2str(Nr) + string("(a+") + num2str(i*Nr) + string(", dir);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
578 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
579 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
580
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
581 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
582 insertTwiddleKernel(string &kernelString, int Nr, int numIter, int Nprev, int len, int numWorkItemsPerXForm)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
583 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
584 int z, k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
585 int logNPrev = (int)log2(Nprev);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
586
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
587 for(z = 0; z < numIter; z++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
588 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
589 if(z == 0)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
590 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
591 if(Nprev > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
592 kernelString += string(" angf = (float) (ii >> ") + num2str(logNPrev) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
593 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
594 kernelString += string(" angf = (float) ii;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
595 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
596 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
597 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
598 if(Nprev > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
599 kernelString += string(" angf = (float) ((") + num2str(z*numWorkItemsPerXForm) + string(" + ii) >>") + num2str(logNPrev) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
600 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
601 kernelString += string(" angf = (float) (") + num2str(z*numWorkItemsPerXForm) + string(" + ii);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
602 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
603
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
604 for(k = 1; k < Nr; k++) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
605 int ind = z*Nr + k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
606 //float fac = (float) (2.0 * M_PI * (double) k / (double) len);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
607 kernelString += string(" ang = dir * ( 2.0f * M_PI * ") + num2str(k) + string(".0f / ") + num2str(len) + string(".0f )") + string(" * angf;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
608 kernelString += string(" w = (float2)(native_cos(ang), native_sin(ang));\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
609 kernelString += string(" a[") + num2str(ind) + string("] = complexMul(a[") + num2str(ind) + string("], w);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
610 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
611 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
612 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
613
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
614 static int
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
615 getPadding(int numWorkItemsPerXForm, int Nprev, int numWorkItemsReq, int numXFormsPerWG, int Nr, int numBanks, int *offset, int *midPad)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
616 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
617 if((numWorkItemsPerXForm <= Nprev) || (Nprev >= numBanks))
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
618 *offset = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
619 else {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
620 int numRowsReq = ((numWorkItemsPerXForm < numBanks) ? numWorkItemsPerXForm : numBanks) / Nprev;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
621 int numColsReq = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
622 if(numRowsReq > Nr)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
623 numColsReq = numRowsReq / Nr;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
624 numColsReq = Nprev * numColsReq;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
625 *offset = numColsReq;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
626 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
627
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
628 if(numWorkItemsPerXForm >= numBanks || numXFormsPerWG == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
629 *midPad = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
630 else {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
631 int bankNum = ( (numWorkItemsReq + *offset) * Nr ) & (numBanks - 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
632 if( bankNum >= numWorkItemsPerXForm )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
633 *midPad = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
634 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
635 *midPad = numWorkItemsPerXForm - bankNum;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
636 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
637
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
638 int lMemSize = ( numWorkItemsReq + *offset) * Nr * numXFormsPerWG + *midPad * (numXFormsPerWG - 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
639 return lMemSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
640 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
641
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
642
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
643 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
644 insertLocalStores(string &kernelString, int numIter, int Nr, int numWorkItemsPerXForm, int numWorkItemsReq, int offset, string &comp)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
645 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
646 int z, k;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
647
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
648 for(z = 0; z < numIter; z++) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
649 for(k = 0; k < Nr; k++) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
650 int index = k*(numWorkItemsReq + offset) + z*numWorkItemsPerXForm;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
651 kernelString += string(" lMemStore[") + num2str(index) + string("] = a[") + num2str(z*Nr + k) + string("].") + comp + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
652 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
653 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
654 kernelString += string(" barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
655 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
656
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
657 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
658 insertLocalLoads(string &kernelString, int n, int Nr, int Nrn, int Nprev, int Ncurr, int numWorkItemsPerXForm, int numWorkItemsReq, int offset, string &comp)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
659 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
660 int numWorkItemsReqN = n / Nrn;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
661 int interBlockHNum = max( Nprev / numWorkItemsPerXForm, 1 );
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
662 int interBlockHStride = numWorkItemsPerXForm;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
663 int vertWidth = max(numWorkItemsPerXForm / Nprev, 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
664 vertWidth = min( vertWidth, Nr);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
665 int vertNum = Nr / vertWidth;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
666 int vertStride = ( n / Nr + offset ) * vertWidth;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
667 int iter = max( numWorkItemsReqN / numWorkItemsPerXForm, 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
668 int intraBlockHStride = (numWorkItemsPerXForm / (Nprev*Nr)) > 1 ? (numWorkItemsPerXForm / (Nprev*Nr)) : 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
669 intraBlockHStride *= Nprev;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
670
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
671 int stride = numWorkItemsReq / Nrn;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
672 int i;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
673 for(i = 0; i < iter; i++) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
674 int ii = i / (interBlockHNum * vertNum);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
675 int zz = i % (interBlockHNum * vertNum);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
676 int jj = zz % interBlockHNum;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
677 int kk = zz / interBlockHNum;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
678 int z;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
679 for(z = 0; z < Nrn; z++) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
680 int st = kk * vertStride + jj * interBlockHStride + ii * intraBlockHStride + z * stride;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
681 kernelString += string(" a[") + num2str(i*Nrn + z) + string("].") + comp + string(" = lMemLoad[") + num2str(st) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
682 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
683 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
684 kernelString += string(" barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
685 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
686
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
687 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
688 insertLocalLoadIndexArithmatic(string &kernelString, int Nprev, int Nr, int numWorkItemsReq, int numWorkItemsPerXForm, int numXFormsPerWG, int offset, int midPad)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
689 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
690 int Ncurr = Nprev * Nr;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
691 int logNcurr = (int)log2(Ncurr);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
692 int logNprev = (int)log2(Nprev);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
693 int incr = (numWorkItemsReq + offset) * Nr + midPad;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
694
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
695 if(Ncurr < numWorkItemsPerXForm)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
696 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
697 if(Nprev == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
698 kernelString += string(" j = ii & ") + num2str(Ncurr - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
699 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
700 kernelString += string(" j = (ii & ") + num2str(Ncurr - 1) + string(") >> ") + num2str(logNprev) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
701
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
702 if(Nprev == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
703 kernelString += string(" i = ii >> ") + num2str(logNcurr) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
704 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
705 kernelString += string(" i = mad24(ii >> ") + num2str(logNcurr) + string(", ") + num2str(Nprev) + string(", ii & ") + num2str(Nprev - 1) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
706 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
707 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
708 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
709 if(Nprev == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
710 kernelString += string(" j = ii;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
711 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
712 kernelString += string(" j = ii >> ") + num2str(logNprev) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
713 if(Nprev == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
714 kernelString += string(" i = 0;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
715 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
716 kernelString += string(" i = ii & ") + num2str(Nprev - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
717 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
718
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
719 if(numXFormsPerWG > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
720 kernelString += string(" i = mad24(jj, ") + num2str(incr) + string(", i);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
721
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
722 kernelString += string(" lMemLoad = sMem + mad24(j, ") + num2str(numWorkItemsReq + offset) + string(", i);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
723 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
724
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
725 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
726 insertLocalStoreIndexArithmatic(string &kernelString, int numWorkItemsReq, int numXFormsPerWG, int Nr, int offset, int midPad)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
727 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
728 if(numXFormsPerWG == 1) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
729 kernelString += string(" lMemStore = sMem + ii;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
730 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
731 else {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
732 kernelString += string(" lMemStore = sMem + mad24(jj, ") + num2str((numWorkItemsReq + offset)*Nr + midPad) + string(", ii);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
733 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
734 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
735
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
736
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
737 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
738 createLocalMemfftKernelString(cl_fft_plan *plan)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
739 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
740 unsigned int radixArray[10];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
741 unsigned int numRadix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
742
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
743 unsigned int n = plan->n.x;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
744
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
745 assert(n <= plan->max_work_item_per_workgroup * plan->max_radix && "signal lenght too big for local mem fft\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
746
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
747 getRadixArray(n, radixArray, &numRadix, 0);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
748 assert(numRadix > 0 && "no radix array supplied\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
749
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
750 if(n/radixArray[0] > plan->max_work_item_per_workgroup)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
751 getRadixArray(n, radixArray, &numRadix, plan->max_radix);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
752
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
753 assert(radixArray[0] <= plan->max_radix && "max radix choosen is greater than allowed\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
754 assert(n/radixArray[0] <= plan->max_work_item_per_workgroup && "required work items per xform greater than maximum work items allowed per work group for local mem fft\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
755
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
756 unsigned int tmpLen = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
757 unsigned int i;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
758 for(i = 0; i < numRadix; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
759 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
760 assert( radixArray[i] && !( (radixArray[i] - 1) & radixArray[i] ) );
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
761 tmpLen *= radixArray[i];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
762 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
763 assert(tmpLen == n && "product of radices choosen doesnt match the length of signal\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
764
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
765 int offset, midPad;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
766 string localString(""), kernelName("");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
767
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
768 clFFT_DataFormat dataFormat = plan->format;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
769 string *kernelString = plan->kernel_string;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
770
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
771
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
772 cl_fft_kernel_info **kInfo = &plan->kernel_info;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
773 int kCount = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
774
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
775 while(*kInfo)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
776 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
777 kInfo = &(*kInfo)->next;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
778 kCount++;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
779 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
780
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
781 kernelName = string("fft") + num2str(kCount);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
782
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
783 *kInfo = (cl_fft_kernel_info *) malloc(sizeof(cl_fft_kernel_info));
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
784 (*kInfo)->kernel = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
785 (*kInfo)->lmem_size = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
786 (*kInfo)->num_workgroups = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
787 (*kInfo)->num_workitems_per_workgroup = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
788 (*kInfo)->dir = cl_fft_kernel_x;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
789 (*kInfo)->in_place_possible = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
790 (*kInfo)->next = NULL;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
791 (*kInfo)->kernel_name = (char *) malloc(sizeof(char)*(kernelName.size()+1));
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
792 strcpy((*kInfo)->kernel_name, kernelName.c_str());
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
793
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
794 unsigned int numWorkItemsPerXForm = n / radixArray[0];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
795 unsigned int numWorkItemsPerWG = numWorkItemsPerXForm <= 64 ? 64 : numWorkItemsPerXForm;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
796 assert(numWorkItemsPerWG <= plan->max_work_item_per_workgroup);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
797 int numXFormsPerWG = numWorkItemsPerWG / numWorkItemsPerXForm;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
798 (*kInfo)->num_workgroups = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
799 (*kInfo)->num_xforms_per_workgroup = numXFormsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
800 (*kInfo)->num_workitems_per_workgroup = numWorkItemsPerWG;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
801
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
802 unsigned int *N = radixArray;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
803 unsigned int maxRadix = N[0];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
804 unsigned int lMemSize = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
805
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
806 insertVariables(localString, maxRadix);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
807
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
808 lMemSize = insertGlobalLoadsAndTranspose(localString, n, numWorkItemsPerXForm, numXFormsPerWG, maxRadix, plan->min_mem_coalesce_width, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
809 (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
810
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
811 string xcomp = string("x");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
812 string ycomp = string("y");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
813
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
814 unsigned int Nprev = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
815 unsigned int len = n;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
816 unsigned int r;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
817 for(r = 0; r < numRadix; r++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
818 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
819 int numIter = N[0] / N[r];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
820 int numWorkItemsReq = n / N[r];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
821 int Ncurr = Nprev * N[r];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
822 insertfftKernel(localString, N[r], numIter);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
823
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
824 if(r < (numRadix - 1)) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
825 insertTwiddleKernel(localString, N[r], numIter, Nprev, len, numWorkItemsPerXForm);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
826 lMemSize = getPadding(numWorkItemsPerXForm, Nprev, numWorkItemsReq, numXFormsPerWG, N[r], plan->num_local_mem_banks, &offset, &midPad);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
827 (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
828 insertLocalStoreIndexArithmatic(localString, numWorkItemsReq, numXFormsPerWG, N[r], offset, midPad);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
829 insertLocalLoadIndexArithmatic(localString, Nprev, N[r], numWorkItemsReq, numWorkItemsPerXForm, numXFormsPerWG, offset, midPad);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
830 insertLocalStores(localString, numIter, N[r], numWorkItemsPerXForm, numWorkItemsReq, offset, xcomp);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
831 insertLocalLoads(localString, n, N[r], N[r+1], Nprev, Ncurr, numWorkItemsPerXForm, numWorkItemsReq, offset, xcomp);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
832 insertLocalStores(localString, numIter, N[r], numWorkItemsPerXForm, numWorkItemsReq, offset, ycomp);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
833 insertLocalLoads(localString, n, N[r], N[r+1], Nprev, Ncurr, numWorkItemsPerXForm, numWorkItemsReq, offset, ycomp);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
834 Nprev = Ncurr;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
835 len = len / N[r];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
836 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
837 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
838
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
839 lMemSize = insertGlobalStoresAndTranspose(localString, n, maxRadix, N[numRadix - 1], numWorkItemsPerXForm, numXFormsPerWG, plan->min_mem_coalesce_width, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
840 (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
841
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
842 insertHeader(*kernelString, kernelName, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
843 *kernelString += string("{\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
844 if((*kInfo)->lmem_size)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
845 *kernelString += string(" __local float sMem[") + num2str((*kInfo)->lmem_size) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
846 *kernelString += localString;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
847 *kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
848 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
849
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
850 // For n larger than what can be computed using local memory fft, global transposes
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
851 // multiple kernel launces is needed. For these sizes, n can be decomposed using
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
852 // much larger base radices i.e. say n = 262144 = 128 x 64 x 32. Thus three kernel
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
853 // launches will be needed, first computing 64 x 32, length 128 ffts, second computing
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
854 // 128 x 32 length 64 ffts, and finally a kernel computing 128 x 64 length 32 ffts.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
855 // Each of these base radices can futher be divided into factors so that each of these
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
856 // base ffts can be computed within one kernel launch using in-register ffts and local
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
857 // memory transposes i.e for the first kernel above which computes 64 x 32 ffts on length
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
858 // 128, 128 can be decomposed into 128 = 16 x 8 i.e. 8 work items can compute 8 length
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
859 // 16 ffts followed by transpose using local memory followed by each of these eight
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
860 // work items computing 2 length 8 ffts thus computing 16 length 8 ffts in total. This
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
861 // means only 8 work items are needed for computing one length 128 fft. If we choose
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
862 // work group size of say 64, we can compute 64/8 = 8 length 128 ffts within one
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
863 // work group. Since we need to compute 64 x 32 length 128 ffts in first kernel, this
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
864 // means we need to launch 64 x 32 / 8 = 256 work groups with 64 work items in each
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
865 // work group where each work group is computing 8 length 128 ffts where each length
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
866 // 128 fft is computed by 8 work items. Same logic can be applied to other two kernels
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
867 // in this example. Users can play with difference base radices and difference
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
868 // decompositions of base radices to generates different kernels and see which gives
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
869 // best performance. Following function is just fixed to use 128 as base radix
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
870
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
871 void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
872 getGlobalRadixInfo(int n, int *radix, int *R1, int *R2, int *numRadices)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
873 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
874 int baseRadix = min(n, 128);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
875
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
876 int numR = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
877 int N = n;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
878 while(N > baseRadix)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
879 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
880 N /= baseRadix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
881 numR++;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
882 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
883
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
884 for(int i = 0; i < numR; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
885 radix[i] = baseRadix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
886
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
887 radix[numR] = N;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
888 numR++;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
889 *numRadices = numR;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
890
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
891 for(int i = 0; i < numR; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
892 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
893 int B = radix[i];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
894 if(B <= 8)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
895 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
896 R1[i] = B;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
897 R2[i] = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
898 continue;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
899 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
900
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
901 int r1 = 2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
902 int r2 = B / r1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
903 while(r2 > r1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
904 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
905 r1 *=2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
906 r2 = B / r1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
907 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
908 R1[i] = r1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
909 R2[i] = r2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
910 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
911 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
912
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
913 static void
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
914 createGlobalFFTKernelString(cl_fft_plan *plan, int n, int BS, cl_fft_kernel_dir dir, int vertBS)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
915 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
916 int i, j, k, t;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
917 int radixArr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
918 int R1Arr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
919 int R2Arr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
920 int radix, R1, R2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
921 int numRadices;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
922
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
923 int maxThreadsPerBlock = plan->max_work_item_per_workgroup;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
924 int maxArrayLen = plan->max_radix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
925 int batchSize = plan->min_mem_coalesce_width;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
926 clFFT_DataFormat dataFormat = plan->format;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
927 int vertical = (dir == cl_fft_kernel_x) ? 0 : 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
928
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
929 getGlobalRadixInfo(n, radixArr, R1Arr, R2Arr, &numRadices);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
930
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
931 int numPasses = numRadices;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
932
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
933 string localString(""), kernelName("");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
934 string *kernelString = plan->kernel_string;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
935 cl_fft_kernel_info **kInfo = &plan->kernel_info;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
936 int kCount = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
937
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
938 while(*kInfo)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
939 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
940 kInfo = &(*kInfo)->next;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
941 kCount++;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
942 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
943
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
944 int N = n;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
945 int m = (int)log2(n);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
946 int Rinit = vertical ? BS : 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
947 batchSize = vertical ? min(BS, batchSize) : batchSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
948 int passNum;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
949
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
950 for(passNum = 0; passNum < numPasses; passNum++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
951 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
952
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
953 localString.clear();
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
954 kernelName.clear();
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
955
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
956 radix = radixArr[passNum];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
957 R1 = R1Arr[passNum];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
958 R2 = R2Arr[passNum];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
959
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
960 int strideI = Rinit;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
961 for(i = 0; i < numPasses; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
962 if(i != passNum)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
963 strideI *= radixArr[i];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
964
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
965 int strideO = Rinit;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
966 for(i = 0; i < passNum; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
967 strideO *= radixArr[i];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
968
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
969 int threadsPerXForm = R2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
970 batchSize = R2 == 1 ? plan->max_work_item_per_workgroup : batchSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
971 batchSize = min(batchSize, strideI);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
972 int threadsPerBlock = batchSize * threadsPerXForm;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
973 threadsPerBlock = min(threadsPerBlock, maxThreadsPerBlock);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
974 batchSize = threadsPerBlock / threadsPerXForm;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
975 assert(R2 <= R1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
976 assert(R1*R2 == radix);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
977 assert(R1 <= maxArrayLen);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
978 assert(threadsPerBlock <= maxThreadsPerBlock);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
979
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
980 int numIter = R1 / R2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
981 int gInInc = threadsPerBlock / batchSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
982
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
983
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
984 int lgStrideO = (int)log2(strideO);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
985 int numBlocksPerXForm = strideI / batchSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
986 int numBlocks = numBlocksPerXForm;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
987 if(!vertical)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
988 numBlocks *= BS;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
989 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
990 numBlocks *= vertBS;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
991
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
992 kernelName = string("fft") + num2str(kCount);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
993 *kInfo = (cl_fft_kernel_info *) malloc(sizeof(cl_fft_kernel_info));
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
994 (*kInfo)->kernel = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
995 if(R2 == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
996 (*kInfo)->lmem_size = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
997 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
998 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
999 if(strideO == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1000 (*kInfo)->lmem_size = (radix + 1)*batchSize;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1001 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1002 (*kInfo)->lmem_size = threadsPerBlock*R1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1003 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1004 (*kInfo)->num_workgroups = numBlocks;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1005 (*kInfo)->num_xforms_per_workgroup = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1006 (*kInfo)->num_workitems_per_workgroup = threadsPerBlock;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1007 (*kInfo)->dir = dir;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1008 if( (passNum == (numPasses - 1)) && (numPasses & 1) )
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1009 (*kInfo)->in_place_possible = 1;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1010 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1011 (*kInfo)->in_place_possible = 0;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1012 (*kInfo)->next = NULL;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1013 (*kInfo)->kernel_name = (char *) malloc(sizeof(char)*(kernelName.size()+1));
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1014 strcpy((*kInfo)->kernel_name, kernelName.c_str());
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1015
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1016 insertVariables(localString, R1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1017
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1018 if(vertical)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1019 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1020 localString += string("xNum = groupId >> ") + num2str((int)log2(numBlocksPerXForm)) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1021 localString += string("groupId = groupId & ") + num2str(numBlocksPerXForm - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1022 localString += string("indexIn = mad24(groupId, ") + num2str(batchSize) + string(", xNum << ") + num2str((int)log2(n*BS)) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1023 localString += string("tid = mul24(groupId, ") + num2str(batchSize) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1024 localString += string("i = tid >> ") + num2str(lgStrideO) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1025 localString += string("j = tid & ") + num2str(strideO - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1026 int stride = radix*Rinit;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1027 for(i = 0; i < passNum; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1028 stride *= radixArr[i];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1029 localString += string("indexOut = mad24(i, ") + num2str(stride) + string(", j + ") + string("(xNum << ") + num2str((int) log2(n*BS)) + string("));\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1030 localString += string("bNum = groupId;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1031 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1032 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1033 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1034 int lgNumBlocksPerXForm = (int)log2(numBlocksPerXForm);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1035 localString += string("bNum = groupId & ") + num2str(numBlocksPerXForm - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1036 localString += string("xNum = groupId >> ") + num2str(lgNumBlocksPerXForm) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1037 localString += string("indexIn = mul24(bNum, ") + num2str(batchSize) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1038 localString += string("tid = indexIn;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1039 localString += string("i = tid >> ") + num2str(lgStrideO) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1040 localString += string("j = tid & ") + num2str(strideO - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1041 int stride = radix*Rinit;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1042 for(i = 0; i < passNum; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1043 stride *= radixArr[i];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1044 localString += string("indexOut = mad24(i, ") + num2str(stride) + string(", j);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1045 localString += string("indexIn += (xNum << ") + num2str(m) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1046 localString += string("indexOut += (xNum << ") + num2str(m) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1047 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1048
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1049 // Load Data
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1050 int lgBatchSize = (int)log2(batchSize);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1051 localString += string("tid = lId;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1052 localString += string("i = tid & ") + num2str(batchSize - 1) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1053 localString += string("j = tid >> ") + num2str(lgBatchSize) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1054 localString += string("indexIn += mad24(j, ") + num2str(strideI) + string(", i);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1055
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1056 if(dataFormat == clFFT_SplitComplexFormat)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1057 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1058 localString += string("in_real += indexIn;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1059 localString += string("in_imag += indexIn;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1060 for(j = 0; j < R1; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1061 localString += string("a[") + num2str(j) + string("].x = in_real[") + num2str(j*gInInc*strideI) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1062 for(j = 0; j < R1; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1063 localString += string("a[") + num2str(j) + string("].y = in_imag[") + num2str(j*gInInc*strideI) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1064 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1065 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1066 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1067 localString += string("in += indexIn;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1068 for(j = 0; j < R1; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1069 localString += string("a[") + num2str(j) + string("] = in[") + num2str(j*gInInc*strideI) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1070 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1071
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1072 localString += string("fftKernel") + num2str(R1) + string("(a, dir);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1073
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1074 if(R2 > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1075 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1076 // twiddle
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1077 for(k = 1; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1078 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1079 localString += string("ang = dir*(2.0f*M_PI*") + num2str(k) + string("/") + num2str(radix) + string(")*j;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1080 localString += string("w = (float2)(native_cos(ang), native_sin(ang));\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1081 localString += string("a[") + num2str(k) + string("] = complexMul(a[") + num2str(k) + string("], w);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1082 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1083
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1084 // shuffle
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1085 numIter = R1 / R2;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1086 localString += string("indexIn = mad24(j, ") + num2str(threadsPerBlock*numIter) + string(", i);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1087 localString += string("lMemStore = sMem + tid;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1088 localString += string("lMemLoad = sMem + indexIn;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1089 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1090 localString += string("lMemStore[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1091 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1092 for(k = 0; k < numIter; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1093 for(t = 0; t < R2; t++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1094 localString += string("a[") + num2str(k*R2+t) + string("].x = lMemLoad[") + num2str(t*batchSize + k*threadsPerBlock) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1095 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1096 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1097 localString += string("lMemStore[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1098 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1099 for(k = 0; k < numIter; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1100 for(t = 0; t < R2; t++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1101 localString += string("a[") + num2str(k*R2+t) + string("].y = lMemLoad[") + num2str(t*batchSize + k*threadsPerBlock) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1102 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1103
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1104 for(j = 0; j < numIter; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1105 localString += string("fftKernel") + num2str(R2) + string("(a + ") + num2str(j*R2) + string(", dir);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1106 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1107
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1108 // twiddle
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1109 if(passNum < (numPasses - 1))
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1110 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1111 localString += string("l = ((bNum << ") + num2str(lgBatchSize) + string(") + i) >> ") + num2str(lgStrideO) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1112 localString += string("k = j << ") + num2str((int)log2(R1/R2)) + string(";\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1113 localString += string("ang1 = dir*(2.0f*M_PI/") + num2str(N) + string(")*l;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1114 for(t = 0; t < R1; t++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1115 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1116 localString += string("ang = ang1*(k + ") + num2str((t%R2)*R1 + (t/R2)) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1117 localString += string("w = (float2)(native_cos(ang), native_sin(ang));\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1118 localString += string("a[") + num2str(t) + string("] = complexMul(a[") + num2str(t) + string("], w);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1119 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1120 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1121
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1122 // Store Data
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1123 if(strideO == 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1124 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1125
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1126 localString += string("lMemStore = sMem + mad24(i, ") + num2str(radix + 1) + string(", j << ") + num2str((int)log2(R1/R2)) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1127 localString += string("lMemLoad = sMem + mad24(tid >> ") + num2str((int)log2(radix)) + string(", ") + num2str(radix+1) + string(", tid & ") + num2str(radix-1) + string(");\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1128
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1129 for(i = 0; i < R1/R2; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1130 for(j = 0; j < R2; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1131 localString += string("lMemStore[ ") + num2str(i + j*R1) + string("] = a[") + num2str(i*R2+j) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1132 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1133 if(threadsPerBlock >= radix)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1134 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1135 for(i = 0; i < R1; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1136 localString += string("a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i*(radix+1)*(threadsPerBlock/radix)) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1137 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1138 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1139 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1140 int innerIter = radix/threadsPerBlock;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1141 int outerIter = R1/innerIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1142 for(i = 0; i < outerIter; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1143 for(j = 0; j < innerIter; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1144 localString += string("a[") + num2str(i*innerIter+j) + string("].x = lMemLoad[") + num2str(j*threadsPerBlock + i*(radix+1)) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1145 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1146 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1147
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1148 for(i = 0; i < R1/R2; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1149 for(j = 0; j < R2; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1150 localString += string("lMemStore[ ") + num2str(i + j*R1) + string("] = a[") + num2str(i*R2+j) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1151 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1152 if(threadsPerBlock >= radix)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1153 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1154 for(i = 0; i < R1; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1155 localString += string("a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i*(radix+1)*(threadsPerBlock/radix)) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1156 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1157 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1158 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1159 int innerIter = radix/threadsPerBlock;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1160 int outerIter = R1/innerIter;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1161 for(i = 0; i < outerIter; i++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1162 for(j = 0; j < innerIter; j++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1163 localString += string("a[") + num2str(i*innerIter+j) + string("].y = lMemLoad[") + num2str(j*threadsPerBlock + i*(radix+1)) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1164 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1165 localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1166
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1167 localString += string("indexOut += tid;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1168 if(dataFormat == clFFT_SplitComplexFormat) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1169 localString += string("out_real += indexOut;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1170 localString += string("out_imag += indexOut;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1171 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1172 localString += string("out_real[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1173 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1174 localString += string("out_imag[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1175 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1176 else {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1177 localString += string("out += indexOut;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1178 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1179 localString += string("out[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1180 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1181
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1182 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1183 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1184 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1185 localString += string("indexOut += mad24(j, ") + num2str(numIter*strideO) + string(", i);\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1186 if(dataFormat == clFFT_SplitComplexFormat) {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1187 localString += string("out_real += indexOut;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1188 localString += string("out_imag += indexOut;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1189 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1190 localString += string("out_real[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("].x;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1191 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1192 localString += string("out_imag[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("].y;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1193 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1194 else {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1195 localString += string("out += indexOut;\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1196 for(k = 0; k < R1; k++)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1197 localString += string("out[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1198 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1199 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1200
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1201 insertHeader(*kernelString, kernelName, dataFormat);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1202 *kernelString += string("{\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1203 if((*kInfo)->lmem_size)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1204 *kernelString += string(" __local float sMem[") + num2str((*kInfo)->lmem_size) + string("];\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1205 *kernelString += localString;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1206 *kernelString += string("}\n");
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1207
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1208 N /= radix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1209 kInfo = &(*kInfo)->next;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1210 kCount++;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1211 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1212 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1213
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1214 void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1215 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1216 unsigned int radixArray[10];
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1217 unsigned int numRadix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1218
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1219 switch(dir)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1220 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1221 case cl_fft_kernel_x:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1222 if(plan->n.x > plan->max_localmem_fft_size)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1223 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1224 createGlobalFFTKernelString(plan, plan->n.x, 1, cl_fft_kernel_x, 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1225 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1226 else if(plan->n.x > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1227 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1228 getRadixArray(plan->n.x, radixArray, &numRadix, 0);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1229 if(plan->n.x / radixArray[0] <= plan->max_work_item_per_workgroup)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1230 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1231 createLocalMemfftKernelString(plan);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1232 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1233 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1234 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1235 getRadixArray(plan->n.x, radixArray, &numRadix, plan->max_radix);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1236 if(plan->n.x / radixArray[0] <= plan->max_work_item_per_workgroup)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1237 createLocalMemfftKernelString(plan);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1238 else
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1239 createGlobalFFTKernelString(plan, plan->n.x, 1, cl_fft_kernel_x, 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1240 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1241 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1242 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1243
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1244 case cl_fft_kernel_y:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1245 if(plan->n.y > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1246 createGlobalFFTKernelString(plan, plan->n.y, plan->n.x, cl_fft_kernel_y, 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1247 break;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1248
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1249 case cl_fft_kernel_z:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1250 if(plan->n.z > 1)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1251 createGlobalFFTKernelString(plan, plan->n.z, plan->n.x*plan->n.y, cl_fft_kernel_z, 1);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1252 default:
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1253 return;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1254 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1255 }
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1256