annotate fft_Example/fft_internal.h @ 2:ccea4e6a1945

add OpenCL example
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Jan 2013 23:19:41 +0900
parents
children ea2e7ce9d5bb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 // File: fft_internal.h
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 // Version: <1.0>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 // in consideration of your agreement to the following terms, and your use,
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 // installation, modification or redistribution of this Apple software
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 // constitutes acceptance of these terms. If you do not agree with these
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 // terms, please do not use, install, modify or redistribute this Apple
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 // software.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 // In consideration of your agreement to abide by the following terms, and
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 // subject to these terms, Apple grants you a personal, non - exclusive
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 // license, under Apple's copyrights in this original Apple software ( the
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 // "Apple Software" ), to use, reproduce, modify and redistribute the Apple
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 // Software, with or without modifications, in source and / or binary forms;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 // provided that if you redistribute the Apple Software in its entirety and
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 // without modifications, you must retain this notice and the following text
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 // and disclaimers in all such redistributions of the Apple Software. Neither
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 // the name, trademarks, service marks or logos of Apple Inc. may be used to
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 // endorse or promote products derived from the Apple Software without specific
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 // prior written permission from Apple. Except as expressly stated in this
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 // notice, no other rights or licenses, express or implied, are granted by
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 // Apple herein, including but not limited to any patent rights that may be
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 // infringed by your derivative works or by other works in which the Apple
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 // Software may be incorporated.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 // ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 //
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 #ifndef __CLFFT_INTERNAL_H
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 #define __CLFFT_INTERNAL_H
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 #include "clFFT.h"
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 #include <iostream>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 #include <string>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 #include <sstream>
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 using namespace std;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 typedef enum kernel_dir_t
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 cl_fft_kernel_x,
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 cl_fft_kernel_y,
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 cl_fft_kernel_z
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 }cl_fft_kernel_dir;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 typedef struct kernel_info_t
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 cl_kernel kernel;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 char *kernel_name;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 unsigned lmem_size;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 unsigned num_workgroups;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 unsigned num_xforms_per_workgroup;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 unsigned num_workitems_per_workgroup;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 cl_fft_kernel_dir dir;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 int in_place_possible;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 kernel_info_t *next;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 }cl_fft_kernel_info;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 typedef struct
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 {
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 // context in which fft resources are created and kernels are executed
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 cl_context context;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
83
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 // size of signal
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 clFFT_Dim3 n;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
86
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 // dimension of transform ... must be either 1D, 2D or 3D
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 clFFT_Dimension dim;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
89
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 // data format ... must be either interleaved or plannar
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 clFFT_DataFormat format;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
92
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 // string containing kernel source. Generated at runtime based on
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 // n, dim, format and other parameters
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 string *kernel_string;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
96
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 // CL program containing source and kernel this particular
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 // n, dim, data format
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 cl_program program;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
100
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 // linked list of kernels which needs to be executed for this fft
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 cl_fft_kernel_info *kernel_info;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
103
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 // number of kernels
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 int num_kernels;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
106
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 // twist kernel for virtualizing fft of very large sizes that do not
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 // fit in GPU global memory
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 cl_kernel twist_kernel;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
110
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 // flag indicating if temporary intermediate buffer is needed or not.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 // this depends on fft kernels being executed and if transform is
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 // in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 // one that does not require global transpose do not need temporary buffer)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 // 2D 1024x1024 out-of-place fft however do require intermediate buffer.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 // If temp buffer is needed, its allocation is lazy i.e. its not allocated
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 // until its needed
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 cl_int temp_buffer_needed;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
119
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
120 // Batch size is runtime parameter and size of temporary buffer (if needed)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
121 // depends on batch size. Allocation of temporary buffer is lazy i.e. its
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
122 // only created when needed. Once its created at first call of clFFT_Executexxx
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 // it is not allocated next time if next time clFFT_Executexxx is called with
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 // batch size different than the first call. last_batch_size caches the last
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 // batch size with which this plan is used so that we dont keep allocating/deallocating
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
126 // temp buffer if same batch size is used again and again.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 unsigned last_batch_size;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
128
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 // temporary buffer for interleaved plan
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 cl_mem tempmemobj;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
131
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 // temporary buffer for planner plan. Only one of tempmemobj or
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 // (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 // data format of plan (plannar or interleaved)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 cl_mem tempmemobj_real, tempmemobj_imag;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
136
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 // Maximum size of signal for which local memory transposed based
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
138 // fft is sufficient i.e. no global mem transpose (communication)
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 // is needed
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 unsigned max_localmem_fft_size;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
141
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 // Maximum work items per work group allowed. This, along with max_radix below controls
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 // maximum local memory being used by fft kernels of this plan. Set to 256 by default
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
144 unsigned max_work_item_per_workgroup;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
145
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 // Maximum base radix for local memory fft ... this controls the maximum register
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 // space used by work items. Currently defaults to 16
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 unsigned max_radix;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
149
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
150 // Device depended parameter that tells how many work-items need to be read consecutive
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 // values to make sure global memory access by work-items of a work-group result in
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
152 // coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 unsigned min_mem_coalesce_width;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
154
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 // Number of local memory banks. This is used to geneate kernel with local memory
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
156 // transposes with appropriate padding to avoid bank conflicts to local memory
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 // e.g. on NVidia it is 16.
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 unsigned num_local_mem_banks;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 }cl_fft_plan;
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
160
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir);
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
162
ccea4e6a1945 add OpenCL example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
163 #endif