Members/yuuhi/OpenCL: fft_Example/fft

comparison fft_Example/fft_internal.h @ 7:ea2e7ce9d5bb

add sample.pgm

author	Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date	Tue, 05 Feb 2013 15:19:02 +0900
parents	ccea4e6a1945
children

comparison

equal deleted inserted replaced

-:db074091ed0b
+:ea2e7ce9d5bb
 typedef struct
 {
 	// context in which fft resources are created and kernels are executed
 	cl_context              context;
-	// size of signal
+// size of signal
-	clFFT_Dim3              n;
+clFFT_Dim3              n;
-	// dimension of transform ... must be either 1D, 2D or 3D
+// dimension of transform ... must be either 1D, 2D or 3D
-	clFFT_Dimension			dim;
+clFFT_Dimension            dim;
-	// data format ... must be either interleaved or plannar
+// data format ... must be either interleaved or plannar
-	clFFT_DataFormat		format;
+clFFT_DataFormat        format;
-	// string containing kernel source. Generated at runtime based on
+// string containing kernel source. Generated at runtime based on
-	// n, dim, format and other parameters
+// n, dim, format and other parameters
-	string                  *kernel_string;
+string                  *kernel_string;
-	// CL program containing source and kernel this particular
+// CL program containing source and kernel this particular
-	// n, dim, data format
+// n, dim, data format
-	cl_program				program;
+cl_program                program;
-	// linked list of kernels which needs to be executed for this fft
+// linked list of kernels which needs to be executed for this fft
-	cl_fft_kernel_info		*kernel_info;
+cl_fft_kernel_info        *kernel_info;
-	// number of kernels
+// number of kernels
-	int                     num_kernels;
+int                     num_kernels;
-	// twist kernel for virtualizing fft of very large sizes that do not
+// twist kernel for virtualizing fft of very large sizes that do not
-	// fit in GPU global memory
+// fit in GPU global memory
-	cl_kernel				twist_kernel;
+cl_kernel                twist_kernel;
-	// flag indicating if temporary intermediate buffer is needed or not.
+// flag indicating if temporary intermediate buffer is needed or not.
-	// this depends on fft kernels being executed and if transform is
+// this depends on fft kernels being executed and if transform is
-	// in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
+// in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
-	// one that does not require global transpose do not need temporary buffer)
+// one that does not require global transpose do not need temporary buffer)
-	// 2D 1024x1024 out-of-place fft however do require intermediate buffer.
+// 2D 1024x1024 out-of-place fft however do require intermediate buffer.
-	// If temp buffer is needed, its allocation is lazy i.e. its not allocated
+// If temp buffer is needed, its allocation is lazy i.e. its not allocated
-	// until its needed
+// until its needed
-	cl_int                  temp_buffer_needed;
+cl_int                  temp_buffer_needed;
-	// Batch size is runtime parameter and size of temporary buffer (if needed)
+// Batch size is runtime parameter and size of temporary buffer (if needed)
-	// depends on batch size. Allocation of temporary buffer is lazy i.e. its
+// depends on batch size. Allocation of temporary buffer is lazy i.e. its
-	// only created when needed. Once its created at first call of clFFT_Executexxx
+// only created when needed. Once its created at first call of clFFT_Executexxx
-	// it is not allocated next time if next time clFFT_Executexxx is called with
+// it is not allocated next time if next time clFFT_Executexxx is called with
-	// batch size different than the first call. last_batch_size caches the last
+// batch size different than the first call. last_batch_size caches the last
-	// batch size with which this plan is used so that we dont keep allocating/deallocating
+// batch size with which this plan is used so that we dont keep allocating/deallocating
-	// temp buffer if same batch size is used again and again.
+// temp buffer if same batch size is used again and again.
-	unsigned                  last_batch_size;
+unsigned                  last_batch_size;
-	// temporary buffer for interleaved plan
+// temporary buffer for interleaved plan
-	cl_mem   				tempmemobj;
+cl_mem                   tempmemobj;
-	// temporary buffer for planner plan. Only one of tempmemobj or
+// temporary buffer for planner plan. Only one of tempmemobj or
-	// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
+// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
-	// data format of plan (plannar or interleaved)
+// data format of plan (plannar or interleaved)
-	cl_mem                  tempmemobj_real, tempmemobj_imag;
+cl_mem                  tempmemobj_real, tempmemobj_imag;
-	// Maximum size of signal for which local memory transposed based
+// Maximum size of signal for which local memory transposed based
-	// fft is sufficient i.e. no global mem transpose (communication)
+// fft is sufficient i.e. no global mem transpose (communication)
-	// is needed
+// is needed
-	unsigned					max_localmem_fft_size;
+unsigned                    max_localmem_fft_size;
-	// Maximum work items per work group allowed. This, along with max_radix below controls
+// Maximum work items per work group allowed. This, along with max_radix below controls
-	// maximum local memory being used by fft kernels of this plan. Set to 256 by default
+// maximum local memory being used by fft kernels of this plan. Set to 256 by default
-	unsigned                  max_work_item_per_workgroup;
+unsigned                  max_work_item_per_workgroup;
-	// Maximum base radix for local memory fft ... this controls the maximum register
+// Maximum base radix for local memory fft ... this controls the maximum register
-	// space used by work items. Currently defaults to 16
+// space used by work items. Currently defaults to 16
-	unsigned                  max_radix;
+unsigned                  max_radix;
-	// Device depended parameter that tells how many work-items need to be read consecutive
+// Device depended parameter that tells how many work-items need to be read consecutive
-	// values to make sure global memory access by work-items of a work-group result in
+// values to make sure global memory access by work-items of a work-group result in
-	// coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
+// coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
-	unsigned                  min_mem_coalesce_width;
+unsigned                  min_mem_coalesce_width;
-	// Number of local memory banks. This is used to geneate kernel with local memory
+// Number of local memory banks. This is used to geneate kernel with local memory
-	// transposes with appropriate padding to avoid bank conflicts to local memory
+// transposes with appropriate padding to avoid bank conflicts to local memory
-	// e.g. on NVidia it is 16.
+// e.g. on NVidia it is 16.
-	unsigned                  num_local_mem_banks;
+unsigned                  num_local_mem_banks;
 }cl_fft_plan;
 void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir);
 #endif

Mercurial > hg > Members > yuuhi > OpenCL

comparison fft_Example/fft_internal.h @ 7:ea2e7ce9d5bb