comparison hello_World_Example/hello.cc @ 2:ccea4e6a1945

add OpenCL example
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Jan 2013 23:19:41 +0900
parents
children
comparison
equal deleted inserted replaced
1:b511640282d2 2:ccea4e6a1945
1 //
2 // File: hello.c
3 //
4 // Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which
5 // calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
6 // floating point values.
7 //
8 //
9 // Version: <1.0>
10 //
11 // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
12 // in consideration of your agreement to the following terms, and your use,
13 // installation, modification or redistribution of this Apple software
14 // constitutes acceptance of these terms. If you do not agree with these
15 // terms, please do not use, install, modify or redistribute this Apple
16 // software.
17 //
18 // In consideration of your agreement to abide by the following terms, and
19 // subject to these terms, Apple grants you a personal, non - exclusive
20 // license, under Apple's copyrights in this original Apple software ( the
21 // "Apple Software" ), to use, reproduce, modify and redistribute the Apple
22 // Software, with or without modifications, in source and / or binary forms;
23 // provided that if you redistribute the Apple Software in its entirety and
24 // without modifications, you must retain this notice and the following text
25 // and disclaimers in all such redistributions of the Apple Software. Neither
26 // the name, trademarks, service marks or logos of Apple Inc. may be used to
27 // endorse or promote products derived from the Apple Software without specific
28 // prior written permission from Apple. Except as expressly stated in this
29 // notice, no other rights or licenses, express or implied, are granted by
30 // Apple herein, including but not limited to any patent rights that may be
31 // infringed by your derivative works or by other works in which the Apple
32 // Software may be incorporated.
33 //
34 // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
35 // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
36 // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
37 // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
38 // ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
39 //
40 // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
41 // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
43 // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
44 // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
45 // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
46 // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 //
48 // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
49 //
50
51 ////////////////////////////////////////////////////////////////////////////////
52
53 #include <fcntl.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <math.h>
58 #include <unistd.h>
59 #include <sys/types.h>
60 #include <sys/stat.h>
61 #include <OpenCL/opencl.h>
62
63 ////////////////////////////////////////////////////////////////////////////////
64
65 // Use a static data size for simplicity
66 //
67 #define DATA_SIZE (1024)
68
69 ////////////////////////////////////////////////////////////////////////////////
70
71 // Simple compute kernel which computes the square of an input array
72 //
73 const char *KernelSource = "\n" \
74 "__kernel void square( \n" \
75 " __global float* input, \n" \
76 " __global float* output, \n" \
77 " const unsigned int count) \n" \
78 "{ \n" \
79 " int i = get_global_id(0); \n" \
80 " if(i < count) \n" \
81 " output[i] = input[i] * input[i]; \n" \
82 "} \n" \
83 "\n";
84
85 ////////////////////////////////////////////////////////////////////////////////
86
87 int main(int argc, char** argv)
88 {
89 int err; // error code returned from api calls
90
91 float data[DATA_SIZE]; // original data set given to device
92 float results[DATA_SIZE]; // results returned from device
93 unsigned int correct; // number of correct results returned
94
95 size_t global; // global domain size for our calculation
96 size_t local; // local domain size for our calculation
97
98 cl_device_id device_id; // compute device id
99 cl_context context; // compute context
100 cl_command_queue commands; // compute command queue
101 cl_program program; // compute program
102 cl_kernel kernel; // compute kernel
103
104 cl_mem input; // device memory used for the input array
105 cl_mem output; // device memory used for the output array
106
107 // Fill our data set with random float values
108 //
109 int i = 0;
110 unsigned int count = DATA_SIZE;
111 for(i = 0; i < count; i++)
112 data[i] = rand() / (float)RAND_MAX;
113
114 // Connect to a compute device
115 //
116 int gpu = 1;
117 err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
118 if (err != CL_SUCCESS)
119 {
120 printf("Error: Failed to create a device group!\n");
121 return EXIT_FAILURE;
122 }
123
124 // Create a compute context
125 //
126 context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
127 if (!context)
128 {
129 printf("Error: Failed to create a compute context!\n");
130 return EXIT_FAILURE;
131 }
132
133 // Create a command commands
134 //
135 commands = clCreateCommandQueue(context, device_id, 0, &err);
136 if (!commands)
137 {
138 printf("Error: Failed to create a command commands!\n");
139 return EXIT_FAILURE;
140 }
141
142 // Create the compute program from the source buffer
143 //
144 program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
145 if (!program)
146 {
147 printf("Error: Failed to create compute program!\n");
148 return EXIT_FAILURE;
149 }
150
151 // Build the program executable
152 //
153 err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
154 if (err != CL_SUCCESS)
155 {
156 size_t len;
157 char buffer[2048];
158
159 printf("Error: Failed to build program executable!\n");
160 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
161 printf("%s\n", buffer);
162 exit(1);
163 }
164
165 // Create the compute kernel in the program we wish to run
166 //
167 kernel = clCreateKernel(program, "square", &err);
168 if (!kernel || err != CL_SUCCESS)
169 {
170 printf("Error: Failed to create compute kernel!\n");
171 exit(1);
172 }
173
174 // Create the input and output arrays in device memory for our calculation
175 //
176 input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
177 output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
178 if (!input || !output)
179 {
180 printf("Error: Failed to allocate device memory!\n");
181 exit(1);
182 }
183
184 // Write our data set into the input array in device memory
185 //
186 err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
187 if (err != CL_SUCCESS)
188 {
189 printf("Error: Failed to write to source array!\n");
190 exit(1);
191 }
192
193 // Set the arguments to our compute kernel
194 //
195 err = 0;
196 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
197 err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
198 err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
199 if (err != CL_SUCCESS)
200 {
201 printf("Error: Failed to set kernel arguments! %d\n", err);
202 exit(1);
203 }
204
205 // Get the maximum work group size for executing the kernel on the device
206 //
207 err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
208 if (err != CL_SUCCESS)
209 {
210 printf("Error: Failed to retrieve kernel work group info! %d\n", err);
211 exit(1);
212 }
213
214 // Execute the kernel over the entire range of our 1d input data set
215 // using the maximum number of work group items for this device
216 //
217 global = count;
218 err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
219 if (err)
220 {
221 printf("Error: Failed to execute kernel!\n");
222 return EXIT_FAILURE;
223 }
224
225 // Wait for the command commands to get serviced before reading back results
226 //
227 clFinish(commands);
228
229 // Read back the results from the device to verify the output
230 //
231 err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
232 if (err != CL_SUCCESS)
233 {
234 printf("Error: Failed to read output array! %d\n", err);
235 exit(1);
236 }
237
238 // Validate our results
239 //
240 correct = 0;
241 for(i = 0; i < count; i++)
242 {
243 if(results[i] == data[i] * data[i])
244 correct++;
245 }
246
247 // Print a brief summary detailing the results
248 //
249 printf("Computed '%d/%d' correct values!\n", correct, count);
250
251 // Shutdown and cleanup
252 //
253 clReleaseMemObject(input);
254 clReleaseMemObject(output);
255 clReleaseProgram(program);
256 clReleaseKernel(kernel);
257 clReleaseCommandQueue(commands);
258 clReleaseContext(context);
259
260 return 0;
261 }
262