2
|
1 //
|
|
2 // File: hello.c
|
|
3 //
|
|
4 // Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which
|
|
5 // calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
|
|
6 // floating point values.
|
|
7 //
|
|
8 //
|
|
9 // Version: <1.0>
|
|
10 //
|
|
11 // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
|
|
12 // in consideration of your agreement to the following terms, and your use,
|
|
13 // installation, modification or redistribution of this Apple software
|
|
14 // constitutes acceptance of these terms. If you do not agree with these
|
|
15 // terms, please do not use, install, modify or redistribute this Apple
|
|
16 // software.
|
|
17 //
|
|
18 // In consideration of your agreement to abide by the following terms, and
|
|
19 // subject to these terms, Apple grants you a personal, non - exclusive
|
|
20 // license, under Apple's copyrights in this original Apple software ( the
|
|
21 // "Apple Software" ), to use, reproduce, modify and redistribute the Apple
|
|
22 // Software, with or without modifications, in source and / or binary forms;
|
|
23 // provided that if you redistribute the Apple Software in its entirety and
|
|
24 // without modifications, you must retain this notice and the following text
|
|
25 // and disclaimers in all such redistributions of the Apple Software. Neither
|
|
26 // the name, trademarks, service marks or logos of Apple Inc. may be used to
|
|
27 // endorse or promote products derived from the Apple Software without specific
|
|
28 // prior written permission from Apple. Except as expressly stated in this
|
|
29 // notice, no other rights or licenses, express or implied, are granted by
|
|
30 // Apple herein, including but not limited to any patent rights that may be
|
|
31 // infringed by your derivative works or by other works in which the Apple
|
|
32 // Software may be incorporated.
|
|
33 //
|
|
34 // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
|
|
35 // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
|
|
36 // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
|
|
37 // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
|
|
38 // ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
|
|
39 //
|
|
40 // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
|
|
41 // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
42 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
43 // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
|
|
44 // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
|
|
45 // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
|
|
46 // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
47 //
|
|
48 // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
|
|
49 //
|
|
50
|
|
51 ////////////////////////////////////////////////////////////////////////////////
|
|
52
|
|
53 #include <fcntl.h>
|
|
54 #include <stdio.h>
|
|
55 #include <stdlib.h>
|
|
56 #include <string.h>
|
|
57 #include <math.h>
|
|
58 #include <unistd.h>
|
|
59 #include <sys/types.h>
|
|
60 #include <sys/stat.h>
|
|
61 #include <OpenCL/opencl.h>
|
|
62
|
|
63 ////////////////////////////////////////////////////////////////////////////////
|
|
64
|
|
65 // Use a static data size for simplicity
|
|
66 //
|
|
67 #define DATA_SIZE (1024)
|
|
68
|
|
69 ////////////////////////////////////////////////////////////////////////////////
|
|
70
|
|
71 // Simple compute kernel which computes the square of an input array
|
|
72 //
|
|
73 const char *KernelSource = "\n" \
|
|
74 "__kernel void square( \n" \
|
|
75 " __global float* input, \n" \
|
|
76 " __global float* output, \n" \
|
|
77 " const unsigned int count) \n" \
|
|
78 "{ \n" \
|
|
79 " int i = get_global_id(0); \n" \
|
|
80 " if(i < count) \n" \
|
|
81 " output[i] = input[i] * input[i]; \n" \
|
|
82 "} \n" \
|
|
83 "\n";
|
|
84
|
|
85 ////////////////////////////////////////////////////////////////////////////////
|
|
86
|
|
87 int main(int argc, char** argv)
|
|
88 {
|
|
89 int err; // error code returned from api calls
|
|
90
|
|
91 float data[DATA_SIZE]; // original data set given to device
|
|
92 float results[DATA_SIZE]; // results returned from device
|
|
93 unsigned int correct; // number of correct results returned
|
|
94
|
|
95 size_t global; // global domain size for our calculation
|
|
96 size_t local; // local domain size for our calculation
|
|
97
|
|
98 cl_device_id device_id; // compute device id
|
|
99 cl_context context; // compute context
|
|
100 cl_command_queue commands; // compute command queue
|
|
101 cl_program program; // compute program
|
|
102 cl_kernel kernel; // compute kernel
|
|
103
|
|
104 cl_mem input; // device memory used for the input array
|
|
105 cl_mem output; // device memory used for the output array
|
|
106
|
|
107 // Fill our data set with random float values
|
|
108 //
|
|
109 int i = 0;
|
|
110 unsigned int count = DATA_SIZE;
|
|
111 for(i = 0; i < count; i++)
|
|
112 data[i] = rand() / (float)RAND_MAX;
|
|
113
|
|
114 // Connect to a compute device
|
|
115 //
|
|
116 int gpu = 1;
|
|
117 err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
|
|
118 if (err != CL_SUCCESS)
|
|
119 {
|
|
120 printf("Error: Failed to create a device group!\n");
|
|
121 return EXIT_FAILURE;
|
|
122 }
|
|
123
|
|
124 // Create a compute context
|
|
125 //
|
|
126 context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
|
|
127 if (!context)
|
|
128 {
|
|
129 printf("Error: Failed to create a compute context!\n");
|
|
130 return EXIT_FAILURE;
|
|
131 }
|
|
132
|
|
133 // Create a command commands
|
|
134 //
|
|
135 commands = clCreateCommandQueue(context, device_id, 0, &err);
|
|
136 if (!commands)
|
|
137 {
|
|
138 printf("Error: Failed to create a command commands!\n");
|
|
139 return EXIT_FAILURE;
|
|
140 }
|
|
141
|
|
142 // Create the compute program from the source buffer
|
|
143 //
|
|
144 program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
|
|
145 if (!program)
|
|
146 {
|
|
147 printf("Error: Failed to create compute program!\n");
|
|
148 return EXIT_FAILURE;
|
|
149 }
|
|
150
|
|
151 // Build the program executable
|
|
152 //
|
|
153 err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
|
154 if (err != CL_SUCCESS)
|
|
155 {
|
|
156 size_t len;
|
|
157 char buffer[2048];
|
|
158
|
|
159 printf("Error: Failed to build program executable!\n");
|
|
160 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
|
|
161 printf("%s\n", buffer);
|
|
162 exit(1);
|
|
163 }
|
|
164
|
|
165 // Create the compute kernel in the program we wish to run
|
|
166 //
|
|
167 kernel = clCreateKernel(program, "square", &err);
|
|
168 if (!kernel || err != CL_SUCCESS)
|
|
169 {
|
|
170 printf("Error: Failed to create compute kernel!\n");
|
|
171 exit(1);
|
|
172 }
|
|
173
|
|
174 // Create the input and output arrays in device memory for our calculation
|
|
175 //
|
|
176 input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
|
|
177 output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
|
|
178 if (!input || !output)
|
|
179 {
|
|
180 printf("Error: Failed to allocate device memory!\n");
|
|
181 exit(1);
|
|
182 }
|
|
183
|
|
184 // Write our data set into the input array in device memory
|
|
185 //
|
|
186 err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
|
|
187 if (err != CL_SUCCESS)
|
|
188 {
|
|
189 printf("Error: Failed to write to source array!\n");
|
|
190 exit(1);
|
|
191 }
|
|
192
|
|
193 // Set the arguments to our compute kernel
|
|
194 //
|
|
195 err = 0;
|
|
196 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
|
|
197 err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
|
|
198 err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
|
|
199 if (err != CL_SUCCESS)
|
|
200 {
|
|
201 printf("Error: Failed to set kernel arguments! %d\n", err);
|
|
202 exit(1);
|
|
203 }
|
|
204
|
|
205 // Get the maximum work group size for executing the kernel on the device
|
|
206 //
|
|
207 err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
|
|
208 if (err != CL_SUCCESS)
|
|
209 {
|
|
210 printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
|
211 exit(1);
|
|
212 }
|
|
213
|
|
214 // Execute the kernel over the entire range of our 1d input data set
|
|
215 // using the maximum number of work group items for this device
|
|
216 //
|
|
217 global = count;
|
|
218 err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
|
219 if (err)
|
|
220 {
|
|
221 printf("Error: Failed to execute kernel!\n");
|
|
222 return EXIT_FAILURE;
|
|
223 }
|
|
224
|
|
225 // Wait for the command commands to get serviced before reading back results
|
|
226 //
|
|
227 clFinish(commands);
|
|
228
|
|
229 // Read back the results from the device to verify the output
|
|
230 //
|
|
231 err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
|
|
232 if (err != CL_SUCCESS)
|
|
233 {
|
|
234 printf("Error: Failed to read output array! %d\n", err);
|
|
235 exit(1);
|
|
236 }
|
|
237
|
|
238 // Validate our results
|
|
239 //
|
|
240 correct = 0;
|
|
241 for(i = 0; i < count; i++)
|
|
242 {
|
|
243 if(results[i] == data[i] * data[i])
|
|
244 correct++;
|
|
245 }
|
|
246
|
|
247 // Print a brief summary detailing the results
|
|
248 //
|
|
249 printf("Computed '%d/%d' correct values!\n", correct, count);
|
|
250
|
|
251 // Shutdown and cleanup
|
|
252 //
|
|
253 clReleaseMemObject(input);
|
|
254 clReleaseMemObject(output);
|
|
255 clReleaseProgram(program);
|
|
256 clReleaseKernel(kernel);
|
|
257 clReleaseCommandQueue(commands);
|
|
258 clReleaseContext(context);
|
|
259
|
|
260 return 0;
|
|
261 }
|
|
262
|