Mercurial > hg > Members > yuuhi > OpenCL
comparison hello_World_Example/hello.cc @ 2:ccea4e6a1945
add OpenCL example
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Jan 2013 23:19:41 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:b511640282d2 | 2:ccea4e6a1945 |
---|---|
1 // | |
2 // File: hello.c | |
3 // | |
4 // Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which | |
5 // calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of | |
6 // floating point values. | |
7 // | |
8 // | |
9 // Version: <1.0> | |
10 // | |
11 // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") | |
12 // in consideration of your agreement to the following terms, and your use, | |
13 // installation, modification or redistribution of this Apple software | |
14 // constitutes acceptance of these terms. If you do not agree with these | |
15 // terms, please do not use, install, modify or redistribute this Apple | |
16 // software. | |
17 // | |
18 // In consideration of your agreement to abide by the following terms, and | |
19 // subject to these terms, Apple grants you a personal, non - exclusive | |
20 // license, under Apple's copyrights in this original Apple software ( the | |
21 // "Apple Software" ), to use, reproduce, modify and redistribute the Apple | |
22 // Software, with or without modifications, in source and / or binary forms; | |
23 // provided that if you redistribute the Apple Software in its entirety and | |
24 // without modifications, you must retain this notice and the following text | |
25 // and disclaimers in all such redistributions of the Apple Software. Neither | |
26 // the name, trademarks, service marks or logos of Apple Inc. may be used to | |
27 // endorse or promote products derived from the Apple Software without specific | |
28 // prior written permission from Apple. Except as expressly stated in this | |
29 // notice, no other rights or licenses, express or implied, are granted by | |
30 // Apple herein, including but not limited to any patent rights that may be | |
31 // infringed by your derivative works or by other works in which the Apple | |
32 // Software may be incorporated. | |
33 // | |
34 // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO | |
35 // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED | |
36 // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A | |
37 // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION | |
38 // ALONE OR IN COMBINATION WITH YOUR PRODUCTS. | |
39 // | |
40 // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR | |
41 // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
42 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
43 // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION | |
44 // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER | |
45 // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR | |
46 // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
47 // | |
48 // Copyright ( C ) 2008 Apple Inc. All Rights Reserved. | |
49 // | |
50 | |
51 //////////////////////////////////////////////////////////////////////////////// | |
52 | |
53 #include <fcntl.h> | |
54 #include <stdio.h> | |
55 #include <stdlib.h> | |
56 #include <string.h> | |
57 #include <math.h> | |
58 #include <unistd.h> | |
59 #include <sys/types.h> | |
60 #include <sys/stat.h> | |
61 #include <OpenCL/opencl.h> | |
62 | |
63 //////////////////////////////////////////////////////////////////////////////// | |
64 | |
65 // Use a static data size for simplicity | |
66 // | |
67 #define DATA_SIZE (1024) | |
68 | |
69 //////////////////////////////////////////////////////////////////////////////// | |
70 | |
71 // Simple compute kernel which computes the square of an input array | |
72 // | |
73 const char *KernelSource = "\n" \ | |
74 "__kernel void square( \n" \ | |
75 " __global float* input, \n" \ | |
76 " __global float* output, \n" \ | |
77 " const unsigned int count) \n" \ | |
78 "{ \n" \ | |
79 " int i = get_global_id(0); \n" \ | |
80 " if(i < count) \n" \ | |
81 " output[i] = input[i] * input[i]; \n" \ | |
82 "} \n" \ | |
83 "\n"; | |
84 | |
85 //////////////////////////////////////////////////////////////////////////////// | |
86 | |
87 int main(int argc, char** argv) | |
88 { | |
89 int err; // error code returned from api calls | |
90 | |
91 float data[DATA_SIZE]; // original data set given to device | |
92 float results[DATA_SIZE]; // results returned from device | |
93 unsigned int correct; // number of correct results returned | |
94 | |
95 size_t global; // global domain size for our calculation | |
96 size_t local; // local domain size for our calculation | |
97 | |
98 cl_device_id device_id; // compute device id | |
99 cl_context context; // compute context | |
100 cl_command_queue commands; // compute command queue | |
101 cl_program program; // compute program | |
102 cl_kernel kernel; // compute kernel | |
103 | |
104 cl_mem input; // device memory used for the input array | |
105 cl_mem output; // device memory used for the output array | |
106 | |
107 // Fill our data set with random float values | |
108 // | |
109 int i = 0; | |
110 unsigned int count = DATA_SIZE; | |
111 for(i = 0; i < count; i++) | |
112 data[i] = rand() / (float)RAND_MAX; | |
113 | |
114 // Connect to a compute device | |
115 // | |
116 int gpu = 1; | |
117 err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); | |
118 if (err != CL_SUCCESS) | |
119 { | |
120 printf("Error: Failed to create a device group!\n"); | |
121 return EXIT_FAILURE; | |
122 } | |
123 | |
124 // Create a compute context | |
125 // | |
126 context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); | |
127 if (!context) | |
128 { | |
129 printf("Error: Failed to create a compute context!\n"); | |
130 return EXIT_FAILURE; | |
131 } | |
132 | |
133 // Create a command commands | |
134 // | |
135 commands = clCreateCommandQueue(context, device_id, 0, &err); | |
136 if (!commands) | |
137 { | |
138 printf("Error: Failed to create a command commands!\n"); | |
139 return EXIT_FAILURE; | |
140 } | |
141 | |
142 // Create the compute program from the source buffer | |
143 // | |
144 program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); | |
145 if (!program) | |
146 { | |
147 printf("Error: Failed to create compute program!\n"); | |
148 return EXIT_FAILURE; | |
149 } | |
150 | |
151 // Build the program executable | |
152 // | |
153 err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); | |
154 if (err != CL_SUCCESS) | |
155 { | |
156 size_t len; | |
157 char buffer[2048]; | |
158 | |
159 printf("Error: Failed to build program executable!\n"); | |
160 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); | |
161 printf("%s\n", buffer); | |
162 exit(1); | |
163 } | |
164 | |
165 // Create the compute kernel in the program we wish to run | |
166 // | |
167 kernel = clCreateKernel(program, "square", &err); | |
168 if (!kernel || err != CL_SUCCESS) | |
169 { | |
170 printf("Error: Failed to create compute kernel!\n"); | |
171 exit(1); | |
172 } | |
173 | |
174 // Create the input and output arrays in device memory for our calculation | |
175 // | |
176 input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL); | |
177 output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL); | |
178 if (!input || !output) | |
179 { | |
180 printf("Error: Failed to allocate device memory!\n"); | |
181 exit(1); | |
182 } | |
183 | |
184 // Write our data set into the input array in device memory | |
185 // | |
186 err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL); | |
187 if (err != CL_SUCCESS) | |
188 { | |
189 printf("Error: Failed to write to source array!\n"); | |
190 exit(1); | |
191 } | |
192 | |
193 // Set the arguments to our compute kernel | |
194 // | |
195 err = 0; | |
196 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); | |
197 err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); | |
198 err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); | |
199 if (err != CL_SUCCESS) | |
200 { | |
201 printf("Error: Failed to set kernel arguments! %d\n", err); | |
202 exit(1); | |
203 } | |
204 | |
205 // Get the maximum work group size for executing the kernel on the device | |
206 // | |
207 err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); | |
208 if (err != CL_SUCCESS) | |
209 { | |
210 printf("Error: Failed to retrieve kernel work group info! %d\n", err); | |
211 exit(1); | |
212 } | |
213 | |
214 // Execute the kernel over the entire range of our 1d input data set | |
215 // using the maximum number of work group items for this device | |
216 // | |
217 global = count; | |
218 err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); | |
219 if (err) | |
220 { | |
221 printf("Error: Failed to execute kernel!\n"); | |
222 return EXIT_FAILURE; | |
223 } | |
224 | |
225 // Wait for the command commands to get serviced before reading back results | |
226 // | |
227 clFinish(commands); | |
228 | |
229 // Read back the results from the device to verify the output | |
230 // | |
231 err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL ); | |
232 if (err != CL_SUCCESS) | |
233 { | |
234 printf("Error: Failed to read output array! %d\n", err); | |
235 exit(1); | |
236 } | |
237 | |
238 // Validate our results | |
239 // | |
240 correct = 0; | |
241 for(i = 0; i < count; i++) | |
242 { | |
243 if(results[i] == data[i] * data[i]) | |
244 correct++; | |
245 } | |
246 | |
247 // Print a brief summary detailing the results | |
248 // | |
249 printf("Computed '%d/%d' correct values!\n", correct, count); | |
250 | |
251 // Shutdown and cleanup | |
252 // | |
253 clReleaseMemObject(input); | |
254 clReleaseMemObject(output); | |
255 clReleaseProgram(program); | |
256 clReleaseKernel(kernel); | |
257 clReleaseCommandQueue(commands); | |
258 clReleaseContext(context); | |
259 | |
260 return 0; | |
261 } | |
262 |