111
|
1 /*
|
|
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
|
|
3
|
|
4 Redistribution and use in source and binary forms, with or without
|
|
5 modification, are permitted provided that the following conditions
|
|
6 are met:
|
|
7
|
|
8 * Redistributions of source code must retain the above copyright
|
|
9 notice, this list of conditions and the following disclaimer.
|
|
10 * Redistributions in binary form must reproduce the above copyright
|
|
11 notice, this list of conditions and the following disclaimer in the
|
|
12 documentation and/or other materials provided with the distribution.
|
|
13 * Neither the name of Intel Corporation nor the names of its
|
|
14 contributors may be used to endorse or promote products derived
|
|
15 from this software without specific prior written permission.
|
|
16
|
|
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
28 */
|
|
29
|
|
30
|
|
31 /*
|
|
32 * Include file for Offload API.
|
|
33 */
|
|
34
|
|
35 #ifndef OFFLOAD_H_INCLUDED
|
|
36 #define OFFLOAD_H_INCLUDED
|
|
37
|
|
38 #ifdef __cplusplus
|
|
39 #if defined(LINUX) || defined(FREEBSD)
|
|
40 #include <bits/functexcept.h>
|
|
41 #endif
|
|
42 #endif
|
|
43
|
|
44 #include <stddef.h>
|
|
45 #include <omp.h>
|
|
46
|
|
47 #ifdef TARGET_WINNT
|
|
48 // <stdint.h> is incompatible on Windows.
|
|
49 typedef unsigned long long int uint64_t;
|
|
50 typedef signed long long int int64_t;
|
|
51 #else
|
|
52 #include <stdint.h>
|
|
53 #endif // TARGET_WINNT
|
|
54
|
|
55 #ifdef __cplusplus
|
|
56 extern "C" {
|
|
57 #endif
|
|
58
|
|
59 #define TARGET_ATTRIBUTE __declspec(target(mic))
|
|
60
|
|
61 /*
|
|
62 * The target architecture.
|
|
63 */
|
|
64 typedef enum TARGET_TYPE {
|
|
65 TARGET_NONE, /* Undefine target */
|
|
66 TARGET_HOST, /* Host used as target */
|
|
67 TARGET_MIC /* MIC target */
|
|
68 } TARGET_TYPE;
|
|
69
|
|
70 /*
|
|
71 * The default target type.
|
|
72 */
|
|
73 #define DEFAULT_TARGET_TYPE TARGET_MIC
|
|
74
|
|
75 /*
|
|
76 * The default target number.
|
|
77 */
|
|
78 #define DEFAULT_TARGET_NUMBER 0
|
|
79
|
|
80 /*
|
|
81 * Offload status.
|
|
82 */
|
|
83 typedef enum {
|
|
84 OFFLOAD_SUCCESS = 0,
|
|
85 OFFLOAD_DISABLED, /* offload is disabled */
|
|
86 OFFLOAD_UNAVAILABLE, /* card is not available */
|
|
87 OFFLOAD_OUT_OF_MEMORY, /* not enough memory on device */
|
|
88 OFFLOAD_PROCESS_DIED, /* target process has died */
|
|
89 OFFLOAD_ERROR /* unspecified error */
|
|
90 } _Offload_result;
|
|
91
|
|
92 typedef struct {
|
|
93 _Offload_result result; /* result, see above */
|
|
94 int device_number; /* device number */
|
|
95 size_t data_sent; /* number of bytes sent to the target */
|
|
96 size_t data_received; /* number of bytes received by host */
|
|
97 } _Offload_status;
|
|
98
|
|
99 typedef int64_t _Offload_stream;
|
|
100
|
|
101 #define OFFLOAD_STATUS_INIT(x) \
|
|
102 ((x).result = OFFLOAD_DISABLED)
|
|
103
|
|
104 #define OFFLOAD_STATUS_INITIALIZER \
|
|
105 { OFFLOAD_DISABLED, -1, 0, 0 }
|
|
106
|
|
107 /* Offload runtime interfaces */
|
|
108
|
|
109 extern int _Offload_number_of_devices(void);
|
|
110 extern int _Offload_get_device_number(void);
|
|
111 extern int _Offload_get_physical_device_number(void);
|
|
112
|
|
113 /* Offload stream runtime interfaces */
|
|
114
|
|
115 extern _Offload_stream _Offload_stream_create(
|
|
116 int device, // MIC device number
|
|
117 int number_of_cpus // Cores allocated to the stream
|
|
118 );
|
|
119
|
|
120 extern int _Offload_stream_destroy(
|
|
121 int device, // MIC device number
|
|
122 _Offload_stream stream // stream handle
|
|
123 );
|
|
124
|
|
125 extern int _Offload_stream_delete(
|
|
126 _Offload_stream handle // stream handle
|
|
127 );
|
|
128
|
|
129 extern int _Offload_stream_completed(
|
|
130 int device, // MIC device number
|
|
131 _Offload_stream handle // stream handle
|
|
132 );
|
|
133
|
|
134 extern int _Offload_device_streams_completed(
|
|
135 int device // MIC device number
|
|
136 );
|
|
137
|
|
138 extern int _Offload_stream_is_empty(
|
|
139 _Offload_stream handle // stream handle
|
|
140 );
|
|
141
|
|
142 /*
|
|
143 * _Offload_shared_malloc/free are only supported when offload is enabled
|
|
144 * else they are defined to malloc and free
|
|
145 */
|
|
146 #ifdef __INTEL_OFFLOAD
|
|
147 extern void* _Offload_shared_malloc(size_t size);
|
|
148 extern void _Offload_shared_free(void *ptr);
|
|
149 extern void* _Offload_shared_aligned_malloc(size_t size, size_t align);
|
|
150 extern void _Offload_shared_aligned_free(void *ptr);
|
|
151 #else
|
|
152 #include <malloc.h>
|
|
153 #define _Offload_shared_malloc(size) malloc(size)
|
|
154 #define _Offload_shared_free(ptr) free(ptr);
|
|
155 #if defined(_WIN32)
|
|
156 #define _Offload_shared_aligned_malloc(size, align) _aligned_malloc(size, align)
|
|
157 #define _Offload_shared_aligned_free(ptr) _aligned_free(ptr);
|
|
158 #else
|
|
159 #define _Offload_shared_aligned_malloc(size, align) memalign(align, size)
|
|
160 #define _Offload_shared_aligned_free(ptr) free(ptr);
|
|
161 #endif
|
|
162 #endif
|
|
163
|
|
164
|
|
165 extern int _Offload_signaled(int index, void *signal);
|
|
166 extern void _Offload_report(int val);
|
|
167 extern int _Offload_find_associated_mic_memory(
|
|
168 int target,
|
|
169 const void* cpu_addr,
|
|
170 void** cpu_base_addr,
|
|
171 uint64_t* buf_length,
|
|
172 void** mic_addr,
|
|
173 uint64_t* mic_buf_start_offset,
|
|
174 int* is_static
|
|
175 );
|
|
176
|
|
177 /* OpenMP API */
|
|
178
|
|
179 extern void omp_set_default_device(int num) __GOMP_NOTHROW;
|
|
180 extern int omp_get_default_device(void) __GOMP_NOTHROW;
|
|
181 extern int omp_get_num_devices(void) __GOMP_NOTHROW;
|
|
182
|
|
183 // OpenMP 4.5 APIs
|
|
184
|
|
185 /*! \fn omp_get_initial_device
|
|
186 \brief Return the device id of the initial device.
|
|
187 \return Returns the device id of the initial device.
|
|
188 */
|
|
189 extern int omp_get_initial_device(
|
|
190 void
|
|
191 ) __GOMP_NOTHROW;
|
|
192
|
|
193 /*! \fn omp_target_alloc
|
|
194 \brief Allocate memory in the device data environment.
|
|
195 \param size Number of bytes to allocate.
|
|
196 \param device_num The device number on which to allocate.
|
|
197 \return Returns a pointer to the allocated memory.
|
|
198 */
|
|
199 extern void* omp_target_alloc(
|
|
200 size_t size,
|
|
201 int device_num
|
|
202 ) __GOMP_NOTHROW;
|
|
203
|
|
204 /*! \fn omp_target_free
|
|
205 \brief Free memory in the device data environment.
|
|
206 \param device_ptr Address of allocated device memory.
|
|
207 \param device_num The device number on which to free.
|
|
208 */
|
|
209 extern void omp_target_free(
|
|
210 void *device_ptr,
|
|
211 int device_num
|
|
212 ) __GOMP_NOTHROW;
|
|
213
|
|
214 /*! \fn omp_target_is_present
|
|
215 \brief Test whether a host pointer has corresponding storage on a device.
|
|
216 \param device_ptr Address of allocated device memory.
|
|
217 \param device_num The device number on which to test..
|
|
218 \return true if storage is found, false otherwise.
|
|
219 */
|
|
220 extern int omp_target_is_present(
|
|
221 void *ptr,
|
|
222 int device_num
|
|
223 ) __GOMP_NOTHROW;
|
|
224
|
|
225 /*! \fn omp_target_memcpy
|
|
226 \brief Copy memory between host/device pointers.
|
|
227 \param dst Address of destination memory.
|
|
228 \param src Address of source memory.
|
|
229 \param length Number of bytes to copy.
|
|
230 \param dst_offset Destination offset in bytes.
|
|
231 \param src_offset Source offset in bytes.
|
|
232 \param dst_device Destination device number.
|
|
233 \param src_device Source device number.
|
|
234 \return 0 on success, 1 otherwise.
|
|
235 */
|
|
236 extern int omp_target_memcpy(
|
|
237 void *dst,
|
|
238 void *src,
|
|
239 size_t length,
|
|
240 size_t dst_offset,
|
|
241 size_t src_offset,
|
|
242 int dst_device,
|
|
243 int src_device
|
|
244 ) __GOMP_NOTHROW;
|
|
245
|
|
246 /*! \fn omp_target_memcpy_rect
|
|
247 \brief Copy a rectangular subsection from
|
|
248 \brief one multi-dimensional array to another.
|
|
249 \param dst Address of destination array.
|
|
250 \param src Address of source array.
|
|
251 \param element_size Number of bytes in each array element.
|
|
252 \param num_dims Number of dimensions.
|
|
253 \param volume Array of element counts to copy in each dimension.
|
|
254 \param dst_offsets Destination offsets array.
|
|
255 \param src_offsets Source offsets array.
|
|
256 \param dst_dims Destination array dimensions array.
|
|
257 \param src_dims Source array dimensions array.
|
|
258 \param dst_device Destination device number.
|
|
259 \param src_device Source device number.
|
|
260 \return 0 on success, 1 otherwise.
|
|
261 */
|
|
262 extern int omp_target_memcpy_rect(
|
|
263 void *dst,
|
|
264 void *src,
|
|
265 size_t element_size,
|
|
266 int num_dims,
|
|
267 const size_t *volume,
|
|
268 const size_t *dst_offsets,
|
|
269 const size_t *src_offsets,
|
|
270 const size_t *dst_dimensions,
|
|
271 const size_t *src_dimensions,
|
|
272 int dst_device,
|
|
273 int src_device
|
|
274 ) __GOMP_NOTHROW;
|
|
275
|
|
276 /*! \fn omp_target_associate_ptr
|
|
277 \brief Map a device pointer to a host pointer.
|
|
278 \param host_ptr The host pointer.
|
|
279 \param device_ptr The device pointer.
|
|
280 \param size Number of bytes to map.
|
|
281 \param device_offset Offset on device of mapped memory.
|
|
282 \param device_num Device number.
|
|
283 \return 0 on success, 1 otherwise.
|
|
284 */
|
|
285 extern int omp_target_associate_ptr(
|
|
286 void *host_ptr,
|
|
287 void *device_ptr,
|
|
288 size_t size,
|
|
289 size_t device_offset,
|
|
290 int device_num
|
|
291 ) __GOMP_NOTHROW;
|
|
292
|
|
293 /*! \fn omp_target_disassociate_ptr
|
|
294 \brief Remove a host pointer to device pointer association.
|
|
295 \param ptr The host pointer to disassociate.
|
|
296 \param device_num Device number.
|
|
297 \return 0 on success, 1 otherwise.
|
|
298 */
|
|
299 extern int omp_target_disassociate_ptr(
|
|
300 void *host_ptr,
|
|
301 int device_num
|
|
302 ) __GOMP_NOTHROW;
|
|
303
|
|
304 // End of OpenMP 4.5 APIs
|
|
305
|
|
306 /* OpenMP API wrappers */
|
|
307
|
|
308 /* Set num_threads on target */
|
|
309 extern void omp_set_num_threads_target(
|
|
310 TARGET_TYPE target_type,
|
|
311 int target_number,
|
|
312 int num_threads
|
|
313 );
|
|
314
|
|
315 /* Get max_threads from target */
|
|
316 extern int omp_get_max_threads_target(
|
|
317 TARGET_TYPE target_type,
|
|
318 int target_number
|
|
319 );
|
|
320
|
|
321 /* Get num_procs from target */
|
|
322 extern int omp_get_num_procs_target(
|
|
323 TARGET_TYPE target_type,
|
|
324 int target_number
|
|
325 );
|
|
326
|
|
327 /* Set dynamic on target */
|
|
328 extern void omp_set_dynamic_target(
|
|
329 TARGET_TYPE target_type,
|
|
330 int target_number,
|
|
331 int num_threads
|
|
332 );
|
|
333
|
|
334 /* Get dynamic from target */
|
|
335 extern int omp_get_dynamic_target(
|
|
336 TARGET_TYPE target_type,
|
|
337 int target_number
|
|
338 );
|
|
339
|
|
340 /* Set nested on target */
|
|
341 extern void omp_set_nested_target(
|
|
342 TARGET_TYPE target_type,
|
|
343 int target_number,
|
|
344 int nested
|
|
345 );
|
|
346
|
|
347 /* Get nested from target */
|
|
348 extern int omp_get_nested_target(
|
|
349 TARGET_TYPE target_type,
|
|
350 int target_number
|
|
351 );
|
|
352
|
|
353 extern void omp_set_num_threads_target(
|
|
354 TARGET_TYPE target_type,
|
|
355 int target_number,
|
|
356 int num_threads
|
|
357 );
|
|
358
|
|
359 extern int omp_get_max_threads_target(
|
|
360 TARGET_TYPE target_type,
|
|
361 int target_number
|
|
362 );
|
|
363
|
|
364 extern int omp_get_num_procs_target(
|
|
365 TARGET_TYPE target_type,
|
|
366 int target_number
|
|
367 );
|
|
368
|
|
369 extern void omp_set_dynamic_target(
|
|
370 TARGET_TYPE target_type,
|
|
371 int target_number,
|
|
372 int num_threads
|
|
373 );
|
|
374
|
|
375 extern int omp_get_dynamic_target(
|
|
376 TARGET_TYPE target_type,
|
|
377 int target_number
|
|
378 );
|
|
379
|
|
380 extern void omp_set_nested_target(
|
|
381 TARGET_TYPE target_type,
|
|
382 int target_number,
|
|
383 int num_threads
|
|
384 );
|
|
385
|
|
386 extern int omp_get_nested_target(
|
|
387 TARGET_TYPE target_type,
|
|
388 int target_number
|
|
389 );
|
|
390
|
|
391 extern void omp_set_schedule_target(
|
|
392 TARGET_TYPE target_type,
|
|
393 int target_number,
|
|
394 omp_sched_t kind,
|
|
395 int modifier
|
|
396 );
|
|
397
|
|
398 extern void omp_get_schedule_target(
|
|
399 TARGET_TYPE target_type,
|
|
400 int target_number,
|
|
401 omp_sched_t *kind,
|
|
402 int *modifier
|
|
403 );
|
|
404
|
|
405 /* lock API functions */
|
|
406
|
|
407 typedef struct {
|
|
408 omp_lock_t lock;
|
|
409 } omp_lock_target_t;
|
|
410
|
|
411 extern void omp_init_lock_target(
|
|
412 TARGET_TYPE target_type,
|
|
413 int target_number,
|
|
414 omp_lock_target_t *lock
|
|
415 );
|
|
416
|
|
417 extern void omp_destroy_lock_target(
|
|
418 TARGET_TYPE target_type,
|
|
419 int target_number,
|
|
420 omp_lock_target_t *lock
|
|
421 );
|
|
422
|
|
423 extern void omp_set_lock_target(
|
|
424 TARGET_TYPE target_type,
|
|
425 int target_number,
|
|
426 omp_lock_target_t *lock
|
|
427 );
|
|
428
|
|
429 extern void omp_unset_lock_target(
|
|
430 TARGET_TYPE target_type,
|
|
431 int target_number,
|
|
432 omp_lock_target_t *lock
|
|
433 );
|
|
434
|
|
435 extern int omp_test_lock_target(
|
|
436 TARGET_TYPE target_type,
|
|
437 int target_number,
|
|
438 omp_lock_target_t *lock
|
|
439 );
|
|
440
|
|
441 /* nested lock API functions */
|
|
442
|
|
443 typedef struct {
|
|
444 omp_nest_lock_t lock;
|
|
445 } omp_nest_lock_target_t;
|
|
446
|
|
447 extern void omp_init_nest_lock_target(
|
|
448 TARGET_TYPE target_type,
|
|
449 int target_number,
|
|
450 omp_nest_lock_target_t *lock
|
|
451 );
|
|
452
|
|
453 extern void omp_destroy_nest_lock_target(
|
|
454 TARGET_TYPE target_type,
|
|
455 int target_number,
|
|
456 omp_nest_lock_target_t *lock
|
|
457 );
|
|
458
|
|
459 extern void omp_set_nest_lock_target(
|
|
460 TARGET_TYPE target_type,
|
|
461 int target_number,
|
|
462 omp_nest_lock_target_t *lock
|
|
463 );
|
|
464
|
|
465 extern void omp_unset_nest_lock_target(
|
|
466 TARGET_TYPE target_type,
|
|
467 int target_number,
|
|
468 omp_nest_lock_target_t *lock
|
|
469 );
|
|
470
|
|
471 extern int omp_test_nest_lock_target(
|
|
472 TARGET_TYPE target_type,
|
|
473 int target_number,
|
|
474 omp_nest_lock_target_t *lock
|
|
475 );
|
|
476
|
|
477 #ifdef __cplusplus
|
|
478 } /* extern "C" */
|
|
479
|
|
480 /* Namespace for the shared_allocator. */
|
|
481 namespace __offload {
|
|
482 /* This follows the specification for std::allocator. */
|
|
483 /* Forward declaration of the class template. */
|
|
484 template <typename T>
|
|
485 class shared_allocator;
|
|
486
|
|
487 /* Specialization for shared_allocator<void>. */
|
|
488 template <>
|
|
489 class shared_allocator<void> {
|
|
490 public:
|
|
491 typedef void *pointer;
|
|
492 typedef const void *const_pointer;
|
|
493 typedef void value_type;
|
|
494 template <class U> struct rebind { typedef shared_allocator<U> other; };
|
|
495 };
|
|
496
|
|
497 /* Definition of shared_allocator<T>. */
|
|
498 template <class T>
|
|
499 class shared_allocator {
|
|
500 public:
|
|
501 typedef size_t size_type;
|
|
502 typedef ptrdiff_t difference_type;
|
|
503 typedef T *pointer;
|
|
504 typedef const T *const_pointer;
|
|
505 typedef T &reference;
|
|
506 typedef const T &const_reference;
|
|
507 typedef T value_type;
|
|
508 template <class U> struct rebind { typedef shared_allocator<U> other; };
|
|
509 shared_allocator() throw() { }
|
|
510 shared_allocator(const shared_allocator&) throw() { }
|
|
511 template <class U> shared_allocator(const shared_allocator<U>&) throw() { }
|
|
512 ~shared_allocator() throw() { }
|
|
513 pointer address(reference x) const { return &x; }
|
|
514 const_pointer address(const_reference x) const { return &x; }
|
|
515 pointer allocate(
|
|
516 size_type, shared_allocator<void>::const_pointer hint = 0);
|
|
517 void deallocate(pointer p, size_type n);
|
|
518 size_type max_size() const throw() {
|
|
519 return size_type(-1)/sizeof(T);
|
|
520 } /* max_size */
|
|
521 void construct(pointer p, const T& arg) {
|
|
522 ::new (p) T(arg);
|
|
523 } /* construct */
|
|
524 void destroy(pointer p) {
|
|
525 p->~T();
|
|
526 } /* destroy */
|
|
527 };
|
|
528
|
|
529 /* Definition for allocate. */
|
|
530 template <class T>
|
|
531 typename shared_allocator<T>::pointer
|
|
532 shared_allocator<T>::allocate(shared_allocator<T>::size_type s,
|
|
533 shared_allocator<void>::const_pointer) {
|
|
534 /* Allocate from shared memory. */
|
|
535 void *ptr = _Offload_shared_malloc(s*sizeof(T));
|
|
536 #if (defined(_WIN32) || defined(_WIN64)) /* Windows */
|
|
537 if (ptr == 0) throw std::bad_alloc();
|
|
538 #else
|
|
539 if (ptr == 0) std::__throw_bad_alloc();
|
|
540 #endif
|
|
541 return static_cast<pointer>(ptr);
|
|
542 } /* allocate */
|
|
543
|
|
544 template <class T>
|
|
545 void shared_allocator<T>::deallocate(pointer p,
|
|
546 shared_allocator<T>::size_type) {
|
|
547 /* Free the shared memory. */
|
|
548 _Offload_shared_free(p);
|
|
549 } /* deallocate */
|
|
550
|
|
551 template <typename _T1, typename _T2>
|
|
552 inline bool operator==(const shared_allocator<_T1> &,
|
|
553 const shared_allocator<_T2> &) throw() {
|
|
554 return true;
|
|
555 } /* operator== */
|
|
556
|
|
557 template <typename _T1, typename _T2>
|
|
558 inline bool operator!=(const shared_allocator<_T1> &,
|
|
559 const shared_allocator<_T2> &) throw() {
|
|
560 return false;
|
|
561 } /* operator!= */
|
|
562 } /* __offload */
|
|
563 #endif /* __cplusplus */
|
|
564
|
|
565 #endif /* OFFLOAD_H_INCLUDED */
|