111
|
1 /*
|
|
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
|
|
3
|
|
4 Redistribution and use in source and binary forms, with or without
|
|
5 modification, are permitted provided that the following conditions
|
|
6 are met:
|
|
7
|
|
8 * Redistributions of source code must retain the above copyright
|
|
9 notice, this list of conditions and the following disclaimer.
|
|
10 * Redistributions in binary form must reproduce the above copyright
|
|
11 notice, this list of conditions and the following disclaimer in the
|
|
12 documentation and/or other materials provided with the distribution.
|
|
13 * Neither the name of Intel Corporation nor the names of its
|
|
14 contributors may be used to endorse or promote products derived
|
|
15 from this software without specific prior written permission.
|
|
16
|
|
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
28 */
|
|
29
|
|
30
|
|
31 #include "compiler_if_host.h"
|
|
32
|
|
33 #include <malloc.h>
|
|
34 #ifndef TARGET_WINNT
|
|
35 #include <alloca.h>
|
|
36 #endif // TARGET_WINNT
|
|
37
|
|
38 // Global counter on host.
|
|
39 // This variable is used if P2OPT_offload_do_data_persistence == 2.
|
|
40 // The variable used to identify offload constructs contained in one procedure.
|
|
41 // Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
|
|
42 // offload constructs.
|
|
43 static int offload_call_count = 0;
|
|
44
|
|
45 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
|
|
46 TARGET_TYPE target_type,
|
|
47 int target_number,
|
|
48 int is_optional,
|
|
49 _Offload_status* status,
|
|
50 const char* file,
|
|
51 uint64_t line
|
|
52 )
|
|
53 {
|
|
54 bool retval;
|
|
55 OFFLOAD ofld;
|
|
56
|
|
57 // initialize status
|
|
58 if (status != 0) {
|
|
59 status->result = OFFLOAD_UNAVAILABLE;
|
|
60 status->device_number = -1;
|
|
61 status->data_sent = 0;
|
|
62 status->data_received = 0;
|
|
63 }
|
|
64
|
|
65 // make sure libray is initialized
|
|
66 retval = __offload_init_library();
|
|
67
|
|
68 // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
|
|
69 OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
|
|
70
|
|
71 OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
|
|
72
|
|
73 OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
|
74
|
|
75 // initialize all devices is init_type is on_offload_all
|
|
76 if (retval && __offload_init_type == c_init_on_offload_all) {
|
|
77 for (int i = 0; i < mic_engines_total; i++) {
|
|
78 mic_engines[i].init();
|
|
79 }
|
|
80 }
|
|
81 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
|
82
|
|
83 OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
|
|
84
|
|
85 if (target_type == TARGET_HOST) {
|
|
86 // Host always available
|
|
87 retval = true;
|
|
88 }
|
|
89 else if (target_type == TARGET_MIC) {
|
|
90 if (target_number >= -1) {
|
|
91 if (retval) {
|
|
92 if (target_number >= 0) {
|
|
93 // User provided the device number
|
|
94 target_number = target_number % mic_engines_total;
|
|
95 }
|
|
96 else {
|
|
97 // use device 0
|
|
98 target_number = 0;
|
|
99 }
|
|
100
|
|
101 // reserve device in ORSL
|
|
102 if (is_optional) {
|
|
103 if (!ORSL::try_reserve(target_number)) {
|
|
104 target_number = -1;
|
|
105 }
|
|
106 }
|
|
107 else {
|
|
108 if (!ORSL::reserve(target_number)) {
|
|
109 target_number = -1;
|
|
110 }
|
|
111 }
|
|
112
|
|
113 // initialize device
|
|
114 if (target_number >= 0 &&
|
|
115 __offload_init_type == c_init_on_offload) {
|
|
116 OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
|
117 mic_engines[target_number].init();
|
|
118 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
|
119 }
|
|
120 }
|
|
121 else {
|
|
122 // fallback to CPU
|
|
123 target_number = -1;
|
|
124 }
|
|
125
|
|
126 if (target_number < 0 || !retval) {
|
|
127 if (!is_optional && status == 0) {
|
|
128 LIBOFFLOAD_ERROR(c_device_is_not_available);
|
|
129 exit(1);
|
|
130 }
|
|
131
|
|
132 retval = false;
|
|
133 }
|
|
134 }
|
|
135 else {
|
|
136 LIBOFFLOAD_ERROR(c_invalid_device_number);
|
|
137 exit(1);
|
|
138 }
|
|
139 }
|
|
140
|
|
141 if (retval) {
|
|
142 ofld = new OffloadDescriptor(target_number, status,
|
|
143 !is_optional, false, timer_data);
|
|
144 OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
|
|
145 Offload_Report_Prolog(timer_data);
|
|
146 OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
|
|
147 "Starting offload: target_type = %d, "
|
|
148 "number = %d, is_optional = %d\n",
|
|
149 target_type, target_number, is_optional);
|
|
150
|
|
151 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
|
|
152 }
|
|
153 else {
|
|
154 ofld = NULL;
|
|
155
|
|
156 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
|
|
157 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
|
|
158 offload_report_free_data(timer_data);
|
|
159 }
|
|
160
|
|
161 return ofld;
|
|
162 }
|
|
163
|
|
164 // This routine is called for OpenMP4.5 offload calls
|
|
165 // OpenMP 4.5 offload is always optional.
|
|
166 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
|
|
167 const int* device_num,
|
|
168 const char* file,
|
|
169 uint64_t line
|
|
170 )
|
|
171 {
|
|
172 int target_number;
|
|
173
|
|
174 // make sure libray is initialized and at least one device is available
|
|
175 if (!__offload_init_library()) {
|
|
176 OFFLOAD_DEBUG_TRACE(2, "No device available, fall back to host\n");
|
|
177 return NULL;
|
|
178 }
|
|
179
|
|
180 // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
|
|
181
|
|
182 OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
|
|
183
|
|
184 OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
|
|
185
|
|
186 OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
|
187
|
|
188 if (__offload_init_type == c_init_on_offload_all) {
|
|
189 for (int i = 0; i < mic_engines_total; i++) {
|
|
190 mic_engines[i].init();
|
|
191 }
|
|
192 }
|
|
193
|
|
194 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
|
195
|
|
196 OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
|
|
197
|
|
198 // use default device number if it is not provided
|
|
199 if (device_num != 0) {
|
|
200 target_number = *device_num;
|
|
201 }
|
|
202 else {
|
|
203 target_number = __omp_device_num;
|
|
204 }
|
|
205
|
|
206 // device number should be a non-negative integer value
|
|
207 if (target_number < 0) {
|
|
208 LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
|
|
209 exit(1);
|
|
210 }
|
|
211
|
|
212 // should we do this for OpenMP?
|
|
213 target_number %= mic_engines_total;
|
|
214
|
|
215 // reserve device in ORSL
|
|
216 if (!ORSL::reserve(target_number)) {
|
|
217 LIBOFFLOAD_ERROR(c_device_is_not_available);
|
|
218 exit(1);
|
|
219 }
|
|
220
|
|
221 // initialize device(s)
|
|
222 OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
|
223
|
|
224 if (__offload_init_type == c_init_on_offload) {
|
|
225 mic_engines[target_number].init();
|
|
226 }
|
|
227
|
|
228 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
|
229
|
|
230 OFFLOAD ofld =
|
|
231 new OffloadDescriptor(target_number, 0, true, true, timer_data);
|
|
232
|
|
233 OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
|
|
234
|
|
235 Offload_Report_Prolog(timer_data);
|
|
236
|
|
237 OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
|
|
238 "Starting OpenMP offload, device = %d\n",
|
|
239 target_number);
|
|
240
|
|
241 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
|
|
242
|
|
243 return ofld;
|
|
244 }
|
|
245
|
|
246 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE2(
|
|
247 TARGET_TYPE target_type,
|
|
248 int target_number,
|
|
249 int is_optional,
|
|
250 _Offload_status* status,
|
|
251 const char* file,
|
|
252 uint64_t line,
|
|
253 const void** stream
|
|
254 )
|
|
255 {
|
|
256 bool retval;
|
|
257 OFFLOAD ofld;
|
|
258
|
|
259 // initialize status
|
|
260 if (status != 0) {
|
|
261 status->result = OFFLOAD_UNAVAILABLE;
|
|
262 status->device_number = -1;
|
|
263 status->data_sent = 0;
|
|
264 status->data_received = 0;
|
|
265 }
|
|
266
|
|
267 // make sure libray is initialized
|
|
268 retval = __offload_init_library();
|
|
269 // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
|
|
270 OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
|
|
271
|
|
272 OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
|
|
273
|
|
274 OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
|
275
|
|
276 // initalize all devices if init_type is on_offload_all
|
|
277 if (retval && __offload_init_type == c_init_on_offload_all) {
|
|
278 for (int i = 0; i < mic_engines_total; i++) {
|
|
279 mic_engines[i].init();
|
|
280 }
|
|
281 }
|
|
282 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
|
283
|
|
284 OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
|
|
285
|
|
286 if (target_type == TARGET_HOST) {
|
|
287 // Host always available
|
|
288 retval = true;
|
|
289 }
|
|
290 else if (target_type == TARGET_MIC) {
|
|
291 _Offload_stream handle = *(reinterpret_cast<_Offload_stream*>(stream));
|
|
292 Stream * stream = handle ? Stream::find_stream(handle, false) : NULL;
|
|
293 if (target_number >= -1) {
|
|
294 if (retval) {
|
|
295 // device number is defined by stream
|
|
296 if (stream) {
|
|
297 target_number = stream->get_device();
|
|
298 target_number = target_number % mic_engines_total;
|
|
299 }
|
|
300
|
|
301 // reserve device in ORSL
|
|
302 if (target_number != -1) {
|
|
303 if (is_optional) {
|
|
304 if (!ORSL::try_reserve(target_number)) {
|
|
305 target_number = -1;
|
|
306 }
|
|
307 }
|
|
308 else {
|
|
309 if (!ORSL::reserve(target_number)) {
|
|
310 target_number = -1;
|
|
311 }
|
|
312 }
|
|
313 }
|
|
314
|
|
315 // initialize device
|
|
316 if (target_number >= 0 &&
|
|
317 __offload_init_type == c_init_on_offload) {
|
|
318 OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
|
|
319 mic_engines[target_number].init();
|
|
320 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
|
|
321 }
|
|
322 }
|
|
323 else {
|
|
324 // fallback to CPU
|
|
325 target_number = -1;
|
|
326 }
|
|
327 if (!(target_number == -1 && handle == 0)) {
|
|
328 if (target_number < 0 || !retval) {
|
|
329 if (!is_optional && status == 0) {
|
|
330 LIBOFFLOAD_ERROR(c_device_is_not_available);
|
|
331 exit(1);
|
|
332 }
|
|
333
|
|
334 retval = false;
|
|
335 }
|
|
336 }
|
|
337 }
|
|
338 else {
|
|
339 LIBOFFLOAD_ERROR(c_invalid_device_number);
|
|
340 exit(1);
|
|
341 }
|
|
342 }
|
|
343
|
|
344 if (retval) {
|
|
345 ofld = new OffloadDescriptor(target_number, status,
|
|
346 !is_optional, false, timer_data);
|
|
347 OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
|
|
348 Offload_Report_Prolog(timer_data);
|
|
349 OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
|
|
350 "Starting offload: target_type = %d, "
|
|
351 "number = %d, is_optional = %d\n",
|
|
352 target_type, target_number, is_optional);
|
|
353
|
|
354 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
|
|
355 }
|
|
356 else {
|
|
357 ofld = NULL;
|
|
358
|
|
359 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
|
|
360 OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
|
|
361 offload_report_free_data(timer_data);
|
|
362 }
|
|
363
|
|
364 return ofld;
|
|
365 }
|
|
366
|
|
367 static int offload_offload_wrap(
|
|
368 OFFLOAD ofld,
|
|
369 const char *name,
|
|
370 int is_empty,
|
|
371 int num_vars,
|
|
372 VarDesc *vars,
|
|
373 VarDesc2 *vars2,
|
|
374 int num_waits,
|
|
375 const void **waits,
|
|
376 const void **signal,
|
|
377 int entry_id,
|
|
378 const void *stack_addr,
|
|
379 OffloadFlags offload_flags
|
|
380 )
|
|
381 {
|
|
382 if (signal) {
|
|
383 ofld->set_signal(*signal);
|
|
384 }
|
|
385
|
|
386 bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
|
|
387 waits, num_waits, signal, entry_id,
|
|
388 stack_addr, offload_flags);
|
|
389 if (!ret || (signal == 0 && ofld->get_stream() == 0 &&
|
|
390 !offload_flags.bits.omp_async)) {
|
|
391 delete ofld;
|
|
392 }
|
|
393 return ret;
|
|
394 }
|
|
395
|
|
396 extern "C" int OFFLOAD_OFFLOAD1(
|
|
397 OFFLOAD ofld,
|
|
398 const char *name,
|
|
399 int is_empty,
|
|
400 int num_vars,
|
|
401 VarDesc *vars,
|
|
402 VarDesc2 *vars2,
|
|
403 int num_waits,
|
|
404 const void **waits,
|
|
405 const void **signal
|
|
406 )
|
|
407 {
|
|
408 return offload_offload_wrap(ofld, name, is_empty,
|
|
409 num_vars, vars, vars2,
|
|
410 num_waits, waits,
|
|
411 signal, 0, NULL, {0});
|
|
412 }
|
|
413
|
|
414 extern "C" int OFFLOAD_OFFLOAD2(
|
|
415 OFFLOAD ofld,
|
|
416 const char *name,
|
|
417 int is_empty,
|
|
418 int num_vars,
|
|
419 VarDesc *vars,
|
|
420 VarDesc2 *vars2,
|
|
421 int num_waits,
|
|
422 const void** waits,
|
|
423 const void** signal,
|
|
424 int entry_id,
|
|
425 const void *stack_addr
|
|
426 )
|
|
427 {
|
|
428 return offload_offload_wrap(ofld, name, is_empty,
|
|
429 num_vars, vars, vars2,
|
|
430 num_waits, waits,
|
|
431 signal, entry_id, stack_addr, {0});
|
|
432 }
|
|
433
|
|
434 extern "C" int OFFLOAD_OFFLOAD3(
|
|
435 OFFLOAD ofld,
|
|
436 const char *name,
|
|
437 int is_empty,
|
|
438 int num_vars,
|
|
439 VarDesc *vars,
|
|
440 VarDesc2 *vars2,
|
|
441 int num_waits,
|
|
442 const void** waits,
|
|
443 const void** signal,
|
|
444 int entry_id,
|
|
445 const void *stack_addr,
|
|
446 OffloadFlags offload_flags,
|
|
447 const void** stream
|
|
448 )
|
|
449 {
|
|
450 // 1. if the source is compiled with -traceback then stream is 0
|
|
451 // 2. if offload has a stream clause then stream is address of stream value
|
|
452 if (stream) {
|
|
453 ofld->set_stream(*(reinterpret_cast<_Offload_stream *>(stream)));
|
|
454 }
|
|
455
|
|
456 return offload_offload_wrap(ofld, name, is_empty,
|
|
457 num_vars, vars, vars2,
|
|
458 num_waits, waits,
|
|
459 signal, entry_id, stack_addr, offload_flags);
|
|
460 }
|
|
461
|
|
462 extern "C" int OFFLOAD_OFFLOAD(
|
|
463 OFFLOAD ofld,
|
|
464 const char *name,
|
|
465 int is_empty,
|
|
466 int num_vars,
|
|
467 VarDesc *vars,
|
|
468 VarDesc2 *vars2,
|
|
469 int num_waits,
|
|
470 const void **waits,
|
|
471 const void *signal,
|
|
472 int entry_id,
|
|
473 const void *stack_addr
|
|
474 )
|
|
475 {
|
|
476 // signal is passed by reference now
|
|
477 const void **signal_new = (signal != 0) ? &signal : 0;
|
|
478 const void **waits_new = 0;
|
|
479 int num_waits_new = 0;
|
|
480
|
|
481 // remove NULL values from the list of signals to wait for
|
|
482 if (num_waits > 0) {
|
|
483 waits_new = (const void**) alloca(sizeof(void*) * num_waits);
|
|
484 for (int i = 0; i < num_waits; i++) {
|
|
485 if (waits[i] != 0) {
|
|
486 waits_new[num_waits_new++] = waits[i];
|
|
487 }
|
|
488 }
|
|
489 }
|
|
490
|
|
491 return OFFLOAD_OFFLOAD1(ofld, name, is_empty,
|
|
492 num_vars, vars, vars2,
|
|
493 num_waits_new, waits_new,
|
|
494 signal_new);
|
|
495 }
|
|
496
|
|
497 extern "C" int OFFLOAD_CALL_COUNT()
|
|
498 {
|
|
499 offload_call_count++;
|
|
500 return offload_call_count;
|
|
501 }
|