111
|
1 /* OpenACC Runtime initialization routines
|
|
2
|
145
|
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
|
111
|
4
|
|
5 Contributed by Mentor Embedded.
|
|
6
|
|
7 This file is part of the GNU Offloading and Multi Processing Library
|
|
8 (libgomp).
|
|
9
|
|
10 Libgomp is free software; you can redistribute it and/or modify it
|
|
11 under the terms of the GNU General Public License as published by
|
|
12 the Free Software Foundation; either version 3, or (at your option)
|
|
13 any later version.
|
|
14
|
|
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
18 more details.
|
|
19
|
|
20 Under Section 7 of GPL version 3, you are granted additional
|
|
21 permissions described in the GCC Runtime Library Exception, version
|
|
22 3.1, as published by the Free Software Foundation.
|
|
23
|
|
24 You should have received a copy of the GNU General Public License and
|
|
25 a copy of the GCC Runtime Library Exception along with this program;
|
|
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
27 <http://www.gnu.org/licenses/>. */
|
|
28
|
|
29 #include "libgomp.h"
|
|
30 #include "oacc-int.h"
|
|
31 #include "openacc.h"
|
|
32 #include <assert.h>
|
|
33 #include <stdlib.h>
|
|
34 #include <strings.h>
|
|
35 #include <stdbool.h>
|
|
36 #include <string.h>
|
|
37
|
|
38 /* This lock is used to protect access to cached_base_dev, dispatchers and
|
|
39 the (abstract) initialisation state of attached offloading devices. */
|
|
40
|
|
41 static gomp_mutex_t acc_device_lock;
|
|
42
|
|
43 /* A cached version of the dispatcher for the global "current" accelerator type,
|
|
44 e.g. used as the default when creating new host threads. This is the
|
|
45 device-type equivalent of goacc_device_num (which specifies which device to
|
|
46 use out of potentially several of the same type). If there are several
|
|
47 devices of a given type, this points at the first one. */
|
|
48
|
|
49 static struct gomp_device_descr *cached_base_dev = NULL;
|
|
50
|
|
51 #if defined HAVE_TLS || defined USE_EMUTLS
|
|
52 __thread struct goacc_thread *goacc_tls_data;
|
|
53 #else
|
|
54 pthread_key_t goacc_tls_key;
|
|
55 #endif
|
|
56 static pthread_key_t goacc_cleanup_key;
|
|
57
|
|
58 static struct goacc_thread *goacc_threads;
|
|
59 static gomp_mutex_t goacc_thread_lock;
|
|
60
|
|
61 /* An array of dispatchers for device types, indexed by the type. This array
|
|
62 only references "base" devices, and other instances of the same type are
|
|
63 found by simply indexing from each such device (which are stored linearly,
|
|
64 grouped by device in target.c:devices). */
|
|
65 static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
|
|
66
|
|
67 attribute_hidden void
|
|
68 goacc_register (struct gomp_device_descr *disp)
|
|
69 {
|
|
70 /* Only register the 0th device here. */
|
|
71 if (disp->target_id != 0)
|
|
72 return;
|
|
73
|
|
74 gomp_mutex_lock (&acc_device_lock);
|
|
75
|
|
76 assert (acc_device_type (disp->type) != acc_device_none
|
|
77 && acc_device_type (disp->type) != acc_device_default
|
|
78 && acc_device_type (disp->type) != acc_device_not_host);
|
|
79 assert (!dispatchers[disp->type]);
|
|
80 dispatchers[disp->type] = disp;
|
|
81
|
|
82 gomp_mutex_unlock (&acc_device_lock);
|
|
83 }
|
|
84
|
145
|
85 static bool
|
|
86 known_device_type_p (acc_device_t d)
|
|
87 {
|
|
88 return d >= 0 && d < _ACC_device_hwm;
|
|
89 }
|
|
90
|
|
91 static void
|
|
92 unknown_device_type_error (acc_device_t invalid_type)
|
|
93 {
|
|
94 gomp_fatal ("unknown device type %u", invalid_type);
|
|
95 }
|
|
96
|
111
|
97 /* OpenACC names some things a little differently. */
|
|
98
|
|
99 static const char *
|
|
100 get_openacc_name (const char *name)
|
|
101 {
|
|
102 if (strcmp (name, "nvptx") == 0)
|
|
103 return "nvidia";
|
|
104 else
|
|
105 return name;
|
|
106 }
|
|
107
|
|
108 static const char *
|
|
109 name_of_acc_device_t (enum acc_device_t type)
|
|
110 {
|
|
111 switch (type)
|
|
112 {
|
|
113 case acc_device_none: return "none";
|
|
114 case acc_device_default: return "default";
|
|
115 case acc_device_host: return "host";
|
|
116 case acc_device_not_host: return "not_host";
|
|
117 case acc_device_nvidia: return "nvidia";
|
145
|
118 case acc_device_radeon: return "radeon";
|
|
119 default: unknown_device_type_error (type);
|
111
|
120 }
|
145
|
121 __builtin_unreachable ();
|
111
|
122 }
|
|
123
|
|
124 /* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR
|
|
125 is true, this function raises an error if there are no devices of type D,
|
|
126 otherwise it returns NULL in that case. */
|
|
127
|
|
128 static struct gomp_device_descr *
|
|
129 resolve_device (acc_device_t d, bool fail_is_error)
|
|
130 {
|
|
131 acc_device_t d_arg = d;
|
|
132
|
|
133 switch (d)
|
|
134 {
|
|
135 case acc_device_default:
|
|
136 {
|
|
137 if (goacc_device_type)
|
|
138 {
|
|
139 /* Lookup the named device. */
|
145
|
140 while (known_device_type_p (++d))
|
111
|
141 if (dispatchers[d]
|
|
142 && !strcasecmp (goacc_device_type,
|
|
143 get_openacc_name (dispatchers[d]->name))
|
|
144 && dispatchers[d]->get_num_devices_func () > 0)
|
|
145 goto found;
|
|
146
|
|
147 if (fail_is_error)
|
|
148 {
|
|
149 gomp_mutex_unlock (&acc_device_lock);
|
|
150 gomp_fatal ("device type %s not supported", goacc_device_type);
|
|
151 }
|
|
152 else
|
|
153 return NULL;
|
|
154 }
|
|
155
|
|
156 /* No default device specified, so start scanning for any non-host
|
|
157 device that is available. */
|
|
158 d = acc_device_not_host;
|
|
159 }
|
|
160 /* FALLTHROUGH */
|
|
161
|
|
162 case acc_device_not_host:
|
|
163 /* Find the first available device after acc_device_not_host. */
|
145
|
164 while (known_device_type_p (++d))
|
111
|
165 if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
|
|
166 goto found;
|
|
167 if (d_arg == acc_device_default)
|
|
168 {
|
|
169 d = acc_device_host;
|
|
170 goto found;
|
|
171 }
|
|
172 if (fail_is_error)
|
|
173 {
|
|
174 gomp_mutex_unlock (&acc_device_lock);
|
|
175 gomp_fatal ("no device found");
|
|
176 }
|
|
177 else
|
|
178 return NULL;
|
|
179 break;
|
|
180
|
|
181 case acc_device_host:
|
|
182 break;
|
|
183
|
|
184 default:
|
145
|
185 if (!known_device_type_p (d))
|
111
|
186 {
|
|
187 if (fail_is_error)
|
|
188 goto unsupported_device;
|
|
189 else
|
|
190 return NULL;
|
|
191 }
|
|
192 break;
|
|
193 }
|
|
194 found:
|
|
195
|
|
196 assert (d != acc_device_none
|
|
197 && d != acc_device_default
|
|
198 && d != acc_device_not_host);
|
|
199
|
|
200 if (dispatchers[d] == NULL && fail_is_error)
|
|
201 {
|
|
202 unsupported_device:
|
|
203 gomp_mutex_unlock (&acc_device_lock);
|
|
204 gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
|
|
205 }
|
|
206
|
|
207 return dispatchers[d];
|
|
208 }
|
|
209
|
|
210 /* Emit a suitable error if no device of a particular type is available, or
|
|
211 the given device number is out-of-range. */
|
|
212 static void
|
|
213 acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
|
|
214 {
|
|
215 if (ndevs == 0)
|
|
216 gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d));
|
|
217 else
|
|
218 gomp_fatal ("device %u out of range", ord);
|
|
219 }
|
|
220
|
|
221 /* This is called when plugins have been initialized, and serves to call
|
|
222 (indirectly) the target's device_init hook. Calling multiple times without
|
|
223 an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be
|
|
224 held before calling this function. */
|
|
225
|
|
226 static struct gomp_device_descr *
|
145
|
227 acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
|
111
|
228 {
|
145
|
229 bool check_not_nested_p;
|
|
230 if (implicit)
|
|
231 {
|
|
232 /* In the implicit case, there should (TODO: must?) already be something
|
|
233 have been set up for an outer construct. */
|
|
234 check_not_nested_p = false;
|
|
235 }
|
|
236 else
|
|
237 {
|
|
238 check_not_nested_p = true;
|
|
239 /* TODO: should we set 'thr->prof_info' etc. in this case ('acc_init')?
|
|
240 The problem is, that we don't have 'thr' yet? (So,
|
|
241 'check_not_nested_p = true' also is pointless actually.) */
|
|
242 }
|
|
243 bool profiling_p = GOACC_PROFILING_DISPATCH_P (check_not_nested_p);
|
|
244
|
|
245 acc_prof_info prof_info;
|
|
246 if (profiling_p)
|
|
247 {
|
|
248 prof_info.event_type = acc_ev_device_init_start;
|
|
249 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
|
|
250 prof_info.version = _ACC_PROF_INFO_VERSION;
|
|
251 prof_info.device_type = d;
|
|
252 prof_info.device_number = goacc_device_num;
|
|
253 prof_info.thread_id = -1;
|
|
254 prof_info.async = acc_async_sync;
|
|
255 prof_info.async_queue = prof_info.async;
|
|
256 prof_info.src_file = NULL;
|
|
257 prof_info.func_name = NULL;
|
|
258 prof_info.line_no = -1;
|
|
259 prof_info.end_line_no = -1;
|
|
260 prof_info.func_line_no = -1;
|
|
261 prof_info.func_end_line_no = -1;
|
|
262 }
|
|
263 acc_event_info device_init_event_info;
|
|
264 if (profiling_p)
|
|
265 {
|
|
266 device_init_event_info.other_event.event_type = prof_info.event_type;
|
|
267 device_init_event_info.other_event.valid_bytes
|
|
268 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
|
|
269 device_init_event_info.other_event.parent_construct = parent_construct;
|
|
270 device_init_event_info.other_event.implicit = implicit;
|
|
271 device_init_event_info.other_event.tool_info = NULL;
|
|
272 }
|
|
273 acc_api_info api_info;
|
|
274 if (profiling_p)
|
|
275 {
|
|
276 api_info.device_api = acc_device_api_none;
|
|
277 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
|
|
278 api_info.device_type = prof_info.device_type;
|
|
279 api_info.vendor = -1;
|
|
280 api_info.device_handle = NULL;
|
|
281 api_info.context_handle = NULL;
|
|
282 api_info.async_handle = NULL;
|
|
283 }
|
|
284
|
|
285 if (profiling_p)
|
|
286 goacc_profiling_dispatch (&prof_info, &device_init_event_info, &api_info);
|
|
287
|
111
|
288 struct gomp_device_descr *base_dev, *acc_dev;
|
|
289 int ndevs;
|
|
290
|
|
291 base_dev = resolve_device (d, true);
|
|
292
|
|
293 ndevs = base_dev->get_num_devices_func ();
|
|
294
|
|
295 if (ndevs <= 0 || goacc_device_num >= ndevs)
|
|
296 acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
|
|
297
|
|
298 acc_dev = &base_dev[goacc_device_num];
|
|
299
|
|
300 gomp_mutex_lock (&acc_dev->lock);
|
|
301 if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
|
|
302 {
|
|
303 gomp_mutex_unlock (&acc_dev->lock);
|
|
304 gomp_fatal ("device already active");
|
|
305 }
|
|
306
|
|
307 gomp_init_device (acc_dev);
|
|
308 gomp_mutex_unlock (&acc_dev->lock);
|
|
309
|
145
|
310 if (profiling_p)
|
|
311 {
|
|
312 prof_info.event_type = acc_ev_device_init_end;
|
|
313 device_init_event_info.other_event.event_type = prof_info.event_type;
|
|
314 goacc_profiling_dispatch (&prof_info, &device_init_event_info,
|
|
315 &api_info);
|
|
316 }
|
|
317
|
111
|
318 return base_dev;
|
|
319 }
|
|
320
|
|
321 /* ACC_DEVICE_LOCK must be held before calling this function. */
|
|
322
|
|
323 static void
|
|
324 acc_shutdown_1 (acc_device_t d)
|
|
325 {
|
|
326 struct gomp_device_descr *base_dev;
|
|
327 struct goacc_thread *walk;
|
|
328 int ndevs, i;
|
|
329 bool devices_active = false;
|
|
330
|
|
331 /* Get the base device for this device type. */
|
|
332 base_dev = resolve_device (d, true);
|
|
333
|
|
334 ndevs = base_dev->get_num_devices_func ();
|
|
335
|
|
336 /* Unload all the devices of this type that have been opened. */
|
|
337 for (i = 0; i < ndevs; i++)
|
|
338 {
|
|
339 struct gomp_device_descr *acc_dev = &base_dev[i];
|
|
340
|
|
341 gomp_mutex_lock (&acc_dev->lock);
|
|
342 gomp_unload_device (acc_dev);
|
|
343 gomp_mutex_unlock (&acc_dev->lock);
|
|
344 }
|
|
345
|
|
346 gomp_mutex_lock (&goacc_thread_lock);
|
|
347
|
|
348 /* Free target-specific TLS data and close all devices. */
|
|
349 for (walk = goacc_threads; walk != NULL; walk = walk->next)
|
|
350 {
|
|
351 if (walk->target_tls)
|
|
352 base_dev->openacc.destroy_thread_data_func (walk->target_tls);
|
|
353
|
|
354 walk->target_tls = NULL;
|
|
355
|
|
356 /* This would mean the user is shutting down OpenACC in the middle of an
|
|
357 "acc data" pragma. Likely not intentional. */
|
|
358 if (walk->mapped_data)
|
|
359 {
|
|
360 gomp_mutex_unlock (&goacc_thread_lock);
|
|
361 gomp_fatal ("shutdown in 'acc data' region");
|
|
362 }
|
|
363
|
|
364 /* Similarly, if this happens then user code has done something weird. */
|
|
365 if (walk->saved_bound_dev)
|
|
366 {
|
|
367 gomp_mutex_unlock (&goacc_thread_lock);
|
|
368 gomp_fatal ("shutdown during host fallback");
|
|
369 }
|
|
370
|
|
371 if (walk->dev)
|
|
372 {
|
|
373 gomp_mutex_lock (&walk->dev->lock);
|
145
|
374
|
|
375 while (walk->dev->mem_map.root)
|
|
376 {
|
|
377 splay_tree_key k = &walk->dev->mem_map.root->key;
|
|
378 if (k->aux)
|
|
379 k->aux->link_key = NULL;
|
|
380 gomp_remove_var (walk->dev, k);
|
|
381 }
|
|
382
|
111
|
383 gomp_mutex_unlock (&walk->dev->lock);
|
|
384
|
|
385 walk->dev = NULL;
|
|
386 walk->base_dev = NULL;
|
|
387 }
|
|
388 }
|
|
389
|
|
390 gomp_mutex_unlock (&goacc_thread_lock);
|
|
391
|
|
392 /* Close all the devices of this type that have been opened. */
|
|
393 bool ret = true;
|
|
394 for (i = 0; i < ndevs; i++)
|
|
395 {
|
|
396 struct gomp_device_descr *acc_dev = &base_dev[i];
|
|
397 gomp_mutex_lock (&acc_dev->lock);
|
|
398 if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
|
|
399 {
|
|
400 devices_active = true;
|
145
|
401 ret &= gomp_fini_device (acc_dev);
|
111
|
402 acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
|
|
403 }
|
|
404 gomp_mutex_unlock (&acc_dev->lock);
|
|
405 }
|
|
406
|
|
407 if (!ret)
|
|
408 gomp_fatal ("device finalization failed");
|
|
409
|
|
410 if (!devices_active)
|
|
411 gomp_fatal ("no device initialized");
|
|
412 }
|
|
413
|
|
414 static struct goacc_thread *
|
|
415 goacc_new_thread (void)
|
|
416 {
|
|
417 struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread));
|
|
418
|
|
419 #if defined HAVE_TLS || defined USE_EMUTLS
|
|
420 goacc_tls_data = thr;
|
|
421 #else
|
|
422 pthread_setspecific (goacc_tls_key, thr);
|
|
423 #endif
|
|
424
|
|
425 pthread_setspecific (goacc_cleanup_key, thr);
|
|
426
|
|
427 gomp_mutex_lock (&goacc_thread_lock);
|
|
428 thr->next = goacc_threads;
|
|
429 goacc_threads = thr;
|
|
430 gomp_mutex_unlock (&goacc_thread_lock);
|
|
431
|
|
432 return thr;
|
|
433 }
|
|
434
|
|
435 static void
|
|
436 goacc_destroy_thread (void *data)
|
|
437 {
|
|
438 struct goacc_thread *thr = data, *walk, *prev;
|
|
439
|
|
440 gomp_mutex_lock (&goacc_thread_lock);
|
|
441
|
|
442 if (thr)
|
|
443 {
|
|
444 struct gomp_device_descr *acc_dev = thr->dev;
|
|
445
|
|
446 if (acc_dev && thr->target_tls)
|
|
447 {
|
|
448 acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
|
|
449 thr->target_tls = NULL;
|
|
450 }
|
|
451
|
|
452 assert (!thr->mapped_data);
|
|
453
|
|
454 /* Remove from thread list. */
|
|
455 for (prev = NULL, walk = goacc_threads; walk;
|
|
456 prev = walk, walk = walk->next)
|
|
457 if (walk == thr)
|
|
458 {
|
|
459 if (prev == NULL)
|
|
460 goacc_threads = walk->next;
|
|
461 else
|
|
462 prev->next = walk->next;
|
|
463
|
|
464 free (thr);
|
|
465
|
|
466 break;
|
|
467 }
|
|
468
|
|
469 assert (walk);
|
|
470 }
|
|
471
|
|
472 gomp_mutex_unlock (&goacc_thread_lock);
|
|
473 }
|
|
474
|
|
475 /* Use the ORD'th device instance for the current host thread (or -1 for the
|
|
476 current global default). The device (and the runtime) must be initialised
|
|
477 before calling this function. */
|
|
478
|
|
479 void
|
|
480 goacc_attach_host_thread_to_device (int ord)
|
|
481 {
|
|
482 struct goacc_thread *thr = goacc_thread ();
|
|
483 struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
|
|
484 int num_devices;
|
|
485
|
|
486 if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
|
|
487 return;
|
|
488
|
|
489 if (ord < 0)
|
|
490 ord = goacc_device_num;
|
|
491
|
|
492 /* Decide which type of device to use. If the current thread has a device
|
|
493 type already (e.g. set by acc_set_device_type), use that, else use the
|
|
494 global default. */
|
|
495 if (thr && thr->base_dev)
|
|
496 base_dev = thr->base_dev;
|
|
497 else
|
|
498 {
|
|
499 assert (cached_base_dev);
|
|
500 base_dev = cached_base_dev;
|
|
501 }
|
|
502
|
|
503 num_devices = base_dev->get_num_devices_func ();
|
|
504 if (num_devices <= 0 || ord >= num_devices)
|
|
505 acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
|
|
506 num_devices);
|
|
507
|
|
508 if (!thr)
|
|
509 thr = goacc_new_thread ();
|
|
510
|
|
511 thr->base_dev = base_dev;
|
|
512 thr->dev = acc_dev = &base_dev[ord];
|
|
513 thr->saved_bound_dev = NULL;
|
|
514 thr->mapped_data = NULL;
|
145
|
515 thr->prof_info = NULL;
|
|
516 thr->api_info = NULL;
|
|
517 /* Initially, all callbacks for all events are enabled. */
|
|
518 thr->prof_callbacks_enabled = true;
|
|
519
|
111
|
520 thr->target_tls
|
|
521 = acc_dev->openacc.create_thread_data_func (ord);
|
|
522 }
|
|
523
|
|
524 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
|
|
525 init/shutdown is per-process or per-thread. We choose per-process. */
|
|
526
|
|
527 void
|
|
528 acc_init (acc_device_t d)
|
|
529 {
|
145
|
530 if (!known_device_type_p (d))
|
|
531 unknown_device_type_error (d);
|
|
532
|
111
|
533 gomp_init_targets_once ();
|
|
534
|
|
535 gomp_mutex_lock (&acc_device_lock);
|
145
|
536 cached_base_dev = acc_init_1 (d, acc_construct_runtime_api, 0);
|
111
|
537 gomp_mutex_unlock (&acc_device_lock);
|
|
538
|
|
539 goacc_attach_host_thread_to_device (-1);
|
|
540 }
|
|
541
|
|
542 ialias (acc_init)
|
|
543
|
|
544 void
|
|
545 acc_shutdown (acc_device_t d)
|
|
546 {
|
145
|
547 if (!known_device_type_p (d))
|
|
548 unknown_device_type_error (d);
|
|
549
|
111
|
550 gomp_init_targets_once ();
|
|
551
|
|
552 gomp_mutex_lock (&acc_device_lock);
|
|
553
|
|
554 acc_shutdown_1 (d);
|
|
555
|
|
556 gomp_mutex_unlock (&acc_device_lock);
|
|
557 }
|
|
558
|
|
559 ialias (acc_shutdown)
|
|
560
|
|
561 int
|
|
562 acc_get_num_devices (acc_device_t d)
|
|
563 {
|
145
|
564 if (!known_device_type_p (d))
|
|
565 unknown_device_type_error (d);
|
|
566
|
111
|
567 int n = 0;
|
|
568 struct gomp_device_descr *acc_dev;
|
|
569
|
|
570 if (d == acc_device_none)
|
|
571 return 0;
|
|
572
|
|
573 gomp_init_targets_once ();
|
|
574
|
|
575 gomp_mutex_lock (&acc_device_lock);
|
|
576 acc_dev = resolve_device (d, false);
|
|
577 gomp_mutex_unlock (&acc_device_lock);
|
|
578
|
|
579 if (!acc_dev)
|
|
580 return 0;
|
|
581
|
|
582 n = acc_dev->get_num_devices_func ();
|
|
583 if (n < 0)
|
|
584 n = 0;
|
|
585
|
|
586 return n;
|
|
587 }
|
|
588
|
|
589 ialias (acc_get_num_devices)
|
|
590
|
|
591 /* Set the device type for the current thread only (using the current global
|
|
592 default device number), initialising that device if necessary. Also set the
|
|
593 default device type for new threads to D. */
|
|
594
|
|
595 void
|
|
596 acc_set_device_type (acc_device_t d)
|
|
597 {
|
145
|
598 if (!known_device_type_p (d))
|
|
599 unknown_device_type_error (d);
|
|
600
|
111
|
601 struct gomp_device_descr *base_dev, *acc_dev;
|
|
602 struct goacc_thread *thr = goacc_thread ();
|
|
603
|
145
|
604 acc_prof_info prof_info;
|
|
605 acc_api_info api_info;
|
|
606 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
607 if (profiling_p)
|
|
608 prof_info.device_type = d;
|
|
609
|
111
|
610 gomp_init_targets_once ();
|
|
611
|
|
612 gomp_mutex_lock (&acc_device_lock);
|
|
613
|
|
614 cached_base_dev = base_dev = resolve_device (d, true);
|
|
615 acc_dev = &base_dev[goacc_device_num];
|
|
616
|
|
617 gomp_mutex_lock (&acc_dev->lock);
|
|
618 if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
|
|
619 gomp_init_device (acc_dev);
|
|
620 gomp_mutex_unlock (&acc_dev->lock);
|
|
621
|
|
622 gomp_mutex_unlock (&acc_device_lock);
|
|
623
|
|
624 /* We're changing device type: invalidate the current thread's dev and
|
|
625 base_dev pointers. */
|
|
626 if (thr && thr->base_dev != base_dev)
|
|
627 {
|
|
628 thr->base_dev = thr->dev = NULL;
|
|
629 if (thr->mapped_data)
|
|
630 gomp_fatal ("acc_set_device_type in 'acc data' region");
|
|
631 }
|
|
632
|
|
633 goacc_attach_host_thread_to_device (-1);
|
145
|
634
|
|
635 if (profiling_p)
|
|
636 {
|
|
637 thr->prof_info = NULL;
|
|
638 thr->api_info = NULL;
|
|
639 }
|
111
|
640 }
|
|
641
|
|
642 ialias (acc_set_device_type)
|
|
643
|
|
644 acc_device_t
|
|
645 acc_get_device_type (void)
|
|
646 {
|
|
647 acc_device_t res = acc_device_none;
|
|
648 struct gomp_device_descr *dev;
|
|
649 struct goacc_thread *thr = goacc_thread ();
|
|
650
|
|
651 if (thr && thr->base_dev)
|
|
652 res = acc_device_type (thr->base_dev->type);
|
|
653 else
|
|
654 {
|
145
|
655 acc_prof_info prof_info;
|
|
656 acc_api_info api_info;
|
|
657 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
658
|
111
|
659 gomp_init_targets_once ();
|
|
660
|
|
661 gomp_mutex_lock (&acc_device_lock);
|
|
662 dev = resolve_device (acc_device_default, true);
|
|
663 gomp_mutex_unlock (&acc_device_lock);
|
|
664 res = acc_device_type (dev->type);
|
145
|
665
|
|
666 if (profiling_p)
|
|
667 {
|
|
668 thr->prof_info = NULL;
|
|
669 thr->api_info = NULL;
|
|
670 }
|
111
|
671 }
|
|
672
|
|
673 assert (res != acc_device_default
|
145
|
674 && res != acc_device_not_host
|
|
675 && res != acc_device_current);
|
111
|
676
|
|
677 return res;
|
|
678 }
|
|
679
|
|
680 ialias (acc_get_device_type)
|
|
681
|
|
682 int
|
|
683 acc_get_device_num (acc_device_t d)
|
|
684 {
|
145
|
685 if (!known_device_type_p (d))
|
|
686 unknown_device_type_error (d);
|
|
687
|
111
|
688 const struct gomp_device_descr *dev;
|
|
689 struct goacc_thread *thr = goacc_thread ();
|
|
690
|
145
|
691 acc_prof_info prof_info;
|
|
692 acc_api_info api_info;
|
|
693 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
694 if (profiling_p)
|
|
695 prof_info.device_type = d;
|
111
|
696
|
|
697 gomp_init_targets_once ();
|
|
698
|
|
699 gomp_mutex_lock (&acc_device_lock);
|
|
700 dev = resolve_device (d, true);
|
|
701 gomp_mutex_unlock (&acc_device_lock);
|
|
702
|
145
|
703 if (profiling_p)
|
|
704 {
|
|
705 thr->prof_info = NULL;
|
|
706 thr->api_info = NULL;
|
|
707 }
|
|
708
|
111
|
709 if (thr && thr->base_dev == dev && thr->dev)
|
|
710 return thr->dev->target_id;
|
|
711
|
|
712 return goacc_device_num;
|
|
713 }
|
|
714
|
|
715 ialias (acc_get_device_num)
|
|
716
|
|
717 void
|
|
718 acc_set_device_num (int ord, acc_device_t d)
|
|
719 {
|
145
|
720 if (!known_device_type_p (d))
|
|
721 unknown_device_type_error (d);
|
|
722
|
111
|
723 struct gomp_device_descr *base_dev, *acc_dev;
|
|
724 int num_devices;
|
|
725
|
|
726 gomp_init_targets_once ();
|
|
727
|
|
728 if (ord < 0)
|
|
729 ord = goacc_device_num;
|
|
730
|
|
731 if ((int) d == 0)
|
|
732 /* Set whatever device is being used by the current host thread to use
|
|
733 device instance ORD. It's unclear if this is supposed to affect other
|
|
734 host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */
|
|
735 goacc_attach_host_thread_to_device (ord);
|
|
736 else
|
|
737 {
|
|
738 gomp_mutex_lock (&acc_device_lock);
|
|
739
|
|
740 cached_base_dev = base_dev = resolve_device (d, true);
|
|
741
|
|
742 num_devices = base_dev->get_num_devices_func ();
|
|
743
|
|
744 if (num_devices <= 0 || ord >= num_devices)
|
|
745 acc_dev_num_out_of_range (d, ord, num_devices);
|
|
746
|
|
747 acc_dev = &base_dev[ord];
|
|
748
|
|
749 gomp_mutex_lock (&acc_dev->lock);
|
|
750 if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
|
|
751 gomp_init_device (acc_dev);
|
|
752 gomp_mutex_unlock (&acc_dev->lock);
|
|
753
|
|
754 gomp_mutex_unlock (&acc_device_lock);
|
|
755
|
|
756 goacc_attach_host_thread_to_device (ord);
|
|
757 }
|
|
758
|
|
759 goacc_device_num = ord;
|
|
760 }
|
|
761
|
|
762 ialias (acc_set_device_num)
|
|
763
|
145
|
764 static union goacc_property_value
|
|
765 get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
|
|
766 {
|
|
767 goacc_lazy_initialize ();
|
|
768 struct goacc_thread *thr = goacc_thread ();
|
|
769
|
|
770 if (d == acc_device_current && thr && thr->dev)
|
|
771 return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
|
|
772
|
|
773 gomp_mutex_lock (&acc_device_lock);
|
|
774
|
|
775 struct gomp_device_descr *dev = resolve_device (d, true);
|
|
776
|
|
777 int num_devices = dev->get_num_devices_func ();
|
|
778
|
|
779 if (num_devices <= 0 || ord >= num_devices)
|
|
780 acc_dev_num_out_of_range (d, ord, num_devices);
|
|
781
|
|
782 dev += ord;
|
|
783
|
|
784 gomp_mutex_lock (&dev->lock);
|
|
785 if (dev->state == GOMP_DEVICE_UNINITIALIZED)
|
|
786 gomp_init_device (dev);
|
|
787 gomp_mutex_unlock (&dev->lock);
|
|
788
|
|
789 gomp_mutex_unlock (&acc_device_lock);
|
|
790
|
|
791 assert (dev);
|
|
792
|
|
793 return dev->openacc.get_property_func (dev->target_id, prop);
|
|
794 }
|
|
795
|
|
796 size_t
|
|
797 acc_get_property (int ord, acc_device_t d, acc_device_property_t prop)
|
|
798 {
|
|
799 if (!known_device_type_p (d))
|
|
800 unknown_device_type_error(d);
|
|
801
|
|
802 if (prop & GOACC_PROPERTY_STRING_MASK)
|
|
803 return 0;
|
|
804 else
|
|
805 return get_property_any (ord, d, prop).val;
|
|
806 }
|
|
807
|
|
808 ialias (acc_get_property)
|
|
809
|
|
810 const char *
|
|
811 acc_get_property_string (int ord, acc_device_t d, acc_device_property_t prop)
|
|
812 {
|
|
813 if (!known_device_type_p (d))
|
|
814 unknown_device_type_error(d);
|
|
815
|
|
816 if (prop & GOACC_PROPERTY_STRING_MASK)
|
|
817 return get_property_any (ord, d, prop).ptr;
|
|
818 else
|
|
819 return NULL;
|
|
820 }
|
|
821
|
|
822 ialias (acc_get_property_string)
|
|
823
|
111
|
824 /* For -O and higher, the compiler always attempts to expand acc_on_device, but
|
|
825 if the user disables the builtin, or calls it via a pointer, we'll need this
|
|
826 version.
|
|
827
|
|
828 Compile this with optimization, so that the compiler expands
|
145
|
829 this, rather than generating infinitely recursive code.
|
|
830
|
|
831 The function just forwards its argument to __builtin_acc_on_device. It does
|
|
832 not verify that the argument is a valid acc_device_t enumeration value. */
|
111
|
833
|
|
834 int __attribute__ ((__optimize__ ("O2")))
|
|
835 acc_on_device (acc_device_t dev)
|
|
836 {
|
|
837 return __builtin_acc_on_device (dev);
|
|
838 }
|
|
839
|
|
840 ialias (acc_on_device)
|
|
841
|
|
842 attribute_hidden void
|
|
843 goacc_runtime_initialize (void)
|
|
844 {
|
|
845 gomp_mutex_init (&acc_device_lock);
|
|
846
|
|
847 #if !(defined HAVE_TLS || defined USE_EMUTLS)
|
|
848 pthread_key_create (&goacc_tls_key, NULL);
|
|
849 #endif
|
|
850
|
|
851 pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
|
|
852
|
|
853 cached_base_dev = NULL;
|
|
854
|
|
855 goacc_threads = NULL;
|
|
856 gomp_mutex_init (&goacc_thread_lock);
|
|
857
|
|
858 /* Initialize and register the 'host' device type. */
|
|
859 goacc_host_init ();
|
|
860 }
|
|
861
|
145
|
862 static void __attribute__((destructor))
|
|
863 goacc_runtime_deinitialize (void)
|
|
864 {
|
|
865 #if !(defined HAVE_TLS || defined USE_EMUTLS)
|
|
866 pthread_key_delete (goacc_tls_key);
|
|
867 #endif
|
|
868 pthread_key_delete (goacc_cleanup_key);
|
|
869 }
|
|
870
|
111
|
871 /* Compiler helper functions */
|
|
872
|
|
873 attribute_hidden void
|
|
874 goacc_save_and_set_bind (acc_device_t d)
|
|
875 {
|
|
876 struct goacc_thread *thr = goacc_thread ();
|
|
877
|
|
878 assert (!thr->saved_bound_dev);
|
|
879
|
|
880 thr->saved_bound_dev = thr->dev;
|
|
881 thr->dev = dispatchers[d];
|
|
882 }
|
|
883
|
|
884 attribute_hidden void
|
|
885 goacc_restore_bind (void)
|
|
886 {
|
|
887 struct goacc_thread *thr = goacc_thread ();
|
|
888
|
|
889 thr->dev = thr->saved_bound_dev;
|
|
890 thr->saved_bound_dev = NULL;
|
|
891 }
|
|
892
|
|
893 /* This is called from any OpenACC support function that may need to implicitly
|
|
894 initialize the libgomp runtime, either globally or from a new host thread.
|
|
895 On exit "goacc_thread" will return a valid & populated thread block. */
|
|
896
|
|
897 attribute_hidden void
|
|
898 goacc_lazy_initialize (void)
|
|
899 {
|
|
900 struct goacc_thread *thr = goacc_thread ();
|
|
901
|
|
902 if (thr && thr->dev)
|
|
903 return;
|
|
904
|
145
|
905 gomp_init_targets_once ();
|
|
906
|
|
907 gomp_mutex_lock (&acc_device_lock);
|
111
|
908 if (!cached_base_dev)
|
145
|
909 cached_base_dev = acc_init_1 (acc_device_default,
|
|
910 acc_construct_parallel, 1);
|
|
911 gomp_mutex_unlock (&acc_device_lock);
|
|
912
|
|
913 goacc_attach_host_thread_to_device (-1);
|
111
|
914 }
|