111
|
1 /* OpenACC Runtime initialization routines
|
|
2
|
145
|
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
|
111
|
4
|
|
5 Contributed by Mentor Embedded.
|
|
6
|
|
7 This file is part of the GNU Offloading and Multi Processing Library
|
|
8 (libgomp).
|
|
9
|
|
10 Libgomp is free software; you can redistribute it and/or modify it
|
|
11 under the terms of the GNU General Public License as published by
|
|
12 the Free Software Foundation; either version 3, or (at your option)
|
|
13 any later version.
|
|
14
|
|
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
18 more details.
|
|
19
|
|
20 Under Section 7 of GPL version 3, you are granted additional
|
|
21 permissions described in the GCC Runtime Library Exception, version
|
|
22 3.1, as published by the Free Software Foundation.
|
|
23
|
|
24 You should have received a copy of the GNU General Public License and
|
|
25 a copy of the GCC Runtime Library Exception along with this program;
|
|
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
27 <http://www.gnu.org/licenses/>. */
|
|
28
|
|
29 #include "openacc.h"
|
|
30 #include "libgomp.h"
|
|
31 #include "gomp-constants.h"
|
|
32 #include "oacc-int.h"
|
|
33 #include <string.h>
|
|
34 #include <assert.h>
|
|
35
|
|
36 /* Return block containing [H->S), or NULL if not contained. The device lock
|
|
37 for DEV must be locked on entry, and remains locked on exit. */
|
|
38
|
|
39 static splay_tree_key
|
|
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
|
|
41 {
|
|
42 struct splay_tree_key_s node;
|
|
43 splay_tree_key key;
|
|
44
|
|
45 node.host_start = (uintptr_t) h;
|
|
46 node.host_end = (uintptr_t) h + s;
|
|
47
|
|
48 key = splay_tree_lookup (&dev->mem_map, &node);
|
|
49
|
|
50 return key;
|
|
51 }
|
|
52
|
145
|
53 /* Helper for lookup_dev. Iterate over splay tree. */
|
111
|
54
|
|
55 static splay_tree_key
|
145
|
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
|
111
|
57 {
|
145
|
58 splay_tree_key key = &node->key;
|
|
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
|
|
60 return key;
|
|
61
|
|
62 key = NULL;
|
|
63 if (node->left)
|
|
64 key = lookup_dev_1 (node->left, d, s);
|
|
65 if (!key && node->right)
|
|
66 key = lookup_dev_1 (node->right, d, s);
|
|
67
|
|
68 return key;
|
|
69 }
|
111
|
70
|
145
|
71 /* Return block containing [D->S), or NULL if not contained.
|
|
72
|
|
73 This iterates over the splay tree. This is not expected to be a common
|
|
74 operation.
|
|
75
|
|
76 The device lock associated with MEM_MAP must be locked on entry, and remains
|
|
77 locked on exit. */
|
|
78
|
|
79 static splay_tree_key
|
|
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
|
|
81 {
|
|
82 if (!mem_map || !mem_map->root)
|
111
|
83 return NULL;
|
|
84
|
145
|
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
|
|
86 }
|
111
|
87
|
|
88
|
|
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
|
|
90 NULL. */
|
|
91
|
|
92 void *
|
|
93 acc_malloc (size_t s)
|
|
94 {
|
|
95 if (!s)
|
|
96 return NULL;
|
|
97
|
|
98 goacc_lazy_initialize ();
|
|
99
|
|
100 struct goacc_thread *thr = goacc_thread ();
|
|
101
|
|
102 assert (thr->dev);
|
|
103
|
|
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
105 return malloc (s);
|
|
106
|
145
|
107 acc_prof_info prof_info;
|
|
108 acc_api_info api_info;
|
|
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
110
|
|
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
|
|
112
|
|
113 if (profiling_p)
|
|
114 {
|
|
115 thr->prof_info = NULL;
|
|
116 thr->api_info = NULL;
|
|
117 }
|
|
118
|
|
119 return res;
|
111
|
120 }
|
|
121
|
|
122 void
|
|
123 acc_free (void *d)
|
|
124 {
|
|
125 splay_tree_key k;
|
|
126
|
|
127 if (!d)
|
|
128 return;
|
|
129
|
|
130 struct goacc_thread *thr = goacc_thread ();
|
|
131
|
|
132 assert (thr && thr->dev);
|
|
133
|
|
134 struct gomp_device_descr *acc_dev = thr->dev;
|
|
135
|
|
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
137 return free (d);
|
|
138
|
145
|
139 acc_prof_info prof_info;
|
|
140 acc_api_info api_info;
|
|
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
142
|
111
|
143 gomp_mutex_lock (&acc_dev->lock);
|
|
144
|
|
145 /* We don't have to call lazy open here, as the ptr value must have
|
|
146 been returned by acc_malloc. It's not permitted to pass NULL in
|
|
147 (unless you got that null from acc_malloc). */
|
145
|
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
|
111
|
149 {
|
145
|
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
|
|
151 void *h = k->host_start + offset;
|
|
152 size_t h_size = k->host_end - k->host_start;
|
111
|
153 gomp_mutex_unlock (&acc_dev->lock);
|
145
|
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
|
|
155 used in a mapping". */
|
|
156 gomp_fatal ("refusing to free device memory space at %p that is still"
|
|
157 " mapped at [%p,+%d]",
|
|
158 d, h, (int) h_size);
|
111
|
159 }
|
|
160 else
|
|
161 gomp_mutex_unlock (&acc_dev->lock);
|
|
162
|
|
163 if (!acc_dev->free_func (acc_dev->target_id, d))
|
|
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
|
145
|
165
|
|
166 if (profiling_p)
|
|
167 {
|
|
168 thr->prof_info = NULL;
|
|
169 thr->api_info = NULL;
|
|
170 }
|
111
|
171 }
|
|
172
|
145
|
173 static void
|
|
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
|
|
175 const char *libfnname)
|
111
|
176 {
|
|
177 /* No need to call lazy open here, as the device pointer must have
|
|
178 been obtained from a routine that did that. */
|
|
179 struct goacc_thread *thr = goacc_thread ();
|
|
180
|
|
181 assert (thr && thr->dev);
|
|
182
|
|
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
184 {
|
145
|
185 if (from)
|
|
186 memmove (h, d, s);
|
|
187 else
|
|
188 memmove (d, h, s);
|
111
|
189 return;
|
|
190 }
|
|
191
|
145
|
192 acc_prof_info prof_info;
|
|
193 acc_api_info api_info;
|
|
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
195 if (profiling_p)
|
|
196 {
|
|
197 prof_info.async = async;
|
|
198 prof_info.async_queue = prof_info.async;
|
|
199 }
|
|
200
|
|
201 goacc_aq aq = get_goacc_asyncqueue (async);
|
|
202 if (from)
|
|
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
|
|
204 else
|
|
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
|
|
206
|
|
207 if (profiling_p)
|
|
208 {
|
|
209 thr->prof_info = NULL;
|
|
210 thr->api_info = NULL;
|
|
211 }
|
|
212 }
|
|
213
|
|
214 void
|
|
215 acc_memcpy_to_device (void *d, void *h, size_t s)
|
|
216 {
|
|
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
|
|
218 }
|
|
219
|
|
220 void
|
|
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
|
|
222 {
|
|
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
|
111
|
224 }
|
|
225
|
|
226 void
|
|
227 acc_memcpy_from_device (void *h, void *d, size_t s)
|
|
228 {
|
145
|
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
|
|
230 }
|
111
|
231
|
145
|
232 void
|
|
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
|
|
234 {
|
|
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
|
111
|
236 }
|
|
237
|
|
238 /* Return the device pointer that corresponds to host data H. Or NULL
|
|
239 if no mapping. */
|
|
240
|
|
241 void *
|
|
242 acc_deviceptr (void *h)
|
|
243 {
|
|
244 splay_tree_key n;
|
|
245 void *d;
|
|
246 void *offset;
|
|
247
|
|
248 goacc_lazy_initialize ();
|
|
249
|
|
250 struct goacc_thread *thr = goacc_thread ();
|
|
251 struct gomp_device_descr *dev = thr->dev;
|
|
252
|
|
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
254 return h;
|
|
255
|
145
|
256 /* In the following, no OpenACC Profiling Interface events can possibly be
|
|
257 generated. */
|
|
258
|
111
|
259 gomp_mutex_lock (&dev->lock);
|
|
260
|
|
261 n = lookup_host (dev, h, 1);
|
|
262
|
|
263 if (!n)
|
|
264 {
|
|
265 gomp_mutex_unlock (&dev->lock);
|
|
266 return NULL;
|
|
267 }
|
|
268
|
|
269 offset = h - n->host_start;
|
|
270
|
|
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
|
|
272
|
|
273 gomp_mutex_unlock (&dev->lock);
|
|
274
|
|
275 return d;
|
|
276 }
|
|
277
|
|
278 /* Return the host pointer that corresponds to device data D. Or NULL
|
|
279 if no mapping. */
|
|
280
|
|
281 void *
|
|
282 acc_hostptr (void *d)
|
|
283 {
|
|
284 splay_tree_key n;
|
|
285 void *h;
|
|
286 void *offset;
|
|
287
|
|
288 goacc_lazy_initialize ();
|
|
289
|
|
290 struct goacc_thread *thr = goacc_thread ();
|
|
291 struct gomp_device_descr *acc_dev = thr->dev;
|
|
292
|
|
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
294 return d;
|
|
295
|
145
|
296 /* In the following, no OpenACC Profiling Interface events can possibly be
|
|
297 generated. */
|
|
298
|
111
|
299 gomp_mutex_lock (&acc_dev->lock);
|
|
300
|
145
|
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
|
111
|
302
|
|
303 if (!n)
|
|
304 {
|
|
305 gomp_mutex_unlock (&acc_dev->lock);
|
|
306 return NULL;
|
|
307 }
|
|
308
|
|
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
|
|
310
|
|
311 h = n->host_start + offset;
|
|
312
|
|
313 gomp_mutex_unlock (&acc_dev->lock);
|
|
314
|
|
315 return h;
|
|
316 }
|
|
317
|
|
318 /* Return 1 if host data [H,+S] is present on the device. */
|
|
319
|
|
320 int
|
|
321 acc_is_present (void *h, size_t s)
|
|
322 {
|
|
323 splay_tree_key n;
|
|
324
|
|
325 if (!s || !h)
|
|
326 return 0;
|
|
327
|
|
328 goacc_lazy_initialize ();
|
|
329
|
|
330 struct goacc_thread *thr = goacc_thread ();
|
|
331 struct gomp_device_descr *acc_dev = thr->dev;
|
|
332
|
|
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
334 return h != NULL;
|
|
335
|
145
|
336 /* In the following, no OpenACC Profiling Interface events can possibly be
|
|
337 generated. */
|
|
338
|
111
|
339 gomp_mutex_lock (&acc_dev->lock);
|
|
340
|
|
341 n = lookup_host (acc_dev, h, s);
|
|
342
|
|
343 if (n && ((uintptr_t)h < n->host_start
|
|
344 || (uintptr_t)h + s > n->host_end
|
|
345 || s > n->host_end - n->host_start))
|
|
346 n = NULL;
|
|
347
|
|
348 gomp_mutex_unlock (&acc_dev->lock);
|
|
349
|
|
350 return n != NULL;
|
|
351 }
|
|
352
|
|
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
|
|
354
|
|
355 void
|
|
356 acc_map_data (void *h, void *d, size_t s)
|
|
357 {
|
|
358 struct target_mem_desc *tgt = NULL;
|
|
359 size_t mapnum = 1;
|
|
360 void *hostaddrs = h;
|
|
361 void *devaddrs = d;
|
|
362 size_t sizes = s;
|
|
363 unsigned short kinds = GOMP_MAP_ALLOC;
|
|
364
|
|
365 goacc_lazy_initialize ();
|
|
366
|
|
367 struct goacc_thread *thr = goacc_thread ();
|
|
368 struct gomp_device_descr *acc_dev = thr->dev;
|
|
369
|
|
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
371 {
|
|
372 if (d != h)
|
|
373 gomp_fatal ("cannot map data on shared-memory system");
|
|
374 }
|
|
375 else
|
|
376 {
|
|
377 struct goacc_thread *thr = goacc_thread ();
|
|
378
|
|
379 if (!d || !h || !s)
|
|
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
|
|
381 (void *)h, (int)s, (void *)d, (int)s);
|
|
382
|
145
|
383 acc_prof_info prof_info;
|
|
384 acc_api_info api_info;
|
|
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
386
|
111
|
387 gomp_mutex_lock (&acc_dev->lock);
|
|
388
|
|
389 if (lookup_host (acc_dev, h, s))
|
|
390 {
|
|
391 gomp_mutex_unlock (&acc_dev->lock);
|
|
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
|
|
393 (int)s);
|
|
394 }
|
|
395
|
145
|
396 if (lookup_dev (&thr->dev->mem_map, d, s))
|
111
|
397 {
|
|
398 gomp_mutex_unlock (&acc_dev->lock);
|
|
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
|
|
400 (int)s);
|
|
401 }
|
|
402
|
|
403 gomp_mutex_unlock (&acc_dev->lock);
|
|
404
|
|
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
|
145
|
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
|
|
407 assert (tgt);
|
|
408 splay_tree_key n = tgt->list[0].key;
|
|
409 assert (n->refcount == 1);
|
|
410 assert (n->virtual_refcount == 0);
|
|
411 /* Special reference counting behavior. */
|
|
412 n->refcount = REFCOUNT_INFINITY;
|
111
|
413
|
145
|
414 if (profiling_p)
|
|
415 {
|
|
416 thr->prof_info = NULL;
|
|
417 thr->api_info = NULL;
|
|
418 }
|
|
419 }
|
111
|
420 }
|
|
421
|
|
422 void
|
|
423 acc_unmap_data (void *h)
|
|
424 {
|
|
425 struct goacc_thread *thr = goacc_thread ();
|
|
426 struct gomp_device_descr *acc_dev = thr->dev;
|
|
427
|
|
428 /* No need to call lazy open, as the address must have been mapped. */
|
|
429
|
|
430 /* This is a no-op on shared-memory targets. */
|
|
431 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
432 return;
|
|
433
|
145
|
434 acc_prof_info prof_info;
|
|
435 acc_api_info api_info;
|
|
436 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
111
|
437
|
|
438 gomp_mutex_lock (&acc_dev->lock);
|
|
439
|
|
440 splay_tree_key n = lookup_host (acc_dev, h, 1);
|
|
441
|
|
442 if (!n)
|
|
443 {
|
|
444 gomp_mutex_unlock (&acc_dev->lock);
|
|
445 gomp_fatal ("%p is not a mapped block", (void *)h);
|
|
446 }
|
|
447
|
145
|
448 size_t host_size = n->host_end - n->host_start;
|
111
|
449
|
|
450 if (n->host_start != (uintptr_t) h)
|
|
451 {
|
|
452 gomp_mutex_unlock (&acc_dev->lock);
|
|
453 gomp_fatal ("[%p,%d] surrounds %p",
|
|
454 (void *) n->host_start, (int) host_size, (void *) h);
|
|
455 }
|
145
|
456 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
|
|
457 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating
|
|
458 the different 'REFCOUNT_INFINITY' cases, or simply separate
|
|
459 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
|
|
460 etc.)? */
|
|
461 else if (n->refcount != REFCOUNT_INFINITY)
|
111
|
462 {
|
145
|
463 gomp_mutex_unlock (&acc_dev->lock);
|
|
464 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
|
|
465 " by 'acc_map_data'",
|
|
466 (void *) h, (int) host_size);
|
|
467 }
|
111
|
468
|
145
|
469 splay_tree_remove (&acc_dev->mem_map, n);
|
|
470
|
|
471 struct target_mem_desc *tgt = n->tgt;
|
111
|
472
|
145
|
473 if (tgt->refcount == REFCOUNT_INFINITY)
|
|
474 {
|
|
475 gomp_mutex_unlock (&acc_dev->lock);
|
|
476 gomp_fatal ("cannot unmap target block");
|
|
477 }
|
|
478 else if (tgt->refcount > 1)
|
|
479 tgt->refcount--;
|
|
480 else
|
|
481 {
|
|
482 free (tgt->array);
|
|
483 free (tgt);
|
111
|
484 }
|
|
485
|
|
486 gomp_mutex_unlock (&acc_dev->lock);
|
|
487
|
145
|
488 if (profiling_p)
|
|
489 {
|
|
490 thr->prof_info = NULL;
|
|
491 thr->api_info = NULL;
|
|
492 }
|
111
|
493 }
|
|
494
|
145
|
495
|
|
496 /* Enter dynamic mapping for a single datum. Return the device pointer. */
|
111
|
497
|
|
498 static void *
|
145
|
499 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
|
111
|
500 {
|
|
501 void *d;
|
|
502 splay_tree_key n;
|
|
503
|
145
|
504 if (!hostaddrs[0] || !sizes[0])
|
|
505 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
|
111
|
506
|
|
507 goacc_lazy_initialize ();
|
|
508
|
|
509 struct goacc_thread *thr = goacc_thread ();
|
|
510 struct gomp_device_descr *acc_dev = thr->dev;
|
|
511
|
|
512 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
145
|
513 return hostaddrs[0];
|
|
514
|
|
515 acc_prof_info prof_info;
|
|
516 acc_api_info api_info;
|
|
517 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
518 if (profiling_p)
|
|
519 {
|
|
520 prof_info.async = async;
|
|
521 prof_info.async_queue = prof_info.async;
|
|
522 }
|
111
|
523
|
|
524 gomp_mutex_lock (&acc_dev->lock);
|
|
525
|
145
|
526 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
|
111
|
527 if (n)
|
|
528 {
|
145
|
529 void *h = hostaddrs[0];
|
|
530 size_t s = sizes[0];
|
111
|
531
|
145
|
532 /* Present. */
|
|
533 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
|
|
534
|
111
|
535 if ((h + s) > (void *)n->host_end)
|
|
536 {
|
|
537 gomp_mutex_unlock (&acc_dev->lock);
|
|
538 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
|
|
539 }
|
|
540
|
145
|
541 assert (n->refcount != REFCOUNT_LINK);
|
131
|
542 if (n->refcount != REFCOUNT_INFINITY)
|
|
543 {
|
|
544 n->refcount++;
|
145
|
545 n->virtual_refcount++;
|
131
|
546 }
|
145
|
547
|
111
|
548 gomp_mutex_unlock (&acc_dev->lock);
|
|
549 }
|
|
550 else
|
|
551 {
|
145
|
552 const size_t mapnum = 1;
|
111
|
553
|
|
554 gomp_mutex_unlock (&acc_dev->lock);
|
|
555
|
145
|
556 goacc_aq aq = get_goacc_asyncqueue (async);
|
|
557
|
|
558 gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
|
|
559 true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
|
111
|
560
|
|
561 gomp_mutex_lock (&acc_dev->lock);
|
145
|
562 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
|
|
563 assert (n != NULL);
|
|
564 assert (n->tgt_offset == 0);
|
|
565 assert ((uintptr_t) hostaddrs[0] == n->host_start);
|
|
566 d = (void *) n->tgt->tgt_start;
|
|
567 gomp_mutex_unlock (&acc_dev->lock);
|
|
568 }
|
111
|
569
|
145
|
570 if (profiling_p)
|
|
571 {
|
|
572 thr->prof_info = NULL;
|
|
573 thr->api_info = NULL;
|
111
|
574 }
|
|
575
|
|
576 return d;
|
|
577 }
|
|
578
|
|
579 void *
|
|
580 acc_create (void *h, size_t s)
|
|
581 {
|
145
|
582 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
|
|
583 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
|
|
584 }
|
|
585
|
|
586 void
|
|
587 acc_create_async (void *h, size_t s, int async)
|
|
588 {
|
|
589 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
|
|
590 goacc_enter_datum (&h, &s, &kinds, async);
|
111
|
591 }
|
|
592
|
145
|
593 /* acc_present_or_create used to be what acc_create is now. */
|
|
594 /* acc_pcreate is acc_present_or_create by a different name. */
|
|
595 #ifdef HAVE_ATTRIBUTE_ALIAS
|
|
596 strong_alias (acc_create, acc_present_or_create)
|
|
597 strong_alias (acc_create, acc_pcreate)
|
|
598 #else
|
|
599 void *
|
|
600 acc_present_or_create (void *h, size_t s)
|
|
601 {
|
|
602 return acc_create (h, s);
|
|
603 }
|
|
604
|
|
605 void *
|
|
606 acc_pcreate (void *h, size_t s)
|
|
607 {
|
|
608 return acc_create (h, s);
|
|
609 }
|
|
610 #endif
|
|
611
|
111
|
612 void *
|
|
613 acc_copyin (void *h, size_t s)
|
|
614 {
|
145
|
615 unsigned short kinds[1] = { GOMP_MAP_TO };
|
|
616 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
|
111
|
617 }
|
|
618
|
145
|
619 void
|
|
620 acc_copyin_async (void *h, size_t s, int async)
|
|
621 {
|
|
622 unsigned short kinds[1] = { GOMP_MAP_TO };
|
|
623 goacc_enter_datum (&h, &s, &kinds, async);
|
|
624 }
|
|
625
|
|
626 /* acc_present_or_copyin used to be what acc_copyin is now. */
|
|
627 /* acc_pcopyin is acc_present_or_copyin by a different name. */
|
111
|
628 #ifdef HAVE_ATTRIBUTE_ALIAS
|
145
|
629 strong_alias (acc_copyin, acc_present_or_copyin)
|
|
630 strong_alias (acc_copyin, acc_pcopyin)
|
111
|
631 #else
|
|
632 void *
|
|
633 acc_present_or_copyin (void *h, size_t s)
|
|
634 {
|
145
|
635 return acc_copyin (h, s);
|
111
|
636 }
|
|
637
|
|
638 void *
|
|
639 acc_pcopyin (void *h, size_t s)
|
|
640 {
|
145
|
641 return acc_copyin (h, s);
|
111
|
642 }
|
|
643 #endif
|
|
644
|
145
|
645
|
|
646 /* Exit a dynamic mapping for a single variable. */
|
111
|
647
|
|
648 static void
|
145
|
649 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
|
111
|
650 {
|
145
|
651 /* No need to call lazy open, as the data must already have been
|
|
652 mapped. */
|
|
653
|
|
654 kind &= 0xff;
|
|
655
|
111
|
656 struct goacc_thread *thr = goacc_thread ();
|
|
657 struct gomp_device_descr *acc_dev = thr->dev;
|
|
658
|
|
659 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
660 return;
|
|
661
|
145
|
662 acc_prof_info prof_info;
|
|
663 acc_api_info api_info;
|
|
664 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
665 if (profiling_p)
|
111
|
666 {
|
145
|
667 prof_info.async = async;
|
|
668 prof_info.async_queue = prof_info.async;
|
111
|
669 }
|
|
670
|
145
|
671 gomp_mutex_lock (&acc_dev->lock);
|
111
|
672
|
145
|
673 splay_tree_key n = lookup_host (acc_dev, h, s);
|
|
674 if (!n)
|
|
675 /* PR92726, RP92970, PR92984: no-op. */
|
|
676 goto out;
|
111
|
677
|
145
|
678 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
|
111
|
679 {
|
145
|
680 size_t host_size = n->host_end - n->host_start;
|
111
|
681 gomp_mutex_unlock (&acc_dev->lock);
|
145
|
682 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
|
|
683 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
|
111
|
684 }
|
|
685
|
145
|
686 bool finalize = (kind == GOMP_MAP_DELETE
|
|
687 || kind == GOMP_MAP_FORCE_FROM);
|
|
688 if (finalize)
|
131
|
689 {
|
145
|
690 if (n->refcount != REFCOUNT_INFINITY)
|
|
691 n->refcount -= n->virtual_refcount;
|
|
692 n->virtual_refcount = 0;
|
131
|
693 }
|
111
|
694
|
145
|
695 if (n->virtual_refcount > 0)
|
131
|
696 {
|
145
|
697 if (n->refcount != REFCOUNT_INFINITY)
|
|
698 n->refcount--;
|
|
699 n->virtual_refcount--;
|
131
|
700 }
|
145
|
701 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
|
|
702 n->refcount--;
|
111
|
703
|
131
|
704 if (n->refcount == 0)
|
|
705 {
|
145
|
706 goacc_aq aq = get_goacc_asyncqueue (async);
|
|
707
|
|
708 bool copyout = (kind == GOMP_MAP_FROM
|
|
709 || kind == GOMP_MAP_FORCE_FROM);
|
|
710 if (copyout)
|
131
|
711 {
|
145
|
712 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
713 + (uintptr_t) h - n->host_start);
|
|
714 gomp_copy_dev2host (acc_dev, aq, h, d, s);
|
131
|
715 }
|
111
|
716
|
145
|
717 if (aq)
|
|
718 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
|
|
719 'gomp_unref_tgt' comment in
|
|
720 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
|
|
721 PR92881. */
|
|
722 gomp_remove_var_async (acc_dev, n, aq);
|
|
723 else
|
|
724 {
|
|
725 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
|
|
726 assert (is_tgt_unmapped);
|
|
727 }
|
131
|
728 }
|
|
729
|
145
|
730 out:
|
131
|
731 gomp_mutex_unlock (&acc_dev->lock);
|
145
|
732
|
|
733 if (profiling_p)
|
|
734 {
|
|
735 thr->prof_info = NULL;
|
|
736 thr->api_info = NULL;
|
|
737 }
|
111
|
738 }
|
|
739
|
|
740 void
|
|
741 acc_delete (void *h , size_t s)
|
|
742 {
|
145
|
743 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
|
|
744 }
|
|
745
|
|
746 void
|
|
747 acc_delete_async (void *h , size_t s, int async)
|
|
748 {
|
|
749 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
|
111
|
750 }
|
|
751
|
|
752 void
|
131
|
753 acc_delete_finalize (void *h , size_t s)
|
|
754 {
|
145
|
755 goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
|
131
|
756 }
|
|
757
|
|
758 void
|
|
759 acc_delete_finalize_async (void *h , size_t s, int async)
|
|
760 {
|
145
|
761 goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
|
131
|
762 }
|
|
763
|
|
764 void
|
111
|
765 acc_copyout (void *h, size_t s)
|
|
766 {
|
145
|
767 goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
|
|
768 }
|
|
769
|
|
770 void
|
|
771 acc_copyout_async (void *h, size_t s, int async)
|
|
772 {
|
|
773 goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
|
111
|
774 }
|
|
775
|
131
|
776 void
|
|
777 acc_copyout_finalize (void *h, size_t s)
|
|
778 {
|
145
|
779 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
|
131
|
780 }
|
|
781
|
|
782 void
|
|
783 acc_copyout_finalize_async (void *h, size_t s, int async)
|
|
784 {
|
145
|
785 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
|
131
|
786 }
|
|
787
|
111
|
788 static void
|
145
|
789 update_dev_host (int is_dev, void *h, size_t s, int async)
|
111
|
790 {
|
|
791 splay_tree_key n;
|
|
792 void *d;
|
|
793
|
|
794 goacc_lazy_initialize ();
|
|
795
|
|
796 struct goacc_thread *thr = goacc_thread ();
|
|
797 struct gomp_device_descr *acc_dev = thr->dev;
|
|
798
|
|
799 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
800 return;
|
|
801
|
145
|
802 /* Fortran optional arguments that are non-present result in a
|
|
803 NULL host address here. This can safely be ignored as it is
|
|
804 not possible to 'update' a non-present optional argument. */
|
|
805 if (h == NULL)
|
|
806 return;
|
|
807
|
|
808 acc_prof_info prof_info;
|
|
809 acc_api_info api_info;
|
|
810 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
811 if (profiling_p)
|
|
812 {
|
|
813 prof_info.async = async;
|
|
814 prof_info.async_queue = prof_info.async;
|
|
815 }
|
|
816
|
111
|
817 gomp_mutex_lock (&acc_dev->lock);
|
|
818
|
|
819 n = lookup_host (acc_dev, h, s);
|
|
820
|
|
821 if (!n)
|
|
822 {
|
|
823 gomp_mutex_unlock (&acc_dev->lock);
|
|
824 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
|
|
825 }
|
|
826
|
|
827 d = (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
828 + (uintptr_t) h - n->host_start);
|
|
829
|
145
|
830 goacc_aq aq = get_goacc_asyncqueue (async);
|
|
831
|
111
|
832 if (is_dev)
|
145
|
833 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
|
111
|
834 else
|
145
|
835 gomp_copy_dev2host (acc_dev, aq, h, d, s);
|
111
|
836
|
|
837 gomp_mutex_unlock (&acc_dev->lock);
|
145
|
838
|
|
839 if (profiling_p)
|
|
840 {
|
|
841 thr->prof_info = NULL;
|
|
842 thr->api_info = NULL;
|
|
843 }
|
111
|
844 }
|
|
845
|
|
846 void
|
|
847 acc_update_device (void *h, size_t s)
|
|
848 {
|
145
|
849 update_dev_host (1, h, s, acc_async_sync);
|
|
850 }
|
|
851
|
|
852 void
|
|
853 acc_update_device_async (void *h, size_t s, int async)
|
|
854 {
|
|
855 update_dev_host (1, h, s, async);
|
111
|
856 }
|
|
857
|
|
858 void
|
|
859 acc_update_self (void *h, size_t s)
|
|
860 {
|
145
|
861 update_dev_host (0, h, s, acc_async_sync);
|
|
862 }
|
|
863
|
|
864 void
|
|
865 acc_update_self_async (void *h, size_t s, int async)
|
|
866 {
|
|
867 update_dev_host (0, h, s, async);
|
111
|
868 }
|
|
869
|
|
870 void
|
145
|
871 acc_attach_async (void **hostaddr, int async)
|
111
|
872 {
|
|
873 struct goacc_thread *thr = goacc_thread ();
|
|
874 struct gomp_device_descr *acc_dev = thr->dev;
|
145
|
875 goacc_aq aq = get_goacc_asyncqueue (async);
|
131
|
876
|
145
|
877 struct splay_tree_key_s cur_node;
|
|
878 splay_tree_key n;
|
131
|
879
|
145
|
880 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
881 return;
|
131
|
882
|
111
|
883 gomp_mutex_lock (&acc_dev->lock);
|
145
|
884
|
|
885 cur_node.host_start = (uintptr_t) hostaddr;
|
|
886 cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
887 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
|
|
888
|
|
889 if (n == NULL)
|
|
890 gomp_fatal ("struct not mapped for acc_attach");
|
|
891
|
|
892 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
|
|
893 0, NULL);
|
|
894
|
111
|
895 gomp_mutex_unlock (&acc_dev->lock);
|
|
896 }
|
|
897
|
|
898 void
|
145
|
899 acc_attach (void **hostaddr)
|
|
900 {
|
|
901 acc_attach_async (hostaddr, acc_async_sync);
|
|
902 }
|
|
903
|
|
904 static void
|
|
905 goacc_detach_internal (void **hostaddr, int async, bool finalize)
|
111
|
906 {
|
|
907 struct goacc_thread *thr = goacc_thread ();
|
|
908 struct gomp_device_descr *acc_dev = thr->dev;
|
145
|
909 struct splay_tree_key_s cur_node;
|
111
|
910 splay_tree_key n;
|
145
|
911 struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
|
111
|
912
|
145
|
913 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
131
|
914 return;
|
|
915
|
111
|
916 gomp_mutex_lock (&acc_dev->lock);
|
|
917
|
145
|
918 cur_node.host_start = (uintptr_t) hostaddr;
|
|
919 cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
920 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
|
|
921
|
|
922 if (n == NULL)
|
|
923 gomp_fatal ("struct not mapped for acc_detach");
|
|
924
|
|
925 gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
|
|
926
|
|
927 gomp_mutex_unlock (&acc_dev->lock);
|
|
928 }
|
|
929
|
|
930 void
|
|
931 acc_detach (void **hostaddr)
|
|
932 {
|
|
933 goacc_detach_internal (hostaddr, acc_async_sync, false);
|
|
934 }
|
|
935
|
|
936 void
|
|
937 acc_detach_async (void **hostaddr, int async)
|
|
938 {
|
|
939 goacc_detach_internal (hostaddr, async, false);
|
|
940 }
|
|
941
|
|
942 void
|
|
943 acc_detach_finalize (void **hostaddr)
|
|
944 {
|
|
945 goacc_detach_internal (hostaddr, acc_async_sync, true);
|
|
946 }
|
|
947
|
|
948 void
|
|
949 acc_detach_finalize_async (void **hostaddr, int async)
|
|
950 {
|
|
951 goacc_detach_internal (hostaddr, async, true);
|
|
952 }
|
|
953
|
|
954 /* Some types of (pointer) variables use several consecutive mappings, which
|
|
955 must be treated as a group for enter/exit data directives. This function
|
|
956 returns the last mapping in such a group (inclusive), or POS for singleton
|
|
957 mappings. */
|
111
|
958
|
145
|
959 static int
|
|
960 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
|
|
961 {
|
|
962 unsigned char kind0 = kinds[pos] & 0xff;
|
|
963 int first_pos = pos;
|
|
964
|
|
965 switch (kind0)
|
111
|
966 {
|
145
|
967 case GOMP_MAP_TO_PSET:
|
|
968 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
|
|
969 pos++;
|
|
970 /* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */
|
|
971 assert (pos > first_pos);
|
|
972 break;
|
|
973
|
|
974 case GOMP_MAP_STRUCT:
|
|
975 pos += sizes[pos];
|
|
976 break;
|
|
977
|
|
978 case GOMP_MAP_POINTER:
|
|
979 case GOMP_MAP_ALWAYS_POINTER:
|
|
980 /* These mappings are only expected after some other mapping. If we
|
|
981 see one by itself, something has gone wrong. */
|
|
982 gomp_fatal ("unexpected mapping");
|
|
983 break;
|
|
984
|
|
985 default:
|
|
986 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
|
|
987 mapping. */
|
|
988 if (pos + 1 < mapnum)
|
|
989 {
|
|
990 unsigned char kind1 = kinds[pos + 1] & 0xff;
|
|
991 if (kind1 == GOMP_MAP_ALWAYS_POINTER)
|
|
992 return pos + 1;
|
|
993 }
|
|
994
|
|
995 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
|
|
996 (etc.) mapping. */
|
|
997 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
|
|
998 pos++;
|
111
|
999 }
|
|
1000
|
145
|
1001 return pos;
|
|
1002 }
|
|
1003
|
|
1004 /* Map variables for OpenACC "enter data". We can't just call
|
|
1005 gomp_map_vars_async once, because individual mapped variables might have
|
|
1006 "exit data" called for them at different times. */
|
111
|
1007
|
145
|
1008 static void
|
|
1009 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
|
|
1010 void **hostaddrs, size_t *sizes,
|
|
1011 unsigned short *kinds, goacc_aq aq)
|
|
1012 {
|
|
1013 for (size_t i = 0; i < mapnum; i++)
|
|
1014 {
|
|
1015 int group_last = find_group_last (i, mapnum, sizes, kinds);
|
|
1016
|
|
1017 gomp_map_vars_async (acc_dev, aq,
|
|
1018 (group_last - i) + 1,
|
|
1019 &hostaddrs[i], NULL,
|
|
1020 &sizes[i], &kinds[i], true,
|
|
1021 GOMP_MAP_VARS_OPENACC_ENTER_DATA);
|
|
1022
|
|
1023 i = group_last;
|
|
1024 }
|
|
1025 }
|
|
1026
|
|
1027 /* Unmap variables for OpenACC "exit data". */
|
111
|
1028
|
145
|
1029 static void
|
|
1030 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
|
|
1031 void **hostaddrs, size_t *sizes,
|
|
1032 unsigned short *kinds, goacc_aq aq)
|
|
1033 {
|
|
1034 gomp_mutex_lock (&acc_dev->lock);
|
|
1035
|
|
1036 /* Handle "detach" before copyback/deletion of mapped data. */
|
|
1037 for (size_t i = 0; i < mapnum; ++i)
|
111
|
1038 {
|
145
|
1039 unsigned char kind = kinds[i] & 0xff;
|
|
1040 bool finalize = false;
|
|
1041 switch (kind)
|
|
1042 {
|
|
1043 case GOMP_MAP_FORCE_DETACH:
|
|
1044 finalize = true;
|
|
1045 /* Fallthrough. */
|
|
1046
|
|
1047 case GOMP_MAP_DETACH:
|
|
1048 {
|
|
1049 struct splay_tree_key_s cur_node;
|
|
1050 uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
|
|
1051 cur_node.host_start = hostaddr;
|
|
1052 cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
1053 splay_tree_key n
|
|
1054 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
|
|
1055
|
|
1056 if (n == NULL)
|
|
1057 gomp_fatal ("struct not mapped for detach operation");
|
|
1058
|
|
1059 gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
|
|
1060 }
|
|
1061 break;
|
|
1062 default:
|
|
1063 ;
|
|
1064 }
|
111
|
1065 }
|
|
1066
|
145
|
1067 for (size_t i = 0; i < mapnum; ++i)
|
131
|
1068 {
|
145
|
1069 unsigned char kind = kinds[i] & 0xff;
|
|
1070 bool copyfrom = false;
|
|
1071 bool finalize = false;
|
|
1072
|
|
1073 if (kind == GOMP_MAP_FORCE_FROM
|
|
1074 || kind == GOMP_MAP_DELETE
|
|
1075 || kind == GOMP_MAP_FORCE_DETACH)
|
|
1076 finalize = true;
|
|
1077
|
|
1078 switch (kind)
|
|
1079 {
|
|
1080 case GOMP_MAP_FROM:
|
|
1081 case GOMP_MAP_FORCE_FROM:
|
|
1082 case GOMP_MAP_ALWAYS_FROM:
|
|
1083 copyfrom = true;
|
|
1084 /* Fallthrough. */
|
|
1085
|
|
1086 case GOMP_MAP_TO_PSET:
|
|
1087 case GOMP_MAP_POINTER:
|
|
1088 case GOMP_MAP_DELETE:
|
|
1089 case GOMP_MAP_RELEASE:
|
|
1090 case GOMP_MAP_DETACH:
|
|
1091 case GOMP_MAP_FORCE_DETACH:
|
|
1092 {
|
|
1093 struct splay_tree_key_s cur_node;
|
|
1094 size_t size;
|
|
1095 if (kind == GOMP_MAP_POINTER
|
|
1096 || kind == GOMP_MAP_DETACH
|
|
1097 || kind == GOMP_MAP_FORCE_DETACH)
|
|
1098 size = sizeof (void *);
|
|
1099 else
|
|
1100 size = sizes[i];
|
|
1101 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
1102 cur_node.host_end = cur_node.host_start + size;
|
|
1103 splay_tree_key n
|
|
1104 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
|
|
1105
|
|
1106 if (n == NULL)
|
|
1107 continue;
|
|
1108
|
|
1109 if (finalize)
|
|
1110 {
|
|
1111 if (n->refcount != REFCOUNT_INFINITY)
|
|
1112 n->refcount -= n->virtual_refcount;
|
|
1113 n->virtual_refcount = 0;
|
|
1114 }
|
|
1115
|
|
1116 if (n->virtual_refcount > 0)
|
|
1117 {
|
|
1118 if (n->refcount != REFCOUNT_INFINITY)
|
|
1119 n->refcount--;
|
|
1120 n->virtual_refcount--;
|
|
1121 }
|
|
1122 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
|
|
1123 n->refcount--;
|
|
1124
|
|
1125 if (copyfrom
|
|
1126 && (kind != GOMP_MAP_FROM || n->refcount == 0))
|
|
1127 gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start,
|
|
1128 (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
1129 + cur_node.host_start
|
|
1130 - n->host_start),
|
|
1131 cur_node.host_end - cur_node.host_start);
|
|
1132
|
|
1133 if (n->refcount == 0)
|
|
1134 gomp_remove_var_async (acc_dev, n, aq);
|
|
1135 }
|
|
1136 break;
|
|
1137
|
|
1138 case GOMP_MAP_STRUCT:
|
|
1139 {
|
|
1140 int elems = sizes[i];
|
|
1141 for (int j = 1; j <= elems; j++)
|
|
1142 {
|
|
1143 struct splay_tree_key_s k;
|
|
1144 k.host_start = (uintptr_t) hostaddrs[i + j];
|
|
1145 k.host_end = k.host_start + sizes[i + j];
|
|
1146 splay_tree_key str;
|
|
1147 str = splay_tree_lookup (&acc_dev->mem_map, &k);
|
|
1148 if (str)
|
|
1149 {
|
|
1150 if (finalize)
|
|
1151 {
|
|
1152 if (str->refcount != REFCOUNT_INFINITY)
|
|
1153 str->refcount -= str->virtual_refcount;
|
|
1154 str->virtual_refcount = 0;
|
|
1155 }
|
|
1156 if (str->virtual_refcount > 0)
|
|
1157 {
|
|
1158 if (str->refcount != REFCOUNT_INFINITY)
|
|
1159 str->refcount--;
|
|
1160 str->virtual_refcount--;
|
|
1161 }
|
|
1162 else if (str->refcount > 0
|
|
1163 && str->refcount != REFCOUNT_INFINITY)
|
|
1164 str->refcount--;
|
|
1165 if (str->refcount == 0)
|
|
1166 gomp_remove_var_async (acc_dev, str, aq);
|
|
1167 }
|
|
1168 }
|
|
1169 i += elems;
|
|
1170 }
|
|
1171 break;
|
|
1172
|
|
1173 default:
|
|
1174 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
|
|
1175 kind);
|
|
1176 }
|
131
|
1177 }
|
111
|
1178
|
|
1179 gomp_mutex_unlock (&acc_dev->lock);
|
145
|
1180 }
|
111
|
1181
|
145
|
1182 void
|
|
1183 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
|
|
1184 size_t *sizes, unsigned short *kinds, int async,
|
|
1185 int num_waits, ...)
|
|
1186 {
|
|
1187 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
|
|
1188
|
|
1189 struct goacc_thread *thr;
|
|
1190 struct gomp_device_descr *acc_dev;
|
|
1191 bool data_enter = false;
|
|
1192 size_t i;
|
|
1193
|
|
1194 goacc_lazy_initialize ();
|
|
1195
|
|
1196 thr = goacc_thread ();
|
|
1197 acc_dev = thr->dev;
|
|
1198
|
|
1199 /* Determine if this is an "acc enter data". */
|
|
1200 for (i = 0; i < mapnum; ++i)
|
131
|
1201 {
|
145
|
1202 unsigned char kind = kinds[i] & 0xff;
|
|
1203
|
|
1204 if (kind == GOMP_MAP_POINTER
|
|
1205 || kind == GOMP_MAP_TO_PSET
|
|
1206 || kind == GOMP_MAP_STRUCT)
|
|
1207 continue;
|
|
1208
|
|
1209 if (kind == GOMP_MAP_FORCE_ALLOC
|
|
1210 || kind == GOMP_MAP_FORCE_PRESENT
|
|
1211 || kind == GOMP_MAP_ATTACH
|
|
1212 || kind == GOMP_MAP_FORCE_TO
|
|
1213 || kind == GOMP_MAP_TO
|
|
1214 || kind == GOMP_MAP_ALLOC)
|
131
|
1215 {
|
145
|
1216 data_enter = true;
|
|
1217 break;
|
131
|
1218 }
|
|
1219
|
145
|
1220 if (kind == GOMP_MAP_RELEASE
|
|
1221 || kind == GOMP_MAP_DELETE
|
|
1222 || kind == GOMP_MAP_DETACH
|
|
1223 || kind == GOMP_MAP_FORCE_DETACH
|
|
1224 || kind == GOMP_MAP_FROM
|
|
1225 || kind == GOMP_MAP_FORCE_FROM)
|
|
1226 break;
|
131
|
1227
|
145
|
1228 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
|
|
1229 kind);
|
131
|
1230 }
|
|
1231
|
145
|
1232 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
|
|
1233
|
|
1234 acc_prof_info prof_info;
|
|
1235 if (profiling_p)
|
|
1236 {
|
|
1237 thr->prof_info = &prof_info;
|
|
1238
|
|
1239 prof_info.event_type
|
|
1240 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
|
|
1241 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
|
|
1242 prof_info.version = _ACC_PROF_INFO_VERSION;
|
|
1243 prof_info.device_type = acc_device_type (acc_dev->type);
|
|
1244 prof_info.device_number = acc_dev->target_id;
|
|
1245 prof_info.thread_id = -1;
|
|
1246 prof_info.async = async;
|
|
1247 prof_info.async_queue = prof_info.async;
|
|
1248 prof_info.src_file = NULL;
|
|
1249 prof_info.func_name = NULL;
|
|
1250 prof_info.line_no = -1;
|
|
1251 prof_info.end_line_no = -1;
|
|
1252 prof_info.func_line_no = -1;
|
|
1253 prof_info.func_end_line_no = -1;
|
|
1254 }
|
|
1255 acc_event_info enter_exit_data_event_info;
|
|
1256 if (profiling_p)
|
|
1257 {
|
|
1258 enter_exit_data_event_info.other_event.event_type
|
|
1259 = prof_info.event_type;
|
|
1260 enter_exit_data_event_info.other_event.valid_bytes
|
|
1261 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
|
|
1262 enter_exit_data_event_info.other_event.parent_construct
|
|
1263 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
|
|
1264 enter_exit_data_event_info.other_event.implicit = 0;
|
|
1265 enter_exit_data_event_info.other_event.tool_info = NULL;
|
|
1266 }
|
|
1267 acc_api_info api_info;
|
|
1268 if (profiling_p)
|
|
1269 {
|
|
1270 thr->api_info = &api_info;
|
111
|
1271
|
145
|
1272 api_info.device_api = acc_device_api_none;
|
|
1273 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
|
|
1274 api_info.device_type = prof_info.device_type;
|
|
1275 api_info.vendor = -1;
|
|
1276 api_info.device_handle = NULL;
|
|
1277 api_info.context_handle = NULL;
|
|
1278 api_info.async_handle = NULL;
|
|
1279 }
|
|
1280
|
|
1281 if (profiling_p)
|
|
1282 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
|
|
1283 &api_info);
|
|
1284
|
|
1285 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1286 || (flags & GOACC_FLAG_HOST_FALLBACK))
|
|
1287 {
|
|
1288 prof_info.device_type = acc_device_host;
|
|
1289 api_info.device_type = prof_info.device_type;
|
|
1290
|
|
1291 goto out_prof;
|
|
1292 }
|
|
1293
|
|
1294 if (num_waits)
|
|
1295 {
|
|
1296 va_list ap;
|
|
1297
|
|
1298 va_start (ap, num_waits);
|
|
1299 goacc_wait (async, num_waits, &ap);
|
|
1300 va_end (ap);
|
|
1301 }
|
|
1302
|
|
1303 goacc_aq aq = get_goacc_asyncqueue (async);
|
|
1304
|
|
1305 if (data_enter)
|
|
1306 goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
|
|
1307 else
|
|
1308 goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
|
|
1309
|
|
1310 out_prof:
|
|
1311 if (profiling_p)
|
|
1312 {
|
|
1313 prof_info.event_type
|
|
1314 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
|
|
1315 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
|
|
1316 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
|
|
1317 &api_info);
|
|
1318
|
|
1319 thr->prof_info = NULL;
|
|
1320 thr->api_info = NULL;
|
|
1321 }
|
111
|
1322 }
|