111
|
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
|
|
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
|
|
3
|
|
4 This file is part of the GNU Offloading and Multi Processing Library
|
|
5 (libgomp).
|
|
6
|
|
7 Libgomp is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
15 more details.
|
|
16
|
|
17 Under Section 7 of GPL version 3, you are granted additional
|
|
18 permissions described in the GCC Runtime Library Exception, version
|
|
19 3.1, as published by the Free Software Foundation.
|
|
20
|
|
21 You should have received a copy of the GNU General Public License and
|
|
22 a copy of the GCC Runtime Library Exception along with this program;
|
|
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
24 <http://www.gnu.org/licenses/>. */
|
|
25
|
|
26 /* This file contains the support of offloading. */
|
|
27
|
|
28 #include "config.h"
|
|
29 #include "libgomp.h"
|
|
30 #include "oacc-plugin.h"
|
|
31 #include "oacc-int.h"
|
|
32 #include "gomp-constants.h"
|
|
33 #include <limits.h>
|
|
34 #include <stdbool.h>
|
|
35 #include <stdlib.h>
|
|
36 #ifdef HAVE_INTTYPES_H
|
|
37 # include <inttypes.h> /* For PRIu64. */
|
|
38 #endif
|
|
39 #include <string.h>
|
|
40 #include <assert.h>
|
|
41 #include <errno.h>
|
|
42
|
|
43 #ifdef PLUGIN_SUPPORT
|
|
44 #include <dlfcn.h>
|
|
45 #include "plugin-suffix.h"
|
|
46 #endif
|
|
47
|
|
48 static void gomp_target_init (void);
|
|
49
|
|
50 /* The whole initialization code for offloading plugins is only run one. */
|
|
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
|
|
52
|
|
53 /* Mutex for offload image registration. */
|
|
54 static gomp_mutex_t register_lock;
|
|
55
|
|
56 /* This structure describes an offload image.
|
|
57 It contains type of the target device, pointer to host table descriptor, and
|
|
58 pointer to target data. */
|
|
59 struct offload_image_descr {
|
|
60 unsigned version;
|
|
61 enum offload_target_type type;
|
|
62 const void *host_table;
|
|
63 const void *target_data;
|
|
64 };
|
|
65
|
|
66 /* Array of descriptors of offload images. */
|
|
67 static struct offload_image_descr *offload_images;
|
|
68
|
|
69 /* Total number of offload images. */
|
|
70 static int num_offload_images;
|
|
71
|
|
72 /* Array of descriptors for all available devices. */
|
|
73 static struct gomp_device_descr *devices;
|
|
74
|
|
75 /* Total number of available devices. */
|
|
76 static int num_devices;
|
|
77
|
|
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
|
|
79 static int num_devices_openmp;
|
|
80
|
|
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
|
|
82
|
|
83 static void *
|
|
84 gomp_realloc_unlock (void *old, size_t size)
|
|
85 {
|
|
86 void *ret = realloc (old, size);
|
|
87 if (ret == NULL)
|
|
88 {
|
|
89 gomp_mutex_unlock (®ister_lock);
|
|
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
|
|
91 }
|
|
92 return ret;
|
|
93 }
|
|
94
|
|
95 attribute_hidden void
|
|
96 gomp_init_targets_once (void)
|
|
97 {
|
|
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
|
|
99 }
|
|
100
|
|
101 attribute_hidden int
|
|
102 gomp_get_num_devices (void)
|
|
103 {
|
|
104 gomp_init_targets_once ();
|
|
105 return num_devices_openmp;
|
|
106 }
|
|
107
|
|
108 static struct gomp_device_descr *
|
|
109 resolve_device (int device_id)
|
|
110 {
|
|
111 if (device_id == GOMP_DEVICE_ICV)
|
|
112 {
|
|
113 struct gomp_task_icv *icv = gomp_icv (false);
|
|
114 device_id = icv->default_device_var;
|
|
115 }
|
|
116
|
|
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
|
|
118 return NULL;
|
|
119
|
|
120 gomp_mutex_lock (&devices[device_id].lock);
|
|
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
|
|
122 gomp_init_device (&devices[device_id]);
|
|
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
|
|
124 {
|
|
125 gomp_mutex_unlock (&devices[device_id].lock);
|
|
126 return NULL;
|
|
127 }
|
|
128 gomp_mutex_unlock (&devices[device_id].lock);
|
|
129
|
|
130 return &devices[device_id];
|
|
131 }
|
|
132
|
|
133
|
|
134 static inline splay_tree_key
|
|
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
|
|
136 {
|
|
137 if (key->host_start != key->host_end)
|
|
138 return splay_tree_lookup (mem_map, key);
|
|
139
|
|
140 key->host_end++;
|
|
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
|
|
142 key->host_end--;
|
|
143 if (n)
|
|
144 return n;
|
|
145 key->host_start--;
|
|
146 n = splay_tree_lookup (mem_map, key);
|
|
147 key->host_start++;
|
|
148 if (n)
|
|
149 return n;
|
|
150 return splay_tree_lookup (mem_map, key);
|
|
151 }
|
|
152
|
|
153 static inline splay_tree_key
|
|
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
|
|
155 {
|
|
156 if (key->host_start != key->host_end)
|
|
157 return splay_tree_lookup (mem_map, key);
|
|
158
|
|
159 key->host_end++;
|
|
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
|
|
161 key->host_end--;
|
|
162 return n;
|
|
163 }
|
|
164
|
|
165 static inline void
|
|
166 gomp_device_copy (struct gomp_device_descr *devicep,
|
|
167 bool (*copy_func) (int, void *, const void *, size_t),
|
|
168 const char *dst, void *dstaddr,
|
|
169 const char *src, const void *srcaddr,
|
|
170 size_t size)
|
|
171 {
|
|
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
|
|
173 {
|
|
174 gomp_mutex_unlock (&devicep->lock);
|
|
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
|
|
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
|
|
177 }
|
|
178 }
|
|
179
|
|
180 static void
|
|
181 gomp_copy_host2dev (struct gomp_device_descr *devicep,
|
|
182 void *d, const void *h, size_t sz)
|
|
183 {
|
|
184 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
|
|
185 }
|
|
186
|
|
187 static void
|
|
188 gomp_copy_dev2host (struct gomp_device_descr *devicep,
|
|
189 void *h, const void *d, size_t sz)
|
|
190 {
|
|
191 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
|
|
192 }
|
|
193
|
|
194 static void
|
|
195 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
|
|
196 {
|
|
197 if (!devicep->free_func (devicep->target_id, devptr))
|
|
198 {
|
|
199 gomp_mutex_unlock (&devicep->lock);
|
|
200 gomp_fatal ("error in freeing device memory block at %p", devptr);
|
|
201 }
|
|
202 }
|
|
203
|
|
204 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
|
|
205 gomp_map_0len_lookup found oldn for newn.
|
|
206 Helper function of gomp_map_vars. */
|
|
207
|
|
208 static inline void
|
|
209 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
|
|
210 splay_tree_key newn, struct target_var_desc *tgt_var,
|
|
211 unsigned char kind)
|
|
212 {
|
|
213 tgt_var->key = oldn;
|
|
214 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
|
|
215 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
|
|
216 tgt_var->offset = newn->host_start - oldn->host_start;
|
|
217 tgt_var->length = newn->host_end - newn->host_start;
|
|
218
|
|
219 if ((kind & GOMP_MAP_FLAG_FORCE)
|
|
220 || oldn->host_start > newn->host_start
|
|
221 || oldn->host_end < newn->host_end)
|
|
222 {
|
|
223 gomp_mutex_unlock (&devicep->lock);
|
|
224 gomp_fatal ("Trying to map into device [%p..%p) object when "
|
|
225 "[%p..%p) is already mapped",
|
|
226 (void *) newn->host_start, (void *) newn->host_end,
|
|
227 (void *) oldn->host_start, (void *) oldn->host_end);
|
|
228 }
|
|
229
|
|
230 if (GOMP_MAP_ALWAYS_TO_P (kind))
|
|
231 gomp_copy_host2dev (devicep,
|
|
232 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
|
|
233 + newn->host_start - oldn->host_start),
|
|
234 (void *) newn->host_start,
|
|
235 newn->host_end - newn->host_start);
|
|
236
|
|
237 if (oldn->refcount != REFCOUNT_INFINITY)
|
|
238 oldn->refcount++;
|
|
239 }
|
|
240
|
|
241 static int
|
|
242 get_kind (bool short_mapkind, void *kinds, int idx)
|
|
243 {
|
|
244 return short_mapkind ? ((unsigned short *) kinds)[idx]
|
|
245 : ((unsigned char *) kinds)[idx];
|
|
246 }
|
|
247
|
|
248 static void
|
|
249 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
|
|
250 uintptr_t target_offset, uintptr_t bias)
|
|
251 {
|
|
252 struct gomp_device_descr *devicep = tgt->device_descr;
|
|
253 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
254 struct splay_tree_key_s cur_node;
|
|
255
|
|
256 cur_node.host_start = host_ptr;
|
|
257 if (cur_node.host_start == (uintptr_t) NULL)
|
|
258 {
|
|
259 cur_node.tgt_offset = (uintptr_t) NULL;
|
|
260 /* FIXME: see comment about coalescing host/dev transfers below. */
|
|
261 gomp_copy_host2dev (devicep,
|
|
262 (void *) (tgt->tgt_start + target_offset),
|
|
263 (void *) &cur_node.tgt_offset,
|
|
264 sizeof (void *));
|
|
265 return;
|
|
266 }
|
|
267 /* Add bias to the pointer value. */
|
|
268 cur_node.host_start += bias;
|
|
269 cur_node.host_end = cur_node.host_start;
|
|
270 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
271 if (n == NULL)
|
|
272 {
|
|
273 gomp_mutex_unlock (&devicep->lock);
|
|
274 gomp_fatal ("Pointer target of array section wasn't mapped");
|
|
275 }
|
|
276 cur_node.host_start -= n->host_start;
|
|
277 cur_node.tgt_offset
|
|
278 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
|
|
279 /* At this point tgt_offset is target address of the
|
|
280 array section. Now subtract bias to get what we want
|
|
281 to initialize the pointer with. */
|
|
282 cur_node.tgt_offset -= bias;
|
|
283 /* FIXME: see comment about coalescing host/dev transfers below. */
|
|
284 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
|
|
285 (void *) &cur_node.tgt_offset, sizeof (void *));
|
|
286 }
|
|
287
|
|
288 static void
|
|
289 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
|
|
290 size_t first, size_t i, void **hostaddrs,
|
|
291 size_t *sizes, void *kinds)
|
|
292 {
|
|
293 struct gomp_device_descr *devicep = tgt->device_descr;
|
|
294 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
295 struct splay_tree_key_s cur_node;
|
|
296 int kind;
|
|
297 const bool short_mapkind = true;
|
|
298 const int typemask = short_mapkind ? 0xff : 0x7;
|
|
299
|
|
300 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
301 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
302 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
303 kind = get_kind (short_mapkind, kinds, i);
|
|
304 if (n2
|
|
305 && n2->tgt == n->tgt
|
|
306 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
|
307 {
|
|
308 gomp_map_vars_existing (devicep, n2, &cur_node,
|
|
309 &tgt->list[i], kind & typemask);
|
|
310 return;
|
|
311 }
|
|
312 if (sizes[i] == 0)
|
|
313 {
|
|
314 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
|
|
315 {
|
|
316 cur_node.host_start--;
|
|
317 n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
318 cur_node.host_start++;
|
|
319 if (n2
|
|
320 && n2->tgt == n->tgt
|
|
321 && n2->host_start - n->host_start
|
|
322 == n2->tgt_offset - n->tgt_offset)
|
|
323 {
|
|
324 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
|
|
325 kind & typemask);
|
|
326 return;
|
|
327 }
|
|
328 }
|
|
329 cur_node.host_end++;
|
|
330 n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
331 cur_node.host_end--;
|
|
332 if (n2
|
|
333 && n2->tgt == n->tgt
|
|
334 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
|
335 {
|
|
336 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
|
|
337 kind & typemask);
|
|
338 return;
|
|
339 }
|
|
340 }
|
|
341 gomp_mutex_unlock (&devicep->lock);
|
|
342 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
|
|
343 "other mapped elements from the same structure weren't mapped "
|
|
344 "together with it", (void *) cur_node.host_start,
|
|
345 (void *) cur_node.host_end);
|
|
346 }
|
|
347
|
|
348 static inline uintptr_t
|
|
349 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
|
|
350 {
|
|
351 if (tgt->list[i].key != NULL)
|
|
352 return tgt->list[i].key->tgt->tgt_start
|
|
353 + tgt->list[i].key->tgt_offset
|
|
354 + tgt->list[i].offset;
|
|
355 if (tgt->list[i].offset == ~(uintptr_t) 0)
|
|
356 return (uintptr_t) hostaddrs[i];
|
|
357 if (tgt->list[i].offset == ~(uintptr_t) 1)
|
|
358 return 0;
|
|
359 if (tgt->list[i].offset == ~(uintptr_t) 2)
|
|
360 return tgt->list[i + 1].key->tgt->tgt_start
|
|
361 + tgt->list[i + 1].key->tgt_offset
|
|
362 + tgt->list[i + 1].offset
|
|
363 + (uintptr_t) hostaddrs[i]
|
|
364 - (uintptr_t) hostaddrs[i + 1];
|
|
365 return tgt->tgt_start + tgt->list[i].offset;
|
|
366 }
|
|
367
|
|
368 attribute_hidden struct target_mem_desc *
|
|
369 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|
370 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
|
|
371 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
|
|
372 {
|
|
373 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
|
|
374 bool has_firstprivate = false;
|
|
375 const int rshift = short_mapkind ? 8 : 3;
|
|
376 const int typemask = short_mapkind ? 0xff : 0x7;
|
|
377 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
378 struct splay_tree_key_s cur_node;
|
|
379 struct target_mem_desc *tgt
|
|
380 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
|
|
381 tgt->list_count = mapnum;
|
|
382 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
|
|
383 tgt->device_descr = devicep;
|
|
384
|
|
385 if (mapnum == 0)
|
|
386 {
|
|
387 tgt->tgt_start = 0;
|
|
388 tgt->tgt_end = 0;
|
|
389 return tgt;
|
|
390 }
|
|
391
|
|
392 tgt_align = sizeof (void *);
|
|
393 tgt_size = 0;
|
|
394 if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
395 {
|
|
396 size_t align = 4 * sizeof (void *);
|
|
397 tgt_align = align;
|
|
398 tgt_size = mapnum * sizeof (void *);
|
|
399 }
|
|
400
|
|
401 gomp_mutex_lock (&devicep->lock);
|
|
402 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
403 {
|
|
404 gomp_mutex_unlock (&devicep->lock);
|
|
405 free (tgt);
|
|
406 return NULL;
|
|
407 }
|
|
408
|
|
409 for (i = 0; i < mapnum; i++)
|
|
410 {
|
|
411 int kind = get_kind (short_mapkind, kinds, i);
|
|
412 if (hostaddrs[i] == NULL
|
|
413 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
|
|
414 {
|
|
415 tgt->list[i].key = NULL;
|
|
416 tgt->list[i].offset = ~(uintptr_t) 0;
|
|
417 continue;
|
|
418 }
|
|
419 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
|
|
420 {
|
|
421 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
422 cur_node.host_end = cur_node.host_start;
|
|
423 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
424 if (n == NULL)
|
|
425 {
|
|
426 gomp_mutex_unlock (&devicep->lock);
|
|
427 gomp_fatal ("use_device_ptr pointer wasn't mapped");
|
|
428 }
|
|
429 cur_node.host_start -= n->host_start;
|
|
430 hostaddrs[i]
|
|
431 = (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
432 + cur_node.host_start);
|
|
433 tgt->list[i].key = NULL;
|
|
434 tgt->list[i].offset = ~(uintptr_t) 0;
|
|
435 continue;
|
|
436 }
|
|
437 else if ((kind & typemask) == GOMP_MAP_STRUCT)
|
|
438 {
|
|
439 size_t first = i + 1;
|
|
440 size_t last = i + sizes[i];
|
|
441 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
442 cur_node.host_end = (uintptr_t) hostaddrs[last]
|
|
443 + sizes[last];
|
|
444 tgt->list[i].key = NULL;
|
|
445 tgt->list[i].offset = ~(uintptr_t) 2;
|
|
446 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
|
|
447 if (n == NULL)
|
|
448 {
|
|
449 size_t align = (size_t) 1 << (kind >> rshift);
|
|
450 if (tgt_align < align)
|
|
451 tgt_align = align;
|
|
452 tgt_size -= (uintptr_t) hostaddrs[first]
|
|
453 - (uintptr_t) hostaddrs[i];
|
|
454 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
455 tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
|
|
456 not_found_cnt += last - i;
|
|
457 for (i = first; i <= last; i++)
|
|
458 tgt->list[i].key = NULL;
|
|
459 i--;
|
|
460 continue;
|
|
461 }
|
|
462 for (i = first; i <= last; i++)
|
|
463 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
|
|
464 sizes, kinds);
|
|
465 i--;
|
|
466 continue;
|
|
467 }
|
|
468 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
|
|
469 {
|
|
470 tgt->list[i].key = NULL;
|
|
471 tgt->list[i].offset = ~(uintptr_t) 1;
|
|
472 has_firstprivate = true;
|
|
473 continue;
|
|
474 }
|
|
475 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
476 if (!GOMP_MAP_POINTER_P (kind & typemask))
|
|
477 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
478 else
|
|
479 cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
480 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
|
|
481 {
|
|
482 tgt->list[i].key = NULL;
|
|
483
|
|
484 size_t align = (size_t) 1 << (kind >> rshift);
|
|
485 if (tgt_align < align)
|
|
486 tgt_align = align;
|
|
487 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
488 tgt_size += cur_node.host_end - cur_node.host_start;
|
|
489 has_firstprivate = true;
|
|
490 continue;
|
|
491 }
|
|
492 splay_tree_key n;
|
|
493 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
|
|
494 {
|
|
495 n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
496 if (!n)
|
|
497 {
|
|
498 tgt->list[i].key = NULL;
|
|
499 tgt->list[i].offset = ~(uintptr_t) 1;
|
|
500 continue;
|
|
501 }
|
|
502 }
|
|
503 else
|
|
504 n = splay_tree_lookup (mem_map, &cur_node);
|
|
505 if (n && n->refcount != REFCOUNT_LINK)
|
|
506 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
|
|
507 kind & typemask);
|
|
508 else
|
|
509 {
|
|
510 tgt->list[i].key = NULL;
|
|
511
|
|
512 size_t align = (size_t) 1 << (kind >> rshift);
|
|
513 not_found_cnt++;
|
|
514 if (tgt_align < align)
|
|
515 tgt_align = align;
|
|
516 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
517 tgt_size += cur_node.host_end - cur_node.host_start;
|
|
518 if ((kind & typemask) == GOMP_MAP_TO_PSET)
|
|
519 {
|
|
520 size_t j;
|
|
521 for (j = i + 1; j < mapnum; j++)
|
|
522 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
|
|
523 & typemask))
|
|
524 break;
|
|
525 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
|
|
526 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
527 > cur_node.host_end))
|
|
528 break;
|
|
529 else
|
|
530 {
|
|
531 tgt->list[j].key = NULL;
|
|
532 i++;
|
|
533 }
|
|
534 }
|
|
535 }
|
|
536 }
|
|
537
|
|
538 if (devaddrs)
|
|
539 {
|
|
540 if (mapnum != 1)
|
|
541 {
|
|
542 gomp_mutex_unlock (&devicep->lock);
|
|
543 gomp_fatal ("unexpected aggregation");
|
|
544 }
|
|
545 tgt->to_free = devaddrs[0];
|
|
546 tgt->tgt_start = (uintptr_t) tgt->to_free;
|
|
547 tgt->tgt_end = tgt->tgt_start + sizes[0];
|
|
548 }
|
|
549 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
550 {
|
|
551 /* Allocate tgt_align aligned tgt_size block of memory. */
|
|
552 /* FIXME: Perhaps change interface to allocate properly aligned
|
|
553 memory. */
|
|
554 tgt->to_free = devicep->alloc_func (devicep->target_id,
|
|
555 tgt_size + tgt_align - 1);
|
|
556 if (!tgt->to_free)
|
|
557 {
|
|
558 gomp_mutex_unlock (&devicep->lock);
|
|
559 gomp_fatal ("device memory allocation fail");
|
|
560 }
|
|
561
|
|
562 tgt->tgt_start = (uintptr_t) tgt->to_free;
|
|
563 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
|
|
564 tgt->tgt_end = tgt->tgt_start + tgt_size;
|
|
565 }
|
|
566 else
|
|
567 {
|
|
568 tgt->to_free = NULL;
|
|
569 tgt->tgt_start = 0;
|
|
570 tgt->tgt_end = 0;
|
|
571 }
|
|
572
|
|
573 tgt_size = 0;
|
|
574 if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
575 tgt_size = mapnum * sizeof (void *);
|
|
576
|
|
577 tgt->array = NULL;
|
|
578 if (not_found_cnt || has_firstprivate)
|
|
579 {
|
|
580 if (not_found_cnt)
|
|
581 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
|
|
582 splay_tree_node array = tgt->array;
|
|
583 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
|
|
584 uintptr_t field_tgt_base = 0;
|
|
585
|
|
586 for (i = 0; i < mapnum; i++)
|
|
587 if (tgt->list[i].key == NULL)
|
|
588 {
|
|
589 int kind = get_kind (short_mapkind, kinds, i);
|
|
590 if (hostaddrs[i] == NULL)
|
|
591 continue;
|
|
592 switch (kind & typemask)
|
|
593 {
|
|
594 size_t align, len, first, last;
|
|
595 splay_tree_key n;
|
|
596 case GOMP_MAP_FIRSTPRIVATE:
|
|
597 align = (size_t) 1 << (kind >> rshift);
|
|
598 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
599 tgt->list[i].offset = tgt_size;
|
|
600 len = sizes[i];
|
|
601 gomp_copy_host2dev (devicep,
|
|
602 (void *) (tgt->tgt_start + tgt_size),
|
|
603 (void *) hostaddrs[i], len);
|
|
604 tgt_size += len;
|
|
605 continue;
|
|
606 case GOMP_MAP_FIRSTPRIVATE_INT:
|
|
607 case GOMP_MAP_USE_DEVICE_PTR:
|
|
608 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
|
|
609 continue;
|
|
610 case GOMP_MAP_STRUCT:
|
|
611 first = i + 1;
|
|
612 last = i + sizes[i];
|
|
613 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
614 cur_node.host_end = (uintptr_t) hostaddrs[last]
|
|
615 + sizes[last];
|
|
616 if (tgt->list[first].key != NULL)
|
|
617 continue;
|
|
618 n = splay_tree_lookup (mem_map, &cur_node);
|
|
619 if (n == NULL)
|
|
620 {
|
|
621 size_t align = (size_t) 1 << (kind >> rshift);
|
|
622 tgt_size -= (uintptr_t) hostaddrs[first]
|
|
623 - (uintptr_t) hostaddrs[i];
|
|
624 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
625 tgt_size += (uintptr_t) hostaddrs[first]
|
|
626 - (uintptr_t) hostaddrs[i];
|
|
627 field_tgt_base = (uintptr_t) hostaddrs[first];
|
|
628 field_tgt_offset = tgt_size;
|
|
629 field_tgt_clear = last;
|
|
630 tgt_size += cur_node.host_end
|
|
631 - (uintptr_t) hostaddrs[first];
|
|
632 continue;
|
|
633 }
|
|
634 for (i = first; i <= last; i++)
|
|
635 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
|
|
636 sizes, kinds);
|
|
637 i--;
|
|
638 continue;
|
|
639 case GOMP_MAP_ALWAYS_POINTER:
|
|
640 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
641 cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
642 n = splay_tree_lookup (mem_map, &cur_node);
|
|
643 if (n == NULL
|
|
644 || n->host_start > cur_node.host_start
|
|
645 || n->host_end < cur_node.host_end)
|
|
646 {
|
|
647 gomp_mutex_unlock (&devicep->lock);
|
|
648 gomp_fatal ("always pointer not mapped");
|
|
649 }
|
|
650 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
|
|
651 != GOMP_MAP_ALWAYS_POINTER)
|
|
652 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
|
|
653 if (cur_node.tgt_offset)
|
|
654 cur_node.tgt_offset -= sizes[i];
|
|
655 gomp_copy_host2dev (devicep,
|
|
656 (void *) (n->tgt->tgt_start
|
|
657 + n->tgt_offset
|
|
658 + cur_node.host_start
|
|
659 - n->host_start),
|
|
660 (void *) &cur_node.tgt_offset,
|
|
661 sizeof (void *));
|
|
662 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
|
|
663 + cur_node.host_start - n->host_start;
|
|
664 continue;
|
|
665 default:
|
|
666 break;
|
|
667 }
|
|
668 splay_tree_key k = &array->key;
|
|
669 k->host_start = (uintptr_t) hostaddrs[i];
|
|
670 if (!GOMP_MAP_POINTER_P (kind & typemask))
|
|
671 k->host_end = k->host_start + sizes[i];
|
|
672 else
|
|
673 k->host_end = k->host_start + sizeof (void *);
|
|
674 splay_tree_key n = splay_tree_lookup (mem_map, k);
|
|
675 if (n && n->refcount != REFCOUNT_LINK)
|
|
676 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
|
|
677 kind & typemask);
|
|
678 else
|
|
679 {
|
|
680 k->link_key = NULL;
|
|
681 if (n && n->refcount == REFCOUNT_LINK)
|
|
682 {
|
|
683 /* Replace target address of the pointer with target address
|
|
684 of mapped object in the splay tree. */
|
|
685 splay_tree_remove (mem_map, n);
|
|
686 k->link_key = n;
|
|
687 }
|
|
688 size_t align = (size_t) 1 << (kind >> rshift);
|
|
689 tgt->list[i].key = k;
|
|
690 k->tgt = tgt;
|
|
691 if (field_tgt_clear != ~(size_t) 0)
|
|
692 {
|
|
693 k->tgt_offset = k->host_start - field_tgt_base
|
|
694 + field_tgt_offset;
|
|
695 if (i == field_tgt_clear)
|
|
696 field_tgt_clear = ~(size_t) 0;
|
|
697 }
|
|
698 else
|
|
699 {
|
|
700 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
701 k->tgt_offset = tgt_size;
|
|
702 tgt_size += k->host_end - k->host_start;
|
|
703 }
|
|
704 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
|
|
705 tgt->list[i].always_copy_from
|
|
706 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
|
|
707 tgt->list[i].offset = 0;
|
|
708 tgt->list[i].length = k->host_end - k->host_start;
|
|
709 k->refcount = 1;
|
|
710 tgt->refcount++;
|
|
711 array->left = NULL;
|
|
712 array->right = NULL;
|
|
713 splay_tree_insert (mem_map, array);
|
|
714 switch (kind & typemask)
|
|
715 {
|
|
716 case GOMP_MAP_ALLOC:
|
|
717 case GOMP_MAP_FROM:
|
|
718 case GOMP_MAP_FORCE_ALLOC:
|
|
719 case GOMP_MAP_FORCE_FROM:
|
|
720 case GOMP_MAP_ALWAYS_FROM:
|
|
721 break;
|
|
722 case GOMP_MAP_TO:
|
|
723 case GOMP_MAP_TOFROM:
|
|
724 case GOMP_MAP_FORCE_TO:
|
|
725 case GOMP_MAP_FORCE_TOFROM:
|
|
726 case GOMP_MAP_ALWAYS_TO:
|
|
727 case GOMP_MAP_ALWAYS_TOFROM:
|
|
728 /* FIXME: Perhaps add some smarts, like if copying
|
|
729 several adjacent fields from host to target, use some
|
|
730 host buffer to avoid sending each var individually. */
|
|
731 gomp_copy_host2dev (devicep,
|
|
732 (void *) (tgt->tgt_start
|
|
733 + k->tgt_offset),
|
|
734 (void *) k->host_start,
|
|
735 k->host_end - k->host_start);
|
|
736 break;
|
|
737 case GOMP_MAP_POINTER:
|
|
738 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
|
|
739 k->tgt_offset, sizes[i]);
|
|
740 break;
|
|
741 case GOMP_MAP_TO_PSET:
|
|
742 /* FIXME: see above FIXME comment. */
|
|
743 gomp_copy_host2dev (devicep,
|
|
744 (void *) (tgt->tgt_start
|
|
745 + k->tgt_offset),
|
|
746 (void *) k->host_start,
|
|
747 k->host_end - k->host_start);
|
|
748
|
|
749 for (j = i + 1; j < mapnum; j++)
|
|
750 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
|
|
751 j)
|
|
752 & typemask))
|
|
753 break;
|
|
754 else if ((uintptr_t) hostaddrs[j] < k->host_start
|
|
755 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
756 > k->host_end))
|
|
757 break;
|
|
758 else
|
|
759 {
|
|
760 tgt->list[j].key = k;
|
|
761 tgt->list[j].copy_from = false;
|
|
762 tgt->list[j].always_copy_from = false;
|
|
763 if (k->refcount != REFCOUNT_INFINITY)
|
|
764 k->refcount++;
|
|
765 gomp_map_pointer (tgt,
|
|
766 (uintptr_t) *(void **) hostaddrs[j],
|
|
767 k->tgt_offset
|
|
768 + ((uintptr_t) hostaddrs[j]
|
|
769 - k->host_start),
|
|
770 sizes[j]);
|
|
771 i++;
|
|
772 }
|
|
773 break;
|
|
774 case GOMP_MAP_FORCE_PRESENT:
|
|
775 {
|
|
776 /* We already looked up the memory region above and it
|
|
777 was missing. */
|
|
778 size_t size = k->host_end - k->host_start;
|
|
779 gomp_mutex_unlock (&devicep->lock);
|
|
780 #ifdef HAVE_INTTYPES_H
|
|
781 gomp_fatal ("present clause: !acc_is_present (%p, "
|
|
782 "%"PRIu64" (0x%"PRIx64"))",
|
|
783 (void *) k->host_start,
|
|
784 (uint64_t) size, (uint64_t) size);
|
|
785 #else
|
|
786 gomp_fatal ("present clause: !acc_is_present (%p, "
|
|
787 "%lu (0x%lx))", (void *) k->host_start,
|
|
788 (unsigned long) size, (unsigned long) size);
|
|
789 #endif
|
|
790 }
|
|
791 break;
|
|
792 case GOMP_MAP_FORCE_DEVICEPTR:
|
|
793 assert (k->host_end - k->host_start == sizeof (void *));
|
|
794 gomp_copy_host2dev (devicep,
|
|
795 (void *) (tgt->tgt_start
|
|
796 + k->tgt_offset),
|
|
797 (void *) k->host_start,
|
|
798 sizeof (void *));
|
|
799 break;
|
|
800 default:
|
|
801 gomp_mutex_unlock (&devicep->lock);
|
|
802 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
|
|
803 kind);
|
|
804 }
|
|
805
|
|
806 if (k->link_key)
|
|
807 {
|
|
808 /* Set link pointer on target to the device address of the
|
|
809 mapped object. */
|
|
810 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
|
|
811 devicep->host2dev_func (devicep->target_id,
|
|
812 (void *) n->tgt_offset,
|
|
813 &tgt_addr, sizeof (void *));
|
|
814 }
|
|
815 array++;
|
|
816 }
|
|
817 }
|
|
818 }
|
|
819
|
|
820 if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
821 {
|
|
822 for (i = 0; i < mapnum; i++)
|
|
823 {
|
|
824 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
|
|
825 /* FIXME: see above FIXME comment. */
|
|
826 gomp_copy_host2dev (devicep,
|
|
827 (void *) (tgt->tgt_start + i * sizeof (void *)),
|
|
828 (void *) &cur_node.tgt_offset, sizeof (void *));
|
|
829 }
|
|
830 }
|
|
831
|
|
832 /* If the variable from "omp target enter data" map-list was already mapped,
|
|
833 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
|
|
834 gomp_exit_data. */
|
|
835 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
|
|
836 {
|
|
837 free (tgt);
|
|
838 tgt = NULL;
|
|
839 }
|
|
840
|
|
841 gomp_mutex_unlock (&devicep->lock);
|
|
842 return tgt;
|
|
843 }
|
|
844
|
|
845 static void
|
|
846 gomp_unmap_tgt (struct target_mem_desc *tgt)
|
|
847 {
|
|
848 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
|
|
849 if (tgt->tgt_end)
|
|
850 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
|
|
851
|
|
852 free (tgt->array);
|
|
853 free (tgt);
|
|
854 }
|
|
855
|
|
856 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
|
|
857 variables back from device to host: if it is false, it is assumed that this
|
|
858 has been done already. */
|
|
859
|
|
860 attribute_hidden void
|
|
861 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
|
862 {
|
|
863 struct gomp_device_descr *devicep = tgt->device_descr;
|
|
864
|
|
865 if (tgt->list_count == 0)
|
|
866 {
|
|
867 free (tgt);
|
|
868 return;
|
|
869 }
|
|
870
|
|
871 gomp_mutex_lock (&devicep->lock);
|
|
872 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
873 {
|
|
874 gomp_mutex_unlock (&devicep->lock);
|
|
875 free (tgt->array);
|
|
876 free (tgt);
|
|
877 return;
|
|
878 }
|
|
879
|
|
880 size_t i;
|
|
881 for (i = 0; i < tgt->list_count; i++)
|
|
882 {
|
|
883 splay_tree_key k = tgt->list[i].key;
|
|
884 if (k == NULL)
|
|
885 continue;
|
|
886
|
|
887 bool do_unmap = false;
|
|
888 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
|
|
889 k->refcount--;
|
|
890 else if (k->refcount == 1)
|
|
891 {
|
|
892 k->refcount--;
|
|
893 do_unmap = true;
|
|
894 }
|
|
895
|
|
896 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|
|
897 || tgt->list[i].always_copy_from)
|
|
898 gomp_copy_dev2host (devicep,
|
|
899 (void *) (k->host_start + tgt->list[i].offset),
|
|
900 (void *) (k->tgt->tgt_start + k->tgt_offset
|
|
901 + tgt->list[i].offset),
|
|
902 tgt->list[i].length);
|
|
903 if (do_unmap)
|
|
904 {
|
|
905 splay_tree_remove (&devicep->mem_map, k);
|
|
906 if (k->link_key)
|
|
907 splay_tree_insert (&devicep->mem_map,
|
|
908 (splay_tree_node) k->link_key);
|
|
909 if (k->tgt->refcount > 1)
|
|
910 k->tgt->refcount--;
|
|
911 else
|
|
912 gomp_unmap_tgt (k->tgt);
|
|
913 }
|
|
914 }
|
|
915
|
|
916 if (tgt->refcount > 1)
|
|
917 tgt->refcount--;
|
|
918 else
|
|
919 gomp_unmap_tgt (tgt);
|
|
920
|
|
921 gomp_mutex_unlock (&devicep->lock);
|
|
922 }
|
|
923
|
|
924 static void
|
|
925 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
|
|
926 size_t *sizes, void *kinds, bool short_mapkind)
|
|
927 {
|
|
928 size_t i;
|
|
929 struct splay_tree_key_s cur_node;
|
|
930 const int typemask = short_mapkind ? 0xff : 0x7;
|
|
931
|
|
932 if (!devicep)
|
|
933 return;
|
|
934
|
|
935 if (mapnum == 0)
|
|
936 return;
|
|
937
|
|
938 gomp_mutex_lock (&devicep->lock);
|
|
939 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
940 {
|
|
941 gomp_mutex_unlock (&devicep->lock);
|
|
942 return;
|
|
943 }
|
|
944
|
|
945 for (i = 0; i < mapnum; i++)
|
|
946 if (sizes[i])
|
|
947 {
|
|
948 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
949 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
950 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
|
|
951 if (n)
|
|
952 {
|
|
953 int kind = get_kind (short_mapkind, kinds, i);
|
|
954 if (n->host_start > cur_node.host_start
|
|
955 || n->host_end < cur_node.host_end)
|
|
956 {
|
|
957 gomp_mutex_unlock (&devicep->lock);
|
|
958 gomp_fatal ("Trying to update [%p..%p) object when "
|
|
959 "only [%p..%p) is mapped",
|
|
960 (void *) cur_node.host_start,
|
|
961 (void *) cur_node.host_end,
|
|
962 (void *) n->host_start,
|
|
963 (void *) n->host_end);
|
|
964 }
|
|
965
|
|
966
|
|
967 void *hostaddr = (void *) cur_node.host_start;
|
|
968 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
969 + cur_node.host_start - n->host_start);
|
|
970 size_t size = cur_node.host_end - cur_node.host_start;
|
|
971
|
|
972 if (GOMP_MAP_COPY_TO_P (kind & typemask))
|
|
973 gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
|
|
974 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
|
|
975 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
|
|
976 }
|
|
977 }
|
|
978 gomp_mutex_unlock (&devicep->lock);
|
|
979 }
|
|
980
|
|
981 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
|
|
982 And insert to splay tree the mapping between addresses from HOST_TABLE and
|
|
983 from loaded target image. We rely in the host and device compiler
|
|
984 emitting variable and functions in the same order. */
|
|
985
|
|
986 static void
|
|
987 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
|
|
988 const void *host_table, const void *target_data,
|
|
989 bool is_register_lock)
|
|
990 {
|
|
991 void **host_func_table = ((void ***) host_table)[0];
|
|
992 void **host_funcs_end = ((void ***) host_table)[1];
|
|
993 void **host_var_table = ((void ***) host_table)[2];
|
|
994 void **host_vars_end = ((void ***) host_table)[3];
|
|
995
|
|
996 /* The func table contains only addresses, the var table contains addresses
|
|
997 and corresponding sizes. */
|
|
998 int num_funcs = host_funcs_end - host_func_table;
|
|
999 int num_vars = (host_vars_end - host_var_table) / 2;
|
|
1000
|
|
1001 /* Load image to device and get target addresses for the image. */
|
|
1002 struct addr_pair *target_table = NULL;
|
|
1003 int i, num_target_entries;
|
|
1004
|
|
1005 num_target_entries
|
|
1006 = devicep->load_image_func (devicep->target_id, version,
|
|
1007 target_data, &target_table);
|
|
1008
|
|
1009 if (num_target_entries != num_funcs + num_vars)
|
|
1010 {
|
|
1011 gomp_mutex_unlock (&devicep->lock);
|
|
1012 if (is_register_lock)
|
|
1013 gomp_mutex_unlock (®ister_lock);
|
|
1014 gomp_fatal ("Cannot map target functions or variables"
|
|
1015 " (expected %u, have %u)", num_funcs + num_vars,
|
|
1016 num_target_entries);
|
|
1017 }
|
|
1018
|
|
1019 /* Insert host-target address mapping into splay tree. */
|
|
1020 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
|
|
1021 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
|
|
1022 tgt->refcount = REFCOUNT_INFINITY;
|
|
1023 tgt->tgt_start = 0;
|
|
1024 tgt->tgt_end = 0;
|
|
1025 tgt->to_free = NULL;
|
|
1026 tgt->prev = NULL;
|
|
1027 tgt->list_count = 0;
|
|
1028 tgt->device_descr = devicep;
|
|
1029 splay_tree_node array = tgt->array;
|
|
1030
|
|
1031 for (i = 0; i < num_funcs; i++)
|
|
1032 {
|
|
1033 splay_tree_key k = &array->key;
|
|
1034 k->host_start = (uintptr_t) host_func_table[i];
|
|
1035 k->host_end = k->host_start + 1;
|
|
1036 k->tgt = tgt;
|
|
1037 k->tgt_offset = target_table[i].start;
|
|
1038 k->refcount = REFCOUNT_INFINITY;
|
|
1039 k->link_key = NULL;
|
|
1040 array->left = NULL;
|
|
1041 array->right = NULL;
|
|
1042 splay_tree_insert (&devicep->mem_map, array);
|
|
1043 array++;
|
|
1044 }
|
|
1045
|
|
1046 /* Most significant bit of the size in host and target tables marks
|
|
1047 "omp declare target link" variables. */
|
|
1048 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
|
|
1049 const uintptr_t size_mask = ~link_bit;
|
|
1050
|
|
1051 for (i = 0; i < num_vars; i++)
|
|
1052 {
|
|
1053 struct addr_pair *target_var = &target_table[num_funcs + i];
|
|
1054 uintptr_t target_size = target_var->end - target_var->start;
|
|
1055
|
|
1056 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
|
|
1057 {
|
|
1058 gomp_mutex_unlock (&devicep->lock);
|
|
1059 if (is_register_lock)
|
|
1060 gomp_mutex_unlock (®ister_lock);
|
|
1061 gomp_fatal ("Cannot map target variables (size mismatch)");
|
|
1062 }
|
|
1063
|
|
1064 splay_tree_key k = &array->key;
|
|
1065 k->host_start = (uintptr_t) host_var_table[i * 2];
|
|
1066 k->host_end
|
|
1067 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
|
|
1068 k->tgt = tgt;
|
|
1069 k->tgt_offset = target_var->start;
|
|
1070 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
|
|
1071 k->link_key = NULL;
|
|
1072 array->left = NULL;
|
|
1073 array->right = NULL;
|
|
1074 splay_tree_insert (&devicep->mem_map, array);
|
|
1075 array++;
|
|
1076 }
|
|
1077
|
|
1078 free (target_table);
|
|
1079 }
|
|
1080
|
|
1081 /* Unload the mappings described by target_data from device DEVICE_P.
|
|
1082 The device must be locked. */
|
|
1083
|
|
1084 static void
|
|
1085 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
|
|
1086 unsigned version,
|
|
1087 const void *host_table, const void *target_data)
|
|
1088 {
|
|
1089 void **host_func_table = ((void ***) host_table)[0];
|
|
1090 void **host_funcs_end = ((void ***) host_table)[1];
|
|
1091 void **host_var_table = ((void ***) host_table)[2];
|
|
1092 void **host_vars_end = ((void ***) host_table)[3];
|
|
1093
|
|
1094 /* The func table contains only addresses, the var table contains addresses
|
|
1095 and corresponding sizes. */
|
|
1096 int num_funcs = host_funcs_end - host_func_table;
|
|
1097 int num_vars = (host_vars_end - host_var_table) / 2;
|
|
1098
|
|
1099 struct splay_tree_key_s k;
|
|
1100 splay_tree_key node = NULL;
|
|
1101
|
|
1102 /* Find mapping at start of node array */
|
|
1103 if (num_funcs || num_vars)
|
|
1104 {
|
|
1105 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
|
|
1106 : (uintptr_t) host_var_table[0]);
|
|
1107 k.host_end = k.host_start + 1;
|
|
1108 node = splay_tree_lookup (&devicep->mem_map, &k);
|
|
1109 }
|
|
1110
|
|
1111 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
|
|
1112 {
|
|
1113 gomp_mutex_unlock (&devicep->lock);
|
|
1114 gomp_fatal ("image unload fail");
|
|
1115 }
|
|
1116
|
|
1117 /* Remove mappings from splay tree. */
|
|
1118 int i;
|
|
1119 for (i = 0; i < num_funcs; i++)
|
|
1120 {
|
|
1121 k.host_start = (uintptr_t) host_func_table[i];
|
|
1122 k.host_end = k.host_start + 1;
|
|
1123 splay_tree_remove (&devicep->mem_map, &k);
|
|
1124 }
|
|
1125
|
|
1126 /* Most significant bit of the size in host and target tables marks
|
|
1127 "omp declare target link" variables. */
|
|
1128 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
|
|
1129 const uintptr_t size_mask = ~link_bit;
|
|
1130 bool is_tgt_unmapped = false;
|
|
1131
|
|
1132 for (i = 0; i < num_vars; i++)
|
|
1133 {
|
|
1134 k.host_start = (uintptr_t) host_var_table[i * 2];
|
|
1135 k.host_end
|
|
1136 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
|
|
1137
|
|
1138 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
|
|
1139 splay_tree_remove (&devicep->mem_map, &k);
|
|
1140 else
|
|
1141 {
|
|
1142 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
|
|
1143 splay_tree_remove (&devicep->mem_map, n);
|
|
1144 if (n->link_key)
|
|
1145 {
|
|
1146 if (n->tgt->refcount > 1)
|
|
1147 n->tgt->refcount--;
|
|
1148 else
|
|
1149 {
|
|
1150 is_tgt_unmapped = true;
|
|
1151 gomp_unmap_tgt (n->tgt);
|
|
1152 }
|
|
1153 }
|
|
1154 }
|
|
1155 }
|
|
1156
|
|
1157 if (node && !is_tgt_unmapped)
|
|
1158 {
|
|
1159 free (node->tgt);
|
|
1160 free (node);
|
|
1161 }
|
|
1162 }
|
|
1163
|
|
1164 /* This function should be called from every offload image while loading.
|
|
1165 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
|
|
1166 the target, and TARGET_DATA needed by target plugin. */
|
|
1167
|
|
1168 void
|
|
1169 GOMP_offload_register_ver (unsigned version, const void *host_table,
|
|
1170 int target_type, const void *target_data)
|
|
1171 {
|
|
1172 int i;
|
|
1173
|
|
1174 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
|
|
1175 gomp_fatal ("Library too old for offload (version %u < %u)",
|
|
1176 GOMP_VERSION, GOMP_VERSION_LIB (version));
|
|
1177
|
|
1178 gomp_mutex_lock (®ister_lock);
|
|
1179
|
|
1180 /* Load image to all initialized devices. */
|
|
1181 for (i = 0; i < num_devices; i++)
|
|
1182 {
|
|
1183 struct gomp_device_descr *devicep = &devices[i];
|
|
1184 gomp_mutex_lock (&devicep->lock);
|
|
1185 if (devicep->type == target_type
|
|
1186 && devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
1187 gomp_load_image_to_device (devicep, version,
|
|
1188 host_table, target_data, true);
|
|
1189 gomp_mutex_unlock (&devicep->lock);
|
|
1190 }
|
|
1191
|
|
1192 /* Insert image to array of pending images. */
|
|
1193 offload_images
|
|
1194 = gomp_realloc_unlock (offload_images,
|
|
1195 (num_offload_images + 1)
|
|
1196 * sizeof (struct offload_image_descr));
|
|
1197 offload_images[num_offload_images].version = version;
|
|
1198 offload_images[num_offload_images].type = target_type;
|
|
1199 offload_images[num_offload_images].host_table = host_table;
|
|
1200 offload_images[num_offload_images].target_data = target_data;
|
|
1201
|
|
1202 num_offload_images++;
|
|
1203 gomp_mutex_unlock (®ister_lock);
|
|
1204 }
|
|
1205
|
|
1206 void
|
|
1207 GOMP_offload_register (const void *host_table, int target_type,
|
|
1208 const void *target_data)
|
|
1209 {
|
|
1210 GOMP_offload_register_ver (0, host_table, target_type, target_data);
|
|
1211 }
|
|
1212
|
|
1213 /* This function should be called from every offload image while unloading.
|
|
1214 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
|
|
1215 the target, and TARGET_DATA needed by target plugin. */
|
|
1216
|
|
1217 void
|
|
1218 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
|
|
1219 int target_type, const void *target_data)
|
|
1220 {
|
|
1221 int i;
|
|
1222
|
|
1223 gomp_mutex_lock (®ister_lock);
|
|
1224
|
|
1225 /* Unload image from all initialized devices. */
|
|
1226 for (i = 0; i < num_devices; i++)
|
|
1227 {
|
|
1228 struct gomp_device_descr *devicep = &devices[i];
|
|
1229 gomp_mutex_lock (&devicep->lock);
|
|
1230 if (devicep->type == target_type
|
|
1231 && devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
1232 gomp_unload_image_from_device (devicep, version,
|
|
1233 host_table, target_data);
|
|
1234 gomp_mutex_unlock (&devicep->lock);
|
|
1235 }
|
|
1236
|
|
1237 /* Remove image from array of pending images. */
|
|
1238 for (i = 0; i < num_offload_images; i++)
|
|
1239 if (offload_images[i].target_data == target_data)
|
|
1240 {
|
|
1241 offload_images[i] = offload_images[--num_offload_images];
|
|
1242 break;
|
|
1243 }
|
|
1244
|
|
1245 gomp_mutex_unlock (®ister_lock);
|
|
1246 }
|
|
1247
|
|
1248 void
|
|
1249 GOMP_offload_unregister (const void *host_table, int target_type,
|
|
1250 const void *target_data)
|
|
1251 {
|
|
1252 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
|
|
1253 }
|
|
1254
|
|
1255 /* This function initializes the target device, specified by DEVICEP. DEVICEP
|
|
1256 must be locked on entry, and remains locked on return. */
|
|
1257
|
|
1258 attribute_hidden void
|
|
1259 gomp_init_device (struct gomp_device_descr *devicep)
|
|
1260 {
|
|
1261 int i;
|
|
1262 if (!devicep->init_device_func (devicep->target_id))
|
|
1263 {
|
|
1264 gomp_mutex_unlock (&devicep->lock);
|
|
1265 gomp_fatal ("device initialization failed");
|
|
1266 }
|
|
1267
|
|
1268 /* Load to device all images registered by the moment. */
|
|
1269 for (i = 0; i < num_offload_images; i++)
|
|
1270 {
|
|
1271 struct offload_image_descr *image = &offload_images[i];
|
|
1272 if (image->type == devicep->type)
|
|
1273 gomp_load_image_to_device (devicep, image->version,
|
|
1274 image->host_table, image->target_data,
|
|
1275 false);
|
|
1276 }
|
|
1277
|
|
1278 devicep->state = GOMP_DEVICE_INITIALIZED;
|
|
1279 }
|
|
1280
|
|
1281 attribute_hidden void
|
|
1282 gomp_unload_device (struct gomp_device_descr *devicep)
|
|
1283 {
|
|
1284 if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
1285 {
|
|
1286 unsigned i;
|
|
1287
|
|
1288 /* Unload from device all images registered at the moment. */
|
|
1289 for (i = 0; i < num_offload_images; i++)
|
|
1290 {
|
|
1291 struct offload_image_descr *image = &offload_images[i];
|
|
1292 if (image->type == devicep->type)
|
|
1293 gomp_unload_image_from_device (devicep, image->version,
|
|
1294 image->host_table,
|
|
1295 image->target_data);
|
|
1296 }
|
|
1297 }
|
|
1298 }
|
|
1299
|
|
1300 /* Free address mapping tables. MM must be locked on entry, and remains locked
|
|
1301 on return. */
|
|
1302
|
|
1303 attribute_hidden void
|
|
1304 gomp_free_memmap (struct splay_tree_s *mem_map)
|
|
1305 {
|
|
1306 while (mem_map->root)
|
|
1307 {
|
|
1308 struct target_mem_desc *tgt = mem_map->root->key.tgt;
|
|
1309
|
|
1310 splay_tree_remove (mem_map, &mem_map->root->key);
|
|
1311 free (tgt->array);
|
|
1312 free (tgt);
|
|
1313 }
|
|
1314 }
|
|
1315
|
|
1316 /* Host fallback for GOMP_target{,_ext} routines. */
|
|
1317
|
|
1318 static void
|
|
1319 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
|
|
1320 {
|
|
1321 struct gomp_thread old_thr, *thr = gomp_thread ();
|
|
1322 old_thr = *thr;
|
|
1323 memset (thr, '\0', sizeof (*thr));
|
|
1324 if (gomp_places_list)
|
|
1325 {
|
|
1326 thr->place = old_thr.place;
|
|
1327 thr->ts.place_partition_len = gomp_places_list_len;
|
|
1328 }
|
|
1329 fn (hostaddrs);
|
|
1330 gomp_free_thread (thr);
|
|
1331 *thr = old_thr;
|
|
1332 }
|
|
1333
|
|
1334 /* Calculate alignment and size requirements of a private copy of data shared
|
|
1335 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
|
|
1336
|
|
1337 static inline void
|
|
1338 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
|
|
1339 unsigned short *kinds, size_t *tgt_align,
|
|
1340 size_t *tgt_size)
|
|
1341 {
|
|
1342 size_t i;
|
|
1343 for (i = 0; i < mapnum; i++)
|
|
1344 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
|
|
1345 {
|
|
1346 size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
1347 if (*tgt_align < align)
|
|
1348 *tgt_align = align;
|
|
1349 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
|
|
1350 *tgt_size += sizes[i];
|
|
1351 }
|
|
1352 }
|
|
1353
|
|
1354 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
|
|
1355
|
|
1356 static inline void
|
|
1357 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
|
|
1358 size_t *sizes, unsigned short *kinds, size_t tgt_align,
|
|
1359 size_t tgt_size)
|
|
1360 {
|
|
1361 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
|
|
1362 if (al)
|
|
1363 tgt += tgt_align - al;
|
|
1364 tgt_size = 0;
|
|
1365 size_t i;
|
|
1366 for (i = 0; i < mapnum; i++)
|
|
1367 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
|
|
1368 {
|
|
1369 size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
1370 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
1371 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
|
|
1372 hostaddrs[i] = tgt + tgt_size;
|
|
1373 tgt_size = tgt_size + sizes[i];
|
|
1374 }
|
|
1375 }
|
|
1376
|
|
1377 /* Helper function of GOMP_target{,_ext} routines. */
|
|
1378
|
|
1379 static void *
|
|
1380 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
|
|
1381 void (*host_fn) (void *))
|
|
1382 {
|
|
1383 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
|
|
1384 return (void *) host_fn;
|
|
1385 else
|
|
1386 {
|
|
1387 gomp_mutex_lock (&devicep->lock);
|
|
1388 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
1389 {
|
|
1390 gomp_mutex_unlock (&devicep->lock);
|
|
1391 return NULL;
|
|
1392 }
|
|
1393
|
|
1394 struct splay_tree_key_s k;
|
|
1395 k.host_start = (uintptr_t) host_fn;
|
|
1396 k.host_end = k.host_start + 1;
|
|
1397 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
|
|
1398 gomp_mutex_unlock (&devicep->lock);
|
|
1399 if (tgt_fn == NULL)
|
|
1400 return NULL;
|
|
1401
|
|
1402 return (void *) tgt_fn->tgt_offset;
|
|
1403 }
|
|
1404 }
|
|
1405
|
|
1406 /* Called when encountering a target directive. If DEVICE
|
|
1407 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
|
|
1408 GOMP_DEVICE_HOST_FALLBACK (or any value
|
|
1409 larger than last available hw device), use host fallback.
|
|
1410 FN is address of host code, UNUSED is part of the current ABI, but
|
|
1411 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
|
|
1412 with MAPNUM entries, with addresses of the host objects,
|
|
1413 sizes of the host objects (resp. for pointer kind pointer bias
|
|
1414 and assumed sizeof (void *) size) and kinds. */
|
|
1415
|
|
1416 void
|
|
1417 GOMP_target (int device, void (*fn) (void *), const void *unused,
|
|
1418 size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
1419 unsigned char *kinds)
|
|
1420 {
|
|
1421 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1422
|
|
1423 void *fn_addr;
|
|
1424 if (devicep == NULL
|
|
1425 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1426 /* All shared memory devices should use the GOMP_target_ext function. */
|
|
1427 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|
|
1428 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
|
|
1429 return gomp_target_fallback (fn, hostaddrs);
|
|
1430
|
|
1431 struct target_mem_desc *tgt_vars
|
|
1432 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
|
|
1433 GOMP_MAP_VARS_TARGET);
|
|
1434 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
|
|
1435 NULL);
|
|
1436 gomp_unmap_vars (tgt_vars, true);
|
|
1437 }
|
|
1438
|
|
1439 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
|
|
1440 and several arguments have been added:
|
|
1441 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
|
|
1442 DEPEND is array of dependencies, see GOMP_task for details.
|
|
1443
|
|
1444 ARGS is a pointer to an array consisting of a variable number of both
|
|
1445 device-independent and device-specific arguments, which can take one two
|
|
1446 elements where the first specifies for which device it is intended, the type
|
|
1447 and optionally also the value. If the value is not present in the first
|
|
1448 one, the whole second element the actual value. The last element of the
|
|
1449 array is a single NULL. Among the device independent can be for example
|
|
1450 NUM_TEAMS and THREAD_LIMIT.
|
|
1451
|
|
1452 NUM_TEAMS is positive if GOMP_teams will be called in the body with
|
|
1453 that value, or 1 if teams construct is not present, or 0, if
|
|
1454 teams construct does not have num_teams clause and so the choice is
|
|
1455 implementation defined, and -1 if it can't be determined on the host
|
|
1456 what value will GOMP_teams have on the device.
|
|
1457 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
|
|
1458 body with that value, or 0, if teams construct does not have thread_limit
|
|
1459 clause or the teams construct is not present, or -1 if it can't be
|
|
1460 determined on the host what value will GOMP_teams have on the device. */
|
|
1461
|
|
1462 void
|
|
1463 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
|
|
1464 void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
|
1465 unsigned int flags, void **depend, void **args)
|
|
1466 {
|
|
1467 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1468 size_t tgt_align = 0, tgt_size = 0;
|
|
1469 bool fpc_done = false;
|
|
1470
|
|
1471 if (flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
1472 {
|
|
1473 struct gomp_thread *thr = gomp_thread ();
|
|
1474 /* Create a team if we don't have any around, as nowait
|
|
1475 target tasks make sense to run asynchronously even when
|
|
1476 outside of any parallel. */
|
|
1477 if (__builtin_expect (thr->ts.team == NULL, 0))
|
|
1478 {
|
|
1479 struct gomp_team *team = gomp_new_team (1);
|
|
1480 struct gomp_task *task = thr->task;
|
|
1481 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
|
|
1482 team->prev_ts = thr->ts;
|
|
1483 thr->ts.team = team;
|
|
1484 thr->ts.team_id = 0;
|
|
1485 thr->ts.work_share = &team->work_shares[0];
|
|
1486 thr->ts.last_work_share = NULL;
|
|
1487 #ifdef HAVE_SYNC_BUILTINS
|
|
1488 thr->ts.single_count = 0;
|
|
1489 #endif
|
|
1490 thr->ts.static_trip = 0;
|
|
1491 thr->task = &team->implicit_task[0];
|
|
1492 gomp_init_task (thr->task, NULL, icv);
|
|
1493 if (task)
|
|
1494 {
|
|
1495 thr->task = task;
|
|
1496 gomp_end_task ();
|
|
1497 free (task);
|
|
1498 thr->task = &team->implicit_task[0];
|
|
1499 }
|
|
1500 else
|
|
1501 pthread_setspecific (gomp_thread_destructor, thr);
|
|
1502 }
|
|
1503 if (thr->ts.team
|
|
1504 && !thr->task->final_task)
|
|
1505 {
|
|
1506 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
|
|
1507 sizes, kinds, flags, depend, args,
|
|
1508 GOMP_TARGET_TASK_BEFORE_MAP);
|
|
1509 return;
|
|
1510 }
|
|
1511 }
|
|
1512
|
|
1513 /* If there are depend clauses, but nowait is not present
|
|
1514 (or we are in a final task), block the parent task until the
|
|
1515 dependencies are resolved and then just continue with the rest
|
|
1516 of the function as if it is a merged task. */
|
|
1517 if (depend != NULL)
|
|
1518 {
|
|
1519 struct gomp_thread *thr = gomp_thread ();
|
|
1520 if (thr->task && thr->task->depend_hash)
|
|
1521 {
|
|
1522 /* If we might need to wait, copy firstprivate now. */
|
|
1523 calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
1524 &tgt_align, &tgt_size);
|
|
1525 if (tgt_align)
|
|
1526 {
|
|
1527 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
1528 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
1529 tgt_align, tgt_size);
|
|
1530 }
|
|
1531 fpc_done = true;
|
|
1532 gomp_task_maybe_wait_for_dependencies (depend);
|
|
1533 }
|
|
1534 }
|
|
1535
|
|
1536 void *fn_addr;
|
|
1537 if (devicep == NULL
|
|
1538 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1539 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
|
|
1540 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
|
1541 {
|
|
1542 if (!fpc_done)
|
|
1543 {
|
|
1544 calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
1545 &tgt_align, &tgt_size);
|
|
1546 if (tgt_align)
|
|
1547 {
|
|
1548 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
1549 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
1550 tgt_align, tgt_size);
|
|
1551 }
|
|
1552 }
|
|
1553 gomp_target_fallback (fn, hostaddrs);
|
|
1554 return;
|
|
1555 }
|
|
1556
|
|
1557 struct target_mem_desc *tgt_vars;
|
|
1558 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1559 {
|
|
1560 if (!fpc_done)
|
|
1561 {
|
|
1562 calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
1563 &tgt_align, &tgt_size);
|
|
1564 if (tgt_align)
|
|
1565 {
|
|
1566 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
1567 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
1568 tgt_align, tgt_size);
|
|
1569 }
|
|
1570 }
|
|
1571 tgt_vars = NULL;
|
|
1572 }
|
|
1573 else
|
|
1574 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
|
|
1575 true, GOMP_MAP_VARS_TARGET);
|
|
1576 devicep->run_func (devicep->target_id, fn_addr,
|
|
1577 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
|
|
1578 args);
|
|
1579 if (tgt_vars)
|
|
1580 gomp_unmap_vars (tgt_vars, true);
|
|
1581 }
|
|
1582
|
|
1583 /* Host fallback for GOMP_target_data{,_ext} routines. */
|
|
1584
|
|
1585 static void
|
|
1586 gomp_target_data_fallback (void)
|
|
1587 {
|
|
1588 struct gomp_task_icv *icv = gomp_icv (false);
|
|
1589 if (icv->target_data)
|
|
1590 {
|
|
1591 /* Even when doing a host fallback, if there are any active
|
|
1592 #pragma omp target data constructs, need to remember the
|
|
1593 new #pragma omp target data, otherwise GOMP_target_end_data
|
|
1594 would get out of sync. */
|
|
1595 struct target_mem_desc *tgt
|
|
1596 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
|
|
1597 GOMP_MAP_VARS_DATA);
|
|
1598 tgt->prev = icv->target_data;
|
|
1599 icv->target_data = tgt;
|
|
1600 }
|
|
1601 }
|
|
1602
|
|
1603 void
|
|
1604 GOMP_target_data (int device, const void *unused, size_t mapnum,
|
|
1605 void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
|
1606 {
|
|
1607 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1608
|
|
1609 if (devicep == NULL
|
|
1610 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1611 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
|
|
1612 return gomp_target_data_fallback ();
|
|
1613
|
|
1614 struct target_mem_desc *tgt
|
|
1615 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
|
|
1616 GOMP_MAP_VARS_DATA);
|
|
1617 struct gomp_task_icv *icv = gomp_icv (true);
|
|
1618 tgt->prev = icv->target_data;
|
|
1619 icv->target_data = tgt;
|
|
1620 }
|
|
1621
|
|
1622 void
|
|
1623 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
|
|
1624 size_t *sizes, unsigned short *kinds)
|
|
1625 {
|
|
1626 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1627
|
|
1628 if (devicep == NULL
|
|
1629 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1630 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1631 return gomp_target_data_fallback ();
|
|
1632
|
|
1633 struct target_mem_desc *tgt
|
|
1634 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
|
|
1635 GOMP_MAP_VARS_DATA);
|
|
1636 struct gomp_task_icv *icv = gomp_icv (true);
|
|
1637 tgt->prev = icv->target_data;
|
|
1638 icv->target_data = tgt;
|
|
1639 }
|
|
1640
|
|
1641 void
|
|
1642 GOMP_target_end_data (void)
|
|
1643 {
|
|
1644 struct gomp_task_icv *icv = gomp_icv (false);
|
|
1645 if (icv->target_data)
|
|
1646 {
|
|
1647 struct target_mem_desc *tgt = icv->target_data;
|
|
1648 icv->target_data = tgt->prev;
|
|
1649 gomp_unmap_vars (tgt, true);
|
|
1650 }
|
|
1651 }
|
|
1652
|
|
1653 void
|
|
1654 GOMP_target_update (int device, const void *unused, size_t mapnum,
|
|
1655 void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
|
1656 {
|
|
1657 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1658
|
|
1659 if (devicep == NULL
|
|
1660 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1661 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1662 return;
|
|
1663
|
|
1664 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
|
|
1665 }
|
|
1666
|
|
1667 void
|
|
1668 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
|
|
1669 size_t *sizes, unsigned short *kinds,
|
|
1670 unsigned int flags, void **depend)
|
|
1671 {
|
|
1672 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1673
|
|
1674 /* If there are depend clauses, but nowait is not present,
|
|
1675 block the parent task until the dependencies are resolved
|
|
1676 and then just continue with the rest of the function as if it
|
|
1677 is a merged task. Until we are able to schedule task during
|
|
1678 variable mapping or unmapping, ignore nowait if depend clauses
|
|
1679 are not present. */
|
|
1680 if (depend != NULL)
|
|
1681 {
|
|
1682 struct gomp_thread *thr = gomp_thread ();
|
|
1683 if (thr->task && thr->task->depend_hash)
|
|
1684 {
|
|
1685 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
1686 && thr->ts.team
|
|
1687 && !thr->task->final_task)
|
|
1688 {
|
|
1689 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
|
|
1690 mapnum, hostaddrs, sizes, kinds,
|
|
1691 flags | GOMP_TARGET_FLAG_UPDATE,
|
|
1692 depend, NULL, GOMP_TARGET_TASK_DATA))
|
|
1693 return;
|
|
1694 }
|
|
1695 else
|
|
1696 {
|
|
1697 struct gomp_team *team = thr->ts.team;
|
|
1698 /* If parallel or taskgroup has been cancelled, don't start new
|
|
1699 tasks. */
|
|
1700 if (team
|
|
1701 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
1702 || (thr->task->taskgroup
|
|
1703 && thr->task->taskgroup->cancelled)))
|
|
1704 return;
|
|
1705
|
|
1706 gomp_task_maybe_wait_for_dependencies (depend);
|
|
1707 }
|
|
1708 }
|
|
1709 }
|
|
1710
|
|
1711 if (devicep == NULL
|
|
1712 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1713 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1714 return;
|
|
1715
|
|
1716 struct gomp_thread *thr = gomp_thread ();
|
|
1717 struct gomp_team *team = thr->ts.team;
|
|
1718 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
1719 if (team
|
|
1720 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
1721 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
|
|
1722 return;
|
|
1723
|
|
1724 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
|
|
1725 }
|
|
1726
|
|
1727 static void
|
|
1728 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
|
|
1729 void **hostaddrs, size_t *sizes, unsigned short *kinds)
|
|
1730 {
|
|
1731 const int typemask = 0xff;
|
|
1732 size_t i;
|
|
1733 gomp_mutex_lock (&devicep->lock);
|
|
1734 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
1735 {
|
|
1736 gomp_mutex_unlock (&devicep->lock);
|
|
1737 return;
|
|
1738 }
|
|
1739
|
|
1740 for (i = 0; i < mapnum; i++)
|
|
1741 {
|
|
1742 struct splay_tree_key_s cur_node;
|
|
1743 unsigned char kind = kinds[i] & typemask;
|
|
1744 switch (kind)
|
|
1745 {
|
|
1746 case GOMP_MAP_FROM:
|
|
1747 case GOMP_MAP_ALWAYS_FROM:
|
|
1748 case GOMP_MAP_DELETE:
|
|
1749 case GOMP_MAP_RELEASE:
|
|
1750 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
|
|
1751 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
|
|
1752 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
1753 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
1754 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
|
|
1755 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
|
|
1756 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
|
|
1757 : splay_tree_lookup (&devicep->mem_map, &cur_node);
|
|
1758 if (!k)
|
|
1759 continue;
|
|
1760
|
|
1761 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
|
|
1762 k->refcount--;
|
|
1763 if ((kind == GOMP_MAP_DELETE
|
|
1764 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
|
|
1765 && k->refcount != REFCOUNT_INFINITY)
|
|
1766 k->refcount = 0;
|
|
1767
|
|
1768 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
|
|
1769 || kind == GOMP_MAP_ALWAYS_FROM)
|
|
1770 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
|
|
1771 (void *) (k->tgt->tgt_start + k->tgt_offset
|
|
1772 + cur_node.host_start
|
|
1773 - k->host_start),
|
|
1774 cur_node.host_end - cur_node.host_start);
|
|
1775 if (k->refcount == 0)
|
|
1776 {
|
|
1777 splay_tree_remove (&devicep->mem_map, k);
|
|
1778 if (k->link_key)
|
|
1779 splay_tree_insert (&devicep->mem_map,
|
|
1780 (splay_tree_node) k->link_key);
|
|
1781 if (k->tgt->refcount > 1)
|
|
1782 k->tgt->refcount--;
|
|
1783 else
|
|
1784 gomp_unmap_tgt (k->tgt);
|
|
1785 }
|
|
1786
|
|
1787 break;
|
|
1788 default:
|
|
1789 gomp_mutex_unlock (&devicep->lock);
|
|
1790 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
|
|
1791 kind);
|
|
1792 }
|
|
1793 }
|
|
1794
|
|
1795 gomp_mutex_unlock (&devicep->lock);
|
|
1796 }
|
|
1797
|
|
1798 void
|
|
1799 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
|
|
1800 size_t *sizes, unsigned short *kinds,
|
|
1801 unsigned int flags, void **depend)
|
|
1802 {
|
|
1803 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1804
|
|
1805 /* If there are depend clauses, but nowait is not present,
|
|
1806 block the parent task until the dependencies are resolved
|
|
1807 and then just continue with the rest of the function as if it
|
|
1808 is a merged task. Until we are able to schedule task during
|
|
1809 variable mapping or unmapping, ignore nowait if depend clauses
|
|
1810 are not present. */
|
|
1811 if (depend != NULL)
|
|
1812 {
|
|
1813 struct gomp_thread *thr = gomp_thread ();
|
|
1814 if (thr->task && thr->task->depend_hash)
|
|
1815 {
|
|
1816 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
1817 && thr->ts.team
|
|
1818 && !thr->task->final_task)
|
|
1819 {
|
|
1820 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
|
|
1821 mapnum, hostaddrs, sizes, kinds,
|
|
1822 flags, depend, NULL,
|
|
1823 GOMP_TARGET_TASK_DATA))
|
|
1824 return;
|
|
1825 }
|
|
1826 else
|
|
1827 {
|
|
1828 struct gomp_team *team = thr->ts.team;
|
|
1829 /* If parallel or taskgroup has been cancelled, don't start new
|
|
1830 tasks. */
|
|
1831 if (team
|
|
1832 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
1833 || (thr->task->taskgroup
|
|
1834 && thr->task->taskgroup->cancelled)))
|
|
1835 return;
|
|
1836
|
|
1837 gomp_task_maybe_wait_for_dependencies (depend);
|
|
1838 }
|
|
1839 }
|
|
1840 }
|
|
1841
|
|
1842 if (devicep == NULL
|
|
1843 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1844 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1845 return;
|
|
1846
|
|
1847 struct gomp_thread *thr = gomp_thread ();
|
|
1848 struct gomp_team *team = thr->ts.team;
|
|
1849 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
1850 if (team
|
|
1851 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
1852 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
|
|
1853 return;
|
|
1854
|
|
1855 size_t i;
|
|
1856 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
|
|
1857 for (i = 0; i < mapnum; i++)
|
|
1858 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
|
|
1859 {
|
|
1860 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
|
|
1861 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
|
|
1862 i += sizes[i];
|
|
1863 }
|
|
1864 else
|
|
1865 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
|
|
1866 true, GOMP_MAP_VARS_ENTER_DATA);
|
|
1867 else
|
|
1868 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
|
|
1869 }
|
|
1870
|
|
1871 bool
|
|
1872 gomp_target_task_fn (void *data)
|
|
1873 {
|
|
1874 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
|
|
1875 struct gomp_device_descr *devicep = ttask->devicep;
|
|
1876
|
|
1877 if (ttask->fn != NULL)
|
|
1878 {
|
|
1879 void *fn_addr;
|
|
1880 if (devicep == NULL
|
|
1881 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1882 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
|
|
1883 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
|
1884 {
|
|
1885 ttask->state = GOMP_TARGET_TASK_FALLBACK;
|
|
1886 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
|
|
1887 return false;
|
|
1888 }
|
|
1889
|
|
1890 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
1891 {
|
|
1892 if (ttask->tgt)
|
|
1893 gomp_unmap_vars (ttask->tgt, true);
|
|
1894 return false;
|
|
1895 }
|
|
1896
|
|
1897 void *actual_arguments;
|
|
1898 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1899 {
|
|
1900 ttask->tgt = NULL;
|
|
1901 actual_arguments = ttask->hostaddrs;
|
|
1902 }
|
|
1903 else
|
|
1904 {
|
|
1905 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
|
|
1906 NULL, ttask->sizes, ttask->kinds, true,
|
|
1907 GOMP_MAP_VARS_TARGET);
|
|
1908 actual_arguments = (void *) ttask->tgt->tgt_start;
|
|
1909 }
|
|
1910 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
|
|
1911
|
|
1912 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
|
|
1913 ttask->args, (void *) ttask);
|
|
1914 return true;
|
|
1915 }
|
|
1916 else if (devicep == NULL
|
|
1917 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1918 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1919 return false;
|
|
1920
|
|
1921 size_t i;
|
|
1922 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
|
|
1923 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
|
|
1924 ttask->kinds, true);
|
|
1925 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
|
|
1926 for (i = 0; i < ttask->mapnum; i++)
|
|
1927 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
|
|
1928 {
|
|
1929 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
|
|
1930 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
|
|
1931 GOMP_MAP_VARS_ENTER_DATA);
|
|
1932 i += ttask->sizes[i];
|
|
1933 }
|
|
1934 else
|
|
1935 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
|
|
1936 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
|
|
1937 else
|
|
1938 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
|
|
1939 ttask->kinds);
|
|
1940 return false;
|
|
1941 }
|
|
1942
|
|
1943 void
|
|
1944 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
|
1945 {
|
|
1946 if (thread_limit)
|
|
1947 {
|
|
1948 struct gomp_task_icv *icv = gomp_icv (true);
|
|
1949 icv->thread_limit_var
|
|
1950 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
|
1951 }
|
|
1952 (void) num_teams;
|
|
1953 }
|
|
1954
|
|
1955 void *
|
|
1956 omp_target_alloc (size_t size, int device_num)
|
|
1957 {
|
|
1958 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
1959 return malloc (size);
|
|
1960
|
|
1961 if (device_num < 0)
|
|
1962 return NULL;
|
|
1963
|
|
1964 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
1965 if (devicep == NULL)
|
|
1966 return NULL;
|
|
1967
|
|
1968 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1969 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1970 return malloc (size);
|
|
1971
|
|
1972 gomp_mutex_lock (&devicep->lock);
|
|
1973 void *ret = devicep->alloc_func (devicep->target_id, size);
|
|
1974 gomp_mutex_unlock (&devicep->lock);
|
|
1975 return ret;
|
|
1976 }
|
|
1977
|
|
1978 void
|
|
1979 omp_target_free (void *device_ptr, int device_num)
|
|
1980 {
|
|
1981 if (device_ptr == NULL)
|
|
1982 return;
|
|
1983
|
|
1984 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
1985 {
|
|
1986 free (device_ptr);
|
|
1987 return;
|
|
1988 }
|
|
1989
|
|
1990 if (device_num < 0)
|
|
1991 return;
|
|
1992
|
|
1993 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
1994 if (devicep == NULL)
|
|
1995 return;
|
|
1996
|
|
1997 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1998 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1999 {
|
|
2000 free (device_ptr);
|
|
2001 return;
|
|
2002 }
|
|
2003
|
|
2004 gomp_mutex_lock (&devicep->lock);
|
|
2005 gomp_free_device_memory (devicep, device_ptr);
|
|
2006 gomp_mutex_unlock (&devicep->lock);
|
|
2007 }
|
|
2008
|
|
2009 int
|
|
2010 omp_target_is_present (void *ptr, int device_num)
|
|
2011 {
|
|
2012 if (ptr == NULL)
|
|
2013 return 1;
|
|
2014
|
|
2015 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2016 return 1;
|
|
2017
|
|
2018 if (device_num < 0)
|
|
2019 return 0;
|
|
2020
|
|
2021 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2022 if (devicep == NULL)
|
|
2023 return 0;
|
|
2024
|
|
2025 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2026 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2027 return 1;
|
|
2028
|
|
2029 gomp_mutex_lock (&devicep->lock);
|
|
2030 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
2031 struct splay_tree_key_s cur_node;
|
|
2032
|
|
2033 cur_node.host_start = (uintptr_t) ptr;
|
|
2034 cur_node.host_end = cur_node.host_start;
|
|
2035 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
2036 int ret = n != NULL;
|
|
2037 gomp_mutex_unlock (&devicep->lock);
|
|
2038 return ret;
|
|
2039 }
|
|
2040
|
|
2041 int
|
|
2042 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
|
|
2043 size_t src_offset, int dst_device_num, int src_device_num)
|
|
2044 {
|
|
2045 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
2046 bool ret;
|
|
2047
|
|
2048 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2049 {
|
|
2050 if (dst_device_num < 0)
|
|
2051 return EINVAL;
|
|
2052
|
|
2053 dst_devicep = resolve_device (dst_device_num);
|
|
2054 if (dst_devicep == NULL)
|
|
2055 return EINVAL;
|
|
2056
|
|
2057 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2058 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2059 dst_devicep = NULL;
|
|
2060 }
|
|
2061 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2062 {
|
|
2063 if (src_device_num < 0)
|
|
2064 return EINVAL;
|
|
2065
|
|
2066 src_devicep = resolve_device (src_device_num);
|
|
2067 if (src_devicep == NULL)
|
|
2068 return EINVAL;
|
|
2069
|
|
2070 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2071 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2072 src_devicep = NULL;
|
|
2073 }
|
|
2074 if (src_devicep == NULL && dst_devicep == NULL)
|
|
2075 {
|
|
2076 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
|
|
2077 return 0;
|
|
2078 }
|
|
2079 if (src_devicep == NULL)
|
|
2080 {
|
|
2081 gomp_mutex_lock (&dst_devicep->lock);
|
|
2082 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
|
2083 (char *) dst + dst_offset,
|
|
2084 (char *) src + src_offset, length);
|
|
2085 gomp_mutex_unlock (&dst_devicep->lock);
|
|
2086 return (ret ? 0 : EINVAL);
|
|
2087 }
|
|
2088 if (dst_devicep == NULL)
|
|
2089 {
|
|
2090 gomp_mutex_lock (&src_devicep->lock);
|
|
2091 ret = src_devicep->dev2host_func (src_devicep->target_id,
|
|
2092 (char *) dst + dst_offset,
|
|
2093 (char *) src + src_offset, length);
|
|
2094 gomp_mutex_unlock (&src_devicep->lock);
|
|
2095 return (ret ? 0 : EINVAL);
|
|
2096 }
|
|
2097 if (src_devicep == dst_devicep)
|
|
2098 {
|
|
2099 gomp_mutex_lock (&src_devicep->lock);
|
|
2100 ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
|
2101 (char *) dst + dst_offset,
|
|
2102 (char *) src + src_offset, length);
|
|
2103 gomp_mutex_unlock (&src_devicep->lock);
|
|
2104 return (ret ? 0 : EINVAL);
|
|
2105 }
|
|
2106 return EINVAL;
|
|
2107 }
|
|
2108
|
|
2109 static int
|
|
2110 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
|
|
2111 int num_dims, const size_t *volume,
|
|
2112 const size_t *dst_offsets,
|
|
2113 const size_t *src_offsets,
|
|
2114 const size_t *dst_dimensions,
|
|
2115 const size_t *src_dimensions,
|
|
2116 struct gomp_device_descr *dst_devicep,
|
|
2117 struct gomp_device_descr *src_devicep)
|
|
2118 {
|
|
2119 size_t dst_slice = element_size;
|
|
2120 size_t src_slice = element_size;
|
|
2121 size_t j, dst_off, src_off, length;
|
|
2122 int i, ret;
|
|
2123
|
|
2124 if (num_dims == 1)
|
|
2125 {
|
|
2126 if (__builtin_mul_overflow (element_size, volume[0], &length)
|
|
2127 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
|
|
2128 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
|
|
2129 return EINVAL;
|
|
2130 if (dst_devicep == NULL && src_devicep == NULL)
|
|
2131 {
|
|
2132 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
|
|
2133 ret = 1;
|
|
2134 }
|
|
2135 else if (src_devicep == NULL)
|
|
2136 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
|
2137 (char *) dst + dst_off,
|
|
2138 (char *) src + src_off, length);
|
|
2139 else if (dst_devicep == NULL)
|
|
2140 ret = src_devicep->dev2host_func (src_devicep->target_id,
|
|
2141 (char *) dst + dst_off,
|
|
2142 (char *) src + src_off, length);
|
|
2143 else if (src_devicep == dst_devicep)
|
|
2144 ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
|
2145 (char *) dst + dst_off,
|
|
2146 (char *) src + src_off, length);
|
|
2147 else
|
|
2148 ret = 0;
|
|
2149 return ret ? 0 : EINVAL;
|
|
2150 }
|
|
2151
|
|
2152 /* FIXME: it would be nice to have some plugin function to handle
|
|
2153 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
|
|
2154 be handled in the generic recursion below, and for host-host it
|
|
2155 should be used even for any num_dims >= 2. */
|
|
2156
|
|
2157 for (i = 1; i < num_dims; i++)
|
|
2158 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
|
|
2159 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
|
|
2160 return EINVAL;
|
|
2161 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
|
|
2162 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
|
|
2163 return EINVAL;
|
|
2164 for (j = 0; j < volume[0]; j++)
|
|
2165 {
|
|
2166 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
|
|
2167 (char *) src + src_off,
|
|
2168 element_size, num_dims - 1,
|
|
2169 volume + 1, dst_offsets + 1,
|
|
2170 src_offsets + 1, dst_dimensions + 1,
|
|
2171 src_dimensions + 1, dst_devicep,
|
|
2172 src_devicep);
|
|
2173 if (ret)
|
|
2174 return ret;
|
|
2175 dst_off += dst_slice;
|
|
2176 src_off += src_slice;
|
|
2177 }
|
|
2178 return 0;
|
|
2179 }
|
|
2180
|
|
2181 int
|
|
2182 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
|
|
2183 int num_dims, const size_t *volume,
|
|
2184 const size_t *dst_offsets,
|
|
2185 const size_t *src_offsets,
|
|
2186 const size_t *dst_dimensions,
|
|
2187 const size_t *src_dimensions,
|
|
2188 int dst_device_num, int src_device_num)
|
|
2189 {
|
|
2190 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
2191
|
|
2192 if (!dst && !src)
|
|
2193 return INT_MAX;
|
|
2194
|
|
2195 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2196 {
|
|
2197 if (dst_device_num < 0)
|
|
2198 return EINVAL;
|
|
2199
|
|
2200 dst_devicep = resolve_device (dst_device_num);
|
|
2201 if (dst_devicep == NULL)
|
|
2202 return EINVAL;
|
|
2203
|
|
2204 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2205 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2206 dst_devicep = NULL;
|
|
2207 }
|
|
2208 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2209 {
|
|
2210 if (src_device_num < 0)
|
|
2211 return EINVAL;
|
|
2212
|
|
2213 src_devicep = resolve_device (src_device_num);
|
|
2214 if (src_devicep == NULL)
|
|
2215 return EINVAL;
|
|
2216
|
|
2217 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2218 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2219 src_devicep = NULL;
|
|
2220 }
|
|
2221
|
|
2222 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
|
|
2223 return EINVAL;
|
|
2224
|
|
2225 if (src_devicep)
|
|
2226 gomp_mutex_lock (&src_devicep->lock);
|
|
2227 else if (dst_devicep)
|
|
2228 gomp_mutex_lock (&dst_devicep->lock);
|
|
2229 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
|
|
2230 volume, dst_offsets, src_offsets,
|
|
2231 dst_dimensions, src_dimensions,
|
|
2232 dst_devicep, src_devicep);
|
|
2233 if (src_devicep)
|
|
2234 gomp_mutex_unlock (&src_devicep->lock);
|
|
2235 else if (dst_devicep)
|
|
2236 gomp_mutex_unlock (&dst_devicep->lock);
|
|
2237 return ret;
|
|
2238 }
|
|
2239
|
|
2240 int
|
|
2241 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
|
|
2242 size_t device_offset, int device_num)
|
|
2243 {
|
|
2244 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2245 return EINVAL;
|
|
2246
|
|
2247 if (device_num < 0)
|
|
2248 return EINVAL;
|
|
2249
|
|
2250 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2251 if (devicep == NULL)
|
|
2252 return EINVAL;
|
|
2253
|
|
2254 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2255 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2256 return EINVAL;
|
|
2257
|
|
2258 gomp_mutex_lock (&devicep->lock);
|
|
2259
|
|
2260 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
2261 struct splay_tree_key_s cur_node;
|
|
2262 int ret = EINVAL;
|
|
2263
|
|
2264 cur_node.host_start = (uintptr_t) host_ptr;
|
|
2265 cur_node.host_end = cur_node.host_start + size;
|
|
2266 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
2267 if (n)
|
|
2268 {
|
|
2269 if (n->tgt->tgt_start + n->tgt_offset
|
|
2270 == (uintptr_t) device_ptr + device_offset
|
|
2271 && n->host_start <= cur_node.host_start
|
|
2272 && n->host_end >= cur_node.host_end)
|
|
2273 ret = 0;
|
|
2274 }
|
|
2275 else
|
|
2276 {
|
|
2277 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
|
|
2278 tgt->array = gomp_malloc (sizeof (*tgt->array));
|
|
2279 tgt->refcount = 1;
|
|
2280 tgt->tgt_start = 0;
|
|
2281 tgt->tgt_end = 0;
|
|
2282 tgt->to_free = NULL;
|
|
2283 tgt->prev = NULL;
|
|
2284 tgt->list_count = 0;
|
|
2285 tgt->device_descr = devicep;
|
|
2286 splay_tree_node array = tgt->array;
|
|
2287 splay_tree_key k = &array->key;
|
|
2288 k->host_start = cur_node.host_start;
|
|
2289 k->host_end = cur_node.host_end;
|
|
2290 k->tgt = tgt;
|
|
2291 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
|
|
2292 k->refcount = REFCOUNT_INFINITY;
|
|
2293 array->left = NULL;
|
|
2294 array->right = NULL;
|
|
2295 splay_tree_insert (&devicep->mem_map, array);
|
|
2296 ret = 0;
|
|
2297 }
|
|
2298 gomp_mutex_unlock (&devicep->lock);
|
|
2299 return ret;
|
|
2300 }
|
|
2301
|
|
2302 int
|
|
2303 omp_target_disassociate_ptr (void *ptr, int device_num)
|
|
2304 {
|
|
2305 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2306 return EINVAL;
|
|
2307
|
|
2308 if (device_num < 0)
|
|
2309 return EINVAL;
|
|
2310
|
|
2311 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2312 if (devicep == NULL)
|
|
2313 return EINVAL;
|
|
2314
|
|
2315 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
|
|
2316 return EINVAL;
|
|
2317
|
|
2318 gomp_mutex_lock (&devicep->lock);
|
|
2319
|
|
2320 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
2321 struct splay_tree_key_s cur_node;
|
|
2322 int ret = EINVAL;
|
|
2323
|
|
2324 cur_node.host_start = (uintptr_t) ptr;
|
|
2325 cur_node.host_end = cur_node.host_start;
|
|
2326 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
2327 if (n
|
|
2328 && n->host_start == cur_node.host_start
|
|
2329 && n->refcount == REFCOUNT_INFINITY
|
|
2330 && n->tgt->tgt_start == 0
|
|
2331 && n->tgt->to_free == NULL
|
|
2332 && n->tgt->refcount == 1
|
|
2333 && n->tgt->list_count == 0)
|
|
2334 {
|
|
2335 splay_tree_remove (&devicep->mem_map, n);
|
|
2336 gomp_unmap_tgt (n->tgt);
|
|
2337 ret = 0;
|
|
2338 }
|
|
2339
|
|
2340 gomp_mutex_unlock (&devicep->lock);
|
|
2341 return ret;
|
|
2342 }
|
|
2343
|
|
2344 #ifdef PLUGIN_SUPPORT
|
|
2345
|
|
2346 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
|
|
2347 in PLUGIN_NAME.
|
|
2348 The handles of the found functions are stored in the corresponding fields
|
|
2349 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
|
|
2350
|
|
2351 static bool
|
|
2352 gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
|
2353 const char *plugin_name)
|
|
2354 {
|
|
2355 const char *err = NULL, *last_missing = NULL;
|
|
2356
|
|
2357 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
|
|
2358 if (!plugin_handle)
|
|
2359 goto dl_fail;
|
|
2360
|
|
2361 /* Check if all required functions are available in the plugin and store
|
|
2362 their handlers. None of the symbols can legitimately be NULL,
|
|
2363 so we don't need to check dlerror all the time. */
|
|
2364 #define DLSYM(f) \
|
|
2365 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
|
|
2366 goto dl_fail
|
|
2367 /* Similar, but missing functions are not an error. Return false if
|
|
2368 failed, true otherwise. */
|
|
2369 #define DLSYM_OPT(f, n) \
|
|
2370 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
|
|
2371 || (last_missing = #n, 0))
|
|
2372
|
|
2373 DLSYM (version);
|
|
2374 if (device->version_func () != GOMP_VERSION)
|
|
2375 {
|
|
2376 err = "plugin version mismatch";
|
|
2377 goto fail;
|
|
2378 }
|
|
2379
|
|
2380 DLSYM (get_name);
|
|
2381 DLSYM (get_caps);
|
|
2382 DLSYM (get_type);
|
|
2383 DLSYM (get_num_devices);
|
|
2384 DLSYM (init_device);
|
|
2385 DLSYM (fini_device);
|
|
2386 DLSYM (load_image);
|
|
2387 DLSYM (unload_image);
|
|
2388 DLSYM (alloc);
|
|
2389 DLSYM (free);
|
|
2390 DLSYM (dev2host);
|
|
2391 DLSYM (host2dev);
|
|
2392 device->capabilities = device->get_caps_func ();
|
|
2393 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2394 {
|
|
2395 DLSYM (run);
|
|
2396 DLSYM (async_run);
|
|
2397 DLSYM_OPT (can_run, can_run);
|
|
2398 DLSYM (dev2dev);
|
|
2399 }
|
|
2400 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
|
|
2401 {
|
|
2402 if (!DLSYM_OPT (openacc.exec, openacc_exec)
|
|
2403 || !DLSYM_OPT (openacc.register_async_cleanup,
|
|
2404 openacc_register_async_cleanup)
|
|
2405 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
|
|
2406 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
|
|
2407 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
|
|
2408 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
|
|
2409 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
|
|
2410 || !DLSYM_OPT (openacc.async_wait_all_async,
|
|
2411 openacc_async_wait_all_async)
|
|
2412 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
|
|
2413 || !DLSYM_OPT (openacc.create_thread_data,
|
|
2414 openacc_create_thread_data)
|
|
2415 || !DLSYM_OPT (openacc.destroy_thread_data,
|
|
2416 openacc_destroy_thread_data))
|
|
2417 {
|
|
2418 /* Require all the OpenACC handlers if we have
|
|
2419 GOMP_OFFLOAD_CAP_OPENACC_200. */
|
|
2420 err = "plugin missing OpenACC handler function";
|
|
2421 goto fail;
|
|
2422 }
|
|
2423
|
|
2424 unsigned cuda = 0;
|
|
2425 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
|
|
2426 openacc_cuda_get_current_device);
|
|
2427 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
|
|
2428 openacc_cuda_get_current_context);
|
|
2429 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
|
|
2430 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
|
|
2431 if (cuda && cuda != 4)
|
|
2432 {
|
|
2433 /* Make sure all the CUDA functions are there if any of them are. */
|
|
2434 err = "plugin missing OpenACC CUDA handler function";
|
|
2435 goto fail;
|
|
2436 }
|
|
2437 }
|
|
2438 #undef DLSYM
|
|
2439 #undef DLSYM_OPT
|
|
2440
|
|
2441 return 1;
|
|
2442
|
|
2443 dl_fail:
|
|
2444 err = dlerror ();
|
|
2445 fail:
|
|
2446 gomp_error ("while loading %s: %s", plugin_name, err);
|
|
2447 if (last_missing)
|
|
2448 gomp_error ("missing function was %s", last_missing);
|
|
2449 if (plugin_handle)
|
|
2450 dlclose (plugin_handle);
|
|
2451
|
|
2452 return 0;
|
|
2453 }
|
|
2454
|
|
2455 /* This function finalizes all initialized devices. */
|
|
2456
|
|
2457 static void
|
|
2458 gomp_target_fini (void)
|
|
2459 {
|
|
2460 int i;
|
|
2461 for (i = 0; i < num_devices; i++)
|
|
2462 {
|
|
2463 bool ret = true;
|
|
2464 struct gomp_device_descr *devicep = &devices[i];
|
|
2465 gomp_mutex_lock (&devicep->lock);
|
|
2466 if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
2467 {
|
|
2468 ret = devicep->fini_device_func (devicep->target_id);
|
|
2469 devicep->state = GOMP_DEVICE_FINALIZED;
|
|
2470 }
|
|
2471 gomp_mutex_unlock (&devicep->lock);
|
|
2472 if (!ret)
|
|
2473 gomp_fatal ("device finalization failed");
|
|
2474 }
|
|
2475 }
|
|
2476
|
|
2477 /* This function initializes the runtime needed for offloading.
|
|
2478 It parses the list of offload targets and tries to load the plugins for
|
|
2479 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
|
|
2480 will be set, and the array DEVICES initialized, containing descriptors for
|
|
2481 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
|
|
2482 by the others. */
|
|
2483
|
|
2484 static void
|
|
2485 gomp_target_init (void)
|
|
2486 {
|
|
2487 const char *prefix ="libgomp-plugin-";
|
|
2488 const char *suffix = SONAME_SUFFIX (1);
|
|
2489 const char *cur, *next;
|
|
2490 char *plugin_name;
|
|
2491 int i, new_num_devices;
|
|
2492
|
|
2493 num_devices = 0;
|
|
2494 devices = NULL;
|
|
2495
|
|
2496 cur = OFFLOAD_TARGETS;
|
|
2497 if (*cur)
|
|
2498 do
|
|
2499 {
|
|
2500 struct gomp_device_descr current_device;
|
|
2501
|
|
2502 next = strchr (cur, ',');
|
|
2503
|
|
2504 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
|
|
2505 + strlen (prefix) + strlen (suffix));
|
|
2506 if (!plugin_name)
|
|
2507 {
|
|
2508 num_devices = 0;
|
|
2509 break;
|
|
2510 }
|
|
2511
|
|
2512 strcpy (plugin_name, prefix);
|
|
2513 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
|
|
2514 strcat (plugin_name, suffix);
|
|
2515
|
|
2516 if (gomp_load_plugin_for_device (¤t_device, plugin_name))
|
|
2517 {
|
|
2518 new_num_devices = current_device.get_num_devices_func ();
|
|
2519 if (new_num_devices >= 1)
|
|
2520 {
|
|
2521 /* Augment DEVICES and NUM_DEVICES. */
|
|
2522
|
|
2523 devices = realloc (devices, (num_devices + new_num_devices)
|
|
2524 * sizeof (struct gomp_device_descr));
|
|
2525 if (!devices)
|
|
2526 {
|
|
2527 num_devices = 0;
|
|
2528 free (plugin_name);
|
|
2529 break;
|
|
2530 }
|
|
2531
|
|
2532 current_device.name = current_device.get_name_func ();
|
|
2533 /* current_device.capabilities has already been set. */
|
|
2534 current_device.type = current_device.get_type_func ();
|
|
2535 current_device.mem_map.root = NULL;
|
|
2536 current_device.state = GOMP_DEVICE_UNINITIALIZED;
|
|
2537 current_device.openacc.data_environ = NULL;
|
|
2538 for (i = 0; i < new_num_devices; i++)
|
|
2539 {
|
|
2540 current_device.target_id = i;
|
|
2541 devices[num_devices] = current_device;
|
|
2542 gomp_mutex_init (&devices[num_devices].lock);
|
|
2543 num_devices++;
|
|
2544 }
|
|
2545 }
|
|
2546 }
|
|
2547
|
|
2548 free (plugin_name);
|
|
2549 cur = next + 1;
|
|
2550 }
|
|
2551 while (next);
|
|
2552
|
|
2553 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
|
|
2554 NUM_DEVICES_OPENMP. */
|
|
2555 struct gomp_device_descr *devices_s
|
|
2556 = malloc (num_devices * sizeof (struct gomp_device_descr));
|
|
2557 if (!devices_s)
|
|
2558 {
|
|
2559 num_devices = 0;
|
|
2560 free (devices);
|
|
2561 devices = NULL;
|
|
2562 }
|
|
2563 num_devices_openmp = 0;
|
|
2564 for (i = 0; i < num_devices; i++)
|
|
2565 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2566 devices_s[num_devices_openmp++] = devices[i];
|
|
2567 int num_devices_after_openmp = num_devices_openmp;
|
|
2568 for (i = 0; i < num_devices; i++)
|
|
2569 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
|
|
2570 devices_s[num_devices_after_openmp++] = devices[i];
|
|
2571 free (devices);
|
|
2572 devices = devices_s;
|
|
2573
|
|
2574 for (i = 0; i < num_devices; i++)
|
|
2575 {
|
|
2576 /* The 'devices' array can be moved (by the realloc call) until we have
|
|
2577 found all the plugins, so registering with the OpenACC runtime (which
|
|
2578 takes a copy of the pointer argument) must be delayed until now. */
|
|
2579 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
|
|
2580 goacc_register (&devices[i]);
|
|
2581 }
|
|
2582
|
|
2583 if (atexit (gomp_target_fini) != 0)
|
|
2584 gomp_fatal ("atexit failed");
|
|
2585 }
|
|
2586
|
|
2587 #else /* PLUGIN_SUPPORT */
|
|
2588 /* If dlfcn.h is unavailable we always fallback to host execution.
|
|
2589 GOMP_target* routines are just stubs for this case. */
|
|
2590 static void
|
|
2591 gomp_target_init (void)
|
|
2592 {
|
|
2593 }
|
|
2594 #endif /* PLUGIN_SUPPORT */
|