131
|
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
|
111
|
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
|
|
3
|
|
4 This file is part of the GNU Offloading and Multi Processing Library
|
|
5 (libgomp).
|
|
6
|
|
7 Libgomp is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
15 more details.
|
|
16
|
|
17 Under Section 7 of GPL version 3, you are granted additional
|
|
18 permissions described in the GCC Runtime Library Exception, version
|
|
19 3.1, as published by the Free Software Foundation.
|
|
20
|
|
21 You should have received a copy of the GNU General Public License and
|
|
22 a copy of the GCC Runtime Library Exception along with this program;
|
|
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
24 <http://www.gnu.org/licenses/>. */
|
|
25
|
|
26 /* This file contains the support of offloading. */
|
|
27
|
|
28 #include "config.h"
|
|
29 #include "libgomp.h"
|
|
30 #include "oacc-plugin.h"
|
|
31 #include "oacc-int.h"
|
|
32 #include "gomp-constants.h"
|
|
33 #include <limits.h>
|
|
34 #include <stdbool.h>
|
|
35 #include <stdlib.h>
|
|
36 #ifdef HAVE_INTTYPES_H
|
|
37 # include <inttypes.h> /* For PRIu64. */
|
|
38 #endif
|
|
39 #include <string.h>
|
|
40 #include <assert.h>
|
|
41 #include <errno.h>
|
|
42
|
|
43 #ifdef PLUGIN_SUPPORT
|
|
44 #include <dlfcn.h>
|
|
45 #include "plugin-suffix.h"
|
|
46 #endif
|
|
47
|
|
48 static void gomp_target_init (void);
|
|
49
|
|
50 /* The whole initialization code for offloading plugins is only run one. */
|
|
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
|
|
52
|
|
53 /* Mutex for offload image registration. */
|
|
54 static gomp_mutex_t register_lock;
|
|
55
|
|
56 /* This structure describes an offload image.
|
|
57 It contains type of the target device, pointer to host table descriptor, and
|
|
58 pointer to target data. */
|
|
59 struct offload_image_descr {
|
|
60 unsigned version;
|
|
61 enum offload_target_type type;
|
|
62 const void *host_table;
|
|
63 const void *target_data;
|
|
64 };
|
|
65
|
|
66 /* Array of descriptors of offload images. */
|
|
67 static struct offload_image_descr *offload_images;
|
|
68
|
|
69 /* Total number of offload images. */
|
|
70 static int num_offload_images;
|
|
71
|
|
72 /* Array of descriptors for all available devices. */
|
|
73 static struct gomp_device_descr *devices;
|
|
74
|
|
75 /* Total number of available devices. */
|
|
76 static int num_devices;
|
|
77
|
|
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
|
|
79 static int num_devices_openmp;
|
|
80
|
|
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
|
|
82
|
|
83 static void *
|
|
84 gomp_realloc_unlock (void *old, size_t size)
|
|
85 {
|
|
86 void *ret = realloc (old, size);
|
|
87 if (ret == NULL)
|
|
88 {
|
|
89 gomp_mutex_unlock (®ister_lock);
|
|
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
|
|
91 }
|
|
92 return ret;
|
|
93 }
|
|
94
|
|
95 attribute_hidden void
|
|
96 gomp_init_targets_once (void)
|
|
97 {
|
|
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
|
|
99 }
|
|
100
|
|
101 attribute_hidden int
|
|
102 gomp_get_num_devices (void)
|
|
103 {
|
|
104 gomp_init_targets_once ();
|
|
105 return num_devices_openmp;
|
|
106 }
|
|
107
|
|
108 static struct gomp_device_descr *
|
|
109 resolve_device (int device_id)
|
|
110 {
|
|
111 if (device_id == GOMP_DEVICE_ICV)
|
|
112 {
|
|
113 struct gomp_task_icv *icv = gomp_icv (false);
|
|
114 device_id = icv->default_device_var;
|
|
115 }
|
|
116
|
|
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
|
|
118 return NULL;
|
|
119
|
|
120 gomp_mutex_lock (&devices[device_id].lock);
|
|
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
|
|
122 gomp_init_device (&devices[device_id]);
|
|
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
|
|
124 {
|
|
125 gomp_mutex_unlock (&devices[device_id].lock);
|
|
126 return NULL;
|
|
127 }
|
|
128 gomp_mutex_unlock (&devices[device_id].lock);
|
|
129
|
|
130 return &devices[device_id];
|
|
131 }
|
|
132
|
|
133
|
|
134 static inline splay_tree_key
|
|
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
|
|
136 {
|
|
137 if (key->host_start != key->host_end)
|
|
138 return splay_tree_lookup (mem_map, key);
|
|
139
|
|
140 key->host_end++;
|
|
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
|
|
142 key->host_end--;
|
|
143 if (n)
|
|
144 return n;
|
|
145 key->host_start--;
|
|
146 n = splay_tree_lookup (mem_map, key);
|
|
147 key->host_start++;
|
|
148 if (n)
|
|
149 return n;
|
|
150 return splay_tree_lookup (mem_map, key);
|
|
151 }
|
|
152
|
|
153 static inline splay_tree_key
|
|
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
|
|
155 {
|
|
156 if (key->host_start != key->host_end)
|
|
157 return splay_tree_lookup (mem_map, key);
|
|
158
|
|
159 key->host_end++;
|
|
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
|
|
161 key->host_end--;
|
|
162 return n;
|
|
163 }
|
|
164
|
|
165 static inline void
|
|
166 gomp_device_copy (struct gomp_device_descr *devicep,
|
|
167 bool (*copy_func) (int, void *, const void *, size_t),
|
|
168 const char *dst, void *dstaddr,
|
|
169 const char *src, const void *srcaddr,
|
|
170 size_t size)
|
|
171 {
|
|
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
|
|
173 {
|
|
174 gomp_mutex_unlock (&devicep->lock);
|
|
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
|
|
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
|
|
177 }
|
|
178 }
|
|
179
|
131
|
180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
|
|
181 host to device memory transfers. */
|
|
182
|
|
183 struct gomp_coalesce_buf
|
|
184 {
|
|
185 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
|
|
186 it will be copied to the device. */
|
|
187 void *buf;
|
|
188 struct target_mem_desc *tgt;
|
|
189 /* Array with offsets, chunks[2 * i] is the starting offset and
|
|
190 chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
|
|
191 of chunks which are to be copied to buf and later copied to device. */
|
|
192 size_t *chunks;
|
|
193 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
|
|
194 be performed. */
|
|
195 long chunk_cnt;
|
|
196 /* During construction of chunks array, how many memory regions are within
|
|
197 the last chunk. If there is just one memory region for a chunk, we copy
|
|
198 it directly to device rather than going through buf. */
|
|
199 long use_cnt;
|
|
200 };
|
|
201
|
|
202 /* Maximum size of memory region considered for coalescing. Larger copies
|
|
203 are performed directly. */
|
|
204 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
|
|
205
|
|
206 /* Maximum size of a gap in between regions to consider them being copied
|
|
207 within the same chunk. All the device offsets considered are within
|
|
208 newly allocated device memory, so it isn't fatal if we copy some padding
|
|
209 in between from host to device. The gaps come either from alignment
|
|
210 padding or from memory regions which are not supposed to be copied from
|
|
211 host to device (e.g. map(alloc:), map(from:) etc.). */
|
|
212 #define MAX_COALESCE_BUF_GAP (4 * 1024)
|
|
213
|
|
214 /* Add region with device tgt_start relative offset and length to CBUF. */
|
|
215
|
|
216 static inline void
|
|
217 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
|
|
218 {
|
|
219 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
|
|
220 return;
|
|
221 if (cbuf->chunk_cnt)
|
|
222 {
|
|
223 if (cbuf->chunk_cnt < 0)
|
|
224 return;
|
|
225 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
|
|
226 {
|
|
227 cbuf->chunk_cnt = -1;
|
|
228 return;
|
|
229 }
|
|
230 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
|
|
231 {
|
|
232 cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
|
|
233 cbuf->use_cnt++;
|
|
234 return;
|
|
235 }
|
|
236 /* If the last chunk is only used by one mapping, discard it,
|
|
237 as it will be one host to device copy anyway and
|
|
238 memcpying it around will only waste cycles. */
|
|
239 if (cbuf->use_cnt == 1)
|
|
240 cbuf->chunk_cnt--;
|
|
241 }
|
|
242 cbuf->chunks[2 * cbuf->chunk_cnt] = start;
|
|
243 cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
|
|
244 cbuf->chunk_cnt++;
|
|
245 cbuf->use_cnt = 1;
|
|
246 }
|
|
247
|
|
248 /* Return true for mapping kinds which need to copy data from the
|
|
249 host to device for regions that weren't previously mapped. */
|
|
250
|
|
251 static inline bool
|
|
252 gomp_to_device_kind_p (int kind)
|
|
253 {
|
|
254 switch (kind)
|
|
255 {
|
|
256 case GOMP_MAP_ALLOC:
|
|
257 case GOMP_MAP_FROM:
|
|
258 case GOMP_MAP_FORCE_ALLOC:
|
|
259 case GOMP_MAP_ALWAYS_FROM:
|
|
260 return false;
|
|
261 default:
|
|
262 return true;
|
|
263 }
|
|
264 }
|
|
265
|
111
|
266 static void
|
|
267 gomp_copy_host2dev (struct gomp_device_descr *devicep,
|
131
|
268 void *d, const void *h, size_t sz,
|
|
269 struct gomp_coalesce_buf *cbuf)
|
111
|
270 {
|
131
|
271 if (cbuf)
|
|
272 {
|
|
273 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
|
|
274 if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
|
|
275 {
|
|
276 long first = 0;
|
|
277 long last = cbuf->chunk_cnt - 1;
|
|
278 while (first <= last)
|
|
279 {
|
|
280 long middle = (first + last) >> 1;
|
|
281 if (cbuf->chunks[2 * middle + 1] <= doff)
|
|
282 first = middle + 1;
|
|
283 else if (cbuf->chunks[2 * middle] <= doff)
|
|
284 {
|
|
285 if (doff + sz > cbuf->chunks[2 * middle + 1])
|
|
286 gomp_fatal ("internal libgomp cbuf error");
|
|
287 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
|
|
288 h, sz);
|
|
289 return;
|
|
290 }
|
|
291 else
|
|
292 last = middle - 1;
|
|
293 }
|
|
294 }
|
|
295 }
|
111
|
296 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
|
|
297 }
|
|
298
|
|
299 static void
|
|
300 gomp_copy_dev2host (struct gomp_device_descr *devicep,
|
|
301 void *h, const void *d, size_t sz)
|
|
302 {
|
|
303 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
|
|
304 }
|
|
305
|
|
306 static void
|
|
307 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
|
|
308 {
|
|
309 if (!devicep->free_func (devicep->target_id, devptr))
|
|
310 {
|
|
311 gomp_mutex_unlock (&devicep->lock);
|
|
312 gomp_fatal ("error in freeing device memory block at %p", devptr);
|
|
313 }
|
|
314 }
|
|
315
|
|
316 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
|
|
317 gomp_map_0len_lookup found oldn for newn.
|
|
318 Helper function of gomp_map_vars. */
|
|
319
|
|
320 static inline void
|
|
321 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
|
|
322 splay_tree_key newn, struct target_var_desc *tgt_var,
|
131
|
323 unsigned char kind, struct gomp_coalesce_buf *cbuf)
|
111
|
324 {
|
|
325 tgt_var->key = oldn;
|
|
326 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
|
|
327 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
|
|
328 tgt_var->offset = newn->host_start - oldn->host_start;
|
|
329 tgt_var->length = newn->host_end - newn->host_start;
|
|
330
|
|
331 if ((kind & GOMP_MAP_FLAG_FORCE)
|
|
332 || oldn->host_start > newn->host_start
|
|
333 || oldn->host_end < newn->host_end)
|
|
334 {
|
|
335 gomp_mutex_unlock (&devicep->lock);
|
|
336 gomp_fatal ("Trying to map into device [%p..%p) object when "
|
|
337 "[%p..%p) is already mapped",
|
|
338 (void *) newn->host_start, (void *) newn->host_end,
|
|
339 (void *) oldn->host_start, (void *) oldn->host_end);
|
|
340 }
|
|
341
|
|
342 if (GOMP_MAP_ALWAYS_TO_P (kind))
|
|
343 gomp_copy_host2dev (devicep,
|
|
344 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
|
|
345 + newn->host_start - oldn->host_start),
|
|
346 (void *) newn->host_start,
|
131
|
347 newn->host_end - newn->host_start, cbuf);
|
111
|
348
|
|
349 if (oldn->refcount != REFCOUNT_INFINITY)
|
|
350 oldn->refcount++;
|
|
351 }
|
|
352
|
|
353 static int
|
|
354 get_kind (bool short_mapkind, void *kinds, int idx)
|
|
355 {
|
|
356 return short_mapkind ? ((unsigned short *) kinds)[idx]
|
|
357 : ((unsigned char *) kinds)[idx];
|
|
358 }
|
|
359
|
|
360 static void
|
|
361 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
|
131
|
362 uintptr_t target_offset, uintptr_t bias,
|
|
363 struct gomp_coalesce_buf *cbuf)
|
111
|
364 {
|
|
365 struct gomp_device_descr *devicep = tgt->device_descr;
|
|
366 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
367 struct splay_tree_key_s cur_node;
|
|
368
|
|
369 cur_node.host_start = host_ptr;
|
|
370 if (cur_node.host_start == (uintptr_t) NULL)
|
|
371 {
|
|
372 cur_node.tgt_offset = (uintptr_t) NULL;
|
|
373 gomp_copy_host2dev (devicep,
|
|
374 (void *) (tgt->tgt_start + target_offset),
|
|
375 (void *) &cur_node.tgt_offset,
|
131
|
376 sizeof (void *), cbuf);
|
111
|
377 return;
|
|
378 }
|
|
379 /* Add bias to the pointer value. */
|
|
380 cur_node.host_start += bias;
|
|
381 cur_node.host_end = cur_node.host_start;
|
|
382 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
383 if (n == NULL)
|
|
384 {
|
|
385 gomp_mutex_unlock (&devicep->lock);
|
|
386 gomp_fatal ("Pointer target of array section wasn't mapped");
|
|
387 }
|
|
388 cur_node.host_start -= n->host_start;
|
|
389 cur_node.tgt_offset
|
|
390 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
|
|
391 /* At this point tgt_offset is target address of the
|
|
392 array section. Now subtract bias to get what we want
|
|
393 to initialize the pointer with. */
|
|
394 cur_node.tgt_offset -= bias;
|
|
395 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
|
131
|
396 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
|
111
|
397 }
|
|
398
|
|
399 static void
|
|
400 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
|
|
401 size_t first, size_t i, void **hostaddrs,
|
131
|
402 size_t *sizes, void *kinds,
|
|
403 struct gomp_coalesce_buf *cbuf)
|
111
|
404 {
|
|
405 struct gomp_device_descr *devicep = tgt->device_descr;
|
|
406 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
407 struct splay_tree_key_s cur_node;
|
|
408 int kind;
|
|
409 const bool short_mapkind = true;
|
|
410 const int typemask = short_mapkind ? 0xff : 0x7;
|
|
411
|
|
412 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
413 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
414 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
415 kind = get_kind (short_mapkind, kinds, i);
|
|
416 if (n2
|
|
417 && n2->tgt == n->tgt
|
|
418 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
|
419 {
|
|
420 gomp_map_vars_existing (devicep, n2, &cur_node,
|
131
|
421 &tgt->list[i], kind & typemask, cbuf);
|
111
|
422 return;
|
|
423 }
|
|
424 if (sizes[i] == 0)
|
|
425 {
|
|
426 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
|
|
427 {
|
|
428 cur_node.host_start--;
|
|
429 n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
430 cur_node.host_start++;
|
|
431 if (n2
|
|
432 && n2->tgt == n->tgt
|
|
433 && n2->host_start - n->host_start
|
|
434 == n2->tgt_offset - n->tgt_offset)
|
|
435 {
|
|
436 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
|
131
|
437 kind & typemask, cbuf);
|
111
|
438 return;
|
|
439 }
|
|
440 }
|
|
441 cur_node.host_end++;
|
|
442 n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
443 cur_node.host_end--;
|
|
444 if (n2
|
|
445 && n2->tgt == n->tgt
|
|
446 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
|
447 {
|
|
448 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
|
131
|
449 kind & typemask, cbuf);
|
111
|
450 return;
|
|
451 }
|
|
452 }
|
|
453 gomp_mutex_unlock (&devicep->lock);
|
|
454 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
|
|
455 "other mapped elements from the same structure weren't mapped "
|
|
456 "together with it", (void *) cur_node.host_start,
|
|
457 (void *) cur_node.host_end);
|
|
458 }
|
|
459
|
|
460 static inline uintptr_t
|
|
461 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
|
|
462 {
|
|
463 if (tgt->list[i].key != NULL)
|
|
464 return tgt->list[i].key->tgt->tgt_start
|
|
465 + tgt->list[i].key->tgt_offset
|
|
466 + tgt->list[i].offset;
|
|
467 if (tgt->list[i].offset == ~(uintptr_t) 0)
|
|
468 return (uintptr_t) hostaddrs[i];
|
|
469 if (tgt->list[i].offset == ~(uintptr_t) 1)
|
|
470 return 0;
|
|
471 if (tgt->list[i].offset == ~(uintptr_t) 2)
|
|
472 return tgt->list[i + 1].key->tgt->tgt_start
|
|
473 + tgt->list[i + 1].key->tgt_offset
|
|
474 + tgt->list[i + 1].offset
|
|
475 + (uintptr_t) hostaddrs[i]
|
|
476 - (uintptr_t) hostaddrs[i + 1];
|
|
477 return tgt->tgt_start + tgt->list[i].offset;
|
|
478 }
|
|
479
|
|
480 attribute_hidden struct target_mem_desc *
|
|
481 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|
482 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
|
|
483 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
|
|
484 {
|
|
485 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
|
|
486 bool has_firstprivate = false;
|
|
487 const int rshift = short_mapkind ? 8 : 3;
|
|
488 const int typemask = short_mapkind ? 0xff : 0x7;
|
|
489 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
490 struct splay_tree_key_s cur_node;
|
|
491 struct target_mem_desc *tgt
|
|
492 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
|
|
493 tgt->list_count = mapnum;
|
|
494 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
|
|
495 tgt->device_descr = devicep;
|
131
|
496 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
|
111
|
497
|
|
498 if (mapnum == 0)
|
|
499 {
|
|
500 tgt->tgt_start = 0;
|
|
501 tgt->tgt_end = 0;
|
|
502 return tgt;
|
|
503 }
|
|
504
|
|
505 tgt_align = sizeof (void *);
|
|
506 tgt_size = 0;
|
131
|
507 cbuf.chunks = NULL;
|
|
508 cbuf.chunk_cnt = -1;
|
|
509 cbuf.use_cnt = 0;
|
|
510 cbuf.buf = NULL;
|
|
511 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
512 {
|
|
513 cbuf.chunks
|
|
514 = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
|
|
515 cbuf.chunk_cnt = 0;
|
|
516 }
|
111
|
517 if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
518 {
|
|
519 size_t align = 4 * sizeof (void *);
|
|
520 tgt_align = align;
|
|
521 tgt_size = mapnum * sizeof (void *);
|
131
|
522 cbuf.chunk_cnt = 1;
|
|
523 cbuf.use_cnt = 1 + (mapnum > 1);
|
|
524 cbuf.chunks[0] = 0;
|
|
525 cbuf.chunks[1] = tgt_size;
|
111
|
526 }
|
|
527
|
|
528 gomp_mutex_lock (&devicep->lock);
|
|
529 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
530 {
|
|
531 gomp_mutex_unlock (&devicep->lock);
|
|
532 free (tgt);
|
|
533 return NULL;
|
|
534 }
|
|
535
|
|
536 for (i = 0; i < mapnum; i++)
|
|
537 {
|
|
538 int kind = get_kind (short_mapkind, kinds, i);
|
|
539 if (hostaddrs[i] == NULL
|
|
540 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
|
|
541 {
|
|
542 tgt->list[i].key = NULL;
|
|
543 tgt->list[i].offset = ~(uintptr_t) 0;
|
|
544 continue;
|
|
545 }
|
|
546 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
|
|
547 {
|
|
548 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
549 cur_node.host_end = cur_node.host_start;
|
|
550 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
551 if (n == NULL)
|
|
552 {
|
|
553 gomp_mutex_unlock (&devicep->lock);
|
|
554 gomp_fatal ("use_device_ptr pointer wasn't mapped");
|
|
555 }
|
|
556 cur_node.host_start -= n->host_start;
|
|
557 hostaddrs[i]
|
|
558 = (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
559 + cur_node.host_start);
|
|
560 tgt->list[i].key = NULL;
|
|
561 tgt->list[i].offset = ~(uintptr_t) 0;
|
|
562 continue;
|
|
563 }
|
|
564 else if ((kind & typemask) == GOMP_MAP_STRUCT)
|
|
565 {
|
|
566 size_t first = i + 1;
|
|
567 size_t last = i + sizes[i];
|
|
568 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
569 cur_node.host_end = (uintptr_t) hostaddrs[last]
|
|
570 + sizes[last];
|
|
571 tgt->list[i].key = NULL;
|
|
572 tgt->list[i].offset = ~(uintptr_t) 2;
|
|
573 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
|
|
574 if (n == NULL)
|
|
575 {
|
|
576 size_t align = (size_t) 1 << (kind >> rshift);
|
|
577 if (tgt_align < align)
|
|
578 tgt_align = align;
|
131
|
579 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
|
111
|
580 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
131
|
581 tgt_size += cur_node.host_end - cur_node.host_start;
|
111
|
582 not_found_cnt += last - i;
|
|
583 for (i = first; i <= last; i++)
|
131
|
584 {
|
|
585 tgt->list[i].key = NULL;
|
|
586 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
|
|
587 & typemask))
|
|
588 gomp_coalesce_buf_add (&cbuf,
|
|
589 tgt_size - cur_node.host_end
|
|
590 + (uintptr_t) hostaddrs[i],
|
|
591 sizes[i]);
|
|
592 }
|
111
|
593 i--;
|
|
594 continue;
|
|
595 }
|
|
596 for (i = first; i <= last; i++)
|
|
597 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
|
131
|
598 sizes, kinds, NULL);
|
111
|
599 i--;
|
|
600 continue;
|
|
601 }
|
|
602 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
|
|
603 {
|
|
604 tgt->list[i].key = NULL;
|
|
605 tgt->list[i].offset = ~(uintptr_t) 1;
|
|
606 has_firstprivate = true;
|
|
607 continue;
|
|
608 }
|
|
609 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
610 if (!GOMP_MAP_POINTER_P (kind & typemask))
|
|
611 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
612 else
|
|
613 cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
614 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
|
|
615 {
|
|
616 tgt->list[i].key = NULL;
|
|
617
|
|
618 size_t align = (size_t) 1 << (kind >> rshift);
|
|
619 if (tgt_align < align)
|
|
620 tgt_align = align;
|
|
621 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
131
|
622 gomp_coalesce_buf_add (&cbuf, tgt_size,
|
|
623 cur_node.host_end - cur_node.host_start);
|
111
|
624 tgt_size += cur_node.host_end - cur_node.host_start;
|
|
625 has_firstprivate = true;
|
|
626 continue;
|
|
627 }
|
|
628 splay_tree_key n;
|
|
629 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
|
|
630 {
|
|
631 n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
632 if (!n)
|
|
633 {
|
|
634 tgt->list[i].key = NULL;
|
|
635 tgt->list[i].offset = ~(uintptr_t) 1;
|
|
636 continue;
|
|
637 }
|
|
638 }
|
|
639 else
|
|
640 n = splay_tree_lookup (mem_map, &cur_node);
|
|
641 if (n && n->refcount != REFCOUNT_LINK)
|
|
642 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
|
131
|
643 kind & typemask, NULL);
|
111
|
644 else
|
|
645 {
|
|
646 tgt->list[i].key = NULL;
|
|
647
|
|
648 size_t align = (size_t) 1 << (kind >> rshift);
|
|
649 not_found_cnt++;
|
|
650 if (tgt_align < align)
|
|
651 tgt_align = align;
|
|
652 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
131
|
653 if (gomp_to_device_kind_p (kind & typemask))
|
|
654 gomp_coalesce_buf_add (&cbuf, tgt_size,
|
|
655 cur_node.host_end - cur_node.host_start);
|
111
|
656 tgt_size += cur_node.host_end - cur_node.host_start;
|
|
657 if ((kind & typemask) == GOMP_MAP_TO_PSET)
|
|
658 {
|
|
659 size_t j;
|
|
660 for (j = i + 1; j < mapnum; j++)
|
|
661 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
|
|
662 & typemask))
|
|
663 break;
|
|
664 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
|
|
665 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
666 > cur_node.host_end))
|
|
667 break;
|
|
668 else
|
|
669 {
|
|
670 tgt->list[j].key = NULL;
|
|
671 i++;
|
|
672 }
|
|
673 }
|
|
674 }
|
|
675 }
|
|
676
|
|
677 if (devaddrs)
|
|
678 {
|
|
679 if (mapnum != 1)
|
|
680 {
|
|
681 gomp_mutex_unlock (&devicep->lock);
|
|
682 gomp_fatal ("unexpected aggregation");
|
|
683 }
|
|
684 tgt->to_free = devaddrs[0];
|
|
685 tgt->tgt_start = (uintptr_t) tgt->to_free;
|
|
686 tgt->tgt_end = tgt->tgt_start + sizes[0];
|
|
687 }
|
|
688 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
689 {
|
|
690 /* Allocate tgt_align aligned tgt_size block of memory. */
|
|
691 /* FIXME: Perhaps change interface to allocate properly aligned
|
|
692 memory. */
|
|
693 tgt->to_free = devicep->alloc_func (devicep->target_id,
|
|
694 tgt_size + tgt_align - 1);
|
|
695 if (!tgt->to_free)
|
|
696 {
|
|
697 gomp_mutex_unlock (&devicep->lock);
|
|
698 gomp_fatal ("device memory allocation fail");
|
|
699 }
|
|
700
|
|
701 tgt->tgt_start = (uintptr_t) tgt->to_free;
|
|
702 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
|
|
703 tgt->tgt_end = tgt->tgt_start + tgt_size;
|
131
|
704
|
|
705 if (cbuf.use_cnt == 1)
|
|
706 cbuf.chunk_cnt--;
|
|
707 if (cbuf.chunk_cnt > 0)
|
|
708 {
|
|
709 cbuf.buf
|
|
710 = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
|
|
711 if (cbuf.buf)
|
|
712 {
|
|
713 cbuf.tgt = tgt;
|
|
714 cbufp = &cbuf;
|
|
715 }
|
|
716 }
|
111
|
717 }
|
|
718 else
|
|
719 {
|
|
720 tgt->to_free = NULL;
|
|
721 tgt->tgt_start = 0;
|
|
722 tgt->tgt_end = 0;
|
|
723 }
|
|
724
|
|
725 tgt_size = 0;
|
|
726 if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
727 tgt_size = mapnum * sizeof (void *);
|
|
728
|
|
729 tgt->array = NULL;
|
|
730 if (not_found_cnt || has_firstprivate)
|
|
731 {
|
|
732 if (not_found_cnt)
|
|
733 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
|
|
734 splay_tree_node array = tgt->array;
|
|
735 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
|
|
736 uintptr_t field_tgt_base = 0;
|
|
737
|
|
738 for (i = 0; i < mapnum; i++)
|
|
739 if (tgt->list[i].key == NULL)
|
|
740 {
|
|
741 int kind = get_kind (short_mapkind, kinds, i);
|
|
742 if (hostaddrs[i] == NULL)
|
|
743 continue;
|
|
744 switch (kind & typemask)
|
|
745 {
|
|
746 size_t align, len, first, last;
|
|
747 splay_tree_key n;
|
|
748 case GOMP_MAP_FIRSTPRIVATE:
|
|
749 align = (size_t) 1 << (kind >> rshift);
|
|
750 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
751 tgt->list[i].offset = tgt_size;
|
|
752 len = sizes[i];
|
|
753 gomp_copy_host2dev (devicep,
|
|
754 (void *) (tgt->tgt_start + tgt_size),
|
131
|
755 (void *) hostaddrs[i], len, cbufp);
|
111
|
756 tgt_size += len;
|
|
757 continue;
|
|
758 case GOMP_MAP_FIRSTPRIVATE_INT:
|
|
759 case GOMP_MAP_USE_DEVICE_PTR:
|
|
760 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
|
|
761 continue;
|
|
762 case GOMP_MAP_STRUCT:
|
|
763 first = i + 1;
|
|
764 last = i + sizes[i];
|
|
765 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
766 cur_node.host_end = (uintptr_t) hostaddrs[last]
|
|
767 + sizes[last];
|
|
768 if (tgt->list[first].key != NULL)
|
|
769 continue;
|
|
770 n = splay_tree_lookup (mem_map, &cur_node);
|
|
771 if (n == NULL)
|
|
772 {
|
|
773 size_t align = (size_t) 1 << (kind >> rshift);
|
|
774 tgt_size -= (uintptr_t) hostaddrs[first]
|
|
775 - (uintptr_t) hostaddrs[i];
|
|
776 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
777 tgt_size += (uintptr_t) hostaddrs[first]
|
|
778 - (uintptr_t) hostaddrs[i];
|
|
779 field_tgt_base = (uintptr_t) hostaddrs[first];
|
|
780 field_tgt_offset = tgt_size;
|
|
781 field_tgt_clear = last;
|
|
782 tgt_size += cur_node.host_end
|
|
783 - (uintptr_t) hostaddrs[first];
|
|
784 continue;
|
|
785 }
|
|
786 for (i = first; i <= last; i++)
|
|
787 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
|
131
|
788 sizes, kinds, cbufp);
|
111
|
789 i--;
|
|
790 continue;
|
|
791 case GOMP_MAP_ALWAYS_POINTER:
|
|
792 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
793 cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
794 n = splay_tree_lookup (mem_map, &cur_node);
|
|
795 if (n == NULL
|
|
796 || n->host_start > cur_node.host_start
|
|
797 || n->host_end < cur_node.host_end)
|
|
798 {
|
|
799 gomp_mutex_unlock (&devicep->lock);
|
|
800 gomp_fatal ("always pointer not mapped");
|
|
801 }
|
|
802 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
|
|
803 != GOMP_MAP_ALWAYS_POINTER)
|
|
804 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
|
|
805 if (cur_node.tgt_offset)
|
|
806 cur_node.tgt_offset -= sizes[i];
|
|
807 gomp_copy_host2dev (devicep,
|
|
808 (void *) (n->tgt->tgt_start
|
|
809 + n->tgt_offset
|
|
810 + cur_node.host_start
|
|
811 - n->host_start),
|
|
812 (void *) &cur_node.tgt_offset,
|
131
|
813 sizeof (void *), cbufp);
|
111
|
814 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
|
|
815 + cur_node.host_start - n->host_start;
|
|
816 continue;
|
|
817 default:
|
|
818 break;
|
|
819 }
|
|
820 splay_tree_key k = &array->key;
|
|
821 k->host_start = (uintptr_t) hostaddrs[i];
|
|
822 if (!GOMP_MAP_POINTER_P (kind & typemask))
|
|
823 k->host_end = k->host_start + sizes[i];
|
|
824 else
|
|
825 k->host_end = k->host_start + sizeof (void *);
|
|
826 splay_tree_key n = splay_tree_lookup (mem_map, k);
|
|
827 if (n && n->refcount != REFCOUNT_LINK)
|
|
828 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
|
131
|
829 kind & typemask, cbufp);
|
111
|
830 else
|
|
831 {
|
|
832 k->link_key = NULL;
|
|
833 if (n && n->refcount == REFCOUNT_LINK)
|
|
834 {
|
|
835 /* Replace target address of the pointer with target address
|
|
836 of mapped object in the splay tree. */
|
|
837 splay_tree_remove (mem_map, n);
|
|
838 k->link_key = n;
|
|
839 }
|
|
840 size_t align = (size_t) 1 << (kind >> rshift);
|
|
841 tgt->list[i].key = k;
|
|
842 k->tgt = tgt;
|
|
843 if (field_tgt_clear != ~(size_t) 0)
|
|
844 {
|
|
845 k->tgt_offset = k->host_start - field_tgt_base
|
|
846 + field_tgt_offset;
|
|
847 if (i == field_tgt_clear)
|
|
848 field_tgt_clear = ~(size_t) 0;
|
|
849 }
|
|
850 else
|
|
851 {
|
|
852 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
853 k->tgt_offset = tgt_size;
|
|
854 tgt_size += k->host_end - k->host_start;
|
|
855 }
|
|
856 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
|
|
857 tgt->list[i].always_copy_from
|
|
858 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
|
|
859 tgt->list[i].offset = 0;
|
|
860 tgt->list[i].length = k->host_end - k->host_start;
|
|
861 k->refcount = 1;
|
131
|
862 k->dynamic_refcount = 0;
|
111
|
863 tgt->refcount++;
|
|
864 array->left = NULL;
|
|
865 array->right = NULL;
|
|
866 splay_tree_insert (mem_map, array);
|
|
867 switch (kind & typemask)
|
|
868 {
|
|
869 case GOMP_MAP_ALLOC:
|
|
870 case GOMP_MAP_FROM:
|
|
871 case GOMP_MAP_FORCE_ALLOC:
|
|
872 case GOMP_MAP_FORCE_FROM:
|
|
873 case GOMP_MAP_ALWAYS_FROM:
|
|
874 break;
|
|
875 case GOMP_MAP_TO:
|
|
876 case GOMP_MAP_TOFROM:
|
|
877 case GOMP_MAP_FORCE_TO:
|
|
878 case GOMP_MAP_FORCE_TOFROM:
|
|
879 case GOMP_MAP_ALWAYS_TO:
|
|
880 case GOMP_MAP_ALWAYS_TOFROM:
|
|
881 gomp_copy_host2dev (devicep,
|
|
882 (void *) (tgt->tgt_start
|
|
883 + k->tgt_offset),
|
|
884 (void *) k->host_start,
|
131
|
885 k->host_end - k->host_start, cbufp);
|
111
|
886 break;
|
|
887 case GOMP_MAP_POINTER:
|
|
888 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
|
131
|
889 k->tgt_offset, sizes[i], cbufp);
|
111
|
890 break;
|
|
891 case GOMP_MAP_TO_PSET:
|
|
892 gomp_copy_host2dev (devicep,
|
|
893 (void *) (tgt->tgt_start
|
|
894 + k->tgt_offset),
|
|
895 (void *) k->host_start,
|
131
|
896 k->host_end - k->host_start, cbufp);
|
111
|
897
|
|
898 for (j = i + 1; j < mapnum; j++)
|
|
899 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
|
|
900 j)
|
|
901 & typemask))
|
|
902 break;
|
|
903 else if ((uintptr_t) hostaddrs[j] < k->host_start
|
|
904 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
905 > k->host_end))
|
|
906 break;
|
|
907 else
|
|
908 {
|
|
909 tgt->list[j].key = k;
|
|
910 tgt->list[j].copy_from = false;
|
|
911 tgt->list[j].always_copy_from = false;
|
|
912 if (k->refcount != REFCOUNT_INFINITY)
|
|
913 k->refcount++;
|
|
914 gomp_map_pointer (tgt,
|
|
915 (uintptr_t) *(void **) hostaddrs[j],
|
|
916 k->tgt_offset
|
|
917 + ((uintptr_t) hostaddrs[j]
|
|
918 - k->host_start),
|
131
|
919 sizes[j], cbufp);
|
111
|
920 i++;
|
|
921 }
|
|
922 break;
|
|
923 case GOMP_MAP_FORCE_PRESENT:
|
|
924 {
|
|
925 /* We already looked up the memory region above and it
|
|
926 was missing. */
|
|
927 size_t size = k->host_end - k->host_start;
|
|
928 gomp_mutex_unlock (&devicep->lock);
|
|
929 #ifdef HAVE_INTTYPES_H
|
|
930 gomp_fatal ("present clause: !acc_is_present (%p, "
|
|
931 "%"PRIu64" (0x%"PRIx64"))",
|
|
932 (void *) k->host_start,
|
|
933 (uint64_t) size, (uint64_t) size);
|
|
934 #else
|
|
935 gomp_fatal ("present clause: !acc_is_present (%p, "
|
|
936 "%lu (0x%lx))", (void *) k->host_start,
|
|
937 (unsigned long) size, (unsigned long) size);
|
|
938 #endif
|
|
939 }
|
|
940 break;
|
|
941 case GOMP_MAP_FORCE_DEVICEPTR:
|
|
942 assert (k->host_end - k->host_start == sizeof (void *));
|
|
943 gomp_copy_host2dev (devicep,
|
|
944 (void *) (tgt->tgt_start
|
|
945 + k->tgt_offset),
|
|
946 (void *) k->host_start,
|
131
|
947 sizeof (void *), cbufp);
|
111
|
948 break;
|
|
949 default:
|
|
950 gomp_mutex_unlock (&devicep->lock);
|
|
951 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
|
|
952 kind);
|
|
953 }
|
|
954
|
|
955 if (k->link_key)
|
|
956 {
|
|
957 /* Set link pointer on target to the device address of the
|
|
958 mapped object. */
|
|
959 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
|
|
960 devicep->host2dev_func (devicep->target_id,
|
|
961 (void *) n->tgt_offset,
|
|
962 &tgt_addr, sizeof (void *));
|
|
963 }
|
|
964 array++;
|
|
965 }
|
|
966 }
|
|
967 }
|
|
968
|
|
969 if (pragma_kind == GOMP_MAP_VARS_TARGET)
|
|
970 {
|
|
971 for (i = 0; i < mapnum; i++)
|
|
972 {
|
|
973 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
|
|
974 gomp_copy_host2dev (devicep,
|
|
975 (void *) (tgt->tgt_start + i * sizeof (void *)),
|
131
|
976 (void *) &cur_node.tgt_offset, sizeof (void *),
|
|
977 cbufp);
|
111
|
978 }
|
|
979 }
|
|
980
|
131
|
981 if (cbufp)
|
|
982 {
|
|
983 long c = 0;
|
|
984 for (c = 0; c < cbuf.chunk_cnt; ++c)
|
|
985 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
|
|
986 (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
|
|
987 cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
|
|
988 free (cbuf.buf);
|
|
989 }
|
|
990
|
111
|
991 /* If the variable from "omp target enter data" map-list was already mapped,
|
|
992 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
|
|
993 gomp_exit_data. */
|
|
994 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
|
|
995 {
|
|
996 free (tgt);
|
|
997 tgt = NULL;
|
|
998 }
|
|
999
|
|
1000 gomp_mutex_unlock (&devicep->lock);
|
|
1001 return tgt;
|
|
1002 }
|
|
1003
|
|
1004 static void
|
|
1005 gomp_unmap_tgt (struct target_mem_desc *tgt)
|
|
1006 {
|
|
1007 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
|
|
1008 if (tgt->tgt_end)
|
|
1009 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
|
|
1010
|
|
1011 free (tgt->array);
|
|
1012 free (tgt);
|
|
1013 }
|
|
1014
|
131
|
1015 attribute_hidden bool
|
|
1016 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
|
|
1017 {
|
|
1018 bool is_tgt_unmapped = false;
|
|
1019 splay_tree_remove (&devicep->mem_map, k);
|
|
1020 if (k->link_key)
|
|
1021 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
|
|
1022 if (k->tgt->refcount > 1)
|
|
1023 k->tgt->refcount--;
|
|
1024 else
|
|
1025 {
|
|
1026 is_tgt_unmapped = true;
|
|
1027 gomp_unmap_tgt (k->tgt);
|
|
1028 }
|
|
1029 return is_tgt_unmapped;
|
|
1030 }
|
|
1031
|
111
|
1032 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
|
|
1033 variables back from device to host: if it is false, it is assumed that this
|
|
1034 has been done already. */
|
|
1035
|
|
1036 attribute_hidden void
|
|
1037 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
|
1038 {
|
|
1039 struct gomp_device_descr *devicep = tgt->device_descr;
|
|
1040
|
|
1041 if (tgt->list_count == 0)
|
|
1042 {
|
|
1043 free (tgt);
|
|
1044 return;
|
|
1045 }
|
|
1046
|
|
1047 gomp_mutex_lock (&devicep->lock);
|
|
1048 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
1049 {
|
|
1050 gomp_mutex_unlock (&devicep->lock);
|
|
1051 free (tgt->array);
|
|
1052 free (tgt);
|
|
1053 return;
|
|
1054 }
|
|
1055
|
|
1056 size_t i;
|
|
1057 for (i = 0; i < tgt->list_count; i++)
|
|
1058 {
|
|
1059 splay_tree_key k = tgt->list[i].key;
|
|
1060 if (k == NULL)
|
|
1061 continue;
|
|
1062
|
|
1063 bool do_unmap = false;
|
|
1064 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
|
|
1065 k->refcount--;
|
|
1066 else if (k->refcount == 1)
|
|
1067 {
|
|
1068 k->refcount--;
|
|
1069 do_unmap = true;
|
|
1070 }
|
|
1071
|
|
1072 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|
|
1073 || tgt->list[i].always_copy_from)
|
|
1074 gomp_copy_dev2host (devicep,
|
|
1075 (void *) (k->host_start + tgt->list[i].offset),
|
|
1076 (void *) (k->tgt->tgt_start + k->tgt_offset
|
|
1077 + tgt->list[i].offset),
|
|
1078 tgt->list[i].length);
|
|
1079 if (do_unmap)
|
131
|
1080 gomp_remove_var (devicep, k);
|
111
|
1081 }
|
|
1082
|
|
1083 if (tgt->refcount > 1)
|
|
1084 tgt->refcount--;
|
|
1085 else
|
|
1086 gomp_unmap_tgt (tgt);
|
|
1087
|
|
1088 gomp_mutex_unlock (&devicep->lock);
|
|
1089 }
|
|
1090
|
|
1091 static void
|
|
1092 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
|
|
1093 size_t *sizes, void *kinds, bool short_mapkind)
|
|
1094 {
|
|
1095 size_t i;
|
|
1096 struct splay_tree_key_s cur_node;
|
|
1097 const int typemask = short_mapkind ? 0xff : 0x7;
|
|
1098
|
|
1099 if (!devicep)
|
|
1100 return;
|
|
1101
|
|
1102 if (mapnum == 0)
|
|
1103 return;
|
|
1104
|
|
1105 gomp_mutex_lock (&devicep->lock);
|
|
1106 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
1107 {
|
|
1108 gomp_mutex_unlock (&devicep->lock);
|
|
1109 return;
|
|
1110 }
|
|
1111
|
|
1112 for (i = 0; i < mapnum; i++)
|
|
1113 if (sizes[i])
|
|
1114 {
|
|
1115 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
1116 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
1117 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
|
|
1118 if (n)
|
|
1119 {
|
|
1120 int kind = get_kind (short_mapkind, kinds, i);
|
|
1121 if (n->host_start > cur_node.host_start
|
|
1122 || n->host_end < cur_node.host_end)
|
|
1123 {
|
|
1124 gomp_mutex_unlock (&devicep->lock);
|
|
1125 gomp_fatal ("Trying to update [%p..%p) object when "
|
|
1126 "only [%p..%p) is mapped",
|
|
1127 (void *) cur_node.host_start,
|
|
1128 (void *) cur_node.host_end,
|
|
1129 (void *) n->host_start,
|
|
1130 (void *) n->host_end);
|
|
1131 }
|
|
1132
|
|
1133
|
|
1134 void *hostaddr = (void *) cur_node.host_start;
|
|
1135 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
1136 + cur_node.host_start - n->host_start);
|
|
1137 size_t size = cur_node.host_end - cur_node.host_start;
|
|
1138
|
|
1139 if (GOMP_MAP_COPY_TO_P (kind & typemask))
|
131
|
1140 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
|
111
|
1141 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
|
|
1142 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
|
|
1143 }
|
|
1144 }
|
|
1145 gomp_mutex_unlock (&devicep->lock);
|
|
1146 }
|
|
1147
|
|
1148 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
|
|
1149 And insert to splay tree the mapping between addresses from HOST_TABLE and
|
|
1150 from loaded target image. We rely in the host and device compiler
|
|
1151 emitting variable and functions in the same order. */
|
|
1152
|
|
1153 static void
|
|
1154 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
|
|
1155 const void *host_table, const void *target_data,
|
|
1156 bool is_register_lock)
|
|
1157 {
|
|
1158 void **host_func_table = ((void ***) host_table)[0];
|
|
1159 void **host_funcs_end = ((void ***) host_table)[1];
|
|
1160 void **host_var_table = ((void ***) host_table)[2];
|
|
1161 void **host_vars_end = ((void ***) host_table)[3];
|
|
1162
|
|
1163 /* The func table contains only addresses, the var table contains addresses
|
|
1164 and corresponding sizes. */
|
|
1165 int num_funcs = host_funcs_end - host_func_table;
|
|
1166 int num_vars = (host_vars_end - host_var_table) / 2;
|
|
1167
|
|
1168 /* Load image to device and get target addresses for the image. */
|
|
1169 struct addr_pair *target_table = NULL;
|
|
1170 int i, num_target_entries;
|
|
1171
|
|
1172 num_target_entries
|
|
1173 = devicep->load_image_func (devicep->target_id, version,
|
|
1174 target_data, &target_table);
|
|
1175
|
|
1176 if (num_target_entries != num_funcs + num_vars)
|
|
1177 {
|
|
1178 gomp_mutex_unlock (&devicep->lock);
|
|
1179 if (is_register_lock)
|
|
1180 gomp_mutex_unlock (®ister_lock);
|
|
1181 gomp_fatal ("Cannot map target functions or variables"
|
|
1182 " (expected %u, have %u)", num_funcs + num_vars,
|
|
1183 num_target_entries);
|
|
1184 }
|
|
1185
|
|
1186 /* Insert host-target address mapping into splay tree. */
|
|
1187 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
|
|
1188 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
|
|
1189 tgt->refcount = REFCOUNT_INFINITY;
|
|
1190 tgt->tgt_start = 0;
|
|
1191 tgt->tgt_end = 0;
|
|
1192 tgt->to_free = NULL;
|
|
1193 tgt->prev = NULL;
|
|
1194 tgt->list_count = 0;
|
|
1195 tgt->device_descr = devicep;
|
|
1196 splay_tree_node array = tgt->array;
|
|
1197
|
|
1198 for (i = 0; i < num_funcs; i++)
|
|
1199 {
|
|
1200 splay_tree_key k = &array->key;
|
|
1201 k->host_start = (uintptr_t) host_func_table[i];
|
|
1202 k->host_end = k->host_start + 1;
|
|
1203 k->tgt = tgt;
|
|
1204 k->tgt_offset = target_table[i].start;
|
|
1205 k->refcount = REFCOUNT_INFINITY;
|
|
1206 k->link_key = NULL;
|
|
1207 array->left = NULL;
|
|
1208 array->right = NULL;
|
|
1209 splay_tree_insert (&devicep->mem_map, array);
|
|
1210 array++;
|
|
1211 }
|
|
1212
|
|
1213 /* Most significant bit of the size in host and target tables marks
|
|
1214 "omp declare target link" variables. */
|
|
1215 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
|
|
1216 const uintptr_t size_mask = ~link_bit;
|
|
1217
|
|
1218 for (i = 0; i < num_vars; i++)
|
|
1219 {
|
|
1220 struct addr_pair *target_var = &target_table[num_funcs + i];
|
|
1221 uintptr_t target_size = target_var->end - target_var->start;
|
|
1222
|
|
1223 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
|
|
1224 {
|
|
1225 gomp_mutex_unlock (&devicep->lock);
|
|
1226 if (is_register_lock)
|
|
1227 gomp_mutex_unlock (®ister_lock);
|
|
1228 gomp_fatal ("Cannot map target variables (size mismatch)");
|
|
1229 }
|
|
1230
|
|
1231 splay_tree_key k = &array->key;
|
|
1232 k->host_start = (uintptr_t) host_var_table[i * 2];
|
|
1233 k->host_end
|
|
1234 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
|
|
1235 k->tgt = tgt;
|
|
1236 k->tgt_offset = target_var->start;
|
|
1237 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
|
|
1238 k->link_key = NULL;
|
|
1239 array->left = NULL;
|
|
1240 array->right = NULL;
|
|
1241 splay_tree_insert (&devicep->mem_map, array);
|
|
1242 array++;
|
|
1243 }
|
|
1244
|
|
1245 free (target_table);
|
|
1246 }
|
|
1247
|
|
1248 /* Unload the mappings described by target_data from device DEVICE_P.
|
|
1249 The device must be locked. */
|
|
1250
|
|
1251 static void
|
|
1252 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
|
|
1253 unsigned version,
|
|
1254 const void *host_table, const void *target_data)
|
|
1255 {
|
|
1256 void **host_func_table = ((void ***) host_table)[0];
|
|
1257 void **host_funcs_end = ((void ***) host_table)[1];
|
|
1258 void **host_var_table = ((void ***) host_table)[2];
|
|
1259 void **host_vars_end = ((void ***) host_table)[3];
|
|
1260
|
|
1261 /* The func table contains only addresses, the var table contains addresses
|
|
1262 and corresponding sizes. */
|
|
1263 int num_funcs = host_funcs_end - host_func_table;
|
|
1264 int num_vars = (host_vars_end - host_var_table) / 2;
|
|
1265
|
|
1266 struct splay_tree_key_s k;
|
|
1267 splay_tree_key node = NULL;
|
|
1268
|
|
1269 /* Find mapping at start of node array */
|
|
1270 if (num_funcs || num_vars)
|
|
1271 {
|
|
1272 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
|
|
1273 : (uintptr_t) host_var_table[0]);
|
|
1274 k.host_end = k.host_start + 1;
|
|
1275 node = splay_tree_lookup (&devicep->mem_map, &k);
|
|
1276 }
|
|
1277
|
|
1278 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
|
|
1279 {
|
|
1280 gomp_mutex_unlock (&devicep->lock);
|
|
1281 gomp_fatal ("image unload fail");
|
|
1282 }
|
|
1283
|
|
1284 /* Remove mappings from splay tree. */
|
|
1285 int i;
|
|
1286 for (i = 0; i < num_funcs; i++)
|
|
1287 {
|
|
1288 k.host_start = (uintptr_t) host_func_table[i];
|
|
1289 k.host_end = k.host_start + 1;
|
|
1290 splay_tree_remove (&devicep->mem_map, &k);
|
|
1291 }
|
|
1292
|
|
1293 /* Most significant bit of the size in host and target tables marks
|
|
1294 "omp declare target link" variables. */
|
|
1295 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
|
|
1296 const uintptr_t size_mask = ~link_bit;
|
|
1297 bool is_tgt_unmapped = false;
|
|
1298
|
|
1299 for (i = 0; i < num_vars; i++)
|
|
1300 {
|
|
1301 k.host_start = (uintptr_t) host_var_table[i * 2];
|
|
1302 k.host_end
|
|
1303 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
|
|
1304
|
|
1305 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
|
|
1306 splay_tree_remove (&devicep->mem_map, &k);
|
|
1307 else
|
|
1308 {
|
|
1309 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
|
131
|
1310 is_tgt_unmapped = gomp_remove_var (devicep, n);
|
111
|
1311 }
|
|
1312 }
|
|
1313
|
|
1314 if (node && !is_tgt_unmapped)
|
|
1315 {
|
|
1316 free (node->tgt);
|
|
1317 free (node);
|
|
1318 }
|
|
1319 }
|
|
1320
|
|
1321 /* This function should be called from every offload image while loading.
|
|
1322 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
|
|
1323 the target, and TARGET_DATA needed by target plugin. */
|
|
1324
|
|
1325 void
|
|
1326 GOMP_offload_register_ver (unsigned version, const void *host_table,
|
|
1327 int target_type, const void *target_data)
|
|
1328 {
|
|
1329 int i;
|
|
1330
|
|
1331 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
|
|
1332 gomp_fatal ("Library too old for offload (version %u < %u)",
|
|
1333 GOMP_VERSION, GOMP_VERSION_LIB (version));
|
|
1334
|
|
1335 gomp_mutex_lock (®ister_lock);
|
|
1336
|
|
1337 /* Load image to all initialized devices. */
|
|
1338 for (i = 0; i < num_devices; i++)
|
|
1339 {
|
|
1340 struct gomp_device_descr *devicep = &devices[i];
|
|
1341 gomp_mutex_lock (&devicep->lock);
|
|
1342 if (devicep->type == target_type
|
|
1343 && devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
1344 gomp_load_image_to_device (devicep, version,
|
|
1345 host_table, target_data, true);
|
|
1346 gomp_mutex_unlock (&devicep->lock);
|
|
1347 }
|
|
1348
|
|
1349 /* Insert image to array of pending images. */
|
|
1350 offload_images
|
|
1351 = gomp_realloc_unlock (offload_images,
|
|
1352 (num_offload_images + 1)
|
|
1353 * sizeof (struct offload_image_descr));
|
|
1354 offload_images[num_offload_images].version = version;
|
|
1355 offload_images[num_offload_images].type = target_type;
|
|
1356 offload_images[num_offload_images].host_table = host_table;
|
|
1357 offload_images[num_offload_images].target_data = target_data;
|
|
1358
|
|
1359 num_offload_images++;
|
|
1360 gomp_mutex_unlock (®ister_lock);
|
|
1361 }
|
|
1362
|
|
1363 void
|
|
1364 GOMP_offload_register (const void *host_table, int target_type,
|
|
1365 const void *target_data)
|
|
1366 {
|
|
1367 GOMP_offload_register_ver (0, host_table, target_type, target_data);
|
|
1368 }
|
|
1369
|
|
1370 /* This function should be called from every offload image while unloading.
|
|
1371 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
|
|
1372 the target, and TARGET_DATA needed by target plugin. */
|
|
1373
|
|
1374 void
|
|
1375 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
|
|
1376 int target_type, const void *target_data)
|
|
1377 {
|
|
1378 int i;
|
|
1379
|
|
1380 gomp_mutex_lock (®ister_lock);
|
|
1381
|
|
1382 /* Unload image from all initialized devices. */
|
|
1383 for (i = 0; i < num_devices; i++)
|
|
1384 {
|
|
1385 struct gomp_device_descr *devicep = &devices[i];
|
|
1386 gomp_mutex_lock (&devicep->lock);
|
|
1387 if (devicep->type == target_type
|
|
1388 && devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
1389 gomp_unload_image_from_device (devicep, version,
|
|
1390 host_table, target_data);
|
|
1391 gomp_mutex_unlock (&devicep->lock);
|
|
1392 }
|
|
1393
|
|
1394 /* Remove image from array of pending images. */
|
|
1395 for (i = 0; i < num_offload_images; i++)
|
|
1396 if (offload_images[i].target_data == target_data)
|
|
1397 {
|
|
1398 offload_images[i] = offload_images[--num_offload_images];
|
|
1399 break;
|
|
1400 }
|
|
1401
|
|
1402 gomp_mutex_unlock (®ister_lock);
|
|
1403 }
|
|
1404
|
|
1405 void
|
|
1406 GOMP_offload_unregister (const void *host_table, int target_type,
|
|
1407 const void *target_data)
|
|
1408 {
|
|
1409 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
|
|
1410 }
|
|
1411
|
|
1412 /* This function initializes the target device, specified by DEVICEP. DEVICEP
|
|
1413 must be locked on entry, and remains locked on return. */
|
|
1414
|
|
1415 attribute_hidden void
|
|
1416 gomp_init_device (struct gomp_device_descr *devicep)
|
|
1417 {
|
|
1418 int i;
|
|
1419 if (!devicep->init_device_func (devicep->target_id))
|
|
1420 {
|
|
1421 gomp_mutex_unlock (&devicep->lock);
|
|
1422 gomp_fatal ("device initialization failed");
|
|
1423 }
|
|
1424
|
|
1425 /* Load to device all images registered by the moment. */
|
|
1426 for (i = 0; i < num_offload_images; i++)
|
|
1427 {
|
|
1428 struct offload_image_descr *image = &offload_images[i];
|
|
1429 if (image->type == devicep->type)
|
|
1430 gomp_load_image_to_device (devicep, image->version,
|
|
1431 image->host_table, image->target_data,
|
|
1432 false);
|
|
1433 }
|
|
1434
|
|
1435 devicep->state = GOMP_DEVICE_INITIALIZED;
|
|
1436 }
|
|
1437
|
|
1438 attribute_hidden void
|
|
1439 gomp_unload_device (struct gomp_device_descr *devicep)
|
|
1440 {
|
|
1441 if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
1442 {
|
|
1443 unsigned i;
|
|
1444
|
|
1445 /* Unload from device all images registered at the moment. */
|
|
1446 for (i = 0; i < num_offload_images; i++)
|
|
1447 {
|
|
1448 struct offload_image_descr *image = &offload_images[i];
|
|
1449 if (image->type == devicep->type)
|
|
1450 gomp_unload_image_from_device (devicep, image->version,
|
|
1451 image->host_table,
|
|
1452 image->target_data);
|
|
1453 }
|
|
1454 }
|
|
1455 }
|
|
1456
|
|
1457 /* Free address mapping tables. MM must be locked on entry, and remains locked
|
|
1458 on return. */
|
|
1459
|
|
1460 attribute_hidden void
|
|
1461 gomp_free_memmap (struct splay_tree_s *mem_map)
|
|
1462 {
|
|
1463 while (mem_map->root)
|
|
1464 {
|
|
1465 struct target_mem_desc *tgt = mem_map->root->key.tgt;
|
|
1466
|
|
1467 splay_tree_remove (mem_map, &mem_map->root->key);
|
|
1468 free (tgt->array);
|
|
1469 free (tgt);
|
|
1470 }
|
|
1471 }
|
|
1472
|
|
1473 /* Host fallback for GOMP_target{,_ext} routines. */
|
|
1474
|
|
1475 static void
|
|
1476 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
|
|
1477 {
|
|
1478 struct gomp_thread old_thr, *thr = gomp_thread ();
|
|
1479 old_thr = *thr;
|
|
1480 memset (thr, '\0', sizeof (*thr));
|
|
1481 if (gomp_places_list)
|
|
1482 {
|
|
1483 thr->place = old_thr.place;
|
|
1484 thr->ts.place_partition_len = gomp_places_list_len;
|
|
1485 }
|
|
1486 fn (hostaddrs);
|
|
1487 gomp_free_thread (thr);
|
|
1488 *thr = old_thr;
|
|
1489 }
|
|
1490
|
|
1491 /* Calculate alignment and size requirements of a private copy of data shared
|
|
1492 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
|
|
1493
|
|
1494 static inline void
|
|
1495 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
|
|
1496 unsigned short *kinds, size_t *tgt_align,
|
|
1497 size_t *tgt_size)
|
|
1498 {
|
|
1499 size_t i;
|
|
1500 for (i = 0; i < mapnum; i++)
|
|
1501 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
|
|
1502 {
|
|
1503 size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
1504 if (*tgt_align < align)
|
|
1505 *tgt_align = align;
|
|
1506 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
|
|
1507 *tgt_size += sizes[i];
|
|
1508 }
|
|
1509 }
|
|
1510
|
|
1511 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
|
|
1512
|
|
1513 static inline void
|
|
1514 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
|
|
1515 size_t *sizes, unsigned short *kinds, size_t tgt_align,
|
|
1516 size_t tgt_size)
|
|
1517 {
|
|
1518 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
|
|
1519 if (al)
|
|
1520 tgt += tgt_align - al;
|
|
1521 tgt_size = 0;
|
|
1522 size_t i;
|
|
1523 for (i = 0; i < mapnum; i++)
|
|
1524 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
|
|
1525 {
|
|
1526 size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
1527 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
1528 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
|
|
1529 hostaddrs[i] = tgt + tgt_size;
|
|
1530 tgt_size = tgt_size + sizes[i];
|
|
1531 }
|
|
1532 }
|
|
1533
|
|
1534 /* Helper function of GOMP_target{,_ext} routines. */
|
|
1535
|
|
1536 static void *
|
|
1537 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
|
|
1538 void (*host_fn) (void *))
|
|
1539 {
|
|
1540 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
|
|
1541 return (void *) host_fn;
|
|
1542 else
|
|
1543 {
|
|
1544 gomp_mutex_lock (&devicep->lock);
|
|
1545 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
1546 {
|
|
1547 gomp_mutex_unlock (&devicep->lock);
|
|
1548 return NULL;
|
|
1549 }
|
|
1550
|
|
1551 struct splay_tree_key_s k;
|
|
1552 k.host_start = (uintptr_t) host_fn;
|
|
1553 k.host_end = k.host_start + 1;
|
|
1554 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
|
|
1555 gomp_mutex_unlock (&devicep->lock);
|
|
1556 if (tgt_fn == NULL)
|
|
1557 return NULL;
|
|
1558
|
|
1559 return (void *) tgt_fn->tgt_offset;
|
|
1560 }
|
|
1561 }
|
|
1562
|
|
1563 /* Called when encountering a target directive. If DEVICE
|
|
1564 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
|
|
1565 GOMP_DEVICE_HOST_FALLBACK (or any value
|
|
1566 larger than last available hw device), use host fallback.
|
|
1567 FN is address of host code, UNUSED is part of the current ABI, but
|
|
1568 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
|
|
1569 with MAPNUM entries, with addresses of the host objects,
|
|
1570 sizes of the host objects (resp. for pointer kind pointer bias
|
|
1571 and assumed sizeof (void *) size) and kinds. */
|
|
1572
|
|
1573 void
|
|
1574 GOMP_target (int device, void (*fn) (void *), const void *unused,
|
|
1575 size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
1576 unsigned char *kinds)
|
|
1577 {
|
|
1578 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1579
|
|
1580 void *fn_addr;
|
|
1581 if (devicep == NULL
|
|
1582 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1583 /* All shared memory devices should use the GOMP_target_ext function. */
|
|
1584 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|
|
1585 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
|
|
1586 return gomp_target_fallback (fn, hostaddrs);
|
|
1587
|
|
1588 struct target_mem_desc *tgt_vars
|
|
1589 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
|
|
1590 GOMP_MAP_VARS_TARGET);
|
|
1591 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
|
|
1592 NULL);
|
|
1593 gomp_unmap_vars (tgt_vars, true);
|
|
1594 }
|
|
1595
|
|
1596 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
|
|
1597 and several arguments have been added:
|
|
1598 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
|
|
1599 DEPEND is array of dependencies, see GOMP_task for details.
|
|
1600
|
|
1601 ARGS is a pointer to an array consisting of a variable number of both
|
|
1602 device-independent and device-specific arguments, which can take one two
|
|
1603 elements where the first specifies for which device it is intended, the type
|
|
1604 and optionally also the value. If the value is not present in the first
|
|
1605 one, the whole second element the actual value. The last element of the
|
|
1606 array is a single NULL. Among the device independent can be for example
|
|
1607 NUM_TEAMS and THREAD_LIMIT.
|
|
1608
|
|
1609 NUM_TEAMS is positive if GOMP_teams will be called in the body with
|
|
1610 that value, or 1 if teams construct is not present, or 0, if
|
|
1611 teams construct does not have num_teams clause and so the choice is
|
|
1612 implementation defined, and -1 if it can't be determined on the host
|
|
1613 what value will GOMP_teams have on the device.
|
|
1614 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
|
|
1615 body with that value, or 0, if teams construct does not have thread_limit
|
|
1616 clause or the teams construct is not present, or -1 if it can't be
|
|
1617 determined on the host what value will GOMP_teams have on the device. */
|
|
1618
|
|
1619 void
|
|
1620 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
|
|
1621 void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
|
1622 unsigned int flags, void **depend, void **args)
|
|
1623 {
|
|
1624 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1625 size_t tgt_align = 0, tgt_size = 0;
|
|
1626 bool fpc_done = false;
|
|
1627
|
|
1628 if (flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
1629 {
|
|
1630 struct gomp_thread *thr = gomp_thread ();
|
|
1631 /* Create a team if we don't have any around, as nowait
|
|
1632 target tasks make sense to run asynchronously even when
|
|
1633 outside of any parallel. */
|
|
1634 if (__builtin_expect (thr->ts.team == NULL, 0))
|
|
1635 {
|
|
1636 struct gomp_team *team = gomp_new_team (1);
|
|
1637 struct gomp_task *task = thr->task;
|
|
1638 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
|
|
1639 team->prev_ts = thr->ts;
|
|
1640 thr->ts.team = team;
|
|
1641 thr->ts.team_id = 0;
|
|
1642 thr->ts.work_share = &team->work_shares[0];
|
|
1643 thr->ts.last_work_share = NULL;
|
|
1644 #ifdef HAVE_SYNC_BUILTINS
|
|
1645 thr->ts.single_count = 0;
|
|
1646 #endif
|
|
1647 thr->ts.static_trip = 0;
|
|
1648 thr->task = &team->implicit_task[0];
|
|
1649 gomp_init_task (thr->task, NULL, icv);
|
|
1650 if (task)
|
|
1651 {
|
|
1652 thr->task = task;
|
|
1653 gomp_end_task ();
|
|
1654 free (task);
|
|
1655 thr->task = &team->implicit_task[0];
|
|
1656 }
|
|
1657 else
|
|
1658 pthread_setspecific (gomp_thread_destructor, thr);
|
|
1659 }
|
|
1660 if (thr->ts.team
|
|
1661 && !thr->task->final_task)
|
|
1662 {
|
|
1663 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
|
|
1664 sizes, kinds, flags, depend, args,
|
|
1665 GOMP_TARGET_TASK_BEFORE_MAP);
|
|
1666 return;
|
|
1667 }
|
|
1668 }
|
|
1669
|
|
1670 /* If there are depend clauses, but nowait is not present
|
|
1671 (or we are in a final task), block the parent task until the
|
|
1672 dependencies are resolved and then just continue with the rest
|
|
1673 of the function as if it is a merged task. */
|
|
1674 if (depend != NULL)
|
|
1675 {
|
|
1676 struct gomp_thread *thr = gomp_thread ();
|
|
1677 if (thr->task && thr->task->depend_hash)
|
|
1678 {
|
|
1679 /* If we might need to wait, copy firstprivate now. */
|
|
1680 calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
1681 &tgt_align, &tgt_size);
|
|
1682 if (tgt_align)
|
|
1683 {
|
|
1684 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
1685 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
1686 tgt_align, tgt_size);
|
|
1687 }
|
|
1688 fpc_done = true;
|
|
1689 gomp_task_maybe_wait_for_dependencies (depend);
|
|
1690 }
|
|
1691 }
|
|
1692
|
|
1693 void *fn_addr;
|
|
1694 if (devicep == NULL
|
|
1695 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1696 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
|
|
1697 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
|
1698 {
|
|
1699 if (!fpc_done)
|
|
1700 {
|
|
1701 calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
1702 &tgt_align, &tgt_size);
|
|
1703 if (tgt_align)
|
|
1704 {
|
|
1705 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
1706 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
1707 tgt_align, tgt_size);
|
|
1708 }
|
|
1709 }
|
|
1710 gomp_target_fallback (fn, hostaddrs);
|
|
1711 return;
|
|
1712 }
|
|
1713
|
|
1714 struct target_mem_desc *tgt_vars;
|
|
1715 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1716 {
|
|
1717 if (!fpc_done)
|
|
1718 {
|
|
1719 calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
1720 &tgt_align, &tgt_size);
|
|
1721 if (tgt_align)
|
|
1722 {
|
|
1723 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
1724 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
1725 tgt_align, tgt_size);
|
|
1726 }
|
|
1727 }
|
|
1728 tgt_vars = NULL;
|
|
1729 }
|
|
1730 else
|
|
1731 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
|
|
1732 true, GOMP_MAP_VARS_TARGET);
|
|
1733 devicep->run_func (devicep->target_id, fn_addr,
|
|
1734 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
|
|
1735 args);
|
|
1736 if (tgt_vars)
|
|
1737 gomp_unmap_vars (tgt_vars, true);
|
|
1738 }
|
|
1739
|
|
1740 /* Host fallback for GOMP_target_data{,_ext} routines. */
|
|
1741
|
|
1742 static void
|
|
1743 gomp_target_data_fallback (void)
|
|
1744 {
|
|
1745 struct gomp_task_icv *icv = gomp_icv (false);
|
|
1746 if (icv->target_data)
|
|
1747 {
|
|
1748 /* Even when doing a host fallback, if there are any active
|
|
1749 #pragma omp target data constructs, need to remember the
|
|
1750 new #pragma omp target data, otherwise GOMP_target_end_data
|
|
1751 would get out of sync. */
|
|
1752 struct target_mem_desc *tgt
|
|
1753 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
|
|
1754 GOMP_MAP_VARS_DATA);
|
|
1755 tgt->prev = icv->target_data;
|
|
1756 icv->target_data = tgt;
|
|
1757 }
|
|
1758 }
|
|
1759
|
|
1760 void
|
|
1761 GOMP_target_data (int device, const void *unused, size_t mapnum,
|
|
1762 void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
|
1763 {
|
|
1764 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1765
|
|
1766 if (devicep == NULL
|
|
1767 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1768 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
|
|
1769 return gomp_target_data_fallback ();
|
|
1770
|
|
1771 struct target_mem_desc *tgt
|
|
1772 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
|
|
1773 GOMP_MAP_VARS_DATA);
|
|
1774 struct gomp_task_icv *icv = gomp_icv (true);
|
|
1775 tgt->prev = icv->target_data;
|
|
1776 icv->target_data = tgt;
|
|
1777 }
|
|
1778
|
|
1779 void
|
|
1780 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
|
|
1781 size_t *sizes, unsigned short *kinds)
|
|
1782 {
|
|
1783 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1784
|
|
1785 if (devicep == NULL
|
|
1786 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1787 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1788 return gomp_target_data_fallback ();
|
|
1789
|
|
1790 struct target_mem_desc *tgt
|
|
1791 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
|
|
1792 GOMP_MAP_VARS_DATA);
|
|
1793 struct gomp_task_icv *icv = gomp_icv (true);
|
|
1794 tgt->prev = icv->target_data;
|
|
1795 icv->target_data = tgt;
|
|
1796 }
|
|
1797
|
|
1798 void
|
|
1799 GOMP_target_end_data (void)
|
|
1800 {
|
|
1801 struct gomp_task_icv *icv = gomp_icv (false);
|
|
1802 if (icv->target_data)
|
|
1803 {
|
|
1804 struct target_mem_desc *tgt = icv->target_data;
|
|
1805 icv->target_data = tgt->prev;
|
|
1806 gomp_unmap_vars (tgt, true);
|
|
1807 }
|
|
1808 }
|
|
1809
|
|
1810 void
|
|
1811 GOMP_target_update (int device, const void *unused, size_t mapnum,
|
|
1812 void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
|
1813 {
|
|
1814 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1815
|
|
1816 if (devicep == NULL
|
|
1817 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1818 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1819 return;
|
|
1820
|
|
1821 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
|
|
1822 }
|
|
1823
|
|
1824 void
|
|
1825 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
|
|
1826 size_t *sizes, unsigned short *kinds,
|
|
1827 unsigned int flags, void **depend)
|
|
1828 {
|
|
1829 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1830
|
|
1831 /* If there are depend clauses, but nowait is not present,
|
|
1832 block the parent task until the dependencies are resolved
|
|
1833 and then just continue with the rest of the function as if it
|
|
1834 is a merged task. Until we are able to schedule task during
|
|
1835 variable mapping or unmapping, ignore nowait if depend clauses
|
|
1836 are not present. */
|
|
1837 if (depend != NULL)
|
|
1838 {
|
|
1839 struct gomp_thread *thr = gomp_thread ();
|
|
1840 if (thr->task && thr->task->depend_hash)
|
|
1841 {
|
|
1842 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
1843 && thr->ts.team
|
|
1844 && !thr->task->final_task)
|
|
1845 {
|
|
1846 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
|
|
1847 mapnum, hostaddrs, sizes, kinds,
|
|
1848 flags | GOMP_TARGET_FLAG_UPDATE,
|
|
1849 depend, NULL, GOMP_TARGET_TASK_DATA))
|
|
1850 return;
|
|
1851 }
|
|
1852 else
|
|
1853 {
|
|
1854 struct gomp_team *team = thr->ts.team;
|
|
1855 /* If parallel or taskgroup has been cancelled, don't start new
|
|
1856 tasks. */
|
|
1857 if (team
|
|
1858 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
1859 || (thr->task->taskgroup
|
|
1860 && thr->task->taskgroup->cancelled)))
|
|
1861 return;
|
|
1862
|
|
1863 gomp_task_maybe_wait_for_dependencies (depend);
|
|
1864 }
|
|
1865 }
|
|
1866 }
|
|
1867
|
|
1868 if (devicep == NULL
|
|
1869 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
1870 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
1871 return;
|
|
1872
|
|
1873 struct gomp_thread *thr = gomp_thread ();
|
|
1874 struct gomp_team *team = thr->ts.team;
|
|
1875 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
1876 if (team
|
|
1877 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
1878 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
|
|
1879 return;
|
|
1880
|
|
1881 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
|
|
1882 }
|
|
1883
|
|
1884 static void
|
|
1885 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
|
|
1886 void **hostaddrs, size_t *sizes, unsigned short *kinds)
|
|
1887 {
|
|
1888 const int typemask = 0xff;
|
|
1889 size_t i;
|
|
1890 gomp_mutex_lock (&devicep->lock);
|
|
1891 if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
1892 {
|
|
1893 gomp_mutex_unlock (&devicep->lock);
|
|
1894 return;
|
|
1895 }
|
|
1896
|
|
1897 for (i = 0; i < mapnum; i++)
|
|
1898 {
|
|
1899 struct splay_tree_key_s cur_node;
|
|
1900 unsigned char kind = kinds[i] & typemask;
|
|
1901 switch (kind)
|
|
1902 {
|
|
1903 case GOMP_MAP_FROM:
|
|
1904 case GOMP_MAP_ALWAYS_FROM:
|
|
1905 case GOMP_MAP_DELETE:
|
|
1906 case GOMP_MAP_RELEASE:
|
|
1907 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
|
|
1908 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
|
|
1909 cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
1910 cur_node.host_end = cur_node.host_start + sizes[i];
|
|
1911 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
|
|
1912 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
|
|
1913 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
|
|
1914 : splay_tree_lookup (&devicep->mem_map, &cur_node);
|
|
1915 if (!k)
|
|
1916 continue;
|
|
1917
|
|
1918 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
|
|
1919 k->refcount--;
|
|
1920 if ((kind == GOMP_MAP_DELETE
|
|
1921 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
|
|
1922 && k->refcount != REFCOUNT_INFINITY)
|
|
1923 k->refcount = 0;
|
|
1924
|
|
1925 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
|
|
1926 || kind == GOMP_MAP_ALWAYS_FROM)
|
|
1927 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
|
|
1928 (void *) (k->tgt->tgt_start + k->tgt_offset
|
|
1929 + cur_node.host_start
|
|
1930 - k->host_start),
|
|
1931 cur_node.host_end - cur_node.host_start);
|
|
1932 if (k->refcount == 0)
|
|
1933 {
|
|
1934 splay_tree_remove (&devicep->mem_map, k);
|
|
1935 if (k->link_key)
|
|
1936 splay_tree_insert (&devicep->mem_map,
|
|
1937 (splay_tree_node) k->link_key);
|
|
1938 if (k->tgt->refcount > 1)
|
|
1939 k->tgt->refcount--;
|
|
1940 else
|
|
1941 gomp_unmap_tgt (k->tgt);
|
|
1942 }
|
|
1943
|
|
1944 break;
|
|
1945 default:
|
|
1946 gomp_mutex_unlock (&devicep->lock);
|
|
1947 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
|
|
1948 kind);
|
|
1949 }
|
|
1950 }
|
|
1951
|
|
1952 gomp_mutex_unlock (&devicep->lock);
|
|
1953 }
|
|
1954
|
|
1955 void
|
|
1956 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
|
|
1957 size_t *sizes, unsigned short *kinds,
|
|
1958 unsigned int flags, void **depend)
|
|
1959 {
|
|
1960 struct gomp_device_descr *devicep = resolve_device (device);
|
|
1961
|
|
1962 /* If there are depend clauses, but nowait is not present,
|
|
1963 block the parent task until the dependencies are resolved
|
|
1964 and then just continue with the rest of the function as if it
|
|
1965 is a merged task. Until we are able to schedule task during
|
|
1966 variable mapping or unmapping, ignore nowait if depend clauses
|
|
1967 are not present. */
|
|
1968 if (depend != NULL)
|
|
1969 {
|
|
1970 struct gomp_thread *thr = gomp_thread ();
|
|
1971 if (thr->task && thr->task->depend_hash)
|
|
1972 {
|
|
1973 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
1974 && thr->ts.team
|
|
1975 && !thr->task->final_task)
|
|
1976 {
|
|
1977 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
|
|
1978 mapnum, hostaddrs, sizes, kinds,
|
|
1979 flags, depend, NULL,
|
|
1980 GOMP_TARGET_TASK_DATA))
|
|
1981 return;
|
|
1982 }
|
|
1983 else
|
|
1984 {
|
|
1985 struct gomp_team *team = thr->ts.team;
|
|
1986 /* If parallel or taskgroup has been cancelled, don't start new
|
|
1987 tasks. */
|
|
1988 if (team
|
|
1989 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
1990 || (thr->task->taskgroup
|
|
1991 && thr->task->taskgroup->cancelled)))
|
|
1992 return;
|
|
1993
|
|
1994 gomp_task_maybe_wait_for_dependencies (depend);
|
|
1995 }
|
|
1996 }
|
|
1997 }
|
|
1998
|
|
1999 if (devicep == NULL
|
|
2000 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2001 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2002 return;
|
|
2003
|
|
2004 struct gomp_thread *thr = gomp_thread ();
|
|
2005 struct gomp_team *team = thr->ts.team;
|
|
2006 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
2007 if (team
|
|
2008 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
2009 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
|
|
2010 return;
|
|
2011
|
|
2012 size_t i;
|
|
2013 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
|
|
2014 for (i = 0; i < mapnum; i++)
|
|
2015 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
|
|
2016 {
|
|
2017 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
|
|
2018 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
|
|
2019 i += sizes[i];
|
|
2020 }
|
|
2021 else
|
|
2022 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
|
|
2023 true, GOMP_MAP_VARS_ENTER_DATA);
|
|
2024 else
|
|
2025 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
|
|
2026 }
|
|
2027
|
|
2028 bool
|
|
2029 gomp_target_task_fn (void *data)
|
|
2030 {
|
|
2031 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
|
|
2032 struct gomp_device_descr *devicep = ttask->devicep;
|
|
2033
|
|
2034 if (ttask->fn != NULL)
|
|
2035 {
|
|
2036 void *fn_addr;
|
|
2037 if (devicep == NULL
|
|
2038 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2039 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
|
|
2040 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
|
2041 {
|
|
2042 ttask->state = GOMP_TARGET_TASK_FALLBACK;
|
|
2043 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
|
|
2044 return false;
|
|
2045 }
|
|
2046
|
|
2047 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
2048 {
|
|
2049 if (ttask->tgt)
|
|
2050 gomp_unmap_vars (ttask->tgt, true);
|
|
2051 return false;
|
|
2052 }
|
|
2053
|
|
2054 void *actual_arguments;
|
|
2055 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2056 {
|
|
2057 ttask->tgt = NULL;
|
|
2058 actual_arguments = ttask->hostaddrs;
|
|
2059 }
|
|
2060 else
|
|
2061 {
|
|
2062 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
|
|
2063 NULL, ttask->sizes, ttask->kinds, true,
|
|
2064 GOMP_MAP_VARS_TARGET);
|
|
2065 actual_arguments = (void *) ttask->tgt->tgt_start;
|
|
2066 }
|
|
2067 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
|
|
2068
|
|
2069 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
|
|
2070 ttask->args, (void *) ttask);
|
|
2071 return true;
|
|
2072 }
|
|
2073 else if (devicep == NULL
|
|
2074 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2075 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2076 return false;
|
|
2077
|
|
2078 size_t i;
|
|
2079 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
|
|
2080 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
|
|
2081 ttask->kinds, true);
|
|
2082 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
|
|
2083 for (i = 0; i < ttask->mapnum; i++)
|
|
2084 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
|
|
2085 {
|
|
2086 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
|
|
2087 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
|
|
2088 GOMP_MAP_VARS_ENTER_DATA);
|
|
2089 i += ttask->sizes[i];
|
|
2090 }
|
|
2091 else
|
|
2092 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
|
|
2093 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
|
|
2094 else
|
|
2095 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
|
|
2096 ttask->kinds);
|
|
2097 return false;
|
|
2098 }
|
|
2099
|
|
2100 void
|
|
2101 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
|
2102 {
|
|
2103 if (thread_limit)
|
|
2104 {
|
|
2105 struct gomp_task_icv *icv = gomp_icv (true);
|
|
2106 icv->thread_limit_var
|
|
2107 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
|
2108 }
|
|
2109 (void) num_teams;
|
|
2110 }
|
|
2111
|
|
2112 void *
|
|
2113 omp_target_alloc (size_t size, int device_num)
|
|
2114 {
|
|
2115 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2116 return malloc (size);
|
|
2117
|
|
2118 if (device_num < 0)
|
|
2119 return NULL;
|
|
2120
|
|
2121 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2122 if (devicep == NULL)
|
|
2123 return NULL;
|
|
2124
|
|
2125 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2126 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2127 return malloc (size);
|
|
2128
|
|
2129 gomp_mutex_lock (&devicep->lock);
|
|
2130 void *ret = devicep->alloc_func (devicep->target_id, size);
|
|
2131 gomp_mutex_unlock (&devicep->lock);
|
|
2132 return ret;
|
|
2133 }
|
|
2134
|
|
2135 void
|
|
2136 omp_target_free (void *device_ptr, int device_num)
|
|
2137 {
|
|
2138 if (device_ptr == NULL)
|
|
2139 return;
|
|
2140
|
|
2141 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2142 {
|
|
2143 free (device_ptr);
|
|
2144 return;
|
|
2145 }
|
|
2146
|
|
2147 if (device_num < 0)
|
|
2148 return;
|
|
2149
|
|
2150 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2151 if (devicep == NULL)
|
|
2152 return;
|
|
2153
|
|
2154 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2155 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2156 {
|
|
2157 free (device_ptr);
|
|
2158 return;
|
|
2159 }
|
|
2160
|
|
2161 gomp_mutex_lock (&devicep->lock);
|
|
2162 gomp_free_device_memory (devicep, device_ptr);
|
|
2163 gomp_mutex_unlock (&devicep->lock);
|
|
2164 }
|
|
2165
|
|
2166 int
|
|
2167 omp_target_is_present (void *ptr, int device_num)
|
|
2168 {
|
|
2169 if (ptr == NULL)
|
|
2170 return 1;
|
|
2171
|
|
2172 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2173 return 1;
|
|
2174
|
|
2175 if (device_num < 0)
|
|
2176 return 0;
|
|
2177
|
|
2178 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2179 if (devicep == NULL)
|
|
2180 return 0;
|
|
2181
|
|
2182 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2183 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2184 return 1;
|
|
2185
|
|
2186 gomp_mutex_lock (&devicep->lock);
|
|
2187 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
2188 struct splay_tree_key_s cur_node;
|
|
2189
|
|
2190 cur_node.host_start = (uintptr_t) ptr;
|
|
2191 cur_node.host_end = cur_node.host_start;
|
|
2192 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
2193 int ret = n != NULL;
|
|
2194 gomp_mutex_unlock (&devicep->lock);
|
|
2195 return ret;
|
|
2196 }
|
|
2197
|
|
2198 int
|
|
2199 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
|
|
2200 size_t src_offset, int dst_device_num, int src_device_num)
|
|
2201 {
|
|
2202 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
2203 bool ret;
|
|
2204
|
|
2205 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2206 {
|
|
2207 if (dst_device_num < 0)
|
|
2208 return EINVAL;
|
|
2209
|
|
2210 dst_devicep = resolve_device (dst_device_num);
|
|
2211 if (dst_devicep == NULL)
|
|
2212 return EINVAL;
|
|
2213
|
|
2214 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2215 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2216 dst_devicep = NULL;
|
|
2217 }
|
|
2218 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2219 {
|
|
2220 if (src_device_num < 0)
|
|
2221 return EINVAL;
|
|
2222
|
|
2223 src_devicep = resolve_device (src_device_num);
|
|
2224 if (src_devicep == NULL)
|
|
2225 return EINVAL;
|
|
2226
|
|
2227 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2228 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2229 src_devicep = NULL;
|
|
2230 }
|
|
2231 if (src_devicep == NULL && dst_devicep == NULL)
|
|
2232 {
|
|
2233 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
|
|
2234 return 0;
|
|
2235 }
|
|
2236 if (src_devicep == NULL)
|
|
2237 {
|
|
2238 gomp_mutex_lock (&dst_devicep->lock);
|
|
2239 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
|
2240 (char *) dst + dst_offset,
|
|
2241 (char *) src + src_offset, length);
|
|
2242 gomp_mutex_unlock (&dst_devicep->lock);
|
|
2243 return (ret ? 0 : EINVAL);
|
|
2244 }
|
|
2245 if (dst_devicep == NULL)
|
|
2246 {
|
|
2247 gomp_mutex_lock (&src_devicep->lock);
|
|
2248 ret = src_devicep->dev2host_func (src_devicep->target_id,
|
|
2249 (char *) dst + dst_offset,
|
|
2250 (char *) src + src_offset, length);
|
|
2251 gomp_mutex_unlock (&src_devicep->lock);
|
|
2252 return (ret ? 0 : EINVAL);
|
|
2253 }
|
|
2254 if (src_devicep == dst_devicep)
|
|
2255 {
|
|
2256 gomp_mutex_lock (&src_devicep->lock);
|
|
2257 ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
|
2258 (char *) dst + dst_offset,
|
|
2259 (char *) src + src_offset, length);
|
|
2260 gomp_mutex_unlock (&src_devicep->lock);
|
|
2261 return (ret ? 0 : EINVAL);
|
|
2262 }
|
|
2263 return EINVAL;
|
|
2264 }
|
|
2265
|
|
2266 static int
|
|
2267 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
|
|
2268 int num_dims, const size_t *volume,
|
|
2269 const size_t *dst_offsets,
|
|
2270 const size_t *src_offsets,
|
|
2271 const size_t *dst_dimensions,
|
|
2272 const size_t *src_dimensions,
|
|
2273 struct gomp_device_descr *dst_devicep,
|
|
2274 struct gomp_device_descr *src_devicep)
|
|
2275 {
|
|
2276 size_t dst_slice = element_size;
|
|
2277 size_t src_slice = element_size;
|
|
2278 size_t j, dst_off, src_off, length;
|
|
2279 int i, ret;
|
|
2280
|
|
2281 if (num_dims == 1)
|
|
2282 {
|
|
2283 if (__builtin_mul_overflow (element_size, volume[0], &length)
|
|
2284 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
|
|
2285 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
|
|
2286 return EINVAL;
|
|
2287 if (dst_devicep == NULL && src_devicep == NULL)
|
|
2288 {
|
|
2289 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
|
|
2290 ret = 1;
|
|
2291 }
|
|
2292 else if (src_devicep == NULL)
|
|
2293 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
|
2294 (char *) dst + dst_off,
|
|
2295 (char *) src + src_off, length);
|
|
2296 else if (dst_devicep == NULL)
|
|
2297 ret = src_devicep->dev2host_func (src_devicep->target_id,
|
|
2298 (char *) dst + dst_off,
|
|
2299 (char *) src + src_off, length);
|
|
2300 else if (src_devicep == dst_devicep)
|
|
2301 ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
|
2302 (char *) dst + dst_off,
|
|
2303 (char *) src + src_off, length);
|
|
2304 else
|
|
2305 ret = 0;
|
|
2306 return ret ? 0 : EINVAL;
|
|
2307 }
|
|
2308
|
|
2309 /* FIXME: it would be nice to have some plugin function to handle
|
|
2310 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
|
|
2311 be handled in the generic recursion below, and for host-host it
|
|
2312 should be used even for any num_dims >= 2. */
|
|
2313
|
|
2314 for (i = 1; i < num_dims; i++)
|
|
2315 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
|
|
2316 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
|
|
2317 return EINVAL;
|
|
2318 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
|
|
2319 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
|
|
2320 return EINVAL;
|
|
2321 for (j = 0; j < volume[0]; j++)
|
|
2322 {
|
|
2323 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
|
|
2324 (char *) src + src_off,
|
|
2325 element_size, num_dims - 1,
|
|
2326 volume + 1, dst_offsets + 1,
|
|
2327 src_offsets + 1, dst_dimensions + 1,
|
|
2328 src_dimensions + 1, dst_devicep,
|
|
2329 src_devicep);
|
|
2330 if (ret)
|
|
2331 return ret;
|
|
2332 dst_off += dst_slice;
|
|
2333 src_off += src_slice;
|
|
2334 }
|
|
2335 return 0;
|
|
2336 }
|
|
2337
|
|
2338 int
|
|
2339 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
|
|
2340 int num_dims, const size_t *volume,
|
|
2341 const size_t *dst_offsets,
|
|
2342 const size_t *src_offsets,
|
|
2343 const size_t *dst_dimensions,
|
|
2344 const size_t *src_dimensions,
|
|
2345 int dst_device_num, int src_device_num)
|
|
2346 {
|
|
2347 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
2348
|
|
2349 if (!dst && !src)
|
|
2350 return INT_MAX;
|
|
2351
|
|
2352 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2353 {
|
|
2354 if (dst_device_num < 0)
|
|
2355 return EINVAL;
|
|
2356
|
|
2357 dst_devicep = resolve_device (dst_device_num);
|
|
2358 if (dst_devicep == NULL)
|
|
2359 return EINVAL;
|
|
2360
|
|
2361 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2362 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2363 dst_devicep = NULL;
|
|
2364 }
|
|
2365 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
|
|
2366 {
|
|
2367 if (src_device_num < 0)
|
|
2368 return EINVAL;
|
|
2369
|
|
2370 src_devicep = resolve_device (src_device_num);
|
|
2371 if (src_devicep == NULL)
|
|
2372 return EINVAL;
|
|
2373
|
|
2374 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2375 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2376 src_devicep = NULL;
|
|
2377 }
|
|
2378
|
|
2379 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
|
|
2380 return EINVAL;
|
|
2381
|
|
2382 if (src_devicep)
|
|
2383 gomp_mutex_lock (&src_devicep->lock);
|
|
2384 else if (dst_devicep)
|
|
2385 gomp_mutex_lock (&dst_devicep->lock);
|
|
2386 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
|
|
2387 volume, dst_offsets, src_offsets,
|
|
2388 dst_dimensions, src_dimensions,
|
|
2389 dst_devicep, src_devicep);
|
|
2390 if (src_devicep)
|
|
2391 gomp_mutex_unlock (&src_devicep->lock);
|
|
2392 else if (dst_devicep)
|
|
2393 gomp_mutex_unlock (&dst_devicep->lock);
|
|
2394 return ret;
|
|
2395 }
|
|
2396
|
|
2397 int
|
|
2398 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
|
|
2399 size_t device_offset, int device_num)
|
|
2400 {
|
|
2401 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2402 return EINVAL;
|
|
2403
|
|
2404 if (device_num < 0)
|
|
2405 return EINVAL;
|
|
2406
|
|
2407 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2408 if (devicep == NULL)
|
|
2409 return EINVAL;
|
|
2410
|
|
2411 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2412 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
2413 return EINVAL;
|
|
2414
|
|
2415 gomp_mutex_lock (&devicep->lock);
|
|
2416
|
|
2417 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
2418 struct splay_tree_key_s cur_node;
|
|
2419 int ret = EINVAL;
|
|
2420
|
|
2421 cur_node.host_start = (uintptr_t) host_ptr;
|
|
2422 cur_node.host_end = cur_node.host_start + size;
|
|
2423 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
2424 if (n)
|
|
2425 {
|
|
2426 if (n->tgt->tgt_start + n->tgt_offset
|
|
2427 == (uintptr_t) device_ptr + device_offset
|
|
2428 && n->host_start <= cur_node.host_start
|
|
2429 && n->host_end >= cur_node.host_end)
|
|
2430 ret = 0;
|
|
2431 }
|
|
2432 else
|
|
2433 {
|
|
2434 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
|
|
2435 tgt->array = gomp_malloc (sizeof (*tgt->array));
|
|
2436 tgt->refcount = 1;
|
|
2437 tgt->tgt_start = 0;
|
|
2438 tgt->tgt_end = 0;
|
|
2439 tgt->to_free = NULL;
|
|
2440 tgt->prev = NULL;
|
|
2441 tgt->list_count = 0;
|
|
2442 tgt->device_descr = devicep;
|
|
2443 splay_tree_node array = tgt->array;
|
|
2444 splay_tree_key k = &array->key;
|
|
2445 k->host_start = cur_node.host_start;
|
|
2446 k->host_end = cur_node.host_end;
|
|
2447 k->tgt = tgt;
|
|
2448 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
|
|
2449 k->refcount = REFCOUNT_INFINITY;
|
|
2450 array->left = NULL;
|
|
2451 array->right = NULL;
|
|
2452 splay_tree_insert (&devicep->mem_map, array);
|
|
2453 ret = 0;
|
|
2454 }
|
|
2455 gomp_mutex_unlock (&devicep->lock);
|
|
2456 return ret;
|
|
2457 }
|
|
2458
|
|
2459 int
|
|
2460 omp_target_disassociate_ptr (void *ptr, int device_num)
|
|
2461 {
|
|
2462 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
|
|
2463 return EINVAL;
|
|
2464
|
|
2465 if (device_num < 0)
|
|
2466 return EINVAL;
|
|
2467
|
|
2468 struct gomp_device_descr *devicep = resolve_device (device_num);
|
|
2469 if (devicep == NULL)
|
|
2470 return EINVAL;
|
|
2471
|
|
2472 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
|
|
2473 return EINVAL;
|
|
2474
|
|
2475 gomp_mutex_lock (&devicep->lock);
|
|
2476
|
|
2477 struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
2478 struct splay_tree_key_s cur_node;
|
|
2479 int ret = EINVAL;
|
|
2480
|
|
2481 cur_node.host_start = (uintptr_t) ptr;
|
|
2482 cur_node.host_end = cur_node.host_start;
|
|
2483 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
2484 if (n
|
|
2485 && n->host_start == cur_node.host_start
|
|
2486 && n->refcount == REFCOUNT_INFINITY
|
|
2487 && n->tgt->tgt_start == 0
|
|
2488 && n->tgt->to_free == NULL
|
|
2489 && n->tgt->refcount == 1
|
|
2490 && n->tgt->list_count == 0)
|
|
2491 {
|
|
2492 splay_tree_remove (&devicep->mem_map, n);
|
|
2493 gomp_unmap_tgt (n->tgt);
|
|
2494 ret = 0;
|
|
2495 }
|
|
2496
|
|
2497 gomp_mutex_unlock (&devicep->lock);
|
|
2498 return ret;
|
|
2499 }
|
|
2500
|
|
2501 #ifdef PLUGIN_SUPPORT
|
|
2502
|
|
2503 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
|
|
2504 in PLUGIN_NAME.
|
|
2505 The handles of the found functions are stored in the corresponding fields
|
|
2506 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
|
|
2507
|
|
2508 static bool
|
|
2509 gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
|
2510 const char *plugin_name)
|
|
2511 {
|
|
2512 const char *err = NULL, *last_missing = NULL;
|
|
2513
|
|
2514 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
|
|
2515 if (!plugin_handle)
|
|
2516 goto dl_fail;
|
|
2517
|
|
2518 /* Check if all required functions are available in the plugin and store
|
|
2519 their handlers. None of the symbols can legitimately be NULL,
|
|
2520 so we don't need to check dlerror all the time. */
|
|
2521 #define DLSYM(f) \
|
|
2522 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
|
|
2523 goto dl_fail
|
|
2524 /* Similar, but missing functions are not an error. Return false if
|
|
2525 failed, true otherwise. */
|
|
2526 #define DLSYM_OPT(f, n) \
|
|
2527 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
|
|
2528 || (last_missing = #n, 0))
|
|
2529
|
|
2530 DLSYM (version);
|
|
2531 if (device->version_func () != GOMP_VERSION)
|
|
2532 {
|
|
2533 err = "plugin version mismatch";
|
|
2534 goto fail;
|
|
2535 }
|
|
2536
|
|
2537 DLSYM (get_name);
|
|
2538 DLSYM (get_caps);
|
|
2539 DLSYM (get_type);
|
|
2540 DLSYM (get_num_devices);
|
|
2541 DLSYM (init_device);
|
|
2542 DLSYM (fini_device);
|
|
2543 DLSYM (load_image);
|
|
2544 DLSYM (unload_image);
|
|
2545 DLSYM (alloc);
|
|
2546 DLSYM (free);
|
|
2547 DLSYM (dev2host);
|
|
2548 DLSYM (host2dev);
|
|
2549 device->capabilities = device->get_caps_func ();
|
|
2550 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2551 {
|
|
2552 DLSYM (run);
|
|
2553 DLSYM (async_run);
|
|
2554 DLSYM_OPT (can_run, can_run);
|
|
2555 DLSYM (dev2dev);
|
|
2556 }
|
|
2557 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
|
|
2558 {
|
|
2559 if (!DLSYM_OPT (openacc.exec, openacc_exec)
|
|
2560 || !DLSYM_OPT (openacc.register_async_cleanup,
|
|
2561 openacc_register_async_cleanup)
|
|
2562 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
|
|
2563 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
|
|
2564 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
|
|
2565 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
|
|
2566 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
|
|
2567 || !DLSYM_OPT (openacc.async_wait_all_async,
|
|
2568 openacc_async_wait_all_async)
|
|
2569 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
|
|
2570 || !DLSYM_OPT (openacc.create_thread_data,
|
|
2571 openacc_create_thread_data)
|
|
2572 || !DLSYM_OPT (openacc.destroy_thread_data,
|
|
2573 openacc_destroy_thread_data))
|
|
2574 {
|
|
2575 /* Require all the OpenACC handlers if we have
|
|
2576 GOMP_OFFLOAD_CAP_OPENACC_200. */
|
|
2577 err = "plugin missing OpenACC handler function";
|
|
2578 goto fail;
|
|
2579 }
|
|
2580
|
|
2581 unsigned cuda = 0;
|
|
2582 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
|
|
2583 openacc_cuda_get_current_device);
|
|
2584 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
|
|
2585 openacc_cuda_get_current_context);
|
|
2586 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
|
|
2587 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
|
|
2588 if (cuda && cuda != 4)
|
|
2589 {
|
|
2590 /* Make sure all the CUDA functions are there if any of them are. */
|
|
2591 err = "plugin missing OpenACC CUDA handler function";
|
|
2592 goto fail;
|
|
2593 }
|
|
2594 }
|
|
2595 #undef DLSYM
|
|
2596 #undef DLSYM_OPT
|
|
2597
|
|
2598 return 1;
|
|
2599
|
|
2600 dl_fail:
|
|
2601 err = dlerror ();
|
|
2602 fail:
|
|
2603 gomp_error ("while loading %s: %s", plugin_name, err);
|
|
2604 if (last_missing)
|
|
2605 gomp_error ("missing function was %s", last_missing);
|
|
2606 if (plugin_handle)
|
|
2607 dlclose (plugin_handle);
|
|
2608
|
|
2609 return 0;
|
|
2610 }
|
|
2611
|
|
2612 /* This function finalizes all initialized devices. */
|
|
2613
|
|
2614 static void
|
|
2615 gomp_target_fini (void)
|
|
2616 {
|
|
2617 int i;
|
|
2618 for (i = 0; i < num_devices; i++)
|
|
2619 {
|
|
2620 bool ret = true;
|
|
2621 struct gomp_device_descr *devicep = &devices[i];
|
|
2622 gomp_mutex_lock (&devicep->lock);
|
|
2623 if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
2624 {
|
|
2625 ret = devicep->fini_device_func (devicep->target_id);
|
|
2626 devicep->state = GOMP_DEVICE_FINALIZED;
|
|
2627 }
|
|
2628 gomp_mutex_unlock (&devicep->lock);
|
|
2629 if (!ret)
|
|
2630 gomp_fatal ("device finalization failed");
|
|
2631 }
|
|
2632 }
|
|
2633
|
|
2634 /* This function initializes the runtime needed for offloading.
|
|
2635 It parses the list of offload targets and tries to load the plugins for
|
|
2636 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
|
|
2637 will be set, and the array DEVICES initialized, containing descriptors for
|
|
2638 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
|
|
2639 by the others. */
|
|
2640
|
|
2641 static void
|
|
2642 gomp_target_init (void)
|
|
2643 {
|
|
2644 const char *prefix ="libgomp-plugin-";
|
|
2645 const char *suffix = SONAME_SUFFIX (1);
|
|
2646 const char *cur, *next;
|
|
2647 char *plugin_name;
|
|
2648 int i, new_num_devices;
|
|
2649
|
|
2650 num_devices = 0;
|
|
2651 devices = NULL;
|
|
2652
|
|
2653 cur = OFFLOAD_TARGETS;
|
|
2654 if (*cur)
|
|
2655 do
|
|
2656 {
|
|
2657 struct gomp_device_descr current_device;
|
131
|
2658 size_t prefix_len, suffix_len, cur_len;
|
111
|
2659
|
|
2660 next = strchr (cur, ',');
|
|
2661
|
131
|
2662 prefix_len = strlen (prefix);
|
|
2663 cur_len = next ? next - cur : strlen (cur);
|
|
2664 suffix_len = strlen (suffix);
|
|
2665
|
|
2666 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
|
111
|
2667 if (!plugin_name)
|
|
2668 {
|
|
2669 num_devices = 0;
|
|
2670 break;
|
|
2671 }
|
|
2672
|
131
|
2673 memcpy (plugin_name, prefix, prefix_len);
|
|
2674 memcpy (plugin_name + prefix_len, cur, cur_len);
|
|
2675 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
|
111
|
2676
|
|
2677 if (gomp_load_plugin_for_device (¤t_device, plugin_name))
|
|
2678 {
|
|
2679 new_num_devices = current_device.get_num_devices_func ();
|
|
2680 if (new_num_devices >= 1)
|
|
2681 {
|
|
2682 /* Augment DEVICES and NUM_DEVICES. */
|
|
2683
|
|
2684 devices = realloc (devices, (num_devices + new_num_devices)
|
|
2685 * sizeof (struct gomp_device_descr));
|
|
2686 if (!devices)
|
|
2687 {
|
|
2688 num_devices = 0;
|
|
2689 free (plugin_name);
|
|
2690 break;
|
|
2691 }
|
|
2692
|
|
2693 current_device.name = current_device.get_name_func ();
|
|
2694 /* current_device.capabilities has already been set. */
|
|
2695 current_device.type = current_device.get_type_func ();
|
|
2696 current_device.mem_map.root = NULL;
|
|
2697 current_device.state = GOMP_DEVICE_UNINITIALIZED;
|
|
2698 current_device.openacc.data_environ = NULL;
|
|
2699 for (i = 0; i < new_num_devices; i++)
|
|
2700 {
|
|
2701 current_device.target_id = i;
|
|
2702 devices[num_devices] = current_device;
|
|
2703 gomp_mutex_init (&devices[num_devices].lock);
|
|
2704 num_devices++;
|
|
2705 }
|
|
2706 }
|
|
2707 }
|
|
2708
|
|
2709 free (plugin_name);
|
|
2710 cur = next + 1;
|
|
2711 }
|
|
2712 while (next);
|
|
2713
|
|
2714 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
|
|
2715 NUM_DEVICES_OPENMP. */
|
|
2716 struct gomp_device_descr *devices_s
|
|
2717 = malloc (num_devices * sizeof (struct gomp_device_descr));
|
|
2718 if (!devices_s)
|
|
2719 {
|
|
2720 num_devices = 0;
|
|
2721 free (devices);
|
|
2722 devices = NULL;
|
|
2723 }
|
|
2724 num_devices_openmp = 0;
|
|
2725 for (i = 0; i < num_devices; i++)
|
|
2726 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
2727 devices_s[num_devices_openmp++] = devices[i];
|
|
2728 int num_devices_after_openmp = num_devices_openmp;
|
|
2729 for (i = 0; i < num_devices; i++)
|
|
2730 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
|
|
2731 devices_s[num_devices_after_openmp++] = devices[i];
|
|
2732 free (devices);
|
|
2733 devices = devices_s;
|
|
2734
|
|
2735 for (i = 0; i < num_devices; i++)
|
|
2736 {
|
|
2737 /* The 'devices' array can be moved (by the realloc call) until we have
|
|
2738 found all the plugins, so registering with the OpenACC runtime (which
|
|
2739 takes a copy of the pointer argument) must be delayed until now. */
|
|
2740 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
|
|
2741 goacc_register (&devices[i]);
|
|
2742 }
|
|
2743
|
|
2744 if (atexit (gomp_target_fini) != 0)
|
|
2745 gomp_fatal ("atexit failed");
|
|
2746 }
|
|
2747
|
|
2748 #else /* PLUGIN_SUPPORT */
|
|
2749 /* If dlfcn.h is unavailable we always fallback to host execution.
|
|
2750 GOMP_target* routines are just stubs for this case. */
|
|
2751 static void
|
|
2752 gomp_target_init (void)
|
|
2753 {
|
|
2754 }
|
|
2755 #endif /* PLUGIN_SUPPORT */
|