Mercurial > hg > CbC > CbC_gcc
diff libgomp/target.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libgomp/target.c Fri Oct 27 22:46:09 2017 +0900 @@ -0,0 +1,2594 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains the support of offloading. */ + +#include "config.h" +#include "libgomp.h" +#include "oacc-plugin.h" +#include "oacc-int.h" +#include "gomp-constants.h" +#include <limits.h> +#include <stdbool.h> +#include <stdlib.h> +#ifdef HAVE_INTTYPES_H +# include <inttypes.h> /* For PRIu64. */ +#endif +#include <string.h> +#include <assert.h> +#include <errno.h> + +#ifdef PLUGIN_SUPPORT +#include <dlfcn.h> +#include "plugin-suffix.h" +#endif + +static void gomp_target_init (void); + +/* The whole initialization code for offloading plugins is only run one. */ +static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; + +/* Mutex for offload image registration. */ +static gomp_mutex_t register_lock; + +/* This structure describes an offload image. + It contains type of the target device, pointer to host table descriptor, and + pointer to target data. */ +struct offload_image_descr { + unsigned version; + enum offload_target_type type; + const void *host_table; + const void *target_data; +}; + +/* Array of descriptors of offload images. */ +static struct offload_image_descr *offload_images; + +/* Total number of offload images. */ +static int num_offload_images; + +/* Array of descriptors for all available devices. */ +static struct gomp_device_descr *devices; + +/* Total number of available devices. */ +static int num_devices; + +/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ +static int num_devices_openmp; + +/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ + +static void * +gomp_realloc_unlock (void *old, size_t size) +{ + void *ret = realloc (old, size); + if (ret == NULL) + { + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); + } + return ret; +} + +attribute_hidden void +gomp_init_targets_once (void) +{ + (void) pthread_once (&gomp_is_initialized, gomp_target_init); +} + +attribute_hidden int +gomp_get_num_devices (void) +{ + gomp_init_targets_once (); + return num_devices_openmp; +} + +static struct gomp_device_descr * +resolve_device (int device_id) +{ + if (device_id == GOMP_DEVICE_ICV) + { + struct gomp_task_icv *icv = gomp_icv (false); + device_id = icv->default_device_var; + } + + if (device_id < 0 || device_id >= gomp_get_num_devices ()) + return NULL; + + gomp_mutex_lock (&devices[device_id].lock); + if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) + gomp_init_device (&devices[device_id]); + else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) + { + gomp_mutex_unlock (&devices[device_id].lock); + return NULL; + } + gomp_mutex_unlock (&devices[device_id].lock); + + return &devices[device_id]; +} + + +static inline splay_tree_key +gomp_map_lookup (splay_tree mem_map, splay_tree_key key) +{ + if (key->host_start != key->host_end) + return splay_tree_lookup (mem_map, key); + + key->host_end++; + splay_tree_key n = splay_tree_lookup (mem_map, key); + key->host_end--; + if (n) + return n; + key->host_start--; + n = splay_tree_lookup (mem_map, key); + key->host_start++; + if (n) + return n; + return splay_tree_lookup (mem_map, key); +} + +static inline splay_tree_key +gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) +{ + if (key->host_start != key->host_end) + return splay_tree_lookup (mem_map, key); + + key->host_end++; + splay_tree_key n = splay_tree_lookup (mem_map, key); + key->host_end--; + return n; +} + +static inline void +gomp_device_copy (struct gomp_device_descr *devicep, + bool (*copy_func) (int, void *, const void *, size_t), + const char *dst, void *dstaddr, + const char *src, const void *srcaddr, + size_t size) +{ + if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", + src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); + } +} + +static void +gomp_copy_host2dev (struct gomp_device_descr *devicep, + void *d, const void *h, size_t sz) +{ + gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); +} + +static void +gomp_copy_dev2host (struct gomp_device_descr *devicep, + void *h, const void *d, size_t sz) +{ + gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); +} + +static void +gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) +{ + if (!devicep->free_func (devicep->target_id, devptr)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("error in freeing device memory block at %p", devptr); + } +} + +/* Handle the case where gomp_map_lookup, splay_tree_lookup or + gomp_map_0len_lookup found oldn for newn. + Helper function of gomp_map_vars. */ + +static inline void +gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, + splay_tree_key newn, struct target_var_desc *tgt_var, + unsigned char kind) +{ + tgt_var->key = oldn; + tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); + tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); + tgt_var->offset = newn->host_start - oldn->host_start; + tgt_var->length = newn->host_end - newn->host_start; + + if ((kind & GOMP_MAP_FLAG_FORCE) + || oldn->host_start > newn->host_start + || oldn->host_end < newn->host_end) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Trying to map into device [%p..%p) object when " + "[%p..%p) is already mapped", + (void *) newn->host_start, (void *) newn->host_end, + (void *) oldn->host_start, (void *) oldn->host_end); + } + + if (GOMP_MAP_ALWAYS_TO_P (kind)) + gomp_copy_host2dev (devicep, + (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + + newn->host_start - oldn->host_start), + (void *) newn->host_start, + newn->host_end - newn->host_start); + + if (oldn->refcount != REFCOUNT_INFINITY) + oldn->refcount++; +} + +static int +get_kind (bool short_mapkind, void *kinds, int idx) +{ + return short_mapkind ? ((unsigned short *) kinds)[idx] + : ((unsigned char *) kinds)[idx]; +} + +static void +gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, + uintptr_t target_offset, uintptr_t bias) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + + cur_node.host_start = host_ptr; + if (cur_node.host_start == (uintptr_t) NULL) + { + cur_node.tgt_offset = (uintptr_t) NULL; + /* FIXME: see comment about coalescing host/dev transfers below. */ + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + target_offset), + (void *) &cur_node.tgt_offset, + sizeof (void *)); + return; + } + /* Add bias to the pointer value. */ + cur_node.host_start += bias; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Pointer target of array section wasn't mapped"); + } + cur_node.host_start -= n->host_start; + cur_node.tgt_offset + = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; + /* At this point tgt_offset is target address of the + array section. Now subtract bias to get what we want + to initialize the pointer with. */ + cur_node.tgt_offset -= bias; + /* FIXME: see comment about coalescing host/dev transfers below. */ + gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset), + (void *) &cur_node.tgt_offset, sizeof (void *)); +} + +static void +gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, + size_t first, size_t i, void **hostaddrs, + size_t *sizes, void *kinds) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int kind; + const bool short_mapkind = true; + const int typemask = short_mapkind ? 0xff : 0x7; + + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); + kind = get_kind (short_mapkind, kinds, i); + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, + &tgt->list[i], kind & typemask); + return; + } + if (sizes[i] == 0) + { + if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) + { + cur_node.host_start--; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_start++; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start + == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + cur_node.host_end++; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_end--; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Trying to map into device [%p..%p) structure element when " + "other mapped elements from the same structure weren't mapped " + "together with it", (void *) cur_node.host_start, + (void *) cur_node.host_end); +} + +static inline uintptr_t +gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) +{ + if (tgt->list[i].key != NULL) + return tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset + + tgt->list[i].offset; + if (tgt->list[i].offset == ~(uintptr_t) 0) + return (uintptr_t) hostaddrs[i]; + if (tgt->list[i].offset == ~(uintptr_t) 1) + return 0; + if (tgt->list[i].offset == ~(uintptr_t) 2) + return tgt->list[i + 1].key->tgt->tgt_start + + tgt->list[i + 1].key->tgt_offset + + tgt->list[i + 1].offset + + (uintptr_t) hostaddrs[i] + - (uintptr_t) hostaddrs[i + 1]; + return tgt->tgt_start + tgt->list[i].offset; +} + +attribute_hidden struct target_mem_desc * +gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, + void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, + bool short_mapkind, enum gomp_map_vars_kind pragma_kind) +{ + size_t i, tgt_align, tgt_size, not_found_cnt = 0; + bool has_firstprivate = false; + const int rshift = short_mapkind ? 8 : 3; + const int typemask = short_mapkind ? 0xff : 0x7; + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + struct target_mem_desc *tgt + = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); + tgt->list_count = mapnum; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; + tgt->device_descr = devicep; + + if (mapnum == 0) + { + tgt->tgt_start = 0; + tgt->tgt_end = 0; + return tgt; + } + + tgt_align = sizeof (void *); + tgt_size = 0; + if (pragma_kind == GOMP_MAP_VARS_TARGET) + { + size_t align = 4 * sizeof (void *); + tgt_align = align; + tgt_size = mapnum * sizeof (void *); + } + + gomp_mutex_lock (&devicep->lock); + if (devicep->state == GOMP_DEVICE_FINALIZED) + { + gomp_mutex_unlock (&devicep->lock); + free (tgt); + return NULL; + } + + for (i = 0; i < mapnum; i++) + { + int kind = get_kind (short_mapkind, kinds, i); + if (hostaddrs[i] == NULL + || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) + { + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("use_device_ptr pointer wasn't mapped"); + } + cur_node.host_start -= n->host_start; + hostaddrs[i] + = (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start); + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_STRUCT) + { + size_t first = i + 1; + size_t last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 2; + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i]; + not_found_cnt += last - i; + for (i = first; i <= last; i++) + tgt->list[i].key = NULL; + i--; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; + continue; + } + else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) + { + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 1; + has_firstprivate = true; + continue; + } + cur_node.host_start = (uintptr_t) hostaddrs[i]; + if (!GOMP_MAP_POINTER_P (kind & typemask)) + cur_node.host_end = cur_node.host_start + sizes[i]; + else + cur_node.host_end = cur_node.host_start + sizeof (void *); + if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) + { + tgt->list[i].key = NULL; + + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - cur_node.host_start; + has_firstprivate = true; + continue; + } + splay_tree_key n; + if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) + { + n = gomp_map_0len_lookup (mem_map, &cur_node); + if (!n) + { + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 1; + continue; + } + } + else + n = splay_tree_lookup (mem_map, &cur_node); + if (n && n->refcount != REFCOUNT_LINK) + gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i], + kind & typemask); + else + { + tgt->list[i].key = NULL; + + size_t align = (size_t) 1 << (kind >> rshift); + not_found_cnt++; + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - cur_node.host_start; + if ((kind & typemask) == GOMP_MAP_TO_PSET) + { + size_t j; + for (j = i + 1; j < mapnum; j++) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) + & typemask)) + break; + else if ((uintptr_t) hostaddrs[j] < cur_node.host_start + || ((uintptr_t) hostaddrs[j] + sizeof (void *) + > cur_node.host_end)) + break; + else + { + tgt->list[j].key = NULL; + i++; + } + } + } + } + + if (devaddrs) + { + if (mapnum != 1) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("unexpected aggregation"); + } + tgt->to_free = devaddrs[0]; + tgt->tgt_start = (uintptr_t) tgt->to_free; + tgt->tgt_end = tgt->tgt_start + sizes[0]; + } + else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) + { + /* Allocate tgt_align aligned tgt_size block of memory. */ + /* FIXME: Perhaps change interface to allocate properly aligned + memory. */ + tgt->to_free = devicep->alloc_func (devicep->target_id, + tgt_size + tgt_align - 1); + if (!tgt->to_free) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("device memory allocation fail"); + } + + tgt->tgt_start = (uintptr_t) tgt->to_free; + tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); + tgt->tgt_end = tgt->tgt_start + tgt_size; + } + else + { + tgt->to_free = NULL; + tgt->tgt_start = 0; + tgt->tgt_end = 0; + } + + tgt_size = 0; + if (pragma_kind == GOMP_MAP_VARS_TARGET) + tgt_size = mapnum * sizeof (void *); + + tgt->array = NULL; + if (not_found_cnt || has_firstprivate) + { + if (not_found_cnt) + tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); + splay_tree_node array = tgt->array; + size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; + uintptr_t field_tgt_base = 0; + + for (i = 0; i < mapnum; i++) + if (tgt->list[i].key == NULL) + { + int kind = get_kind (short_mapkind, kinds, i); + if (hostaddrs[i] == NULL) + continue; + switch (kind & typemask) + { + size_t align, len, first, last; + splay_tree_key n; + case GOMP_MAP_FIRSTPRIVATE: + align = (size_t) 1 << (kind >> rshift); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt->list[i].offset = tgt_size; + len = sizes[i]; + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + tgt_size), + (void *) hostaddrs[i], len); + tgt_size += len; + continue; + case GOMP_MAP_FIRSTPRIVATE_INT: + case GOMP_MAP_USE_DEVICE_PTR: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + continue; + case GOMP_MAP_STRUCT: + first = i + 1; + last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + if (tgt->list[first].key != NULL) + continue; + n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + field_tgt_base = (uintptr_t) hostaddrs[first]; + field_tgt_offset = tgt_size; + field_tgt_clear = last; + tgt_size += cur_node.host_end + - (uintptr_t) hostaddrs[first]; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; + continue; + case GOMP_MAP_ALWAYS_POINTER: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizeof (void *); + n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL + || n->host_start > cur_node.host_start + || n->host_end < cur_node.host_end) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("always pointer not mapped"); + } + if ((get_kind (short_mapkind, kinds, i - 1) & typemask) + != GOMP_MAP_ALWAYS_POINTER) + cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); + if (cur_node.tgt_offset) + cur_node.tgt_offset -= sizes[i]; + gomp_copy_host2dev (devicep, + (void *) (n->tgt->tgt_start + + n->tgt_offset + + cur_node.host_start + - n->host_start), + (void *) &cur_node.tgt_offset, + sizeof (void *)); + cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start - n->host_start; + continue; + default: + break; + } + splay_tree_key k = &array->key; + k->host_start = (uintptr_t) hostaddrs[i]; + if (!GOMP_MAP_POINTER_P (kind & typemask)) + k->host_end = k->host_start + sizes[i]; + else + k->host_end = k->host_start + sizeof (void *); + splay_tree_key n = splay_tree_lookup (mem_map, k); + if (n && n->refcount != REFCOUNT_LINK) + gomp_map_vars_existing (devicep, n, k, &tgt->list[i], + kind & typemask); + else + { + k->link_key = NULL; + if (n && n->refcount == REFCOUNT_LINK) + { + /* Replace target address of the pointer with target address + of mapped object in the splay tree. */ + splay_tree_remove (mem_map, n); + k->link_key = n; + } + size_t align = (size_t) 1 << (kind >> rshift); + tgt->list[i].key = k; + k->tgt = tgt; + if (field_tgt_clear != ~(size_t) 0) + { + k->tgt_offset = k->host_start - field_tgt_base + + field_tgt_offset; + if (i == field_tgt_clear) + field_tgt_clear = ~(size_t) 0; + } + else + { + tgt_size = (tgt_size + align - 1) & ~(align - 1); + k->tgt_offset = tgt_size; + tgt_size += k->host_end - k->host_start; + } + tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); + tgt->list[i].always_copy_from + = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); + tgt->list[i].offset = 0; + tgt->list[i].length = k->host_end - k->host_start; + k->refcount = 1; + tgt->refcount++; + array->left = NULL; + array->right = NULL; + splay_tree_insert (mem_map, array); + switch (kind & typemask) + { + case GOMP_MAP_ALLOC: + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALWAYS_FROM: + break; + case GOMP_MAP_TO: + case GOMP_MAP_TOFROM: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_TOFROM: + /* FIXME: Perhaps add some smarts, like if copying + several adjacent fields from host to target, use some + host buffer to avoid sending each var individually. */ + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + k->host_end - k->host_start); + break; + case GOMP_MAP_POINTER: + gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start, + k->tgt_offset, sizes[i]); + break; + case GOMP_MAP_TO_PSET: + /* FIXME: see above FIXME comment. */ + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + k->host_end - k->host_start); + + for (j = i + 1; j < mapnum; j++) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, + j) + & typemask)) + break; + else if ((uintptr_t) hostaddrs[j] < k->host_start + || ((uintptr_t) hostaddrs[j] + sizeof (void *) + > k->host_end)) + break; + else + { + tgt->list[j].key = k; + tgt->list[j].copy_from = false; + tgt->list[j].always_copy_from = false; + if (k->refcount != REFCOUNT_INFINITY) + k->refcount++; + gomp_map_pointer (tgt, + (uintptr_t) *(void **) hostaddrs[j], + k->tgt_offset + + ((uintptr_t) hostaddrs[j] + - k->host_start), + sizes[j]); + i++; + } + break; + case GOMP_MAP_FORCE_PRESENT: + { + /* We already looked up the memory region above and it + was missing. */ + size_t size = k->host_end - k->host_start; + gomp_mutex_unlock (&devicep->lock); +#ifdef HAVE_INTTYPES_H + gomp_fatal ("present clause: !acc_is_present (%p, " + "%"PRIu64" (0x%"PRIx64"))", + (void *) k->host_start, + (uint64_t) size, (uint64_t) size); +#else + gomp_fatal ("present clause: !acc_is_present (%p, " + "%lu (0x%lx))", (void *) k->host_start, + (unsigned long) size, (unsigned long) size); +#endif + } + break; + case GOMP_MAP_FORCE_DEVICEPTR: + assert (k->host_end - k->host_start == sizeof (void *)); + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + sizeof (void *)); + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, + kind); + } + + if (k->link_key) + { + /* Set link pointer on target to the device address of the + mapped object. */ + void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); + devicep->host2dev_func (devicep->target_id, + (void *) n->tgt_offset, + &tgt_addr, sizeof (void *)); + } + array++; + } + } + } + + if (pragma_kind == GOMP_MAP_VARS_TARGET) + { + for (i = 0; i < mapnum; i++) + { + cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); + /* FIXME: see above FIXME comment. */ + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + i * sizeof (void *)), + (void *) &cur_node.tgt_offset, sizeof (void *)); + } + } + + /* If the variable from "omp target enter data" map-list was already mapped, + tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or + gomp_exit_data. */ + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { + free (tgt); + tgt = NULL; + } + + gomp_mutex_unlock (&devicep->lock); + return tgt; +} + +static void +gomp_unmap_tgt (struct target_mem_desc *tgt) +{ + /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ + if (tgt->tgt_end) + gomp_free_device_memory (tgt->device_descr, tgt->to_free); + + free (tgt->array); + free (tgt); +} + +/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant + variables back from device to host: if it is false, it is assumed that this + has been done already. */ + +attribute_hidden void +gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + + if (tgt->list_count == 0) + { + free (tgt); + return; + } + + gomp_mutex_lock (&devicep->lock); + if (devicep->state == GOMP_DEVICE_FINALIZED) + { + gomp_mutex_unlock (&devicep->lock); + free (tgt->array); + free (tgt); + return; + } + + size_t i; + for (i = 0; i < tgt->list_count; i++) + { + splay_tree_key k = tgt->list[i].key; + if (k == NULL) + continue; + + bool do_unmap = false; + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + else if (k->refcount == 1) + { + k->refcount--; + do_unmap = true; + } + + if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) + || tgt->list[i].always_copy_from) + gomp_copy_dev2host (devicep, + (void *) (k->host_start + tgt->list[i].offset), + (void *) (k->tgt->tgt_start + k->tgt_offset + + tgt->list[i].offset), + tgt->list[i].length); + if (do_unmap) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->link_key) + splay_tree_insert (&devicep->mem_map, + (splay_tree_node) k->link_key); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + } + + if (tgt->refcount > 1) + tgt->refcount--; + else + gomp_unmap_tgt (tgt); + + gomp_mutex_unlock (&devicep->lock); +} + +static void +gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, + size_t *sizes, void *kinds, bool short_mapkind) +{ + size_t i; + struct splay_tree_key_s cur_node; + const int typemask = short_mapkind ? 0xff : 0x7; + + if (!devicep) + return; + + if (mapnum == 0) + return; + + gomp_mutex_lock (&devicep->lock); + if (devicep->state == GOMP_DEVICE_FINALIZED) + { + gomp_mutex_unlock (&devicep->lock); + return; + } + + for (i = 0; i < mapnum; i++) + if (sizes[i]) + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); + if (n) + { + int kind = get_kind (short_mapkind, kinds, i); + if (n->host_start > cur_node.host_start + || n->host_end < cur_node.host_end) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Trying to update [%p..%p) object when " + "only [%p..%p) is mapped", + (void *) cur_node.host_start, + (void *) cur_node.host_end, + (void *) n->host_start, + (void *) n->host_end); + } + + + void *hostaddr = (void *) cur_node.host_start; + void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start - n->host_start); + size_t size = cur_node.host_end - cur_node.host_start; + + if (GOMP_MAP_COPY_TO_P (kind & typemask)) + gomp_copy_host2dev (devicep, devaddr, hostaddr, size); + if (GOMP_MAP_COPY_FROM_P (kind & typemask)) + gomp_copy_dev2host (devicep, hostaddr, devaddr, size); + } + } + gomp_mutex_unlock (&devicep->lock); +} + +/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. + And insert to splay tree the mapping between addresses from HOST_TABLE and + from loaded target image. We rely in the host and device compiler + emitting variable and functions in the same order. */ + +static void +gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, + const void *host_table, const void *target_data, + bool is_register_lock) +{ + void **host_func_table = ((void ***) host_table)[0]; + void **host_funcs_end = ((void ***) host_table)[1]; + void **host_var_table = ((void ***) host_table)[2]; + void **host_vars_end = ((void ***) host_table)[3]; + + /* The func table contains only addresses, the var table contains addresses + and corresponding sizes. */ + int num_funcs = host_funcs_end - host_func_table; + int num_vars = (host_vars_end - host_var_table) / 2; + + /* Load image to device and get target addresses for the image. */ + struct addr_pair *target_table = NULL; + int i, num_target_entries; + + num_target_entries + = devicep->load_image_func (devicep->target_id, version, + target_data, &target_table); + + if (num_target_entries != num_funcs + num_vars) + { + gomp_mutex_unlock (&devicep->lock); + if (is_register_lock) + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("Cannot map target functions or variables" + " (expected %u, have %u)", num_funcs + num_vars, + num_target_entries); + } + + /* Insert host-target address mapping into splay tree. */ + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); + tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); + tgt->refcount = REFCOUNT_INFINITY; + tgt->tgt_start = 0; + tgt->tgt_end = 0; + tgt->to_free = NULL; + tgt->prev = NULL; + tgt->list_count = 0; + tgt->device_descr = devicep; + splay_tree_node array = tgt->array; + + for (i = 0; i < num_funcs; i++) + { + splay_tree_key k = &array->key; + k->host_start = (uintptr_t) host_func_table[i]; + k->host_end = k->host_start + 1; + k->tgt = tgt; + k->tgt_offset = target_table[i].start; + k->refcount = REFCOUNT_INFINITY; + k->link_key = NULL; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&devicep->mem_map, array); + array++; + } + + /* Most significant bit of the size in host and target tables marks + "omp declare target link" variables. */ + const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); + const uintptr_t size_mask = ~link_bit; + + for (i = 0; i < num_vars; i++) + { + struct addr_pair *target_var = &target_table[num_funcs + i]; + uintptr_t target_size = target_var->end - target_var->start; + + if ((uintptr_t) host_var_table[i * 2 + 1] != target_size) + { + gomp_mutex_unlock (&devicep->lock); + if (is_register_lock) + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("Cannot map target variables (size mismatch)"); + } + + splay_tree_key k = &array->key; + k->host_start = (uintptr_t) host_var_table[i * 2]; + k->host_end + = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); + k->tgt = tgt; + k->tgt_offset = target_var->start; + k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; + k->link_key = NULL; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&devicep->mem_map, array); + array++; + } + + free (target_table); +} + +/* Unload the mappings described by target_data from device DEVICE_P. + The device must be locked. */ + +static void +gomp_unload_image_from_device (struct gomp_device_descr *devicep, + unsigned version, + const void *host_table, const void *target_data) +{ + void **host_func_table = ((void ***) host_table)[0]; + void **host_funcs_end = ((void ***) host_table)[1]; + void **host_var_table = ((void ***) host_table)[2]; + void **host_vars_end = ((void ***) host_table)[3]; + + /* The func table contains only addresses, the var table contains addresses + and corresponding sizes. */ + int num_funcs = host_funcs_end - host_func_table; + int num_vars = (host_vars_end - host_var_table) / 2; + + struct splay_tree_key_s k; + splay_tree_key node = NULL; + + /* Find mapping at start of node array */ + if (num_funcs || num_vars) + { + k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] + : (uintptr_t) host_var_table[0]); + k.host_end = k.host_start + 1; + node = splay_tree_lookup (&devicep->mem_map, &k); + } + + if (!devicep->unload_image_func (devicep->target_id, version, target_data)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("image unload fail"); + } + + /* Remove mappings from splay tree. */ + int i; + for (i = 0; i < num_funcs; i++) + { + k.host_start = (uintptr_t) host_func_table[i]; + k.host_end = k.host_start + 1; + splay_tree_remove (&devicep->mem_map, &k); + } + + /* Most significant bit of the size in host and target tables marks + "omp declare target link" variables. */ + const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); + const uintptr_t size_mask = ~link_bit; + bool is_tgt_unmapped = false; + + for (i = 0; i < num_vars; i++) + { + k.host_start = (uintptr_t) host_var_table[i * 2]; + k.host_end + = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); + + if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) + splay_tree_remove (&devicep->mem_map, &k); + else + { + splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); + splay_tree_remove (&devicep->mem_map, n); + if (n->link_key) + { + if (n->tgt->refcount > 1) + n->tgt->refcount--; + else + { + is_tgt_unmapped = true; + gomp_unmap_tgt (n->tgt); + } + } + } + } + + if (node && !is_tgt_unmapped) + { + free (node->tgt); + free (node); + } +} + +/* This function should be called from every offload image while loading. + It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of + the target, and TARGET_DATA needed by target plugin. */ + +void +GOMP_offload_register_ver (unsigned version, const void *host_table, + int target_type, const void *target_data) +{ + int i; + + if (GOMP_VERSION_LIB (version) > GOMP_VERSION) + gomp_fatal ("Library too old for offload (version %u < %u)", + GOMP_VERSION, GOMP_VERSION_LIB (version)); + + gomp_mutex_lock (®ister_lock); + + /* Load image to all initialized devices. */ + for (i = 0; i < num_devices; i++) + { + struct gomp_device_descr *devicep = &devices[i]; + gomp_mutex_lock (&devicep->lock); + if (devicep->type == target_type + && devicep->state == GOMP_DEVICE_INITIALIZED) + gomp_load_image_to_device (devicep, version, + host_table, target_data, true); + gomp_mutex_unlock (&devicep->lock); + } + + /* Insert image to array of pending images. */ + offload_images + = gomp_realloc_unlock (offload_images, + (num_offload_images + 1) + * sizeof (struct offload_image_descr)); + offload_images[num_offload_images].version = version; + offload_images[num_offload_images].type = target_type; + offload_images[num_offload_images].host_table = host_table; + offload_images[num_offload_images].target_data = target_data; + + num_offload_images++; + gomp_mutex_unlock (®ister_lock); +} + +void +GOMP_offload_register (const void *host_table, int target_type, + const void *target_data) +{ + GOMP_offload_register_ver (0, host_table, target_type, target_data); +} + +/* This function should be called from every offload image while unloading. + It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of + the target, and TARGET_DATA needed by target plugin. */ + +void +GOMP_offload_unregister_ver (unsigned version, const void *host_table, + int target_type, const void *target_data) +{ + int i; + + gomp_mutex_lock (®ister_lock); + + /* Unload image from all initialized devices. */ + for (i = 0; i < num_devices; i++) + { + struct gomp_device_descr *devicep = &devices[i]; + gomp_mutex_lock (&devicep->lock); + if (devicep->type == target_type + && devicep->state == GOMP_DEVICE_INITIALIZED) + gomp_unload_image_from_device (devicep, version, + host_table, target_data); + gomp_mutex_unlock (&devicep->lock); + } + + /* Remove image from array of pending images. */ + for (i = 0; i < num_offload_images; i++) + if (offload_images[i].target_data == target_data) + { + offload_images[i] = offload_images[--num_offload_images]; + break; + } + + gomp_mutex_unlock (®ister_lock); +} + +void +GOMP_offload_unregister (const void *host_table, int target_type, + const void *target_data) +{ + GOMP_offload_unregister_ver (0, host_table, target_type, target_data); +} + +/* This function initializes the target device, specified by DEVICEP. DEVICEP + must be locked on entry, and remains locked on return. */ + +attribute_hidden void +gomp_init_device (struct gomp_device_descr *devicep) +{ + int i; + if (!devicep->init_device_func (devicep->target_id)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("device initialization failed"); + } + + /* Load to device all images registered by the moment. */ + for (i = 0; i < num_offload_images; i++) + { + struct offload_image_descr *image = &offload_images[i]; + if (image->type == devicep->type) + gomp_load_image_to_device (devicep, image->version, + image->host_table, image->target_data, + false); + } + + devicep->state = GOMP_DEVICE_INITIALIZED; +} + +attribute_hidden void +gomp_unload_device (struct gomp_device_descr *devicep) +{ + if (devicep->state == GOMP_DEVICE_INITIALIZED) + { + unsigned i; + + /* Unload from device all images registered at the moment. */ + for (i = 0; i < num_offload_images; i++) + { + struct offload_image_descr *image = &offload_images[i]; + if (image->type == devicep->type) + gomp_unload_image_from_device (devicep, image->version, + image->host_table, + image->target_data); + } + } +} + +/* Free address mapping tables. MM must be locked on entry, and remains locked + on return. */ + +attribute_hidden void +gomp_free_memmap (struct splay_tree_s *mem_map) +{ + while (mem_map->root) + { + struct target_mem_desc *tgt = mem_map->root->key.tgt; + + splay_tree_remove (mem_map, &mem_map->root->key); + free (tgt->array); + free (tgt); + } +} + +/* Host fallback for GOMP_target{,_ext} routines. */ + +static void +gomp_target_fallback (void (*fn) (void *), void **hostaddrs) +{ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) + { + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; + } + fn (hostaddrs); + gomp_free_thread (thr); + *thr = old_thr; +} + +/* Calculate alignment and size requirements of a private copy of data shared + as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ + +static inline void +calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, + unsigned short *kinds, size_t *tgt_align, + size_t *tgt_size) +{ + size_t i; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + if (*tgt_align < align) + *tgt_align = align; + *tgt_size = (*tgt_size + align - 1) & ~(align - 1); + *tgt_size += sizes[i]; + } +} + +/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ + +static inline void +copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, size_t tgt_align, + size_t tgt_size) +{ + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + size_t i; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); + hostaddrs[i] = tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; + } +} + +/* Helper function of GOMP_target{,_ext} routines. */ + +static void * +gomp_get_target_fn_addr (struct gomp_device_descr *devicep, + void (*host_fn) (void *)) +{ + if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) + return (void *) host_fn; + else + { + gomp_mutex_lock (&devicep->lock); + if (devicep->state == GOMP_DEVICE_FINALIZED) + { + gomp_mutex_unlock (&devicep->lock); + return NULL; + } + + struct splay_tree_key_s k; + k.host_start = (uintptr_t) host_fn; + k.host_end = k.host_start + 1; + splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); + gomp_mutex_unlock (&devicep->lock); + if (tgt_fn == NULL) + return NULL; + + return (void *) tgt_fn->tgt_offset; + } +} + +/* Called when encountering a target directive. If DEVICE + is GOMP_DEVICE_ICV, it means use device-var ICV. If it is + GOMP_DEVICE_HOST_FALLBACK (or any value + larger than last available hw device), use host fallback. + FN is address of host code, UNUSED is part of the current ABI, but + we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays + with MAPNUM entries, with addresses of the host objects, + sizes of the host objects (resp. for pointer kind pointer bias + and assumed sizeof (void *) size) and kinds. */ + +void +GOMP_target (int device, void (*fn) (void *), const void *unused, + size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned char *kinds) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + void *fn_addr; + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + /* All shared memory devices should use the GOMP_target_ext function. */ + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM + || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) + return gomp_target_fallback (fn, hostaddrs); + + struct target_mem_desc *tgt_vars + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + GOMP_MAP_VARS_TARGET); + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, + NULL); + gomp_unmap_vars (tgt_vars, true); +} + +/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, + and several arguments have been added: + FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. + DEPEND is array of dependencies, see GOMP_task for details. + + ARGS is a pointer to an array consisting of a variable number of both + device-independent and device-specific arguments, which can take one two + elements where the first specifies for which device it is intended, the type + and optionally also the value. If the value is not present in the first + one, the whole second element the actual value. The last element of the + array is a single NULL. Among the device independent can be for example + NUM_TEAMS and THREAD_LIMIT. + + NUM_TEAMS is positive if GOMP_teams will be called in the body with + that value, or 1 if teams construct is not present, or 0, if + teams construct does not have num_teams clause and so the choice is + implementation defined, and -1 if it can't be determined on the host + what value will GOMP_teams have on the device. + THREAD_LIMIT similarly is positive if GOMP_teams will be called in the + body with that value, or 0, if teams construct does not have thread_limit + clause or the teams construct is not present, or -1 if it can't be + determined on the host what value will GOMP_teams have on the device. */ + +void +GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend, void **args) +{ + struct gomp_device_descr *devicep = resolve_device (device); + size_t tgt_align = 0, tgt_size = 0; + bool fpc_done = false; + + if (flags & GOMP_TARGET_FLAG_NOWAIT) + { + struct gomp_thread *thr = gomp_thread (); + /* Create a team if we don't have any around, as nowait + target tasks make sense to run asynchronously even when + outside of any parallel. */ + if (__builtin_expect (thr->ts.team == NULL, 0)) + { + struct gomp_team *team = gomp_new_team (1); + struct gomp_task *task = thr->task; + struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; + team->prev_ts = thr->ts; + thr->ts.team = team; + thr->ts.team_id = 0; + thr->ts.work_share = &team->work_shares[0]; + thr->ts.last_work_share = NULL; +#ifdef HAVE_SYNC_BUILTINS + thr->ts.single_count = 0; +#endif + thr->ts.static_trip = 0; + thr->task = &team->implicit_task[0]; + gomp_init_task (thr->task, NULL, icv); + if (task) + { + thr->task = task; + gomp_end_task (); + free (task); + thr->task = &team->implicit_task[0]; + } + else + pthread_setspecific (gomp_thread_destructor, thr); + } + if (thr->ts.team + && !thr->task->final_task) + { + gomp_create_target_task (devicep, fn, mapnum, hostaddrs, + sizes, kinds, flags, depend, args, + GOMP_TARGET_TASK_BEFORE_MAP); + return; + } + } + + /* If there are depend clauses, but nowait is not present + (or we are in a final task), block the parent task until the + dependencies are resolved and then just continue with the rest + of the function as if it is a merged task. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + /* If we might need to wait, copy firstprivate now. */ + calculate_firstprivate_requirements (mapnum, sizes, kinds, + &tgt_align, &tgt_size); + if (tgt_align) + { + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, + tgt_align, tgt_size); + } + fpc_done = true; + gomp_task_maybe_wait_for_dependencies (depend); + } + } + + void *fn_addr; + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) + { + if (!fpc_done) + { + calculate_firstprivate_requirements (mapnum, sizes, kinds, + &tgt_align, &tgt_size); + if (tgt_align) + { + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, + tgt_align, tgt_size); + } + } + gomp_target_fallback (fn, hostaddrs); + return; + } + + struct target_mem_desc *tgt_vars; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + if (!fpc_done) + { + calculate_firstprivate_requirements (mapnum, sizes, kinds, + &tgt_align, &tgt_size); + if (tgt_align) + { + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, + tgt_align, tgt_size); + } + } + tgt_vars = NULL; + } + else + tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, + true, GOMP_MAP_VARS_TARGET); + devicep->run_func (devicep->target_id, fn_addr, + tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, + args); + if (tgt_vars) + gomp_unmap_vars (tgt_vars, true); +} + +/* Host fallback for GOMP_target_data{,_ext} routines. */ + +static void +gomp_target_data_fallback (void) +{ + struct gomp_task_icv *icv = gomp_icv (false); + if (icv->target_data) + { + /* Even when doing a host fallback, if there are any active + #pragma omp target data constructs, need to remember the + new #pragma omp target data, otherwise GOMP_target_end_data + would get out of sync. */ + struct target_mem_desc *tgt + = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, + GOMP_MAP_VARS_DATA); + tgt->prev = icv->target_data; + icv->target_data = tgt; + } +} + +void +GOMP_target_data (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) + return gomp_target_data_fallback (); + + struct target_mem_desc *tgt + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + GOMP_MAP_VARS_DATA); + struct gomp_task_icv *icv = gomp_icv (true); + tgt->prev = icv->target_data; + icv->target_data = tgt; +} + +void +GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return gomp_target_data_fallback (); + + struct target_mem_desc *tgt + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_DATA); + struct gomp_task_icv *icv = gomp_icv (true); + tgt->prev = icv->target_data; + icv->target_data = tgt; +} + +void +GOMP_target_end_data (void) +{ + struct gomp_task_icv *icv = gomp_icv (false); + if (icv->target_data) + { + struct target_mem_desc *tgt = icv->target_data; + icv->target_data = tgt->prev; + gomp_unmap_vars (tgt, true); + } +} + +void +GOMP_target_update (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return; + + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); +} + +void +GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags | GOMP_TARGET_FLAG_UPDATE, + depend, NULL, GOMP_TARGET_TASK_DATA)) + return; + } + else + { + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); +} + +static void +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + const int typemask = 0xff; + size_t i; + gomp_mutex_lock (&devicep->lock); + if (devicep->state == GOMP_DEVICE_FINALIZED) + { + gomp_mutex_unlock (&devicep->lock); + return; + } + + for (i = 0; i < mapnum; i++) + { + struct splay_tree_key_s cur_node; + unsigned char kind = kinds[i] & typemask; + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION + || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) + ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) + : splay_tree_lookup (&devicep->mem_map, &cur_node); + if (!k) + continue; + + if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + if ((kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) + && k->refcount != REFCOUNT_INFINITY) + k->refcount = 0; + + if ((kind == GOMP_MAP_FROM && k->refcount == 0) + || kind == GOMP_MAP_ALWAYS_FROM) + gomp_copy_dev2host (devicep, (void *) cur_node.host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset + + cur_node.host_start + - k->host_start), + cur_node.host_end - cur_node.host_start); + if (k->refcount == 0) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->link_key) + splay_tree_insert (&devicep->mem_map, + (splay_tree_node) k->link_key); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", + kind); + } + } + + gomp_mutex_unlock (&devicep->lock); +} + +void +GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags, depend, NULL, + GOMP_TARGET_TASK_DATA)) + return; + } + else + { + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + size_t i; + if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], + &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += sizes[i]; + } + else + gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); +} + +bool +gomp_target_task_fn (void *data) +{ + struct gomp_target_task *ttask = (struct gomp_target_task *) data; + struct gomp_device_descr *devicep = ttask->devicep; + + if (ttask->fn != NULL) + { + void *fn_addr; + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) + { + ttask->state = GOMP_TARGET_TASK_FALLBACK; + gomp_target_fallback (ttask->fn, ttask->hostaddrs); + return false; + } + + if (ttask->state == GOMP_TARGET_TASK_FINISHED) + { + if (ttask->tgt) + gomp_unmap_vars (ttask->tgt, true); + return false; + } + + void *actual_arguments; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + ttask->tgt = NULL; + actual_arguments = ttask->hostaddrs; + } + else + { + ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, + NULL, ttask->sizes, ttask->kinds, true, + GOMP_MAP_VARS_TARGET); + actual_arguments = (void *) ttask->tgt->tgt_start; + } + ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; + + devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, + ttask->args, (void *) ttask); + return true; + } + else if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return false; + + size_t i; + if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) + gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, + ttask->kinds, true); + else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < ttask->mapnum; i++) + if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], + NULL, &ttask->sizes[i], &ttask->kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); + i += ttask->sizes[i]; + } + else + gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], + &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, + ttask->kinds); + return false; +} + +void +GOMP_teams (unsigned int num_teams, unsigned int thread_limit) +{ + if (thread_limit) + { + struct gomp_task_icv *icv = gomp_icv (true); + icv->thread_limit_var + = thread_limit > INT_MAX ? UINT_MAX : thread_limit; + } + (void) num_teams; +} + +void * +omp_target_alloc (size_t size, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return malloc (size); + + if (device_num < 0) + return NULL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return NULL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return malloc (size); + + gomp_mutex_lock (&devicep->lock); + void *ret = devicep->alloc_func (devicep->target_id, size); + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +void +omp_target_free (void *device_ptr, int device_num) +{ + if (device_ptr == NULL) + return; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + { + free (device_ptr); + return; + } + + if (device_num < 0) + return; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + free (device_ptr); + return; + } + + gomp_mutex_lock (&devicep->lock); + gomp_free_device_memory (devicep, device_ptr); + gomp_mutex_unlock (&devicep->lock); +} + +int +omp_target_is_present (void *ptr, int device_num) +{ + if (ptr == NULL) + return 1; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return 1; + + if (device_num < 0) + return 0; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return 0; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return 1; + + gomp_mutex_lock (&devicep->lock); + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); + int ret = n != NULL; + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, + size_t src_offset, int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + bool ret; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + src_devicep = NULL; + } + if (src_devicep == NULL && dst_devicep == NULL) + { + memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); + return 0; + } + if (src_devicep == NULL) + { + gomp_mutex_lock (&dst_devicep->lock); + ret = dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&dst_devicep->lock); + return (ret ? 0 : EINVAL); + } + if (dst_devicep == NULL) + { + gomp_mutex_lock (&src_devicep->lock); + ret = src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return (ret ? 0 : EINVAL); + } + if (src_devicep == dst_devicep) + { + gomp_mutex_lock (&src_devicep->lock); + ret = src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return (ret ? 0 : EINVAL); + } + return EINVAL; +} + +static int +omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + struct gomp_device_descr *dst_devicep, + struct gomp_device_descr *src_devicep) +{ + size_t dst_slice = element_size; + size_t src_slice = element_size; + size_t j, dst_off, src_off, length; + int i, ret; + + if (num_dims == 1) + { + if (__builtin_mul_overflow (element_size, volume[0], &length) + || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) + return EINVAL; + if (dst_devicep == NULL && src_devicep == NULL) + { + memcpy ((char *) dst + dst_off, (char *) src + src_off, length); + ret = 1; + } + else if (src_devicep == NULL) + ret = dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (dst_devicep == NULL) + ret = src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (src_devicep == dst_devicep) + ret = src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else + ret = 0; + return ret ? 0 : EINVAL; + } + + /* FIXME: it would be nice to have some plugin function to handle + num_dims == 2 and num_dims == 3 more efficiently. Larger ones can + be handled in the generic recursion below, and for host-host it + should be used even for any num_dims >= 2. */ + + for (i = 1; i < num_dims; i++) + if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) + || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) + return EINVAL; + if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) + return EINVAL; + for (j = 0; j < volume[0]; j++) + { + ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, + (char *) src + src_off, + element_size, num_dims - 1, + volume + 1, dst_offsets + 1, + src_offsets + 1, dst_dimensions + 1, + src_dimensions + 1, dst_devicep, + src_devicep); + if (ret) + return ret; + dst_off += dst_slice; + src_off += src_slice; + } + return 0; +} + +int +omp_target_memcpy_rect (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (!dst && !src) + return INT_MAX; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + src_devicep = NULL; + } + + if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) + return EINVAL; + + if (src_devicep) + gomp_mutex_lock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_lock (&dst_devicep->lock); + int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, + volume, dst_offsets, src_offsets, + dst_dimensions, src_dimensions, + dst_devicep, src_devicep); + if (src_devicep) + gomp_mutex_unlock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_unlock (&dst_devicep->lock); + return ret; +} + +int +omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, + size_t device_offset, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) host_ptr; + cur_node.host_end = cur_node.host_start + size; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n) + { + if (n->tgt->tgt_start + n->tgt_offset + == (uintptr_t) device_ptr + device_offset + && n->host_start <= cur_node.host_start + && n->host_end >= cur_node.host_end) + ret = 0; + } + else + { + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); + tgt->array = gomp_malloc (sizeof (*tgt->array)); + tgt->refcount = 1; + tgt->tgt_start = 0; + tgt->tgt_end = 0; + tgt->to_free = NULL; + tgt->prev = NULL; + tgt->list_count = 0; + tgt->device_descr = devicep; + splay_tree_node array = tgt->array; + splay_tree_key k = &array->key; + k->host_start = cur_node.host_start; + k->host_end = cur_node.host_end; + k->tgt = tgt; + k->tgt_offset = (uintptr_t) device_ptr + device_offset; + k->refcount = REFCOUNT_INFINITY; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&devicep->mem_map, array); + ret = 0; + } + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_disassociate_ptr (void *ptr, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n + && n->host_start == cur_node.host_start + && n->refcount == REFCOUNT_INFINITY + && n->tgt->tgt_start == 0 + && n->tgt->to_free == NULL + && n->tgt->refcount == 1 + && n->tgt->list_count == 0) + { + splay_tree_remove (&devicep->mem_map, n); + gomp_unmap_tgt (n->tgt); + ret = 0; + } + + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +#ifdef PLUGIN_SUPPORT + +/* This function tries to load a plugin for DEVICE. Name of plugin is passed + in PLUGIN_NAME. + The handles of the found functions are stored in the corresponding fields + of DEVICE. The function returns TRUE on success and FALSE otherwise. */ + +static bool +gomp_load_plugin_for_device (struct gomp_device_descr *device, + const char *plugin_name) +{ + const char *err = NULL, *last_missing = NULL; + + void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); + if (!plugin_handle) + goto dl_fail; + + /* Check if all required functions are available in the plugin and store + their handlers. None of the symbols can legitimately be NULL, + so we don't need to check dlerror all the time. */ +#define DLSYM(f) \ + if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ + goto dl_fail + /* Similar, but missing functions are not an error. Return false if + failed, true otherwise. */ +#define DLSYM_OPT(f, n) \ + ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ + || (last_missing = #n, 0)) + + DLSYM (version); + if (device->version_func () != GOMP_VERSION) + { + err = "plugin version mismatch"; + goto fail; + } + + DLSYM (get_name); + DLSYM (get_caps); + DLSYM (get_type); + DLSYM (get_num_devices); + DLSYM (init_device); + DLSYM (fini_device); + DLSYM (load_image); + DLSYM (unload_image); + DLSYM (alloc); + DLSYM (free); + DLSYM (dev2host); + DLSYM (host2dev); + device->capabilities = device->get_caps_func (); + if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + { + DLSYM (run); + DLSYM (async_run); + DLSYM_OPT (can_run, can_run); + DLSYM (dev2dev); + } + if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) + { + if (!DLSYM_OPT (openacc.exec, openacc_exec) + || !DLSYM_OPT (openacc.register_async_cleanup, + openacc_register_async_cleanup) + || !DLSYM_OPT (openacc.async_test, openacc_async_test) + || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all) + || !DLSYM_OPT (openacc.async_wait, openacc_async_wait) + || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async) + || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all) + || !DLSYM_OPT (openacc.async_wait_all_async, + openacc_async_wait_all_async) + || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async) + || !DLSYM_OPT (openacc.create_thread_data, + openacc_create_thread_data) + || !DLSYM_OPT (openacc.destroy_thread_data, + openacc_destroy_thread_data)) + { + /* Require all the OpenACC handlers if we have + GOMP_OFFLOAD_CAP_OPENACC_200. */ + err = "plugin missing OpenACC handler function"; + goto fail; + } + + unsigned cuda = 0; + cuda += DLSYM_OPT (openacc.cuda.get_current_device, + openacc_cuda_get_current_device); + cuda += DLSYM_OPT (openacc.cuda.get_current_context, + openacc_cuda_get_current_context); + cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); + cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); + if (cuda && cuda != 4) + { + /* Make sure all the CUDA functions are there if any of them are. */ + err = "plugin missing OpenACC CUDA handler function"; + goto fail; + } + } +#undef DLSYM +#undef DLSYM_OPT + + return 1; + + dl_fail: + err = dlerror (); + fail: + gomp_error ("while loading %s: %s", plugin_name, err); + if (last_missing) + gomp_error ("missing function was %s", last_missing); + if (plugin_handle) + dlclose (plugin_handle); + + return 0; +} + +/* This function finalizes all initialized devices. */ + +static void +gomp_target_fini (void) +{ + int i; + for (i = 0; i < num_devices; i++) + { + bool ret = true; + struct gomp_device_descr *devicep = &devices[i]; + gomp_mutex_lock (&devicep->lock); + if (devicep->state == GOMP_DEVICE_INITIALIZED) + { + ret = devicep->fini_device_func (devicep->target_id); + devicep->state = GOMP_DEVICE_FINALIZED; + } + gomp_mutex_unlock (&devicep->lock); + if (!ret) + gomp_fatal ("device finalization failed"); + } +} + +/* This function initializes the runtime needed for offloading. + It parses the list of offload targets and tries to load the plugins for + these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP + will be set, and the array DEVICES initialized, containing descriptors for + corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows + by the others. */ + +static void +gomp_target_init (void) +{ + const char *prefix ="libgomp-plugin-"; + const char *suffix = SONAME_SUFFIX (1); + const char *cur, *next; + char *plugin_name; + int i, new_num_devices; + + num_devices = 0; + devices = NULL; + + cur = OFFLOAD_TARGETS; + if (*cur) + do + { + struct gomp_device_descr current_device; + + next = strchr (cur, ','); + + plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur)) + + strlen (prefix) + strlen (suffix)); + if (!plugin_name) + { + num_devices = 0; + break; + } + + strcpy (plugin_name, prefix); + strncat (plugin_name, cur, next ? next - cur : strlen (cur)); + strcat (plugin_name, suffix); + + if (gomp_load_plugin_for_device (¤t_device, plugin_name)) + { + new_num_devices = current_device.get_num_devices_func (); + if (new_num_devices >= 1) + { + /* Augment DEVICES and NUM_DEVICES. */ + + devices = realloc (devices, (num_devices + new_num_devices) + * sizeof (struct gomp_device_descr)); + if (!devices) + { + num_devices = 0; + free (plugin_name); + break; + } + + current_device.name = current_device.get_name_func (); + /* current_device.capabilities has already been set. */ + current_device.type = current_device.get_type_func (); + current_device.mem_map.root = NULL; + current_device.state = GOMP_DEVICE_UNINITIALIZED; + current_device.openacc.data_environ = NULL; + for (i = 0; i < new_num_devices; i++) + { + current_device.target_id = i; + devices[num_devices] = current_device; + gomp_mutex_init (&devices[num_devices].lock); + num_devices++; + } + } + } + + free (plugin_name); + cur = next + 1; + } + while (next); + + /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set + NUM_DEVICES_OPENMP. */ + struct gomp_device_descr *devices_s + = malloc (num_devices * sizeof (struct gomp_device_descr)); + if (!devices_s) + { + num_devices = 0; + free (devices); + devices = NULL; + } + num_devices_openmp = 0; + for (i = 0; i < num_devices; i++) + if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + devices_s[num_devices_openmp++] = devices[i]; + int num_devices_after_openmp = num_devices_openmp; + for (i = 0; i < num_devices; i++) + if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + devices_s[num_devices_after_openmp++] = devices[i]; + free (devices); + devices = devices_s; + + for (i = 0; i < num_devices; i++) + { + /* The 'devices' array can be moved (by the realloc call) until we have + found all the plugins, so registering with the OpenACC runtime (which + takes a copy of the pointer argument) must be delayed until now. */ + if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) + goacc_register (&devices[i]); + } + + if (atexit (gomp_target_fini) != 0) + gomp_fatal ("atexit failed"); +} + +#else /* PLUGIN_SUPPORT */ +/* If dlfcn.h is unavailable we always fallback to host execution. + GOMP_target* routines are just stubs for this case. */ +static void +gomp_target_init (void) +{ +} +#endif /* PLUGIN_SUPPORT */