Mercurial > hg > CbC > CbC_gcc
diff libgomp/oacc-profiling.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libgomp/oacc-profiling.c Thu Feb 13 11:34:05 2020 +0900 @@ -0,0 +1,662 @@ +/* OpenACC Profiling Interface + + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + Contributed by Mentor, a Siemens Business. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#define _GNU_SOURCE +#include "libgomp.h" +#include "oacc-int.h" +#include "secure_getenv.h" +#include "acc_prof.h" +#include <assert.h> +#ifdef HAVE_STRING_H +# include <string.h> +#endif +#ifdef PLUGIN_SUPPORT +# include <dlfcn.h> +#endif + +#define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")") + +/* Statically assert that the layout of the common fields in the + 'acc_event_info' variants matches. */ +/* 'event_type' */ +STATIC_ASSERT (offsetof (acc_event_info, event_type) + == offsetof (acc_event_info, data_event.event_type)); +STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type) + == offsetof (acc_event_info, launch_event.event_type)); +STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type) + == offsetof (acc_event_info, other_event.event_type)); +/* 'valid_bytes' */ +STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes) + == offsetof (acc_event_info, launch_event.valid_bytes)); +STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes) + == offsetof (acc_event_info, other_event.valid_bytes)); +/* 'parent_construct' */ +STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct) + == offsetof (acc_event_info, launch_event.parent_construct)); +STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct) + == offsetof (acc_event_info, other_event.parent_construct)); +/* 'implicit' */ +STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit) + == offsetof (acc_event_info, launch_event.implicit)); +STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit) + == offsetof (acc_event_info, other_event.implicit)); +/* 'tool_info' */ +STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info) + == offsetof (acc_event_info, launch_event.tool_info)); +STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info) + == offsetof (acc_event_info, other_event.tool_info)); + +struct goacc_prof_callback_entry +{ + acc_prof_callback cb; + int ref; + bool enabled; + struct goacc_prof_callback_entry *next; +}; + +/* Use a separate flag to minimize run-time performance impact for the (very + common) case that profiling is not enabled. + + Once enabled, we're not going to disable this anymore, anywhere. We + probably could, by adding appropriate logic to 'acc_prof_register', + 'acc_prof_unregister'. */ +bool goacc_prof_enabled = false; + +/* Global state for registered callbacks. + 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */ +static bool goacc_prof_callbacks_enabled[acc_ev_last]; +static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last]; +/* Lock used to protect access to 'goacc_prof_callbacks_enabled', and + 'goacc_prof_callback_entries'. */ +static gomp_mutex_t goacc_prof_lock; + +void +goacc_profiling_initialize (void) +{ + gomp_mutex_init (&goacc_prof_lock); + + /* Initially, all callbacks for all events are enabled. */ + for (int i = 0; i < acc_ev_last; ++i) + goacc_prof_callbacks_enabled[i] = true; + + +#ifdef PLUGIN_SUPPORT + char *acc_proflibs = secure_getenv ("ACC_PROFLIB"); + while (acc_proflibs != NULL && acc_proflibs[0] != '\0') + { + char *acc_proflibs_sep = strchr (acc_proflibs, ';'); + char *acc_proflib; + if (acc_proflibs_sep == acc_proflibs) + { + /* Stray ';' separator: make sure we don't 'dlopen' the main + program. */ + acc_proflib = NULL; + } + else + { + if (acc_proflibs_sep != NULL) + { + /* Single out the first library. */ + acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1); + memcpy (acc_proflib, acc_proflibs, + acc_proflibs_sep - acc_proflibs); + acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0'; + } + else + { + /* No ';' separator, so only one library. */ + acc_proflib = acc_proflibs; + } + + gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib); + void *dl_handle = dlopen (acc_proflib, RTLD_LAZY); + if (dl_handle != NULL) + { + typeof (&acc_register_library) a_r_l + = dlsym (dl_handle, "acc_register_library"); + if (a_r_l == NULL) + goto dl_fail; + gomp_debug (0, " %s: calling %s:acc_register_library\n", + __FUNCTION__, acc_proflib); + a_r_l (acc_prof_register, acc_prof_unregister, + acc_prof_lookup); + } + else + { + dl_fail: + gomp_error ("while loading ACC_PROFLIB \"%s\": %s", + acc_proflib, dlerror ()); + if (dl_handle != NULL) + { + int err = dlclose (dl_handle); + dl_handle = NULL; + if (err != 0) + goto dl_fail; + } + } + } + + if (acc_proflib != acc_proflibs) + { + free (acc_proflib); + + acc_proflibs = acc_proflibs_sep + 1; + } + else + acc_proflibs = NULL; + } +#endif /* PLUGIN_SUPPORT */ +} + +void +acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg) +{ + gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n", + __FUNCTION__, (int) ev, (void *) cb, (int) reg); + + + /* For any events to be dispatched, the user first has to register a + callback, which makes this here a good place for enabling the whole + machinery. */ + if (!GOACC_PROF_ENABLED) + __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE); + + + enum + { + EVENT_KIND_BOGUS, + EVENT_KIND_NORMAL, + /* As end events invoke callbacks in the reverse order, we register these + in the reverse order here. */ + EVENT_KIND_END, + } event_kind = EVENT_KIND_BOGUS; + switch (ev) + { + case acc_ev_none: + case acc_ev_device_init_start: + case acc_ev_device_shutdown_start: + case acc_ev_runtime_shutdown: + case acc_ev_create: + case acc_ev_delete: + case acc_ev_alloc: + case acc_ev_free: + case acc_ev_enter_data_start: + case acc_ev_exit_data_start: + case acc_ev_update_start: + case acc_ev_compute_construct_start: + case acc_ev_enqueue_launch_start: + case acc_ev_enqueue_upload_start: + case acc_ev_enqueue_download_start: + case acc_ev_wait_start: + event_kind = EVENT_KIND_NORMAL; + break; + case acc_ev_device_init_end: + case acc_ev_device_shutdown_end: + case acc_ev_enter_data_end: + case acc_ev_exit_data_end: + case acc_ev_update_end: + case acc_ev_compute_construct_end: + case acc_ev_enqueue_launch_end: + case acc_ev_enqueue_upload_end: + case acc_ev_enqueue_download_end: + case acc_ev_wait_end: + event_kind = EVENT_KIND_END; + break; + case acc_ev_last: + break; + } + if (event_kind == EVENT_KIND_BOGUS) + { + /* Silently ignore. */ + gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n"); + return; + } + + bool bogus = true; + switch (reg) + { + case acc_reg: + case acc_toggle: + case acc_toggle_per_thread: + bogus = false; + break; + } + if (bogus) + { + /* Silently ignore. */ + gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n"); + return; + } + + /* Special cases. */ + if (reg == acc_toggle) + { + if (cb == NULL) + { + gomp_debug (0, " globally enabling callbacks\n"); + gomp_mutex_lock (&goacc_prof_lock); + /* For 'acc_ev_none', this acts as a global toggle. */ + goacc_prof_callbacks_enabled[ev] = true; + gomp_mutex_unlock (&goacc_prof_lock); + return; + } + else if (ev == acc_ev_none && cb != NULL) + { + gomp_debug (0, " ignoring request\n"); + return; + } + } + else if (reg == acc_toggle_per_thread) + { + if (ev == acc_ev_none && cb == NULL) + { + gomp_debug (0, " thread: enabling callbacks\n"); + goacc_lazy_initialize (); + struct goacc_thread *thr = goacc_thread (); + thr->prof_callbacks_enabled = true; + return; + } + /* Silently ignore. */ + gomp_debug (0, " ignoring bogus request\n"); + return; + } + + gomp_mutex_lock (&goacc_prof_lock); + + struct goacc_prof_callback_entry *it, *it_p; + it = goacc_prof_callback_entries[ev]; + it_p = NULL; + while (it) + { + if (it->cb == cb) + break; + it_p = it; + it = it->next; + } + + switch (reg) + { + case acc_reg: + /* If we already have this callback registered, just increment its + reference count. */ + if (it != NULL) + { + it->ref++; + gomp_debug (0, " already registered;" + " incrementing reference count to: %d\n", it->ref); + } + else + { + struct goacc_prof_callback_entry *e + = gomp_malloc (sizeof (struct goacc_prof_callback_entry)); + e->cb = cb; + e->ref = 1; + e->enabled = true; + bool prepend = (event_kind == EVENT_KIND_END); + /* If we don't have any callback registered yet, also use the + 'prepend' code path. */ + if (it_p == NULL) + prepend = true; + if (prepend) + { + gomp_debug (0, " prepending\n"); + e->next = goacc_prof_callback_entries[ev]; + goacc_prof_callback_entries[ev] = e; + } + else + { + gomp_debug (0, " appending\n"); + e->next = NULL; + it_p->next = e; + } + } + break; + + case acc_toggle: + if (it == NULL) + { + gomp_debug (0, " ignoring request: is not registered\n"); + break; + } + else + { + gomp_debug (0, " enabling\n"); + it->enabled = true; + } + break; + + case acc_toggle_per_thread: + __builtin_unreachable (); + } + + gomp_mutex_unlock (&goacc_prof_lock); +} + +void +acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg) +{ + gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n", + __FUNCTION__, (int) ev, (void *) cb, (int) reg); + + /* If profiling is not enabled, there cannot be anything to unregister. */ + if (!GOACC_PROF_ENABLED) + return; + + if (ev < acc_ev_none + || ev >= acc_ev_last) + { + /* Silently ignore. */ + gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n"); + return; + } + + bool bogus = true; + switch (reg) + { + case acc_reg: + case acc_toggle: + case acc_toggle_per_thread: + bogus = false; + break; + } + if (bogus) + { + /* Silently ignore. */ + gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n"); + return; + } + + /* Special cases. */ + if (reg == acc_toggle) + { + if (cb == NULL) + { + gomp_debug (0, " globally disabling callbacks\n"); + gomp_mutex_lock (&goacc_prof_lock); + /* For 'acc_ev_none', this acts as a global toggle. */ + goacc_prof_callbacks_enabled[ev] = false; + gomp_mutex_unlock (&goacc_prof_lock); + return; + } + else if (ev == acc_ev_none && cb != NULL) + { + gomp_debug (0, " ignoring request\n"); + return; + } + } + else if (reg == acc_toggle_per_thread) + { + if (ev == acc_ev_none && cb == NULL) + { + gomp_debug (0, " thread: disabling callbacks\n"); + goacc_lazy_initialize (); + struct goacc_thread *thr = goacc_thread (); + thr->prof_callbacks_enabled = false; + return; + } + /* Silently ignore. */ + gomp_debug (0, " ignoring bogus request\n"); + return; + } + + gomp_mutex_lock (&goacc_prof_lock); + + struct goacc_prof_callback_entry *it, *it_p; + it = goacc_prof_callback_entries[ev]; + it_p = NULL; + while (it) + { + if (it->cb == cb) + break; + it_p = it; + it = it->next; + } + + switch (reg) + { + case acc_reg: + if (it == NULL) + { + /* Silently ignore. */ + gomp_debug (0, " ignoring bogus request: is not registered\n"); + break; + } + it->ref--; + gomp_debug (0, " decrementing reference count to: %d\n", it->ref); + if (it->ref == 0) + { + if (it_p == NULL) + goacc_prof_callback_entries[ev] = it->next; + else + it_p->next = it->next; + free (it); + } + break; + + case acc_toggle: + if (it == NULL) + { + gomp_debug (0, " ignoring request: is not registered\n"); + break; + } + else + { + gomp_debug (0, " disabling\n"); + it->enabled = false; + } + break; + + case acc_toggle_per_thread: + __builtin_unreachable (); + } + + gomp_mutex_unlock (&goacc_prof_lock); +} + +acc_query_fn +acc_prof_lookup (const char *name) +{ + gomp_debug (0, "%s (%s)\n", + __FUNCTION__, name ?: "NULL"); + + return NULL; +} + +void +acc_register_library (acc_prof_reg reg, acc_prof_reg unreg, + acc_prof_lookup_func lookup) +{ + gomp_fatal ("TODO"); +} + +/* Prepare to dispatch events? */ + +bool +_goacc_profiling_dispatch_p (bool check_not_nested_p) +{ + gomp_debug (0, "%s\n", __FUNCTION__); + + bool ret; + + struct goacc_thread *thr = goacc_thread (); + if (__builtin_expect (thr == NULL, false)) + { + /* If we don't have any per-thread state yet, that means that per-thread + callback dispatch has not been explicitly disabled (which only a call + to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and + that would have allocated per-thread state via + 'goacc_lazy_initialize'); initially, all callbacks for all events are + enabled. */ + gomp_debug (0, " %s: don't have any per-thread state yet\n", __FUNCTION__); + } + else + { + if (check_not_nested_p) + { + /* No nesting. */ + assert (thr->prof_info == NULL); + assert (thr->api_info == NULL); + } + + if (__builtin_expect (!thr->prof_callbacks_enabled, true)) + { + gomp_debug (0, " %s: disabled for this thread\n", __FUNCTION__); + ret = false; + goto out; + } + } + + gomp_mutex_lock (&goacc_prof_lock); + + /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */ + if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true)) + { + gomp_debug (0, " %s: disabled globally\n", __FUNCTION__); + ret = false; + goto out_unlock; + } + else + ret = true; + + out_unlock: + gomp_mutex_unlock (&goacc_prof_lock); + + out: + return ret; +} + +/* Set up to dispatch events? */ + +bool +_goacc_profiling_setup_p (struct goacc_thread *thr, + acc_prof_info *prof_info, acc_api_info *api_info) +{ + gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr); + + /* If we don't have any per-thread state yet, we can't register 'prof_info' + and 'api_info'. */ + if (__builtin_expect (thr == NULL, false)) + { + gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for" + " the current call, construct, or directive\n"); + return false; + } + + if (thr->prof_info != NULL) + { + /* Profiling has already been set up for an outer construct. In this + case, we continue to use the existing information, and thus return + 'false' here. + + This can happen, for example, for an 'enter data' directive, which + sets up profiling, then calls into 'acc_copyin', which should not + again set up profiling, should not overwrite the existing + information. */ + return false; + } + + thr->prof_info = prof_info; + thr->api_info = api_info; + + /* Fill in some defaults. */ + + prof_info->event_type = -1; /* Must be set later. */ + prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES; + prof_info->version = _ACC_PROF_INFO_VERSION; + if (thr->dev) + { + prof_info->device_type = acc_device_type (thr->dev->type); + prof_info->device_number = thr->dev->target_id; + } + else + { + prof_info->device_type = -1; + prof_info->device_number = -1; + } + prof_info->thread_id = -1; + prof_info->async = acc_async_sync; + prof_info->async_queue = prof_info->async; + prof_info->src_file = NULL; + prof_info->func_name = NULL; + prof_info->line_no = -1; + prof_info->end_line_no = -1; + prof_info->func_line_no = -1; + prof_info->func_end_line_no = -1; + + api_info->device_api = acc_device_api_none; + api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES; + api_info->device_type = prof_info->device_type; + api_info->vendor = -1; + api_info->device_handle = NULL; + api_info->context_handle = NULL; + api_info->async_handle = NULL; + + return true; +} + +/* Dispatch events. + + This must only be called if 'GOACC_PROFILING_DISPATCH_P' or + 'GOACC_PROFILING_SETUP_P' returned a true result. */ + +void +goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info, + acc_api_info *apt_info) +{ + acc_event_t event_type = event_info->event_type; + gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type); + assert (event_type > acc_ev_none + && event_type < acc_ev_last); + + gomp_mutex_lock (&goacc_prof_lock); + + if (!goacc_prof_callbacks_enabled[event_type]) + { + gomp_debug (0, " disabled for this event type\n"); + + goto out_unlock; + } + + for (struct goacc_prof_callback_entry *e + = goacc_prof_callback_entries[event_type]; + e != NULL; + e = e->next) + { + if (!e->enabled) + { + gomp_debug (0, " disabled for callback %p\n", e->cb); + continue; + } + + gomp_debug (0, " calling callback %p\n", e->cb); + e->cb (prof_info, event_info, apt_info); + } + + out_unlock: + gomp_mutex_unlock (&goacc_prof_lock); +}