diff libgomp/oacc-async.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
line wrap: on
line diff
--- a/libgomp/oacc-async.c	Thu Oct 25 07:37:49 2018 +0900
+++ b/libgomp/oacc-async.c	Thu Feb 13 11:34:05 2020 +0900
@@ -1,6 +1,6 @@
 /* OpenACC Runtime Library Definitions.
 
-   Copyright (C) 2013-2018 Free Software Foundation, Inc.
+   Copyright (C) 2013-2020 Free Software Foundation, Inc.
 
    Contributed by Mentor Embedded.
 
@@ -27,47 +27,199 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <string.h>
 #include "openacc.h"
 #include "libgomp.h"
 #include "oacc-int.h"
 
-int
-acc_async_test (int async)
-{
-  if (!async_valid_p (async))
-    gomp_fatal ("invalid async argument: %d", async);
-
-  struct goacc_thread *thr = goacc_thread ();
-
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
-
-  return thr->dev->openacc.async_test_func (async);
-}
-
-int
-acc_async_test_all (void)
+static struct goacc_thread *
+get_goacc_thread (void)
 {
   struct goacc_thread *thr = goacc_thread ();
 
   if (!thr || !thr->dev)
     gomp_fatal ("no device active");
 
-  return thr->dev->openacc.async_test_all_func ();
+  return thr;
+}
+
+static int
+validate_async_val (int async)
+{
+  if (!async_valid_p (async))
+    gomp_fatal ("invalid async-argument: %d", async);
+
+  if (async == acc_async_sync)
+    return -1;
+
+  if (async == acc_async_noval)
+    return 0;
+
+  if (async >= 0)
+    /* TODO: we reserve 0 for acc_async_noval before we can clarify the
+       semantics of "default_async".  */
+    return 1 + async;
+  else
+    __builtin_unreachable ();
+}
+
+/* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
+   might return NULL if no asyncqueue is to be used.  Otherwise, if CREATE,
+   create the asyncqueue if it doesn't exist yet.
+
+   Unless CREATE, this will not generate any OpenACC Profiling Interface
+   events.  */
+
+attribute_hidden struct goacc_asyncqueue *
+lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
+{
+  async = validate_async_val (async);
+  if (async < 0)
+    return NULL;
+
+  struct goacc_asyncqueue *ret_aq = NULL;
+  struct gomp_device_descr *dev = thr->dev;
+
+  gomp_mutex_lock (&dev->openacc.async.lock);
+
+  if (!create
+      && (async >= dev->openacc.async.nasyncqueue
+	  || !dev->openacc.async.asyncqueue[async]))
+    goto end;
+
+  if (async >= dev->openacc.async.nasyncqueue)
+    {
+      int diff = async + 1 - dev->openacc.async.nasyncqueue;
+      dev->openacc.async.asyncqueue
+	= gomp_realloc (dev->openacc.async.asyncqueue,
+			sizeof (goacc_aq) * (async + 1));
+      memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
+	      0, sizeof (goacc_aq) * diff);
+      dev->openacc.async.nasyncqueue = async + 1;
+    }
+
+  if (!dev->openacc.async.asyncqueue[async])
+    {
+      dev->openacc.async.asyncqueue[async]
+	= dev->openacc.async.construct_func (dev->target_id);
+
+      if (!dev->openacc.async.asyncqueue[async])
+	{
+	  gomp_mutex_unlock (&dev->openacc.async.lock);
+	  gomp_fatal ("async %d creation failed", async);
+	}
+      
+      /* Link new async queue into active list.  */
+      goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
+      n->aq = dev->openacc.async.asyncqueue[async];
+      n->next = dev->openacc.async.active;
+      dev->openacc.async.active = n;
+    }
+
+  ret_aq = dev->openacc.async.asyncqueue[async];
+
+ end:
+  gomp_mutex_unlock (&dev->openacc.async.lock);
+  return ret_aq;
+}
+
+/* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
+   might return NULL if no asyncqueue is to be used.  Otherwise, create the
+   asyncqueue if it doesn't exist yet.  */
+
+attribute_hidden struct goacc_asyncqueue *
+get_goacc_asyncqueue (int async)
+{
+  struct goacc_thread *thr = get_goacc_thread ();
+  return lookup_goacc_asyncqueue (thr, true, async);
+}
+
+int
+acc_async_test (int async)
+{
+  struct goacc_thread *thr = goacc_thread ();
+
+  if (!thr || !thr->dev)
+    gomp_fatal ("no device active");
+
+  goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+  if (!aq)
+    return 1;
+
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+  if (profiling_p)
+    {
+      prof_info.async = async;
+      prof_info.async_queue = prof_info.async;
+    }
+
+  int res = thr->dev->openacc.async.test_func (aq);
+
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+
+  return res;
+}
+
+int
+acc_async_test_all (void)
+{
+  struct goacc_thread *thr = get_goacc_thread ();
+
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+  int ret = 1;
+  gomp_mutex_lock (&thr->dev->openacc.async.lock);
+  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+    if (!thr->dev->openacc.async.test_func (l->aq))
+      {
+	ret = 0;
+	break;
+      }
+  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+
+  return ret;
 }
 
 void
 acc_wait (int async)
 {
-  if (!async_valid_p (async))
-    gomp_fatal ("invalid async argument: %d", async);
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  struct goacc_thread *thr = goacc_thread ();
+  goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+  if (!aq)
+    return;
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+  if (profiling_p)
+    {
+      prof_info.async = async;
+      prof_info.async_queue = prof_info.async;
+    }
 
-  thr->dev->openacc.async_wait_func (async);
+  if (!thr->dev->openacc.async.synchronize_func (aq))
+    gomp_fatal ("wait on %d failed", async);
+
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
 }
 
 /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.  */
@@ -84,12 +236,47 @@
 void
 acc_wait_async (int async1, int async2)
 {
-  struct goacc_thread *thr = goacc_thread ();
+  struct goacc_thread *thr = get_goacc_thread ();
+
+  goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
+  /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
+     we'll always be synchronous anyways?  */
+  if (!aq1)
+    return;
+
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+  if (profiling_p)
+    {
+      prof_info.async = async2;
+      prof_info.async_queue = prof_info.async;
+    }
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
+  /* An async queue is always synchronized with itself.  */
+  if (aq1 == aq2)
+    goto out_prof;
 
-  thr->dev->openacc.async_wait_async_func (async1, async2);
+  if (aq2)
+    {
+      if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
+	gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
+    }
+  else
+    {
+      /* TODO: Local thread synchronization.
+	 Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
+      if (!thr->dev->openacc.async.synchronize_func (aq1))
+	gomp_fatal ("wait on %d failed", async1);
+    }
+
+ out_prof:
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
 }
 
 void
@@ -97,10 +284,24 @@
 {
   struct goacc_thread *thr = goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
 
-  thr->dev->openacc.async_wait_all_func ();
+  bool ret = true;
+  gomp_mutex_lock (&thr->dev->openacc.async.lock);
+  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+    ret &= thr->dev->openacc.async.synchronize_func (l->aq);
+  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+
+  if (!ret)
+    gomp_fatal ("wait all failed");
 }
 
 /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.  */
@@ -117,13 +318,159 @@
 void
 acc_wait_all_async (int async)
 {
-  if (!async_valid_p (async))
-    gomp_fatal ("invalid async argument: %d", async);
+  struct goacc_thread *thr = get_goacc_thread ();
+
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+  if (profiling_p)
+    {
+      prof_info.async = async;
+      prof_info.async_queue = prof_info.async;
+    }
+
+  goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
+
+  bool ret = true;
+  gomp_mutex_lock (&thr->dev->openacc.async.lock);
+  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+    {
+      if (waiting_queue)
+	ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
+      else
+	/* TODO: Local thread synchronization.
+	   Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
+	ret &= thr->dev->openacc.async.synchronize_func (l->aq);
+    }
+  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+
+  if (!ret)
+    gomp_fatal ("wait all async(%d) failed", async);
+}
+
+void
+GOACC_wait (int async, int num_waits, ...)
+{
+  goacc_lazy_initialize ();
 
   struct goacc_thread *thr = goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  /* No nesting.  */
+  assert (thr->prof_info == NULL);
+  assert (thr->api_info == NULL);
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+  if (profiling_p)
+    {
+      prof_info.async = async;
+      prof_info.async_queue = prof_info.async;
+    }
+
+  if (num_waits)
+    {
+      va_list ap;
+
+      va_start (ap, num_waits);
+      goacc_wait (async, num_waits, &ap);
+      va_end (ap);
+    }
+  else if (async == acc_async_sync)
+    acc_wait_all ();
+  else
+    acc_wait_all_async (async);
+
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+}
+
+attribute_hidden void
+goacc_wait (int async, int num_waits, va_list *ap)
+{
+  while (num_waits--)
+    {
+      int qid = va_arg (*ap, int);
+
+      /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'.  */
+      if (qid == acc_async_noval)
+	{
+	  if (async == acc_async_sync)
+	    acc_wait_all ();
+	  else
+	    acc_wait_all_async (async);
+	  break;
+	}
+
+      if (acc_async_test (qid))
+	continue;
 
-  thr->dev->openacc.async_wait_all_async_func (async);
+      if (async == acc_async_sync)
+	acc_wait (qid);
+      else if (qid == async)
+	/* If we're waiting on the same asynchronous queue as we're
+	   launching on, the queue itself will order work as
+	   required, so there's no need to wait explicitly.  */
+	;
+      else
+	acc_wait_async (qid, async);
+    }
+}
+
+attribute_hidden void
+goacc_async_free (struct gomp_device_descr *devicep,
+		  struct goacc_asyncqueue *aq, void *ptr)
+{
+  if (!aq)
+    free (ptr);
+  else
+    devicep->openacc.async.queue_callback_func (aq, free, ptr);
 }
+
+/* This function initializes the asyncqueues for the device specified by
+   DEVICEP.  TODO DEVICEP must be locked on entry, and remains locked on
+   return.  */
+
+attribute_hidden void
+goacc_init_asyncqueues (struct gomp_device_descr *devicep)
+{
+  devicep->openacc.async.nasyncqueue = 0;
+  devicep->openacc.async.asyncqueue = NULL;
+  devicep->openacc.async.active = NULL;
+  gomp_mutex_init (&devicep->openacc.async.lock);
+}
+
+/* This function finalizes the asyncqueues for the device specified by DEVICEP.
+   TODO DEVICEP must be locked on entry, and remains locked on return.  */
+
+attribute_hidden bool
+goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
+{
+  bool ret = true;
+  gomp_mutex_lock (&devicep->openacc.async.lock);
+  if (devicep->openacc.async.nasyncqueue > 0)
+    {
+      goacc_aq_list next;
+      for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
+	{
+	  ret &= devicep->openacc.async.destruct_func (l->aq);
+	  next = l->next;
+	  free (l);
+	}
+      free (devicep->openacc.async.asyncqueue);
+      devicep->openacc.async.nasyncqueue = 0;
+      devicep->openacc.async.asyncqueue = NULL;
+      devicep->openacc.async.active = NULL;
+    }
+  gomp_mutex_unlock (&devicep->openacc.async.lock);
+  gomp_mutex_destroy (&devicep->openacc.async.lock);
+  return ret;
+}