diff liboffloadmic/plugin/libgomp-plugin-intelmic.cpp @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp	Fri Oct 27 22:46:09 2017 +0900
@@ -0,0 +1,540 @@
+/* Plugin for offload execution on Intel MIC devices.
+
+   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+
+   Contributed by Ilya Verbin <ilya.verbin@intel.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Host side part of a libgomp plugin.  */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <utility>
+#include <vector>
+#include <map>
+#include "libgomp-plugin.h"
+#include "compiler_if_host.h"
+#include "main_target_image.h"
+#include "gomp-constants.h"
+
+#define OFFLOAD_ACTIVE_WAIT_ENV	"OFFLOAD_ACTIVE_WAIT"
+
+#ifdef DEBUG
+#define TRACE(...)					    \
+{							    \
+fprintf (stderr, "HOST:\t%s:%s ", __FILE__, __FUNCTION__);  \
+fprintf (stderr, __VA_ARGS__);				    \
+fprintf (stderr, "\n");					    \
+}
+#else
+#define TRACE { }
+#endif
+
+
+/* Start/end addresses of functions and global variables on a device.  */
+typedef std::vector<addr_pair> AddrVect;
+
+/* Addresses for one image and all devices.  */
+typedef std::vector<AddrVect> DevAddrVect;
+
+/* Addresses for all images and all devices.  */
+typedef std::map<const void *, DevAddrVect> ImgDevAddrMap;
+
+/* Image descriptor needed by __offload_[un]register_image.  */
+struct TargetImageDesc {
+  int64_t size;
+  /* 10 characters is enough for max int value.  */
+  char name[sizeof ("lib0000000000.so")];
+  char data[];
+};
+
+/* Image descriptors, indexed by a pointer obtained from libgomp.  */
+typedef std::map<const void *, TargetImageDesc *> ImgDescMap;
+
+
+/* Total number of available devices.  */
+static int num_devices;
+
+/* Total number of shared libraries with offloading to Intel MIC.  */
+static int num_images;
+
+/* Two dimensional array: one key is a pointer to image,
+   second key is number of device.  Contains a vector of pointer pairs.  */
+static ImgDevAddrMap *address_table;
+
+/* Descriptors of all images, registered in liboffloadmic.  */
+static ImgDescMap *image_descriptors;
+
+/* Thread-safe registration of the main image.  */
+static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
+
+static VarDesc vd_host2tgt = {
+  { 1, 1 },		      /* dst, src			      */
+  { 1, 0 },		      /* in, out			      */
+  1,			      /* alloc_if			      */
+  1,			      /* free_if			      */
+  4,			      /* align				      */
+  0,			      /* mic_offset			      */
+  { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
+				 is_stack_buf, sink_addr, alloc_disp,
+				 is_noncont_src, is_noncont_dst	      */
+  0,			      /* offset				      */
+  0,			      /* size				      */
+  1,			      /* count				      */
+  0,			      /* alloc				      */
+  0,			      /* into				      */
+  0			      /* ptr				      */
+};
+
+static VarDesc vd_tgt2host = {
+  { 1, 1 },		      /* dst, src			      */
+  { 0, 1 },		      /* in, out			      */
+  1,			      /* alloc_if			      */
+  1,			      /* free_if			      */
+  4,			      /* align				      */
+  0,			      /* mic_offset			      */
+  { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
+				 is_stack_buf, sink_addr, alloc_disp,
+				 is_noncont_src, is_noncont_dst	      */
+  0,			      /* offset				      */
+  0,			      /* size				      */
+  1,			      /* count				      */
+  0,			      /* alloc				      */
+  0,			      /* into				      */
+  0			      /* ptr				      */
+};
+
+
+__attribute__((constructor))
+static void
+init (void)
+{
+  const char *active_wait = getenv (OFFLOAD_ACTIVE_WAIT_ENV);
+
+  /* Disable active wait by default to avoid useless CPU usage.  */
+  if (!active_wait)
+    setenv (OFFLOAD_ACTIVE_WAIT_ENV, "0", 0);
+
+  address_table = new ImgDevAddrMap;
+  image_descriptors = new ImgDescMap;
+  num_devices = _Offload_number_of_devices ();
+}
+
+extern "C" const char *
+GOMP_OFFLOAD_get_name (void)
+{
+  const char *res = "intelmic";
+  TRACE ("(): return %s", res);
+  return res;
+}
+
+extern "C" unsigned int
+GOMP_OFFLOAD_get_caps (void)
+{
+  unsigned int res = GOMP_OFFLOAD_CAP_OPENMP_400;
+  TRACE ("(): return %x", res);
+  return res;
+}
+
+extern "C" int
+GOMP_OFFLOAD_get_type (void)
+{
+  enum offload_target_type res = OFFLOAD_TARGET_TYPE_INTEL_MIC;
+  TRACE ("(): return %d", res);
+  return res;
+}
+
+extern "C" int
+GOMP_OFFLOAD_get_num_devices (void)
+{
+  TRACE ("(): return %d", num_devices);
+  return num_devices;
+}
+
+static bool
+offload (const char *file, uint64_t line, int device, const char *name,
+	 int num_vars, VarDesc *vars, const void **async_data)
+{
+  OFFLOAD ofld = __offload_target_acquire1 (&device, file, line);
+  if (ofld)
+    {
+      if (async_data == NULL)
+	return __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0,
+				   NULL, NULL);
+      else
+	{
+	  OffloadFlags flags;
+	  flags.flags = 0;
+	  flags.bits.omp_async = 1;
+	  return __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0,
+				     NULL, async_data, 0, NULL, flags, NULL);
+	}
+    }
+  else
+    {
+      GOMP_PLUGIN_error ("%s:%d: Offload target acquire failed\n", file, line);
+      return false;
+    }
+}
+
+static void
+register_main_image ()
+{
+  /* Do not check the return value, because old versions of liboffloadmic did
+     not have return values.  */
+  __offload_register_image (&main_target_image);
+
+  /* liboffloadmic will call GOMP_PLUGIN_target_task_completion when
+     asynchronous task on target is completed.  */
+  __offload_register_task_callback (GOMP_PLUGIN_target_task_completion);
+}
+
+/* liboffloadmic loads and runs offload_target_main on all available devices
+   during a first call to offload ().  */
+extern "C" bool
+GOMP_OFFLOAD_init_device (int device)
+{
+  TRACE ("(device = %d)", device);
+  pthread_once (&main_image_is_registered, register_main_image);
+  return offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
+		  NULL, NULL);
+}
+
+extern "C" bool
+GOMP_OFFLOAD_fini_device (int device)
+{
+  TRACE ("(device = %d)", device);
+
+  /* liboffloadmic will finalize target processes on all available devices.  */
+  __offload_unregister_image (&main_target_image);
+  return true;
+}
+
+static bool
+get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
+{
+  VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host };
+  vd1[0].ptr = &num_funcs;
+  vd1[0].size = sizeof (num_funcs);
+  vd1[1].ptr = &num_vars;
+  vd1[1].size = sizeof (num_vars);
+
+  if (!offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
+		vd1, NULL))
+    return false;
+
+  int table_size = num_funcs + 2 * num_vars;
+  if (table_size > 0)
+    {
+      table = new void * [table_size];
+
+      VarDesc vd2;
+      vd2 = vd_tgt2host;
+      vd2.ptr = table;
+      vd2.size = table_size * sizeof (void *);
+
+      return offload (__FILE__, __LINE__, device, "__offload_target_table_p2",
+		      1, &vd2, NULL);
+    }
+  return true;
+}
+
+/* Offload TARGET_IMAGE to all available devices and fill address_table with
+   corresponding target addresses.  */
+
+static bool
+offload_image (const void *target_image)
+{
+  void *image_start = ((void **) target_image)[0];
+  void *image_end   = ((void **) target_image)[1];
+
+  TRACE ("(target_image = %p { %p, %p })",
+	 target_image, image_start, image_end);
+
+  int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start;
+  TargetImageDesc *image = (TargetImageDesc *) malloc (offsetof (TargetImageDesc, data)
+						       + image_size);
+  if (!image)
+    {
+      GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
+      return false;
+    }
+
+  image->size = image_size;
+  sprintf (image->name, "lib%010d.so", num_images++);
+  memcpy (image->data, image_start, image->size);
+
+  TRACE ("() __offload_register_image %s { %p, %d }",
+	 image->name, image_start, image->size);
+  /* Do not check the return value, because old versions of liboffloadmic did
+     not have return values.  */
+  __offload_register_image (image);
+
+  /* Receive tables for target_image from all devices.  */
+  DevAddrVect dev_table;
+  bool ret = true;
+  for (int dev = 0; dev < num_devices; dev++)
+    {
+      int num_funcs = 0;
+      int num_vars = 0;
+      void **table = NULL;
+
+      ret &= get_target_table (dev, num_funcs, num_vars, table);
+
+      AddrVect curr_dev_table;
+
+      for (int i = 0; i < num_funcs; i++)
+	{
+	  addr_pair tgt_addr;
+	  tgt_addr.start = (uintptr_t) table[i];
+	  tgt_addr.end = tgt_addr.start + 1;
+	  TRACE ("() func %d:\t0x%llx..0x%llx", i,
+		 tgt_addr.start, tgt_addr.end);
+	  curr_dev_table.push_back (tgt_addr);
+	}
+
+      for (int i = 0; i < num_vars; i++)
+	{
+	  addr_pair tgt_addr;
+	  tgt_addr.start = (uintptr_t) table[num_funcs+i*2];
+	  tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1];
+	  TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end);
+	  curr_dev_table.push_back (tgt_addr);
+	}
+
+      dev_table.push_back (curr_dev_table);
+      delete [] table;
+    }
+
+  address_table->insert (std::make_pair (target_image, dev_table));
+  image_descriptors->insert (std::make_pair (target_image, image));
+  return ret;
+}
+
+/* Return the libgomp version number we're compatible with.  There is
+   no requirement for cross-version compatibility.  */
+
+extern "C" unsigned
+GOMP_OFFLOAD_version (void)
+{
+  return GOMP_VERSION;
+}
+
+extern "C" int
+GOMP_OFFLOAD_load_image (int device, const unsigned version,
+			 const void *target_image, addr_pair **result)
+{
+  TRACE ("(device = %d, target_image = %p)", device, target_image);
+
+  if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
+    {
+      GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
+			 " (expected %u, received %u)",
+			 GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
+      return -1;
+    }
+
+  /* If target_image is already present in address_table, then there is no need
+     to offload it.  */
+  if (address_table->count (target_image) == 0)
+    {
+      /* If fail, return -1 as error code.  */
+      if (!offload_image (target_image))
+	return -1;
+    }
+
+  AddrVect *curr_dev_table = &(*address_table)[target_image][device];
+  int table_size = curr_dev_table->size ();
+  addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
+  if (table == NULL)
+    {
+      GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
+      return -1;
+    }
+
+  std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
+  *result = table;
+  return table_size;
+}
+
+extern "C" bool
+GOMP_OFFLOAD_unload_image (int device, unsigned version,
+			   const void *target_image)
+{
+  if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
+    {
+      GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
+			 " (expected %u, received %u)",
+			 GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
+      return false;
+    }
+
+  TRACE ("(device = %d, target_image = %p)", device, target_image);
+
+  /* liboffloadmic unloads the image from all available devices.  */
+  if (image_descriptors->count (target_image) > 0)
+    {
+      TargetImageDesc *image_desc = (*image_descriptors)[target_image];
+      __offload_unregister_image (image_desc);
+      free (image_desc);
+
+      address_table->erase (target_image);
+      image_descriptors->erase (target_image);
+    }
+  return true;
+}
+
+extern "C" void *
+GOMP_OFFLOAD_alloc (int device, size_t size)
+{
+  TRACE ("(device = %d, size = %d)", device, size);
+
+  void *tgt_ptr;
+  VarDesc vd[2] = { vd_host2tgt, vd_tgt2host };
+  vd[0].ptr = &size;
+  vd[0].size = sizeof (size);
+  vd[1].ptr = &tgt_ptr;
+  vd[1].size = sizeof (void *);
+
+  if (!offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2,
+		vd, NULL))
+    return NULL;
+
+  return tgt_ptr;
+}
+
+extern "C" bool
+GOMP_OFFLOAD_free (int device, void *tgt_ptr)
+{
+  TRACE ("(device = %d, tgt_ptr = %p)", device, tgt_ptr);
+
+  VarDesc vd = vd_host2tgt;
+  vd.ptr = &tgt_ptr;
+  vd.size = sizeof (void *);
+
+  return offload (__FILE__, __LINE__, device, "__offload_target_free", 1,
+		  &vd, NULL);
+}
+
+extern "C" bool
+GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
+		       size_t size)
+{
+  TRACE ("(device = %d, tgt_ptr = %p, host_ptr = %p, size = %d)",
+	 device, tgt_ptr, host_ptr, size);
+  if (!size)
+    return true;
+
+  VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
+  vd1[0].ptr = &tgt_ptr;
+  vd1[0].size = sizeof (void *);
+  vd1[1].ptr = &size;
+  vd1[1].size = sizeof (size);
+
+  if (!offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
+		vd1, NULL))
+    return false;
+
+  VarDesc vd2 = vd_host2tgt;
+  vd2.ptr = (void *) host_ptr;
+  vd2.size = size;
+
+  return offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
+		  &vd2, NULL);
+}
+
+extern "C" bool
+GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
+		       size_t size)
+{
+  TRACE ("(device = %d, host_ptr = %p, tgt_ptr = %p, size = %d)",
+	 device, host_ptr, tgt_ptr, size);
+  if (!size)
+    return true;
+
+  VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
+  vd1[0].ptr = &tgt_ptr;
+  vd1[0].size = sizeof (void *);
+  vd1[1].ptr = &size;
+  vd1[1].size = sizeof (size);
+
+  if (!offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
+		vd1, NULL))
+    return false;
+
+  VarDesc vd2 = vd_tgt2host;
+  vd2.ptr = (void *) host_ptr;
+  vd2.size = size;
+
+  return offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
+		  &vd2, NULL);
+}
+
+extern "C" bool
+GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
+		      size_t size)
+{
+  TRACE ("(device = %d, dst_ptr = %p, src_ptr = %p, size = %d)",
+	 device, dst_ptr, src_ptr, size);
+  if (!size)
+    return true;
+
+  VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
+  vd[0].ptr = &dst_ptr;
+  vd[0].size = sizeof (void *);
+  vd[1].ptr = &src_ptr;
+  vd[1].size = sizeof (void *);
+  vd[2].ptr = &size;
+  vd[2].size = sizeof (size);
+
+  return offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3,
+		  vd, NULL);
+}
+
+extern "C" void
+GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
+			void **, void *async_data)
+{
+  TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
+	 tgt_fn, tgt_vars, async_data);
+
+  VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
+  vd[0].ptr = &tgt_fn;
+  vd[0].size = sizeof (void *);
+  vd[1].ptr = &tgt_vars;
+  vd[1].size = sizeof (void *);
+
+  offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd,
+	   (const void **) async_data);
+}
+
+extern "C" void
+GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **)
+{
+  TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);
+
+  GOMP_OFFLOAD_async_run (device, tgt_fn, tgt_vars, NULL, NULL);
+}