diff libhsail-rt/rt/workitems.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
line wrap: on
line diff
--- a/libhsail-rt/rt/workitems.c	Fri Oct 27 22:46:09 2017 +0900
+++ b/libhsail-rt/rt/workitems.c	Thu Oct 25 07:37:49 2018 +0900
@@ -2,7 +2,7 @@
    various ways and the builtin functions closely related to the
    implementation.
 
-   Copyright (C) 2015-2017 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
    for General Processor Tech.
 
@@ -63,6 +63,12 @@
 #define FIBER_STACK_SIZE (64*1024)
 #define GROUP_SEGMENT_ALIGN 256
 
+/* Preserve this amount of additional space in the alloca stack as we need to
+   store the alloca frame pointer to the alloca frame, thus must preserve
+   space for it.  This thus supports at most 1024 functions with allocas in
+   a call chain.  */
+#define ALLOCA_OVERHEAD 1024*4
+
 uint32_t __hsail_workitemabsid (uint32_t dim, PHSAWorkItem *context);
 
 uint32_t __hsail_workitemid (uint32_t dim, PHSAWorkItem *context);
@@ -101,11 +107,20 @@
 	 the current_work_group_* is set to point to the WG executed next.  */
       if (!wi->wg->more_wgs)
 	break;
+
+      wi->group_x = wg->x;
+      wi->group_y = wg->y;
+      wi->group_z = wg->z;
+
+      wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
+      wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
+      wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
+
 #ifdef DEBUG_PHSA_RT
       printf (
 	"Running work-item %lu/%lu/%lu for wg %lu/%lu/%lu / %lu/%lu/%lu...\n",
-	wi->x, wi->y, wi->z, wg->x, wg->y, wg->z, l_data->wg_max_x,
-	l_data->wg_max_y, l_data->wg_max_z);
+	wi->x, wi->y, wi->z, wi->group_x, wi->group_y, wi->group_z,
+	l_data->wg_max_x, l_data->wg_max_y, l_data->wg_max_z);
 #endif
 
       if (wi->x < __hsail_currentworkgroupsize (0, wi)
@@ -174,6 +189,13 @@
 	  else
 	    wg->x++;
 #endif
+	  wi->group_x = wg->x;
+	  wi->group_y = wg->y;
+	  wi->group_z = wg->z;
+
+	  wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
+	  wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
+	  wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
 
 	  /* Reinitialize the work-group barrier according to the new WG's
 	     size, which might not be the same as the previous ones, due
@@ -227,6 +249,7 @@
   PHSAWorkItem *wi_threads = NULL;
   PHSAWorkGroup wg;
   size_t flat_wi_id = 0, x, y, z, max_x, max_y, max_z;
+  uint32_t group_x, group_y, group_z;
   fiber_barrier_t wg_start_barrier;
   fiber_barrier_t wg_completion_barrier;
   fiber_barrier_t wg_sync_barrier;
@@ -246,18 +269,18 @@
 	   != 0)
     phsa_fatal_error (3);
 
-  wg.alloca_stack_p = wg.private_segment_total_size;
+  wg.alloca_stack_p = wg.private_segment_total_size + ALLOCA_OVERHEAD;
   wg.alloca_frame_p = wg.alloca_stack_p;
   wg.initial_group_offset = group_local_offset;
 
 #ifdef EXECUTE_WGS_BACKWARDS
-  wg.x = context->wg_max_x - 1;
-  wg.y = context->wg_max_y - 1;
-  wg.z = context->wg_max_z - 1;
+  group_x = context->wg_max_x - 1;
+  group_y = context->wg_max_y - 1;
+  group_z = context->wg_max_z - 1;
 #else
-  wg.x = context->wg_min_x;
-  wg.y = context->wg_min_y;
-  wg.z = context->wg_min_z;
+  group_x = context->wg_min_x;
+  group_y = context->wg_min_y;
+  group_z = context->wg_min_z;
 #endif
 
   fiber_barrier_init (&wg_sync_barrier, wg_size);
@@ -284,6 +307,19 @@
 	  PHSAWorkItem *wi = &wi_threads[flat_wi_id];
 	  wi->launch_data = context;
 	  wi->wg = &wg;
+
+	  wg.x = wi->group_x = group_x;
+	  wg.y = wi->group_y = group_y;
+	  wg.z = wi->group_z = group_z;
+
+	  wi->wg_size_x = context->dp->workgroup_size_x;
+	  wi->wg_size_y = context->dp->workgroup_size_y;
+	  wi->wg_size_z = context->dp->workgroup_size_z;
+
+	  wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
+	  wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
+	  wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
+
 	  wi->x = x;
 	  wi->y = y;
 	  wi->z = z;
@@ -446,7 +482,7 @@
 	   != 0)
     phsa_fatal_error (3);
 
-  wg.alloca_stack_p = dp->private_segment_size * wg_size;
+  wg.alloca_stack_p = dp->private_segment_size * wg_size + ALLOCA_OVERHEAD;
   wg.alloca_frame_p = wg.alloca_stack_p;
 
   wg.private_base_ptr = private_base_ptr;
@@ -461,9 +497,17 @@
     for (wg_y = context->wg_min_y; wg_y < context->wg_max_y; ++wg_y)
       for (wg_x = context->wg_min_x; wg_x < context->wg_max_x; ++wg_x)
 	{
-	  wi.wg->x = wg_x;
-	  wi.wg->y = wg_y;
-	  wi.wg->z = wg_z;
+	  wi.group_x = wg_x;
+	  wi.group_y = wg_y;
+	  wi.group_z = wg_z;
+
+	  wi.wg_size_x = context->dp->workgroup_size_x;
+	  wi.wg_size_y = context->dp->workgroup_size_y;
+	  wi.wg_size_z = context->dp->workgroup_size_z;
+
+	  wi.cur_wg_size_x = __hsail_currentworkgroupsize (0, &wi);
+	  wi.cur_wg_size_y = __hsail_currentworkgroupsize (1, &wi);
+	  wi.cur_wg_size_z = __hsail_currentworkgroupsize (2, &wi);
 
 	  context->kernel (context->kernarg_addr, &wi, group_base_ptr,
 			   group_local_offset, private_base_ptr);
@@ -558,15 +602,15 @@
     default:
     case 0:
       /* Overflow semantics in the case of WG dim > grid dim.  */
-      id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x)
+      id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x)
 	   % dp->grid_size_x;
       break;
     case 1:
-      id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y)
+      id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y)
 	   % dp->grid_size_y;
       break;
     case 2:
-      id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z)
+      id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z)
 	   % dp->grid_size_z;
       break;
     }
@@ -584,15 +628,15 @@
     default:
     case 0:
       /* Overflow semantics in the case of WG dim > grid dim.  */
-      id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x)
+      id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x)
 	   % dp->grid_size_x;
       break;
     case 1:
-      id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y)
+      id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y)
 	   % dp->grid_size_y;
       break;
     case 2:
-      id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z)
+      id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z)
 	   % dp->grid_size_z;
       break;
     }
@@ -732,19 +776,19 @@
     {
     default:
     case 0:
-      if ((uint64_t) wi->wg->x < dp->grid_size_x / dp->workgroup_size_x)
+      if ((uint64_t) wi->group_x < dp->grid_size_x / dp->workgroup_size_x)
 	wg_size = dp->workgroup_size_x; /* Full WG.  */
       else
 	wg_size = dp->grid_size_x % dp->workgroup_size_x; /* Partial WG.  */
       break;
     case 1:
-      if ((uint64_t) wi->wg->y < dp->grid_size_y / dp->workgroup_size_y)
+      if ((uint64_t) wi->group_y < dp->grid_size_y / dp->workgroup_size_y)
 	wg_size = dp->workgroup_size_y; /* Full WG.  */
       else
 	wg_size = dp->grid_size_y % dp->workgroup_size_y; /* Partial WG.  */
       break;
     case 2:
-      if ((uint64_t) wi->wg->z < dp->grid_size_z / dp->workgroup_size_z)
+      if ((uint64_t) wi->group_z < dp->grid_size_z / dp->workgroup_size_z)
 	wg_size = dp->workgroup_size_z; /* Full WG.  */
       else
 	wg_size = dp->grid_size_z % dp->workgroup_size_z; /* Partial WG.  */
@@ -792,11 +836,11 @@
     {
     default:
     case 0:
-      return wi->wg->x;
+      return wi->group_x;
     case 1:
-      return wi->wg->y;
+      return wi->group_y;
     case 2:
-      return wi->wg->z;
+      return wi->group_z;
     }
 }
 
@@ -867,9 +911,12 @@
 __hsail_alloca (uint32_t size, uint32_t align, PHSAWorkItem *wi)
 {
   volatile PHSAWorkGroup *wg = wi->wg;
-  uint32_t new_pos = wg->alloca_stack_p - size;
+  int64_t new_pos = wg->alloca_stack_p - size;
   while (new_pos % align != 0)
     new_pos--;
+  if (new_pos < 0)
+    phsa_fatal_error (2);
+
   wg->alloca_stack_p = new_pos;
 
 #ifdef DEBUG_ALLOCA