diff libgomp/taskloop.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children 84e7813d76e9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgomp/taskloop.c	Fri Oct 27 22:46:09 2017 +0900
@@ -0,0 +1,340 @@
+/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file handles the taskloop construct.  It is included twice, once
+   for the long and once for unsigned long long variant.  */
+
+/* Called when encountering an explicit task directive.  If IF_CLAUSE is
+   false, then we must not delay in executing the task.  If UNTIED is true,
+   then the task may be executed by any member of the team.  */
+
+void
+GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+	       long arg_size, long arg_align, unsigned flags,
+	       unsigned long num_tasks, int priority,
+	       TYPE start, TYPE end, TYPE step)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+
+#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
+  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
+     tied to one thread all the time.  This means UNTIED tasks must be
+     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
+     might be running on different thread than FN.  */
+  if (cpyfn)
+    flags &= ~GOMP_TASK_FLAG_IF;
+  flags &= ~GOMP_TASK_FLAG_UNTIED;
+#endif
+
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team && gomp_team_barrier_cancelled (&team->barrier))
+    return;
+
+#ifdef TYPE_is_long
+  TYPE s = step;
+  if (step > 0)
+    {
+      if (start >= end)
+	return;
+      s--;
+    }
+  else
+    {
+      if (start <= end)
+	return;
+      s++;
+    }
+  UTYPE n = (end - start + s) / step;
+#else
+  UTYPE n;
+  if (flags & GOMP_TASK_FLAG_UP)
+    {
+      if (start >= end)
+	return;
+      n = (end - start + step - 1) / step;
+    }
+  else
+    {
+      if (start <= end)
+	return;
+      n = (start - end - step - 1) / -step;
+    }
+#endif
+
+  TYPE task_step = step;
+  unsigned long nfirst = n;
+  if (flags & GOMP_TASK_FLAG_GRAINSIZE)
+    {
+      unsigned long grainsize = num_tasks;
+#ifdef TYPE_is_long
+      num_tasks = n / grainsize;
+#else
+      UTYPE ndiv = n / grainsize;
+      num_tasks = ndiv;
+      if (num_tasks != ndiv)
+	num_tasks = ~0UL;
+#endif
+      if (num_tasks <= 1)
+	{
+	  num_tasks = 1;
+	  task_step = end - start;
+	}
+      else if (num_tasks >= grainsize
+#ifndef TYPE_is_long
+	       && num_tasks != ~0UL
+#endif
+	      )
+	{
+	  UTYPE mul = num_tasks * grainsize;
+	  task_step = (TYPE) grainsize * step;
+	  if (mul != n)
+	    {
+	      task_step += step;
+	      nfirst = n - mul - 1;
+	    }
+	}
+      else
+	{
+	  UTYPE div = n / num_tasks;
+	  UTYPE mod = n % num_tasks;
+	  task_step = (TYPE) div * step;
+	  if (mod)
+	    {
+	      task_step += step;
+	      nfirst = mod - 1;
+	    }
+	}
+    }
+  else
+    {
+      if (num_tasks == 0)
+	num_tasks = team ? team->nthreads : 1;
+      if (num_tasks >= n)
+	num_tasks = n;
+      else
+	{
+	  UTYPE div = n / num_tasks;
+	  UTYPE mod = n % num_tasks;
+	  task_step = (TYPE) div * step;
+	  if (mod)
+	    {
+	      task_step += step;
+	      nfirst = mod - 1;
+	    }
+	}
+    }
+
+  if (flags & GOMP_TASK_FLAG_NOGROUP)
+    {
+      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
+	return;
+    }
+  else
+    ialias_call (GOMP_taskgroup_start) ();
+
+  if (priority > gomp_max_task_priority_var)
+    priority = gomp_max_task_priority_var;
+
+  if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
+      || (thr->task && thr->task->final_task)
+      || team->task_count + num_tasks > 64 * team->nthreads)
+    {
+      unsigned long i;
+      if (__builtin_expect (cpyfn != NULL, 0))
+	{
+	  struct gomp_task task[num_tasks];
+	  struct gomp_task *parent = thr->task;
+	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
+	  char buf[num_tasks * arg_size + arg_align - 1];
+	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
+				& ~(uintptr_t) (arg_align - 1));
+	  char *orig_arg = arg;
+	  for (i = 0; i < num_tasks; i++)
+	    {
+	      gomp_init_task (&task[i], parent, gomp_icv (false));
+	      task[i].priority = priority;
+	      task[i].kind = GOMP_TASK_UNDEFERRED;
+	      task[i].final_task = (thr->task && thr->task->final_task)
+				   || (flags & GOMP_TASK_FLAG_FINAL);
+	      if (thr->task)
+		{
+		  task[i].in_tied_task = thr->task->in_tied_task;
+		  task[i].taskgroup = thr->task->taskgroup;
+		}
+	      thr->task = &task[i];
+	      cpyfn (arg, data);
+	      arg += arg_size;
+	    }
+	  arg = orig_arg;
+	  for (i = 0; i < num_tasks; i++)
+	    {
+	      thr->task = &task[i];
+	      ((TYPE *)arg)[0] = start;
+	      start += task_step;
+	      ((TYPE *)arg)[1] = start;
+	      if (i == nfirst)
+		task_step -= step;
+	      fn (arg);
+	      arg += arg_size;
+	      if (!priority_queue_empty_p (&task[i].children_queue,
+					   MEMMODEL_RELAXED))
+		{
+		  gomp_mutex_lock (&team->task_lock);
+		  gomp_clear_parent (&task[i].children_queue);
+		  gomp_mutex_unlock (&team->task_lock);
+		}
+	      gomp_end_task ();
+	    }
+	}
+      else
+	for (i = 0; i < num_tasks; i++)
+	  {
+	    struct gomp_task task;
+
+	    gomp_init_task (&task, thr->task, gomp_icv (false));
+	    task.priority = priority;
+	    task.kind = GOMP_TASK_UNDEFERRED;
+	    task.final_task = (thr->task && thr->task->final_task)
+			      || (flags & GOMP_TASK_FLAG_FINAL);
+	    if (thr->task)
+	      {
+		task.in_tied_task = thr->task->in_tied_task;
+		task.taskgroup = thr->task->taskgroup;
+	      }
+	    thr->task = &task;
+	    ((TYPE *)data)[0] = start;
+	    start += task_step;
+	    ((TYPE *)data)[1] = start;
+	    if (i == nfirst)
+	      task_step -= step;
+	    fn (data);
+	    if (!priority_queue_empty_p (&task.children_queue,
+					 MEMMODEL_RELAXED))
+	      {
+		gomp_mutex_lock (&team->task_lock);
+		gomp_clear_parent (&task.children_queue);
+		gomp_mutex_unlock (&team->task_lock);
+	      }
+	    gomp_end_task ();
+	  }
+    }
+  else
+    {
+      struct gomp_task *tasks[num_tasks];
+      struct gomp_task *parent = thr->task;
+      struct gomp_taskgroup *taskgroup = parent->taskgroup;
+      char *arg;
+      int do_wake;
+      unsigned long i;
+
+      for (i = 0; i < num_tasks; i++)
+	{
+	  struct gomp_task *task
+	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
+	  tasks[i] = task;
+	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+			  & ~(uintptr_t) (arg_align - 1));
+	  gomp_init_task (task, parent, gomp_icv (false));
+	  task->priority = priority;
+	  task->kind = GOMP_TASK_UNDEFERRED;
+	  task->in_tied_task = parent->in_tied_task;
+	  task->taskgroup = taskgroup;
+	  thr->task = task;
+	  if (cpyfn)
+	    {
+	      cpyfn (arg, data);
+	      task->copy_ctors_done = true;
+	    }
+	  else
+	    memcpy (arg, data, arg_size);
+	  ((TYPE *)arg)[0] = start;
+	  start += task_step;
+	  ((TYPE *)arg)[1] = start;
+	  if (i == nfirst)
+	    task_step -= step;
+	  thr->task = parent;
+	  task->kind = GOMP_TASK_WAITING;
+	  task->fn = fn;
+	  task->fn_data = arg;
+	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
+	}
+      gomp_mutex_lock (&team->task_lock);
+      /* If parallel or taskgroup has been cancelled, don't start new
+	 tasks.  */
+      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+			     || (taskgroup && taskgroup->cancelled))
+			    && cpyfn == NULL, 0))
+	{
+	  gomp_mutex_unlock (&team->task_lock);
+	  for (i = 0; i < num_tasks; i++)
+	    {
+	      gomp_finish_task (tasks[i]);
+	      free (tasks[i]);
+	    }
+	  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+	    ialias_call (GOMP_taskgroup_end) ();
+	  return;
+	}
+      if (taskgroup)
+	taskgroup->num_children += num_tasks;
+      for (i = 0; i < num_tasks; i++)
+	{
+	  struct gomp_task *task = tasks[i];
+	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
+				 task, priority,
+				 PRIORITY_INSERT_BEGIN,
+				 /*last_parent_depends_on=*/false,
+				 task->parent_depends_on);
+	  if (taskgroup)
+	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
+				   task, priority, PRIORITY_INSERT_BEGIN,
+				   /*last_parent_depends_on=*/false,
+				   task->parent_depends_on);
+	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
+				 PRIORITY_INSERT_END,
+				 /*last_parent_depends_on=*/false,
+				 task->parent_depends_on);
+	  ++team->task_count;
+	  ++team->task_queued_count;
+	}
+      gomp_team_barrier_set_task_pending (&team->barrier);
+      if (team->task_running_count + !parent->in_tied_task
+	  < team->nthreads)
+	{
+	  do_wake = team->nthreads - team->task_running_count
+		    - !parent->in_tied_task;
+	  if ((unsigned long) do_wake > num_tasks)
+	    do_wake = num_tasks;
+	}
+      else
+	do_wake = 0;
+      gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+	gomp_team_barrier_wake (&team->barrier, do_wake);
+    }
+  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+    ialias_call (GOMP_taskgroup_end) ();
+}