131
|
1 /* Copyright (C) 2007-2018 Free Software Foundation, Inc.
|
0
|
2 Contributed by Richard Henderson <rth@redhat.com>.
|
|
3
|
111
|
4 This file is part of the GNU Offloading and Multi Processing Library
|
|
5 (libgomp).
|
0
|
6
|
|
7 Libgomp is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
15 more details.
|
|
16
|
|
17 Under Section 7 of GPL version 3, you are granted additional
|
|
18 permissions described in the GCC Runtime Library Exception, version
|
|
19 3.1, as published by the Free Software Foundation.
|
|
20
|
|
21 You should have received a copy of the GNU General Public License and
|
|
22 a copy of the GCC Runtime Library Exception along with this program;
|
|
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
24 <http://www.gnu.org/licenses/>. */
|
|
25
|
|
26 /* This file handles the maintainence of tasks in response to task
|
|
27 creation and termination. */
|
|
28
|
|
29 #include "libgomp.h"
|
|
30 #include <stdlib.h>
|
|
31 #include <string.h>
|
111
|
32 #include "gomp-constants.h"
|
0
|
33
|
111
|
34 typedef struct gomp_task_depend_entry *hash_entry_type;
|
|
35
|
|
36 static inline void *
|
|
37 htab_alloc (size_t size)
|
|
38 {
|
|
39 return gomp_malloc (size);
|
|
40 }
|
|
41
|
|
42 static inline void
|
|
43 htab_free (void *ptr)
|
|
44 {
|
|
45 free (ptr);
|
|
46 }
|
|
47
|
|
48 #include "hashtab.h"
|
|
49
|
|
50 static inline hashval_t
|
|
51 htab_hash (hash_entry_type element)
|
|
52 {
|
|
53 return hash_pointer (element->addr);
|
|
54 }
|
|
55
|
|
56 static inline bool
|
|
57 htab_eq (hash_entry_type x, hash_entry_type y)
|
|
58 {
|
|
59 return x->addr == y->addr;
|
|
60 }
|
0
|
61
|
|
62 /* Create a new task data structure. */
|
|
63
|
|
64 void
|
|
65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
|
|
66 struct gomp_task_icv *prev_icv)
|
|
67 {
|
111
|
68 /* It would seem that using memset here would be a win, but it turns
|
|
69 out that partially filling gomp_task allows us to keep the
|
|
70 overhead of task creation low. In the nqueens-1.c test, for a
|
|
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
|
|
72
|
|
73 Note, the nqueens-1.c test in serial mode is a good test to
|
|
74 benchmark the overhead of creating tasks as there are millions of
|
|
75 tiny tasks created that all run undeferred. */
|
0
|
76 task->parent = parent_task;
|
|
77 task->icv = *prev_icv;
|
|
78 task->kind = GOMP_TASK_IMPLICIT;
|
111
|
79 task->taskwait = NULL;
|
0
|
80 task->in_tied_task = false;
|
111
|
81 task->final_task = false;
|
|
82 task->copy_ctors_done = false;
|
|
83 task->parent_depends_on = false;
|
|
84 priority_queue_init (&task->children_queue);
|
|
85 task->taskgroup = NULL;
|
|
86 task->dependers = NULL;
|
|
87 task->depend_hash = NULL;
|
|
88 task->depend_count = 0;
|
0
|
89 }
|
|
90
|
|
91 /* Clean up a task, after completing it. */
|
|
92
|
|
93 void
|
|
94 gomp_end_task (void)
|
|
95 {
|
|
96 struct gomp_thread *thr = gomp_thread ();
|
|
97 struct gomp_task *task = thr->task;
|
|
98
|
|
99 gomp_finish_task (task);
|
|
100 thr->task = task->parent;
|
|
101 }
|
|
102
|
111
|
103 /* Clear the parent field of every task in LIST. */
|
|
104
|
0
|
105 static inline void
|
111
|
106 gomp_clear_parent_in_list (struct priority_list *list)
|
0
|
107 {
|
111
|
108 struct priority_node *p = list->tasks;
|
|
109 if (p)
|
0
|
110 do
|
|
111 {
|
111
|
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
|
|
113 p = p->next;
|
0
|
114 }
|
111
|
115 while (p != list->tasks);
|
|
116 }
|
|
117
|
|
118 /* Splay tree version of gomp_clear_parent_in_list.
|
|
119
|
|
120 Clear the parent field of every task in NODE within SP, and free
|
|
121 the node when done. */
|
|
122
|
|
123 static void
|
|
124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
|
|
125 {
|
|
126 if (!node)
|
|
127 return;
|
|
128 prio_splay_tree_node left = node->left, right = node->right;
|
|
129 gomp_clear_parent_in_list (&node->key.l);
|
|
130 #if _LIBGOMP_CHECKING_
|
|
131 memset (node, 0xaf, sizeof (*node));
|
|
132 #endif
|
|
133 /* No need to remove the node from the tree. We're nuking
|
|
134 everything, so just free the nodes and our caller can clear the
|
|
135 entire splay tree. */
|
|
136 free (node);
|
|
137 gomp_clear_parent_in_tree (sp, left);
|
|
138 gomp_clear_parent_in_tree (sp, right);
|
|
139 }
|
|
140
|
|
141 /* Clear the parent field of every task in Q and remove every task
|
|
142 from Q. */
|
|
143
|
|
144 static inline void
|
|
145 gomp_clear_parent (struct priority_queue *q)
|
|
146 {
|
|
147 if (priority_queue_multi_p (q))
|
|
148 {
|
|
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
|
|
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
|
|
151 No need to remove anything. We can just nuke everything. */
|
|
152 q->t.root = NULL;
|
|
153 }
|
|
154 else
|
|
155 gomp_clear_parent_in_list (&q->l);
|
|
156 }
|
|
157
|
|
158 /* Helper function for GOMP_task and gomp_create_target_task.
|
|
159
|
|
160 For a TASK with in/out dependencies, fill in the various dependency
|
|
161 queues. PARENT is the parent of said task. DEPEND is as in
|
|
162 GOMP_task. */
|
|
163
|
|
164 static void
|
|
165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
|
|
166 void **depend)
|
|
167 {
|
|
168 size_t ndepend = (uintptr_t) depend[0];
|
|
169 size_t nout = (uintptr_t) depend[1];
|
|
170 size_t i;
|
|
171 hash_entry_type ent;
|
|
172
|
|
173 task->depend_count = ndepend;
|
|
174 task->num_dependees = 0;
|
|
175 if (parent->depend_hash == NULL)
|
|
176 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
|
|
177 for (i = 0; i < ndepend; i++)
|
|
178 {
|
|
179 task->depend[i].addr = depend[2 + i];
|
|
180 task->depend[i].next = NULL;
|
|
181 task->depend[i].prev = NULL;
|
|
182 task->depend[i].task = task;
|
|
183 task->depend[i].is_in = i >= nout;
|
|
184 task->depend[i].redundant = false;
|
|
185 task->depend[i].redundant_out = false;
|
|
186
|
|
187 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
|
|
188 &task->depend[i], INSERT);
|
|
189 hash_entry_type out = NULL, last = NULL;
|
|
190 if (*slot)
|
|
191 {
|
|
192 /* If multiple depends on the same task are the same, all but the
|
|
193 first one are redundant. As inout/out come first, if any of them
|
|
194 is inout/out, it will win, which is the right semantics. */
|
|
195 if ((*slot)->task == task)
|
|
196 {
|
|
197 task->depend[i].redundant = true;
|
|
198 continue;
|
|
199 }
|
|
200 for (ent = *slot; ent; ent = ent->next)
|
|
201 {
|
|
202 if (ent->redundant_out)
|
|
203 break;
|
|
204
|
|
205 last = ent;
|
|
206
|
|
207 /* depend(in:...) doesn't depend on earlier depend(in:...). */
|
|
208 if (i >= nout && ent->is_in)
|
|
209 continue;
|
|
210
|
|
211 if (!ent->is_in)
|
|
212 out = ent;
|
|
213
|
|
214 struct gomp_task *tsk = ent->task;
|
|
215 if (tsk->dependers == NULL)
|
|
216 {
|
|
217 tsk->dependers
|
|
218 = gomp_malloc (sizeof (struct gomp_dependers_vec)
|
|
219 + 6 * sizeof (struct gomp_task *));
|
|
220 tsk->dependers->n_elem = 1;
|
|
221 tsk->dependers->allocated = 6;
|
|
222 tsk->dependers->elem[0] = task;
|
|
223 task->num_dependees++;
|
|
224 continue;
|
|
225 }
|
|
226 /* We already have some other dependency on tsk from earlier
|
|
227 depend clause. */
|
|
228 else if (tsk->dependers->n_elem
|
|
229 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
|
|
230 == task))
|
|
231 continue;
|
|
232 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
|
|
233 {
|
|
234 tsk->dependers->allocated
|
|
235 = tsk->dependers->allocated * 2 + 2;
|
|
236 tsk->dependers
|
|
237 = gomp_realloc (tsk->dependers,
|
|
238 sizeof (struct gomp_dependers_vec)
|
|
239 + (tsk->dependers->allocated
|
|
240 * sizeof (struct gomp_task *)));
|
|
241 }
|
|
242 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
|
|
243 task->num_dependees++;
|
|
244 }
|
|
245 task->depend[i].next = *slot;
|
|
246 (*slot)->prev = &task->depend[i];
|
|
247 }
|
|
248 *slot = &task->depend[i];
|
|
249
|
|
250 /* There is no need to store more than one depend({,in}out:) task per
|
|
251 address in the hash table chain for the purpose of creation of
|
|
252 deferred tasks, because each out depends on all earlier outs, thus it
|
|
253 is enough to record just the last depend({,in}out:). For depend(in:),
|
|
254 we need to keep all of the previous ones not terminated yet, because
|
|
255 a later depend({,in}out:) might need to depend on all of them. So, if
|
|
256 the new task's clause is depend({,in}out:), we know there is at most
|
|
257 one other depend({,in}out:) clause in the list (out). For
|
|
258 non-deferred tasks we want to see all outs, so they are moved to the
|
|
259 end of the chain, after first redundant_out entry all following
|
|
260 entries should be redundant_out. */
|
|
261 if (!task->depend[i].is_in && out)
|
|
262 {
|
|
263 if (out != last)
|
|
264 {
|
|
265 out->next->prev = out->prev;
|
|
266 out->prev->next = out->next;
|
|
267 out->next = last->next;
|
|
268 out->prev = last;
|
|
269 last->next = out;
|
|
270 if (out->next)
|
|
271 out->next->prev = out;
|
|
272 }
|
|
273 out->redundant_out = true;
|
|
274 }
|
|
275 }
|
0
|
276 }
|
|
277
|
|
278 /* Called when encountering an explicit task directive. If IF_CLAUSE is
|
|
279 false, then we must not delay in executing the task. If UNTIED is true,
|
111
|
280 then the task may be executed by any member of the team.
|
|
281
|
|
282 DEPEND is an array containing:
|
|
283 depend[0]: number of depend elements.
|
|
284 depend[1]: number of depend elements of type "out".
|
|
285 depend[2..N+1]: address of [1..N]th depend element. */
|
0
|
286
|
|
287 void
|
|
288 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
111
|
289 long arg_size, long arg_align, bool if_clause, unsigned flags,
|
|
290 void **depend, int priority)
|
0
|
291 {
|
|
292 struct gomp_thread *thr = gomp_thread ();
|
|
293 struct gomp_team *team = thr->ts.team;
|
|
294
|
|
295 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
|
|
296 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
|
|
297 tied to one thread all the time. This means UNTIED tasks must be
|
|
298 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
|
|
299 might be running on different thread than FN. */
|
|
300 if (cpyfn)
|
|
301 if_clause = false;
|
111
|
302 flags &= ~GOMP_TASK_FLAG_UNTIED;
|
0
|
303 #endif
|
|
304
|
111
|
305 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
306 if (team
|
|
307 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
308 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
|
|
309 return;
|
|
310
|
|
311 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
|
|
312 priority = 0;
|
|
313 else if (priority > gomp_max_task_priority_var)
|
|
314 priority = gomp_max_task_priority_var;
|
|
315
|
0
|
316 if (!if_clause || team == NULL
|
111
|
317 || (thr->task && thr->task->final_task)
|
0
|
318 || team->task_count > 64 * team->nthreads)
|
|
319 {
|
|
320 struct gomp_task task;
|
|
321
|
111
|
322 /* If there are depend clauses and earlier deferred sibling tasks
|
|
323 with depend clauses, check if there isn't a dependency. If there
|
|
324 is, we need to wait for them. There is no need to handle
|
|
325 depend clauses for non-deferred tasks other than this, because
|
|
326 the parent task is suspended until the child task finishes and thus
|
|
327 it can't start further child tasks. */
|
|
328 if ((flags & GOMP_TASK_FLAG_DEPEND)
|
|
329 && thr->task && thr->task->depend_hash)
|
|
330 gomp_task_maybe_wait_for_dependencies (depend);
|
|
331
|
0
|
332 gomp_init_task (&task, thr->task, gomp_icv (false));
|
111
|
333 task.kind = GOMP_TASK_UNDEFERRED;
|
|
334 task.final_task = (thr->task && thr->task->final_task)
|
|
335 || (flags & GOMP_TASK_FLAG_FINAL);
|
|
336 task.priority = priority;
|
0
|
337 if (thr->task)
|
111
|
338 {
|
|
339 task.in_tied_task = thr->task->in_tied_task;
|
|
340 task.taskgroup = thr->task->taskgroup;
|
|
341 }
|
0
|
342 thr->task = &task;
|
|
343 if (__builtin_expect (cpyfn != NULL, 0))
|
|
344 {
|
|
345 char buf[arg_size + arg_align - 1];
|
|
346 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
|
|
347 & ~(uintptr_t) (arg_align - 1));
|
|
348 cpyfn (arg, data);
|
|
349 fn (arg);
|
|
350 }
|
|
351 else
|
|
352 fn (data);
|
111
|
353 /* Access to "children" is normally done inside a task_lock
|
|
354 mutex region, but the only way this particular task.children
|
|
355 can be set is if this thread's task work function (fn)
|
|
356 creates children. So since the setter is *this* thread, we
|
|
357 need no barriers here when testing for non-NULL. We can have
|
|
358 task.children set by the current thread then changed by a
|
|
359 child thread, but seeing a stale non-NULL value is not a
|
|
360 problem. Once past the task_lock acquisition, this thread
|
|
361 will see the real value of task.children. */
|
|
362 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
|
0
|
363 {
|
|
364 gomp_mutex_lock (&team->task_lock);
|
111
|
365 gomp_clear_parent (&task.children_queue);
|
0
|
366 gomp_mutex_unlock (&team->task_lock);
|
|
367 }
|
|
368 gomp_end_task ();
|
|
369 }
|
|
370 else
|
|
371 {
|
|
372 struct gomp_task *task;
|
|
373 struct gomp_task *parent = thr->task;
|
111
|
374 struct gomp_taskgroup *taskgroup = parent->taskgroup;
|
0
|
375 char *arg;
|
|
376 bool do_wake;
|
111
|
377 size_t depend_size = 0;
|
0
|
378
|
111
|
379 if (flags & GOMP_TASK_FLAG_DEPEND)
|
|
380 depend_size = ((uintptr_t) depend[0]
|
|
381 * sizeof (struct gomp_task_depend_entry));
|
|
382 task = gomp_malloc (sizeof (*task) + depend_size
|
|
383 + arg_size + arg_align - 1);
|
|
384 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
|
0
|
385 & ~(uintptr_t) (arg_align - 1));
|
|
386 gomp_init_task (task, parent, gomp_icv (false));
|
111
|
387 task->priority = priority;
|
|
388 task->kind = GOMP_TASK_UNDEFERRED;
|
0
|
389 task->in_tied_task = parent->in_tied_task;
|
111
|
390 task->taskgroup = taskgroup;
|
0
|
391 thr->task = task;
|
|
392 if (cpyfn)
|
111
|
393 {
|
|
394 cpyfn (arg, data);
|
|
395 task->copy_ctors_done = true;
|
|
396 }
|
0
|
397 else
|
|
398 memcpy (arg, data, arg_size);
|
|
399 thr->task = parent;
|
|
400 task->kind = GOMP_TASK_WAITING;
|
|
401 task->fn = fn;
|
|
402 task->fn_data = arg;
|
111
|
403 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
|
0
|
404 gomp_mutex_lock (&team->task_lock);
|
111
|
405 /* If parallel or taskgroup has been cancelled, don't start new
|
|
406 tasks. */
|
|
407 if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
|
|
408 || (taskgroup && taskgroup->cancelled))
|
|
409 && !task->copy_ctors_done, 0))
|
0
|
410 {
|
111
|
411 gomp_mutex_unlock (&team->task_lock);
|
|
412 gomp_finish_task (task);
|
|
413 free (task);
|
|
414 return;
|
0
|
415 }
|
111
|
416 if (taskgroup)
|
|
417 taskgroup->num_children++;
|
|
418 if (depend_size)
|
0
|
419 {
|
111
|
420 gomp_task_handle_depend (task, parent, depend);
|
|
421 if (task->num_dependees)
|
|
422 {
|
|
423 /* Tasks that depend on other tasks are not put into the
|
|
424 various waiting queues, so we are done for now. Said
|
|
425 tasks are instead put into the queues via
|
|
426 gomp_task_run_post_handle_dependers() after their
|
|
427 dependencies have been satisfied. After which, they
|
|
428 can be picked up by the various scheduling
|
|
429 points. */
|
|
430 gomp_mutex_unlock (&team->task_lock);
|
|
431 return;
|
|
432 }
|
0
|
433 }
|
111
|
434
|
|
435 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
|
|
436 task, priority,
|
|
437 PRIORITY_INSERT_BEGIN,
|
|
438 /*adjust_parent_depends_on=*/false,
|
|
439 task->parent_depends_on);
|
|
440 if (taskgroup)
|
|
441 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
|
|
442 task, priority,
|
|
443 PRIORITY_INSERT_BEGIN,
|
|
444 /*adjust_parent_depends_on=*/false,
|
|
445 task->parent_depends_on);
|
|
446
|
|
447 priority_queue_insert (PQ_TEAM, &team->task_queue,
|
|
448 task, priority,
|
|
449 PRIORITY_INSERT_END,
|
|
450 /*adjust_parent_depends_on=*/false,
|
|
451 task->parent_depends_on);
|
|
452
|
0
|
453 ++team->task_count;
|
111
|
454 ++team->task_queued_count;
|
0
|
455 gomp_team_barrier_set_task_pending (&team->barrier);
|
|
456 do_wake = team->task_running_count + !parent->in_tied_task
|
|
457 < team->nthreads;
|
|
458 gomp_mutex_unlock (&team->task_lock);
|
|
459 if (do_wake)
|
|
460 gomp_team_barrier_wake (&team->barrier, 1);
|
|
461 }
|
|
462 }
|
|
463
|
111
|
464 ialias (GOMP_taskgroup_start)
|
|
465 ialias (GOMP_taskgroup_end)
|
|
466
|
|
467 #define TYPE long
|
|
468 #define UTYPE unsigned long
|
|
469 #define TYPE_is_long 1
|
|
470 #include "taskloop.c"
|
|
471 #undef TYPE
|
|
472 #undef UTYPE
|
|
473 #undef TYPE_is_long
|
|
474
|
|
475 #define TYPE unsigned long long
|
|
476 #define UTYPE TYPE
|
|
477 #define GOMP_taskloop GOMP_taskloop_ull
|
|
478 #include "taskloop.c"
|
|
479 #undef TYPE
|
|
480 #undef UTYPE
|
|
481 #undef GOMP_taskloop
|
|
482
|
|
483 static void inline
|
|
484 priority_queue_move_task_first (enum priority_queue_type type,
|
|
485 struct priority_queue *head,
|
|
486 struct gomp_task *task)
|
|
487 {
|
|
488 #if _LIBGOMP_CHECKING_
|
|
489 if (!priority_queue_task_in_queue_p (type, head, task))
|
|
490 gomp_fatal ("Attempt to move first missing task %p", task);
|
|
491 #endif
|
|
492 struct priority_list *list;
|
|
493 if (priority_queue_multi_p (head))
|
|
494 {
|
|
495 list = priority_queue_lookup_priority (head, task->priority);
|
|
496 #if _LIBGOMP_CHECKING_
|
|
497 if (!list)
|
|
498 gomp_fatal ("Unable to find priority %d", task->priority);
|
|
499 #endif
|
|
500 }
|
|
501 else
|
|
502 list = &head->l;
|
|
503 priority_list_remove (list, task_to_priority_node (type, task), 0);
|
|
504 priority_list_insert (type, list, task, task->priority,
|
|
505 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
|
|
506 task->parent_depends_on);
|
|
507 }
|
|
508
|
|
509 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
|
|
510 with team->task_lock held, or is executed in the thread that called
|
|
511 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
|
|
512 run before it acquires team->task_lock. */
|
|
513
|
|
514 static void
|
|
515 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
|
|
516 {
|
|
517 struct gomp_task *parent = task->parent;
|
|
518 if (parent)
|
|
519 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
|
|
520 task);
|
|
521
|
|
522 struct gomp_taskgroup *taskgroup = task->taskgroup;
|
|
523 if (taskgroup)
|
|
524 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
|
|
525 task);
|
|
526
|
|
527 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
|
|
528 PRIORITY_INSERT_BEGIN, false,
|
|
529 task->parent_depends_on);
|
|
530 task->kind = GOMP_TASK_WAITING;
|
|
531 if (parent && parent->taskwait)
|
|
532 {
|
|
533 if (parent->taskwait->in_taskwait)
|
|
534 {
|
|
535 /* One more task has had its dependencies met.
|
|
536 Inform any waiters. */
|
|
537 parent->taskwait->in_taskwait = false;
|
|
538 gomp_sem_post (&parent->taskwait->taskwait_sem);
|
|
539 }
|
|
540 else if (parent->taskwait->in_depend_wait)
|
|
541 {
|
|
542 /* One more task has had its dependencies met.
|
|
543 Inform any waiters. */
|
|
544 parent->taskwait->in_depend_wait = false;
|
|
545 gomp_sem_post (&parent->taskwait->taskwait_sem);
|
|
546 }
|
|
547 }
|
|
548 if (taskgroup && taskgroup->in_taskgroup_wait)
|
|
549 {
|
|
550 /* One more task has had its dependencies met.
|
|
551 Inform any waiters. */
|
|
552 taskgroup->in_taskgroup_wait = false;
|
|
553 gomp_sem_post (&taskgroup->taskgroup_sem);
|
|
554 }
|
|
555
|
|
556 ++team->task_queued_count;
|
|
557 gomp_team_barrier_set_task_pending (&team->barrier);
|
|
558 /* I'm afraid this can't be done after releasing team->task_lock,
|
|
559 as gomp_target_task_completion is run from unrelated thread and
|
|
560 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
|
|
561 the team could be gone already. */
|
|
562 if (team->nthreads > team->task_running_count)
|
|
563 gomp_team_barrier_wake (&team->barrier, 1);
|
|
564 }
|
|
565
|
|
566 /* Signal that a target task TTASK has completed the asynchronously
|
|
567 running phase and should be requeued as a task to handle the
|
|
568 variable unmapping. */
|
|
569
|
|
570 void
|
|
571 GOMP_PLUGIN_target_task_completion (void *data)
|
|
572 {
|
|
573 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
|
|
574 struct gomp_task *task = ttask->task;
|
|
575 struct gomp_team *team = ttask->team;
|
|
576
|
|
577 gomp_mutex_lock (&team->task_lock);
|
|
578 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
|
|
579 {
|
|
580 ttask->state = GOMP_TARGET_TASK_FINISHED;
|
|
581 gomp_mutex_unlock (&team->task_lock);
|
|
582 return;
|
|
583 }
|
|
584 ttask->state = GOMP_TARGET_TASK_FINISHED;
|
|
585 gomp_target_task_completion (team, task);
|
|
586 gomp_mutex_unlock (&team->task_lock);
|
|
587 }
|
|
588
|
|
589 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
|
|
590
|
|
591 /* Called for nowait target tasks. */
|
|
592
|
|
593 bool
|
|
594 gomp_create_target_task (struct gomp_device_descr *devicep,
|
|
595 void (*fn) (void *), size_t mapnum, void **hostaddrs,
|
|
596 size_t *sizes, unsigned short *kinds,
|
|
597 unsigned int flags, void **depend, void **args,
|
|
598 enum gomp_target_task_state state)
|
|
599 {
|
|
600 struct gomp_thread *thr = gomp_thread ();
|
|
601 struct gomp_team *team = thr->ts.team;
|
|
602
|
|
603 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
604 if (team
|
|
605 && (gomp_team_barrier_cancelled (&team->barrier)
|
|
606 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
|
|
607 return true;
|
|
608
|
|
609 struct gomp_target_task *ttask;
|
|
610 struct gomp_task *task;
|
|
611 struct gomp_task *parent = thr->task;
|
|
612 struct gomp_taskgroup *taskgroup = parent->taskgroup;
|
|
613 bool do_wake;
|
|
614 size_t depend_size = 0;
|
|
615 uintptr_t depend_cnt = 0;
|
|
616 size_t tgt_align = 0, tgt_size = 0;
|
|
617
|
|
618 if (depend != NULL)
|
|
619 {
|
|
620 depend_cnt = (uintptr_t) depend[0];
|
|
621 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
|
|
622 }
|
|
623 if (fn)
|
|
624 {
|
|
625 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
|
|
626 firstprivate on the target task. */
|
|
627 size_t i;
|
|
628 for (i = 0; i < mapnum; i++)
|
|
629 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
|
|
630 {
|
|
631 size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
632 if (tgt_align < align)
|
|
633 tgt_align = align;
|
|
634 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
635 tgt_size += sizes[i];
|
|
636 }
|
|
637 if (tgt_align)
|
|
638 tgt_size += tgt_align - 1;
|
|
639 else
|
|
640 tgt_size = 0;
|
|
641 }
|
|
642
|
|
643 task = gomp_malloc (sizeof (*task) + depend_size
|
|
644 + sizeof (*ttask)
|
|
645 + mapnum * (sizeof (void *) + sizeof (size_t)
|
|
646 + sizeof (unsigned short))
|
|
647 + tgt_size);
|
|
648 gomp_init_task (task, parent, gomp_icv (false));
|
|
649 task->priority = 0;
|
|
650 task->kind = GOMP_TASK_WAITING;
|
|
651 task->in_tied_task = parent->in_tied_task;
|
|
652 task->taskgroup = taskgroup;
|
|
653 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
|
|
654 ttask->devicep = devicep;
|
|
655 ttask->fn = fn;
|
|
656 ttask->mapnum = mapnum;
|
|
657 ttask->args = args;
|
|
658 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
|
|
659 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
|
|
660 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
|
|
661 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
|
|
662 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
|
|
663 if (tgt_align)
|
|
664 {
|
|
665 char *tgt = (char *) &ttask->kinds[mapnum];
|
|
666 size_t i;
|
|
667 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
|
|
668 if (al)
|
|
669 tgt += tgt_align - al;
|
|
670 tgt_size = 0;
|
|
671 for (i = 0; i < mapnum; i++)
|
|
672 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
|
|
673 {
|
|
674 size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
675 tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
676 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
|
|
677 ttask->hostaddrs[i] = tgt + tgt_size;
|
|
678 tgt_size = tgt_size + sizes[i];
|
|
679 }
|
|
680 }
|
|
681 ttask->flags = flags;
|
|
682 ttask->state = state;
|
|
683 ttask->task = task;
|
|
684 ttask->team = team;
|
|
685 task->fn = NULL;
|
|
686 task->fn_data = ttask;
|
|
687 task->final_task = 0;
|
|
688 gomp_mutex_lock (&team->task_lock);
|
|
689 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
690 if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
|
|
691 || (taskgroup && taskgroup->cancelled), 0))
|
|
692 {
|
|
693 gomp_mutex_unlock (&team->task_lock);
|
|
694 gomp_finish_task (task);
|
|
695 free (task);
|
|
696 return true;
|
|
697 }
|
|
698 if (depend_size)
|
|
699 {
|
|
700 gomp_task_handle_depend (task, parent, depend);
|
|
701 if (task->num_dependees)
|
|
702 {
|
|
703 if (taskgroup)
|
|
704 taskgroup->num_children++;
|
|
705 gomp_mutex_unlock (&team->task_lock);
|
|
706 return true;
|
|
707 }
|
|
708 }
|
|
709 if (state == GOMP_TARGET_TASK_DATA)
|
|
710 {
|
|
711 gomp_task_run_post_handle_depend_hash (task);
|
|
712 gomp_mutex_unlock (&team->task_lock);
|
|
713 gomp_finish_task (task);
|
|
714 free (task);
|
|
715 return false;
|
|
716 }
|
|
717 if (taskgroup)
|
|
718 taskgroup->num_children++;
|
|
719 /* For async offloading, if we don't need to wait for dependencies,
|
|
720 run the gomp_target_task_fn right away, essentially schedule the
|
|
721 mapping part of the task in the current thread. */
|
|
722 if (devicep != NULL
|
|
723 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
|
|
724 {
|
|
725 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
|
|
726 PRIORITY_INSERT_END,
|
|
727 /*adjust_parent_depends_on=*/false,
|
|
728 task->parent_depends_on);
|
|
729 if (taskgroup)
|
|
730 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
|
|
731 task, 0, PRIORITY_INSERT_END,
|
|
732 /*adjust_parent_depends_on=*/false,
|
|
733 task->parent_depends_on);
|
|
734 task->pnode[PQ_TEAM].next = NULL;
|
|
735 task->pnode[PQ_TEAM].prev = NULL;
|
|
736 task->kind = GOMP_TASK_TIED;
|
|
737 ++team->task_count;
|
|
738 gomp_mutex_unlock (&team->task_lock);
|
|
739
|
|
740 thr->task = task;
|
|
741 gomp_target_task_fn (task->fn_data);
|
|
742 thr->task = parent;
|
|
743
|
|
744 gomp_mutex_lock (&team->task_lock);
|
|
745 task->kind = GOMP_TASK_ASYNC_RUNNING;
|
|
746 /* If GOMP_PLUGIN_target_task_completion has run already
|
|
747 in between gomp_target_task_fn and the mutex lock,
|
|
748 perform the requeuing here. */
|
|
749 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
750 gomp_target_task_completion (team, task);
|
|
751 else
|
|
752 ttask->state = GOMP_TARGET_TASK_RUNNING;
|
|
753 gomp_mutex_unlock (&team->task_lock);
|
|
754 return true;
|
|
755 }
|
|
756 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
|
|
757 PRIORITY_INSERT_BEGIN,
|
|
758 /*adjust_parent_depends_on=*/false,
|
|
759 task->parent_depends_on);
|
|
760 if (taskgroup)
|
|
761 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
|
|
762 PRIORITY_INSERT_BEGIN,
|
|
763 /*adjust_parent_depends_on=*/false,
|
|
764 task->parent_depends_on);
|
|
765 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
|
|
766 PRIORITY_INSERT_END,
|
|
767 /*adjust_parent_depends_on=*/false,
|
|
768 task->parent_depends_on);
|
|
769 ++team->task_count;
|
|
770 ++team->task_queued_count;
|
|
771 gomp_team_barrier_set_task_pending (&team->barrier);
|
|
772 do_wake = team->task_running_count + !parent->in_tied_task
|
|
773 < team->nthreads;
|
|
774 gomp_mutex_unlock (&team->task_lock);
|
|
775 if (do_wake)
|
|
776 gomp_team_barrier_wake (&team->barrier, 1);
|
|
777 return true;
|
|
778 }
|
|
779
|
|
780 /* Given a parent_depends_on task in LIST, move it to the front of its
|
|
781 priority so it is run as soon as possible.
|
|
782
|
|
783 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
|
|
784
|
|
785 We rearrange the queue such that all parent_depends_on tasks are
|
|
786 first, and last_parent_depends_on points to the last such task we
|
|
787 rearranged. For example, given the following tasks in a queue
|
|
788 where PD[123] are the parent_depends_on tasks:
|
|
789
|
|
790 task->children
|
|
791 |
|
|
792 V
|
|
793 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
|
|
794
|
|
795 We rearrange such that:
|
|
796
|
|
797 task->children
|
|
798 | +--- last_parent_depends_on
|
|
799 | |
|
|
800 V V
|
|
801 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
|
|
802
|
|
803 static void inline
|
|
804 priority_list_upgrade_task (struct priority_list *list,
|
|
805 struct priority_node *node)
|
|
806 {
|
|
807 struct priority_node *last_parent_depends_on
|
|
808 = list->last_parent_depends_on;
|
|
809 if (last_parent_depends_on)
|
|
810 {
|
|
811 node->prev->next = node->next;
|
|
812 node->next->prev = node->prev;
|
|
813 node->prev = last_parent_depends_on;
|
|
814 node->next = last_parent_depends_on->next;
|
|
815 node->prev->next = node;
|
|
816 node->next->prev = node;
|
|
817 }
|
|
818 else if (node != list->tasks)
|
|
819 {
|
|
820 node->prev->next = node->next;
|
|
821 node->next->prev = node->prev;
|
|
822 node->prev = list->tasks->prev;
|
|
823 node->next = list->tasks;
|
|
824 list->tasks = node;
|
|
825 node->prev->next = node;
|
|
826 node->next->prev = node;
|
|
827 }
|
|
828 list->last_parent_depends_on = node;
|
|
829 }
|
|
830
|
|
831 /* Given a parent_depends_on TASK in its parent's children_queue, move
|
|
832 it to the front of its priority so it is run as soon as possible.
|
|
833
|
|
834 PARENT is passed as an optimization.
|
|
835
|
|
836 (This function could be defined in priority_queue.c, but we want it
|
|
837 inlined, and putting it in priority_queue.h is not an option, given
|
|
838 that gomp_task has not been properly defined at that point). */
|
|
839
|
|
840 static void inline
|
|
841 priority_queue_upgrade_task (struct gomp_task *task,
|
|
842 struct gomp_task *parent)
|
|
843 {
|
|
844 struct priority_queue *head = &parent->children_queue;
|
|
845 struct priority_node *node = &task->pnode[PQ_CHILDREN];
|
|
846 #if _LIBGOMP_CHECKING_
|
|
847 if (!task->parent_depends_on)
|
|
848 gomp_fatal ("priority_queue_upgrade_task: task must be a "
|
|
849 "parent_depends_on task");
|
|
850 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
|
|
851 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
|
|
852 #endif
|
|
853 if (priority_queue_multi_p (head))
|
|
854 {
|
|
855 struct priority_list *list
|
|
856 = priority_queue_lookup_priority (head, task->priority);
|
|
857 priority_list_upgrade_task (list, node);
|
|
858 }
|
|
859 else
|
|
860 priority_list_upgrade_task (&head->l, node);
|
|
861 }
|
|
862
|
|
863 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
|
|
864 the way in LIST so that other tasks can be considered for
|
|
865 execution. LIST contains tasks of type TYPE.
|
|
866
|
|
867 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
|
|
868 if applicable. */
|
|
869
|
|
870 static void inline
|
|
871 priority_list_downgrade_task (enum priority_queue_type type,
|
|
872 struct priority_list *list,
|
|
873 struct gomp_task *child_task)
|
|
874 {
|
|
875 struct priority_node *node = task_to_priority_node (type, child_task);
|
|
876 if (list->tasks == node)
|
|
877 list->tasks = node->next;
|
|
878 else if (node->next != list->tasks)
|
|
879 {
|
|
880 /* The task in NODE is about to become TIED and TIED tasks
|
|
881 cannot come before WAITING tasks. If we're about to
|
|
882 leave the queue in such an indeterminate state, rewire
|
|
883 things appropriately. However, a TIED task at the end is
|
|
884 perfectly fine. */
|
|
885 struct gomp_task *next_task = priority_node_to_task (type, node->next);
|
|
886 if (next_task->kind == GOMP_TASK_WAITING)
|
|
887 {
|
|
888 /* Remove from list. */
|
|
889 node->prev->next = node->next;
|
|
890 node->next->prev = node->prev;
|
|
891 /* Rewire at the end. */
|
|
892 node->next = list->tasks;
|
|
893 node->prev = list->tasks->prev;
|
|
894 list->tasks->prev->next = node;
|
|
895 list->tasks->prev = node;
|
|
896 }
|
|
897 }
|
|
898
|
|
899 /* If the current task is the last_parent_depends_on for its
|
|
900 priority, adjust last_parent_depends_on appropriately. */
|
|
901 if (__builtin_expect (child_task->parent_depends_on, 0)
|
|
902 && list->last_parent_depends_on == node)
|
|
903 {
|
|
904 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
|
|
905 if (node->prev != node
|
|
906 && prev_child->kind == GOMP_TASK_WAITING
|
|
907 && prev_child->parent_depends_on)
|
|
908 list->last_parent_depends_on = node->prev;
|
|
909 else
|
|
910 {
|
|
911 /* There are no more parent_depends_on entries waiting
|
|
912 to run, clear the list. */
|
|
913 list->last_parent_depends_on = NULL;
|
|
914 }
|
|
915 }
|
|
916 }
|
|
917
|
|
918 /* Given a TASK in HEAD that is about to be executed, move it out of
|
|
919 the way so that other tasks can be considered for execution. HEAD
|
|
920 contains tasks of type TYPE.
|
|
921
|
|
922 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
|
|
923 if applicable.
|
|
924
|
|
925 (This function could be defined in priority_queue.c, but we want it
|
|
926 inlined, and putting it in priority_queue.h is not an option, given
|
|
927 that gomp_task has not been properly defined at that point). */
|
|
928
|
|
929 static void inline
|
|
930 priority_queue_downgrade_task (enum priority_queue_type type,
|
|
931 struct priority_queue *head,
|
|
932 struct gomp_task *task)
|
|
933 {
|
|
934 #if _LIBGOMP_CHECKING_
|
|
935 if (!priority_queue_task_in_queue_p (type, head, task))
|
|
936 gomp_fatal ("Attempt to downgrade missing task %p", task);
|
|
937 #endif
|
|
938 if (priority_queue_multi_p (head))
|
|
939 {
|
|
940 struct priority_list *list
|
|
941 = priority_queue_lookup_priority (head, task->priority);
|
|
942 priority_list_downgrade_task (type, list, task);
|
|
943 }
|
|
944 else
|
|
945 priority_list_downgrade_task (type, &head->l, task);
|
|
946 }
|
|
947
|
|
948 /* Setup CHILD_TASK to execute. This is done by setting the task to
|
|
949 TIED, and updating all relevant queues so that CHILD_TASK is no
|
|
950 longer chosen for scheduling. Also, remove CHILD_TASK from the
|
|
951 overall team task queue entirely.
|
|
952
|
|
953 Return TRUE if task or its containing taskgroup has been
|
|
954 cancelled. */
|
|
955
|
|
956 static inline bool
|
|
957 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
|
|
958 struct gomp_team *team)
|
|
959 {
|
|
960 #if _LIBGOMP_CHECKING_
|
|
961 if (child_task->parent)
|
|
962 priority_queue_verify (PQ_CHILDREN,
|
|
963 &child_task->parent->children_queue, true);
|
|
964 if (child_task->taskgroup)
|
|
965 priority_queue_verify (PQ_TASKGROUP,
|
|
966 &child_task->taskgroup->taskgroup_queue, false);
|
|
967 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
|
|
968 #endif
|
|
969
|
|
970 /* Task is about to go tied, move it out of the way. */
|
|
971 if (parent)
|
|
972 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
|
|
973 child_task);
|
|
974
|
|
975 /* Task is about to go tied, move it out of the way. */
|
|
976 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
|
|
977 if (taskgroup)
|
|
978 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
|
|
979 child_task);
|
|
980
|
|
981 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
|
|
982 MEMMODEL_RELAXED);
|
|
983 child_task->pnode[PQ_TEAM].next = NULL;
|
|
984 child_task->pnode[PQ_TEAM].prev = NULL;
|
|
985 child_task->kind = GOMP_TASK_TIED;
|
|
986
|
|
987 if (--team->task_queued_count == 0)
|
|
988 gomp_team_barrier_clear_task_pending (&team->barrier);
|
|
989 if ((gomp_team_barrier_cancelled (&team->barrier)
|
|
990 || (taskgroup && taskgroup->cancelled))
|
|
991 && !child_task->copy_ctors_done)
|
|
992 return true;
|
|
993 return false;
|
|
994 }
|
|
995
|
|
996 static void
|
|
997 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
|
|
998 {
|
|
999 struct gomp_task *parent = child_task->parent;
|
|
1000 size_t i;
|
|
1001
|
|
1002 for (i = 0; i < child_task->depend_count; i++)
|
|
1003 if (!child_task->depend[i].redundant)
|
|
1004 {
|
|
1005 if (child_task->depend[i].next)
|
|
1006 child_task->depend[i].next->prev = child_task->depend[i].prev;
|
|
1007 if (child_task->depend[i].prev)
|
|
1008 child_task->depend[i].prev->next = child_task->depend[i].next;
|
|
1009 else
|
|
1010 {
|
|
1011 hash_entry_type *slot
|
|
1012 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
|
|
1013 NO_INSERT);
|
|
1014 if (*slot != &child_task->depend[i])
|
|
1015 abort ();
|
|
1016 if (child_task->depend[i].next)
|
|
1017 *slot = child_task->depend[i].next;
|
|
1018 else
|
|
1019 htab_clear_slot (parent->depend_hash, slot);
|
|
1020 }
|
|
1021 }
|
|
1022 }
|
|
1023
|
|
1024 /* After a CHILD_TASK has been run, adjust the dependency queue for
|
|
1025 each task that depends on CHILD_TASK, to record the fact that there
|
|
1026 is one less dependency to worry about. If a task that depended on
|
|
1027 CHILD_TASK now has no dependencies, place it in the various queues
|
|
1028 so it gets scheduled to run.
|
|
1029
|
|
1030 TEAM is the team to which CHILD_TASK belongs to. */
|
|
1031
|
|
1032 static size_t
|
|
1033 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
|
|
1034 struct gomp_team *team)
|
|
1035 {
|
|
1036 struct gomp_task *parent = child_task->parent;
|
|
1037 size_t i, count = child_task->dependers->n_elem, ret = 0;
|
|
1038 for (i = 0; i < count; i++)
|
|
1039 {
|
|
1040 struct gomp_task *task = child_task->dependers->elem[i];
|
|
1041
|
|
1042 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
|
|
1043 TASK's remaining dependencies. Once TASK has no other
|
|
1044 depenencies, put it into the various queues so it will get
|
|
1045 scheduled for execution. */
|
|
1046 if (--task->num_dependees != 0)
|
|
1047 continue;
|
|
1048
|
|
1049 struct gomp_taskgroup *taskgroup = task->taskgroup;
|
|
1050 if (parent)
|
|
1051 {
|
|
1052 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
|
|
1053 task, task->priority,
|
|
1054 PRIORITY_INSERT_BEGIN,
|
|
1055 /*adjust_parent_depends_on=*/true,
|
|
1056 task->parent_depends_on);
|
|
1057 if (parent->taskwait)
|
|
1058 {
|
|
1059 if (parent->taskwait->in_taskwait)
|
|
1060 {
|
|
1061 /* One more task has had its dependencies met.
|
|
1062 Inform any waiters. */
|
|
1063 parent->taskwait->in_taskwait = false;
|
|
1064 gomp_sem_post (&parent->taskwait->taskwait_sem);
|
|
1065 }
|
|
1066 else if (parent->taskwait->in_depend_wait)
|
|
1067 {
|
|
1068 /* One more task has had its dependencies met.
|
|
1069 Inform any waiters. */
|
|
1070 parent->taskwait->in_depend_wait = false;
|
|
1071 gomp_sem_post (&parent->taskwait->taskwait_sem);
|
|
1072 }
|
|
1073 }
|
|
1074 }
|
|
1075 if (taskgroup)
|
|
1076 {
|
|
1077 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
|
|
1078 task, task->priority,
|
|
1079 PRIORITY_INSERT_BEGIN,
|
|
1080 /*adjust_parent_depends_on=*/false,
|
|
1081 task->parent_depends_on);
|
|
1082 if (taskgroup->in_taskgroup_wait)
|
|
1083 {
|
|
1084 /* One more task has had its dependencies met.
|
|
1085 Inform any waiters. */
|
|
1086 taskgroup->in_taskgroup_wait = false;
|
|
1087 gomp_sem_post (&taskgroup->taskgroup_sem);
|
|
1088 }
|
|
1089 }
|
|
1090 priority_queue_insert (PQ_TEAM, &team->task_queue,
|
|
1091 task, task->priority,
|
|
1092 PRIORITY_INSERT_END,
|
|
1093 /*adjust_parent_depends_on=*/false,
|
|
1094 task->parent_depends_on);
|
|
1095 ++team->task_count;
|
|
1096 ++team->task_queued_count;
|
|
1097 ++ret;
|
|
1098 }
|
|
1099 free (child_task->dependers);
|
|
1100 child_task->dependers = NULL;
|
|
1101 if (ret > 1)
|
|
1102 gomp_team_barrier_set_task_pending (&team->barrier);
|
|
1103 return ret;
|
|
1104 }
|
|
1105
|
|
1106 static inline size_t
|
|
1107 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
|
|
1108 struct gomp_team *team)
|
|
1109 {
|
|
1110 if (child_task->depend_count == 0)
|
|
1111 return 0;
|
|
1112
|
|
1113 /* If parent is gone already, the hash table is freed and nothing
|
|
1114 will use the hash table anymore, no need to remove anything from it. */
|
|
1115 if (child_task->parent != NULL)
|
|
1116 gomp_task_run_post_handle_depend_hash (child_task);
|
|
1117
|
|
1118 if (child_task->dependers == NULL)
|
|
1119 return 0;
|
|
1120
|
|
1121 return gomp_task_run_post_handle_dependers (child_task, team);
|
|
1122 }
|
|
1123
|
|
1124 /* Remove CHILD_TASK from its parent. */
|
|
1125
|
|
1126 static inline void
|
|
1127 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
|
|
1128 {
|
|
1129 struct gomp_task *parent = child_task->parent;
|
|
1130 if (parent == NULL)
|
|
1131 return;
|
|
1132
|
|
1133 /* If this was the last task the parent was depending on,
|
|
1134 synchronize with gomp_task_maybe_wait_for_dependencies so it can
|
|
1135 clean up and return. */
|
|
1136 if (__builtin_expect (child_task->parent_depends_on, 0)
|
|
1137 && --parent->taskwait->n_depend == 0
|
|
1138 && parent->taskwait->in_depend_wait)
|
|
1139 {
|
|
1140 parent->taskwait->in_depend_wait = false;
|
|
1141 gomp_sem_post (&parent->taskwait->taskwait_sem);
|
|
1142 }
|
|
1143
|
|
1144 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
|
|
1145 child_task, MEMMODEL_RELEASE)
|
|
1146 && parent->taskwait && parent->taskwait->in_taskwait)
|
|
1147 {
|
|
1148 parent->taskwait->in_taskwait = false;
|
|
1149 gomp_sem_post (&parent->taskwait->taskwait_sem);
|
|
1150 }
|
|
1151 child_task->pnode[PQ_CHILDREN].next = NULL;
|
|
1152 child_task->pnode[PQ_CHILDREN].prev = NULL;
|
|
1153 }
|
|
1154
|
|
1155 /* Remove CHILD_TASK from its taskgroup. */
|
|
1156
|
|
1157 static inline void
|
|
1158 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
|
|
1159 {
|
|
1160 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
|
|
1161 if (taskgroup == NULL)
|
|
1162 return;
|
|
1163 bool empty = priority_queue_remove (PQ_TASKGROUP,
|
|
1164 &taskgroup->taskgroup_queue,
|
|
1165 child_task, MEMMODEL_RELAXED);
|
|
1166 child_task->pnode[PQ_TASKGROUP].next = NULL;
|
|
1167 child_task->pnode[PQ_TASKGROUP].prev = NULL;
|
|
1168 if (taskgroup->num_children > 1)
|
|
1169 --taskgroup->num_children;
|
|
1170 else
|
|
1171 {
|
|
1172 /* We access taskgroup->num_children in GOMP_taskgroup_end
|
|
1173 outside of the task lock mutex region, so
|
|
1174 need a release barrier here to ensure memory
|
|
1175 written by child_task->fn above is flushed
|
|
1176 before the NULL is written. */
|
|
1177 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
|
|
1178 }
|
|
1179 if (empty && taskgroup->in_taskgroup_wait)
|
|
1180 {
|
|
1181 taskgroup->in_taskgroup_wait = false;
|
|
1182 gomp_sem_post (&taskgroup->taskgroup_sem);
|
|
1183 }
|
|
1184 }
|
|
1185
|
0
|
1186 void
|
|
1187 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
|
|
1188 {
|
|
1189 struct gomp_thread *thr = gomp_thread ();
|
|
1190 struct gomp_team *team = thr->ts.team;
|
|
1191 struct gomp_task *task = thr->task;
|
|
1192 struct gomp_task *child_task = NULL;
|
|
1193 struct gomp_task *to_free = NULL;
|
111
|
1194 int do_wake = 0;
|
0
|
1195
|
|
1196 gomp_mutex_lock (&team->task_lock);
|
|
1197 if (gomp_barrier_last_thread (state))
|
|
1198 {
|
|
1199 if (team->task_count == 0)
|
|
1200 {
|
|
1201 gomp_team_barrier_done (&team->barrier, state);
|
|
1202 gomp_mutex_unlock (&team->task_lock);
|
|
1203 gomp_team_barrier_wake (&team->barrier, 0);
|
|
1204 return;
|
|
1205 }
|
|
1206 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
|
|
1207 }
|
|
1208
|
|
1209 while (1)
|
|
1210 {
|
111
|
1211 bool cancelled = false;
|
|
1212 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
|
0
|
1213 {
|
111
|
1214 bool ignored;
|
|
1215 child_task
|
|
1216 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
|
|
1217 PQ_IGNORED, NULL,
|
|
1218 &ignored);
|
|
1219 cancelled = gomp_task_run_pre (child_task, child_task->parent,
|
|
1220 team);
|
|
1221 if (__builtin_expect (cancelled, 0))
|
|
1222 {
|
|
1223 if (to_free)
|
|
1224 {
|
|
1225 gomp_finish_task (to_free);
|
|
1226 free (to_free);
|
|
1227 to_free = NULL;
|
|
1228 }
|
|
1229 goto finish_cancelled;
|
|
1230 }
|
0
|
1231 team->task_running_count++;
|
111
|
1232 child_task->in_tied_task = true;
|
0
|
1233 }
|
|
1234 gomp_mutex_unlock (&team->task_lock);
|
111
|
1235 if (do_wake)
|
|
1236 {
|
|
1237 gomp_team_barrier_wake (&team->barrier, do_wake);
|
|
1238 do_wake = 0;
|
|
1239 }
|
|
1240 if (to_free)
|
|
1241 {
|
|
1242 gomp_finish_task (to_free);
|
|
1243 free (to_free);
|
|
1244 to_free = NULL;
|
|
1245 }
|
|
1246 if (child_task)
|
|
1247 {
|
|
1248 thr->task = child_task;
|
|
1249 if (__builtin_expect (child_task->fn == NULL, 0))
|
|
1250 {
|
|
1251 if (gomp_target_task_fn (child_task->fn_data))
|
|
1252 {
|
|
1253 thr->task = task;
|
|
1254 gomp_mutex_lock (&team->task_lock);
|
|
1255 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
|
|
1256 team->task_running_count--;
|
|
1257 struct gomp_target_task *ttask
|
|
1258 = (struct gomp_target_task *) child_task->fn_data;
|
|
1259 /* If GOMP_PLUGIN_target_task_completion has run already
|
|
1260 in between gomp_target_task_fn and the mutex lock,
|
|
1261 perform the requeuing here. */
|
|
1262 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
1263 gomp_target_task_completion (team, child_task);
|
|
1264 else
|
|
1265 ttask->state = GOMP_TARGET_TASK_RUNNING;
|
|
1266 child_task = NULL;
|
|
1267 continue;
|
|
1268 }
|
|
1269 }
|
|
1270 else
|
|
1271 child_task->fn (child_task->fn_data);
|
|
1272 thr->task = task;
|
|
1273 }
|
|
1274 else
|
|
1275 return;
|
|
1276 gomp_mutex_lock (&team->task_lock);
|
|
1277 if (child_task)
|
|
1278 {
|
|
1279 finish_cancelled:;
|
|
1280 size_t new_tasks
|
|
1281 = gomp_task_run_post_handle_depend (child_task, team);
|
|
1282 gomp_task_run_post_remove_parent (child_task);
|
|
1283 gomp_clear_parent (&child_task->children_queue);
|
|
1284 gomp_task_run_post_remove_taskgroup (child_task);
|
|
1285 to_free = child_task;
|
|
1286 child_task = NULL;
|
|
1287 if (!cancelled)
|
|
1288 team->task_running_count--;
|
|
1289 if (new_tasks > 1)
|
|
1290 {
|
|
1291 do_wake = team->nthreads - team->task_running_count;
|
|
1292 if (do_wake > new_tasks)
|
|
1293 do_wake = new_tasks;
|
|
1294 }
|
|
1295 if (--team->task_count == 0
|
|
1296 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
|
|
1297 {
|
|
1298 gomp_team_barrier_done (&team->barrier, state);
|
|
1299 gomp_mutex_unlock (&team->task_lock);
|
|
1300 gomp_team_barrier_wake (&team->barrier, 0);
|
|
1301 gomp_mutex_lock (&team->task_lock);
|
|
1302 }
|
|
1303 }
|
|
1304 }
|
|
1305 }
|
|
1306
|
|
1307 /* Called when encountering a taskwait directive.
|
|
1308
|
|
1309 Wait for all children of the current task. */
|
|
1310
|
|
1311 void
|
|
1312 GOMP_taskwait (void)
|
|
1313 {
|
|
1314 struct gomp_thread *thr = gomp_thread ();
|
|
1315 struct gomp_team *team = thr->ts.team;
|
|
1316 struct gomp_task *task = thr->task;
|
|
1317 struct gomp_task *child_task = NULL;
|
|
1318 struct gomp_task *to_free = NULL;
|
|
1319 struct gomp_taskwait taskwait;
|
|
1320 int do_wake = 0;
|
|
1321
|
|
1322 /* The acquire barrier on load of task->children here synchronizes
|
|
1323 with the write of a NULL in gomp_task_run_post_remove_parent. It is
|
|
1324 not necessary that we synchronize with other non-NULL writes at
|
|
1325 this point, but we must ensure that all writes to memory by a
|
|
1326 child thread task work function are seen before we exit from
|
|
1327 GOMP_taskwait. */
|
|
1328 if (task == NULL
|
|
1329 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
|
|
1330 return;
|
|
1331
|
|
1332 memset (&taskwait, 0, sizeof (taskwait));
|
|
1333 bool child_q = false;
|
|
1334 gomp_mutex_lock (&team->task_lock);
|
|
1335 while (1)
|
|
1336 {
|
|
1337 bool cancelled = false;
|
|
1338 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
|
|
1339 {
|
|
1340 bool destroy_taskwait = task->taskwait != NULL;
|
|
1341 task->taskwait = NULL;
|
|
1342 gomp_mutex_unlock (&team->task_lock);
|
|
1343 if (to_free)
|
|
1344 {
|
|
1345 gomp_finish_task (to_free);
|
|
1346 free (to_free);
|
|
1347 }
|
|
1348 if (destroy_taskwait)
|
|
1349 gomp_sem_destroy (&taskwait.taskwait_sem);
|
|
1350 return;
|
|
1351 }
|
|
1352 struct gomp_task *next_task
|
|
1353 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
|
|
1354 PQ_TEAM, &team->task_queue, &child_q);
|
|
1355 if (next_task->kind == GOMP_TASK_WAITING)
|
|
1356 {
|
|
1357 child_task = next_task;
|
|
1358 cancelled
|
|
1359 = gomp_task_run_pre (child_task, task, team);
|
|
1360 if (__builtin_expect (cancelled, 0))
|
|
1361 {
|
|
1362 if (to_free)
|
|
1363 {
|
|
1364 gomp_finish_task (to_free);
|
|
1365 free (to_free);
|
|
1366 to_free = NULL;
|
|
1367 }
|
|
1368 goto finish_cancelled;
|
|
1369 }
|
|
1370 }
|
|
1371 else
|
|
1372 {
|
|
1373 /* All tasks we are waiting for are either running in other
|
|
1374 threads, or they are tasks that have not had their
|
|
1375 dependencies met (so they're not even in the queue). Wait
|
|
1376 for them. */
|
|
1377 if (task->taskwait == NULL)
|
|
1378 {
|
|
1379 taskwait.in_depend_wait = false;
|
|
1380 gomp_sem_init (&taskwait.taskwait_sem, 0);
|
|
1381 task->taskwait = &taskwait;
|
|
1382 }
|
|
1383 taskwait.in_taskwait = true;
|
|
1384 }
|
|
1385 gomp_mutex_unlock (&team->task_lock);
|
|
1386 if (do_wake)
|
|
1387 {
|
|
1388 gomp_team_barrier_wake (&team->barrier, do_wake);
|
|
1389 do_wake = 0;
|
|
1390 }
|
0
|
1391 if (to_free)
|
|
1392 {
|
|
1393 gomp_finish_task (to_free);
|
|
1394 free (to_free);
|
|
1395 to_free = NULL;
|
|
1396 }
|
|
1397 if (child_task)
|
|
1398 {
|
|
1399 thr->task = child_task;
|
111
|
1400 if (__builtin_expect (child_task->fn == NULL, 0))
|
|
1401 {
|
|
1402 if (gomp_target_task_fn (child_task->fn_data))
|
|
1403 {
|
|
1404 thr->task = task;
|
|
1405 gomp_mutex_lock (&team->task_lock);
|
|
1406 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
|
|
1407 struct gomp_target_task *ttask
|
|
1408 = (struct gomp_target_task *) child_task->fn_data;
|
|
1409 /* If GOMP_PLUGIN_target_task_completion has run already
|
|
1410 in between gomp_target_task_fn and the mutex lock,
|
|
1411 perform the requeuing here. */
|
|
1412 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
1413 gomp_target_task_completion (team, child_task);
|
|
1414 else
|
|
1415 ttask->state = GOMP_TARGET_TASK_RUNNING;
|
|
1416 child_task = NULL;
|
|
1417 continue;
|
|
1418 }
|
|
1419 }
|
|
1420 else
|
|
1421 child_task->fn (child_task->fn_data);
|
0
|
1422 thr->task = task;
|
|
1423 }
|
|
1424 else
|
111
|
1425 gomp_sem_wait (&taskwait.taskwait_sem);
|
0
|
1426 gomp_mutex_lock (&team->task_lock);
|
|
1427 if (child_task)
|
|
1428 {
|
111
|
1429 finish_cancelled:;
|
|
1430 size_t new_tasks
|
|
1431 = gomp_task_run_post_handle_depend (child_task, team);
|
|
1432
|
|
1433 if (child_q)
|
0
|
1434 {
|
111
|
1435 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
|
|
1436 child_task, MEMMODEL_RELAXED);
|
|
1437 child_task->pnode[PQ_CHILDREN].next = NULL;
|
|
1438 child_task->pnode[PQ_CHILDREN].prev = NULL;
|
0
|
1439 }
|
111
|
1440
|
|
1441 gomp_clear_parent (&child_task->children_queue);
|
|
1442
|
|
1443 gomp_task_run_post_remove_taskgroup (child_task);
|
|
1444
|
0
|
1445 to_free = child_task;
|
|
1446 child_task = NULL;
|
111
|
1447 team->task_count--;
|
|
1448 if (new_tasks > 1)
|
0
|
1449 {
|
111
|
1450 do_wake = team->nthreads - team->task_running_count
|
|
1451 - !task->in_tied_task;
|
|
1452 if (do_wake > new_tasks)
|
|
1453 do_wake = new_tasks;
|
0
|
1454 }
|
|
1455 }
|
|
1456 }
|
|
1457 }
|
|
1458
|
111
|
1459 /* An undeferred task is about to run. Wait for all tasks that this
|
|
1460 undeferred task depends on.
|
|
1461
|
|
1462 This is done by first putting all known ready dependencies
|
|
1463 (dependencies that have their own dependencies met) at the top of
|
|
1464 the scheduling queues. Then we iterate through these imminently
|
|
1465 ready tasks (and possibly other high priority tasks), and run them.
|
|
1466 If we run out of ready dependencies to execute, we either wait for
|
|
1467 the reamining dependencies to finish, or wait for them to get
|
|
1468 scheduled so we can run them.
|
|
1469
|
|
1470 DEPEND is as in GOMP_task. */
|
0
|
1471
|
|
1472 void
|
111
|
1473 gomp_task_maybe_wait_for_dependencies (void **depend)
|
0
|
1474 {
|
|
1475 struct gomp_thread *thr = gomp_thread ();
|
111
|
1476 struct gomp_task *task = thr->task;
|
0
|
1477 struct gomp_team *team = thr->ts.team;
|
111
|
1478 struct gomp_task_depend_entry elem, *ent = NULL;
|
|
1479 struct gomp_taskwait taskwait;
|
|
1480 size_t ndepend = (uintptr_t) depend[0];
|
|
1481 size_t nout = (uintptr_t) depend[1];
|
|
1482 size_t i;
|
|
1483 size_t num_awaited = 0;
|
0
|
1484 struct gomp_task *child_task = NULL;
|
|
1485 struct gomp_task *to_free = NULL;
|
111
|
1486 int do_wake = 0;
|
0
|
1487
|
|
1488 gomp_mutex_lock (&team->task_lock);
|
111
|
1489 for (i = 0; i < ndepend; i++)
|
|
1490 {
|
|
1491 elem.addr = depend[i + 2];
|
|
1492 ent = htab_find (task->depend_hash, &elem);
|
|
1493 for (; ent; ent = ent->next)
|
|
1494 if (i >= nout && ent->is_in)
|
|
1495 continue;
|
|
1496 else
|
|
1497 {
|
|
1498 struct gomp_task *tsk = ent->task;
|
|
1499 if (!tsk->parent_depends_on)
|
|
1500 {
|
|
1501 tsk->parent_depends_on = true;
|
|
1502 ++num_awaited;
|
|
1503 /* If depenency TSK itself has no dependencies and is
|
|
1504 ready to run, move it up front so that we run it as
|
|
1505 soon as possible. */
|
|
1506 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
|
|
1507 priority_queue_upgrade_task (tsk, task);
|
|
1508 }
|
|
1509 }
|
|
1510 }
|
|
1511 if (num_awaited == 0)
|
|
1512 {
|
|
1513 gomp_mutex_unlock (&team->task_lock);
|
|
1514 return;
|
|
1515 }
|
|
1516
|
|
1517 memset (&taskwait, 0, sizeof (taskwait));
|
|
1518 taskwait.n_depend = num_awaited;
|
|
1519 gomp_sem_init (&taskwait.taskwait_sem, 0);
|
|
1520 task->taskwait = &taskwait;
|
|
1521
|
0
|
1522 while (1)
|
|
1523 {
|
111
|
1524 bool cancelled = false;
|
|
1525 if (taskwait.n_depend == 0)
|
0
|
1526 {
|
111
|
1527 task->taskwait = NULL;
|
0
|
1528 gomp_mutex_unlock (&team->task_lock);
|
|
1529 if (to_free)
|
|
1530 {
|
|
1531 gomp_finish_task (to_free);
|
|
1532 free (to_free);
|
|
1533 }
|
111
|
1534 gomp_sem_destroy (&taskwait.taskwait_sem);
|
0
|
1535 return;
|
|
1536 }
|
111
|
1537
|
|
1538 /* Theoretically when we have multiple priorities, we should
|
|
1539 chose between the highest priority item in
|
|
1540 task->children_queue and team->task_queue here, so we should
|
|
1541 use priority_queue_next_task(). However, since we are
|
|
1542 running an undeferred task, perhaps that makes all tasks it
|
|
1543 depends on undeferred, thus a priority of INF? This would
|
|
1544 make it unnecessary to take anything into account here,
|
|
1545 but the dependencies.
|
|
1546
|
|
1547 On the other hand, if we want to use priority_queue_next_task(),
|
|
1548 care should be taken to only use priority_queue_remove()
|
|
1549 below if the task was actually removed from the children
|
|
1550 queue. */
|
|
1551 bool ignored;
|
|
1552 struct gomp_task *next_task
|
|
1553 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
|
|
1554 PQ_IGNORED, NULL, &ignored);
|
|
1555
|
|
1556 if (next_task->kind == GOMP_TASK_WAITING)
|
0
|
1557 {
|
111
|
1558 child_task = next_task;
|
|
1559 cancelled
|
|
1560 = gomp_task_run_pre (child_task, task, team);
|
|
1561 if (__builtin_expect (cancelled, 0))
|
0
|
1562 {
|
111
|
1563 if (to_free)
|
|
1564 {
|
|
1565 gomp_finish_task (to_free);
|
|
1566 free (to_free);
|
|
1567 to_free = NULL;
|
|
1568 }
|
|
1569 goto finish_cancelled;
|
0
|
1570 }
|
|
1571 }
|
|
1572 else
|
111
|
1573 /* All tasks we are waiting for are either running in other
|
|
1574 threads, or they are tasks that have not had their
|
|
1575 dependencies met (so they're not even in the queue). Wait
|
|
1576 for them. */
|
|
1577 taskwait.in_depend_wait = true;
|
0
|
1578 gomp_mutex_unlock (&team->task_lock);
|
111
|
1579 if (do_wake)
|
|
1580 {
|
|
1581 gomp_team_barrier_wake (&team->barrier, do_wake);
|
|
1582 do_wake = 0;
|
|
1583 }
|
0
|
1584 if (to_free)
|
|
1585 {
|
|
1586 gomp_finish_task (to_free);
|
|
1587 free (to_free);
|
|
1588 to_free = NULL;
|
|
1589 }
|
|
1590 if (child_task)
|
|
1591 {
|
|
1592 thr->task = child_task;
|
111
|
1593 if (__builtin_expect (child_task->fn == NULL, 0))
|
|
1594 {
|
|
1595 if (gomp_target_task_fn (child_task->fn_data))
|
|
1596 {
|
|
1597 thr->task = task;
|
|
1598 gomp_mutex_lock (&team->task_lock);
|
|
1599 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
|
|
1600 struct gomp_target_task *ttask
|
|
1601 = (struct gomp_target_task *) child_task->fn_data;
|
|
1602 /* If GOMP_PLUGIN_target_task_completion has run already
|
|
1603 in between gomp_target_task_fn and the mutex lock,
|
|
1604 perform the requeuing here. */
|
|
1605 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
1606 gomp_target_task_completion (team, child_task);
|
|
1607 else
|
|
1608 ttask->state = GOMP_TARGET_TASK_RUNNING;
|
|
1609 child_task = NULL;
|
|
1610 continue;
|
|
1611 }
|
|
1612 }
|
|
1613 else
|
|
1614 child_task->fn (child_task->fn_data);
|
0
|
1615 thr->task = task;
|
|
1616 }
|
|
1617 else
|
111
|
1618 gomp_sem_wait (&taskwait.taskwait_sem);
|
0
|
1619 gomp_mutex_lock (&team->task_lock);
|
|
1620 if (child_task)
|
|
1621 {
|
111
|
1622 finish_cancelled:;
|
|
1623 size_t new_tasks
|
|
1624 = gomp_task_run_post_handle_depend (child_task, team);
|
|
1625 if (child_task->parent_depends_on)
|
|
1626 --taskwait.n_depend;
|
|
1627
|
|
1628 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
|
|
1629 child_task, MEMMODEL_RELAXED);
|
|
1630 child_task->pnode[PQ_CHILDREN].next = NULL;
|
|
1631 child_task->pnode[PQ_CHILDREN].prev = NULL;
|
|
1632
|
|
1633 gomp_clear_parent (&child_task->children_queue);
|
|
1634 gomp_task_run_post_remove_taskgroup (child_task);
|
0
|
1635 to_free = child_task;
|
|
1636 child_task = NULL;
|
|
1637 team->task_count--;
|
111
|
1638 if (new_tasks > 1)
|
|
1639 {
|
|
1640 do_wake = team->nthreads - team->task_running_count
|
|
1641 - !task->in_tied_task;
|
|
1642 if (do_wake > new_tasks)
|
|
1643 do_wake = new_tasks;
|
|
1644 }
|
0
|
1645 }
|
|
1646 }
|
|
1647 }
|
111
|
1648
|
|
1649 /* Called when encountering a taskyield directive. */
|
|
1650
|
|
1651 void
|
|
1652 GOMP_taskyield (void)
|
|
1653 {
|
|
1654 /* Nothing at the moment. */
|
|
1655 }
|
|
1656
|
|
1657 void
|
|
1658 GOMP_taskgroup_start (void)
|
|
1659 {
|
|
1660 struct gomp_thread *thr = gomp_thread ();
|
|
1661 struct gomp_team *team = thr->ts.team;
|
|
1662 struct gomp_task *task = thr->task;
|
|
1663 struct gomp_taskgroup *taskgroup;
|
|
1664
|
|
1665 /* If team is NULL, all tasks are executed as
|
|
1666 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
|
|
1667 taskgroup and their descendant tasks will be finished
|
|
1668 by the time GOMP_taskgroup_end is called. */
|
|
1669 if (team == NULL)
|
|
1670 return;
|
|
1671 taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
|
|
1672 taskgroup->prev = task->taskgroup;
|
|
1673 priority_queue_init (&taskgroup->taskgroup_queue);
|
|
1674 taskgroup->in_taskgroup_wait = false;
|
|
1675 taskgroup->cancelled = false;
|
|
1676 taskgroup->num_children = 0;
|
|
1677 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
|
|
1678 task->taskgroup = taskgroup;
|
|
1679 }
|
|
1680
|
|
1681 void
|
|
1682 GOMP_taskgroup_end (void)
|
|
1683 {
|
|
1684 struct gomp_thread *thr = gomp_thread ();
|
|
1685 struct gomp_team *team = thr->ts.team;
|
|
1686 struct gomp_task *task = thr->task;
|
|
1687 struct gomp_taskgroup *taskgroup;
|
|
1688 struct gomp_task *child_task = NULL;
|
|
1689 struct gomp_task *to_free = NULL;
|
|
1690 int do_wake = 0;
|
|
1691
|
|
1692 if (team == NULL)
|
|
1693 return;
|
|
1694 taskgroup = task->taskgroup;
|
|
1695 if (__builtin_expect (taskgroup == NULL, 0)
|
|
1696 && thr->ts.level == 0)
|
|
1697 {
|
|
1698 /* This can happen if GOMP_taskgroup_start is called when
|
|
1699 thr->ts.team == NULL, but inside of the taskgroup there
|
|
1700 is #pragma omp target nowait that creates an implicit
|
|
1701 team with a single thread. In this case, we want to wait
|
|
1702 for all outstanding tasks in this team. */
|
|
1703 gomp_team_barrier_wait (&team->barrier);
|
|
1704 return;
|
|
1705 }
|
|
1706
|
|
1707 /* The acquire barrier on load of taskgroup->num_children here
|
|
1708 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
|
|
1709 It is not necessary that we synchronize with other non-0 writes at
|
|
1710 this point, but we must ensure that all writes to memory by a
|
|
1711 child thread task work function are seen before we exit from
|
|
1712 GOMP_taskgroup_end. */
|
|
1713 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
|
|
1714 goto finish;
|
|
1715
|
|
1716 bool unused;
|
|
1717 gomp_mutex_lock (&team->task_lock);
|
|
1718 while (1)
|
|
1719 {
|
|
1720 bool cancelled = false;
|
|
1721 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
|
|
1722 MEMMODEL_RELAXED))
|
|
1723 {
|
|
1724 if (taskgroup->num_children)
|
|
1725 {
|
|
1726 if (priority_queue_empty_p (&task->children_queue,
|
|
1727 MEMMODEL_RELAXED))
|
|
1728 goto do_wait;
|
|
1729 child_task
|
|
1730 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
|
|
1731 PQ_TEAM, &team->task_queue,
|
|
1732 &unused);
|
|
1733 }
|
|
1734 else
|
|
1735 {
|
|
1736 gomp_mutex_unlock (&team->task_lock);
|
|
1737 if (to_free)
|
|
1738 {
|
|
1739 gomp_finish_task (to_free);
|
|
1740 free (to_free);
|
|
1741 }
|
|
1742 goto finish;
|
|
1743 }
|
|
1744 }
|
|
1745 else
|
|
1746 child_task
|
|
1747 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
|
|
1748 PQ_TEAM, &team->task_queue, &unused);
|
|
1749 if (child_task->kind == GOMP_TASK_WAITING)
|
|
1750 {
|
|
1751 cancelled
|
|
1752 = gomp_task_run_pre (child_task, child_task->parent, team);
|
|
1753 if (__builtin_expect (cancelled, 0))
|
|
1754 {
|
|
1755 if (to_free)
|
|
1756 {
|
|
1757 gomp_finish_task (to_free);
|
|
1758 free (to_free);
|
|
1759 to_free = NULL;
|
|
1760 }
|
|
1761 goto finish_cancelled;
|
|
1762 }
|
|
1763 }
|
|
1764 else
|
|
1765 {
|
|
1766 child_task = NULL;
|
|
1767 do_wait:
|
|
1768 /* All tasks we are waiting for are either running in other
|
|
1769 threads, or they are tasks that have not had their
|
|
1770 dependencies met (so they're not even in the queue). Wait
|
|
1771 for them. */
|
|
1772 taskgroup->in_taskgroup_wait = true;
|
|
1773 }
|
|
1774 gomp_mutex_unlock (&team->task_lock);
|
|
1775 if (do_wake)
|
|
1776 {
|
|
1777 gomp_team_barrier_wake (&team->barrier, do_wake);
|
|
1778 do_wake = 0;
|
|
1779 }
|
|
1780 if (to_free)
|
|
1781 {
|
|
1782 gomp_finish_task (to_free);
|
|
1783 free (to_free);
|
|
1784 to_free = NULL;
|
|
1785 }
|
|
1786 if (child_task)
|
|
1787 {
|
|
1788 thr->task = child_task;
|
|
1789 if (__builtin_expect (child_task->fn == NULL, 0))
|
|
1790 {
|
|
1791 if (gomp_target_task_fn (child_task->fn_data))
|
|
1792 {
|
|
1793 thr->task = task;
|
|
1794 gomp_mutex_lock (&team->task_lock);
|
|
1795 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
|
|
1796 struct gomp_target_task *ttask
|
|
1797 = (struct gomp_target_task *) child_task->fn_data;
|
|
1798 /* If GOMP_PLUGIN_target_task_completion has run already
|
|
1799 in between gomp_target_task_fn and the mutex lock,
|
|
1800 perform the requeuing here. */
|
|
1801 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
1802 gomp_target_task_completion (team, child_task);
|
|
1803 else
|
|
1804 ttask->state = GOMP_TARGET_TASK_RUNNING;
|
|
1805 child_task = NULL;
|
|
1806 continue;
|
|
1807 }
|
|
1808 }
|
|
1809 else
|
|
1810 child_task->fn (child_task->fn_data);
|
|
1811 thr->task = task;
|
|
1812 }
|
|
1813 else
|
|
1814 gomp_sem_wait (&taskgroup->taskgroup_sem);
|
|
1815 gomp_mutex_lock (&team->task_lock);
|
|
1816 if (child_task)
|
|
1817 {
|
|
1818 finish_cancelled:;
|
|
1819 size_t new_tasks
|
|
1820 = gomp_task_run_post_handle_depend (child_task, team);
|
|
1821 gomp_task_run_post_remove_parent (child_task);
|
|
1822 gomp_clear_parent (&child_task->children_queue);
|
|
1823 gomp_task_run_post_remove_taskgroup (child_task);
|
|
1824 to_free = child_task;
|
|
1825 child_task = NULL;
|
|
1826 team->task_count--;
|
|
1827 if (new_tasks > 1)
|
|
1828 {
|
|
1829 do_wake = team->nthreads - team->task_running_count
|
|
1830 - !task->in_tied_task;
|
|
1831 if (do_wake > new_tasks)
|
|
1832 do_wake = new_tasks;
|
|
1833 }
|
|
1834 }
|
|
1835 }
|
|
1836
|
|
1837 finish:
|
|
1838 task->taskgroup = taskgroup->prev;
|
|
1839 gomp_sem_destroy (&taskgroup->taskgroup_sem);
|
|
1840 free (taskgroup);
|
|
1841 }
|
|
1842
|
|
1843 int
|
|
1844 omp_in_final (void)
|
|
1845 {
|
|
1846 struct gomp_thread *thr = gomp_thread ();
|
|
1847 return thr->task && thr->task->final_task;
|
|
1848 }
|
|
1849
|
|
1850 ialias (omp_in_final)
|