111
|
1 /* Copyright (C) 2005-2017 Free Software Foundation, Inc.
|
0
|
2 Contributed by Richard Henderson <rth@redhat.com>.
|
|
3
|
111
|
4 This file is part of the GNU Offloading and Multi Processing Library
|
|
5 (libgomp).
|
0
|
6
|
|
7 Libgomp is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
15 more details.
|
|
16
|
|
17 Under Section 7 of GPL version 3, you are granted additional
|
|
18 permissions described in the GCC Runtime Library Exception, version
|
|
19 3.1, as published by the Free Software Foundation.
|
|
20
|
|
21 You should have received a copy of the GNU General Public License and
|
|
22 a copy of the GCC Runtime Library Exception along with this program;
|
|
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
24 <http://www.gnu.org/licenses/>. */
|
|
25
|
|
26 /* This file handles the LOOP (FOR/DO) construct. */
|
|
27
|
|
28 #include <limits.h>
|
|
29 #include <stdlib.h>
|
|
30 #include "libgomp.h"
|
|
31
|
|
32
|
|
33 /* Initialize the given work share construct from the given arguments. */
|
|
34
|
|
35 static inline void
|
|
36 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
|
|
37 enum gomp_schedule_type sched, long chunk_size)
|
|
38 {
|
|
39 ws->sched = sched;
|
|
40 ws->chunk_size = chunk_size;
|
|
41 /* Canonicalize loops that have zero iterations to ->next == ->end. */
|
|
42 ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
|
|
43 ? start : end;
|
|
44 ws->incr = incr;
|
|
45 ws->next = start;
|
|
46 if (sched == GFS_DYNAMIC)
|
|
47 {
|
|
48 ws->chunk_size *= incr;
|
|
49
|
|
50 #ifdef HAVE_SYNC_BUILTINS
|
|
51 {
|
|
52 /* For dynamic scheduling prepare things to make each iteration
|
|
53 faster. */
|
|
54 struct gomp_thread *thr = gomp_thread ();
|
|
55 struct gomp_team *team = thr->ts.team;
|
|
56 long nthreads = team ? team->nthreads : 1;
|
|
57
|
|
58 if (__builtin_expect (incr > 0, 1))
|
|
59 {
|
|
60 /* Cheap overflow protection. */
|
|
61 if (__builtin_expect ((nthreads | ws->chunk_size)
|
|
62 >= 1UL << (sizeof (long)
|
|
63 * __CHAR_BIT__ / 2 - 1), 0))
|
|
64 ws->mode = 0;
|
|
65 else
|
|
66 ws->mode = ws->end < (LONG_MAX
|
|
67 - (nthreads + 1) * ws->chunk_size);
|
|
68 }
|
|
69 /* Cheap overflow protection. */
|
|
70 else if (__builtin_expect ((nthreads | -ws->chunk_size)
|
|
71 >= 1UL << (sizeof (long)
|
|
72 * __CHAR_BIT__ / 2 - 1), 0))
|
|
73 ws->mode = 0;
|
|
74 else
|
|
75 ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
|
|
76 }
|
|
77 #endif
|
|
78 }
|
|
79 }
|
|
80
|
|
81 /* The *_start routines are called when first encountering a loop construct
|
|
82 that is not bound directly to a parallel construct. The first thread
|
|
83 that arrives will create the work-share construct; subsequent threads
|
|
84 will see the construct exists and allocate work from it.
|
|
85
|
|
86 START, END, INCR are the bounds of the loop; due to the restrictions of
|
|
87 OpenMP, these values must be the same in every thread. This is not
|
|
88 verified (nor is it entirely verifiable, since START is not necessarily
|
|
89 retained intact in the work-share data structure). CHUNK_SIZE is the
|
|
90 scheduling parameter; again this must be identical in all threads.
|
|
91
|
|
92 Returns true if there's any work for this thread to perform. If so,
|
|
93 *ISTART and *IEND are filled with the bounds of the iteration block
|
|
94 allocated to this thread. Returns false if all work was assigned to
|
|
95 other threads prior to this thread's arrival. */
|
|
96
|
|
97 static bool
|
|
98 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
|
|
99 long *istart, long *iend)
|
|
100 {
|
|
101 struct gomp_thread *thr = gomp_thread ();
|
|
102
|
|
103 thr->ts.static_trip = 0;
|
|
104 if (gomp_work_share_start (false))
|
|
105 {
|
|
106 gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
107 GFS_STATIC, chunk_size);
|
|
108 gomp_work_share_init_done ();
|
|
109 }
|
|
110
|
|
111 return !gomp_iter_static_next (istart, iend);
|
|
112 }
|
|
113
|
111
|
114 /* The current dynamic implementation is always monotonic. The
|
|
115 entrypoints without nonmonotonic in them have to be always monotonic,
|
|
116 but the nonmonotonic ones could be changed to use work-stealing for
|
|
117 improved scalability. */
|
|
118
|
0
|
119 static bool
|
|
120 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
|
|
121 long *istart, long *iend)
|
|
122 {
|
|
123 struct gomp_thread *thr = gomp_thread ();
|
|
124 bool ret;
|
|
125
|
|
126 if (gomp_work_share_start (false))
|
|
127 {
|
|
128 gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
129 GFS_DYNAMIC, chunk_size);
|
|
130 gomp_work_share_init_done ();
|
|
131 }
|
|
132
|
|
133 #ifdef HAVE_SYNC_BUILTINS
|
|
134 ret = gomp_iter_dynamic_next (istart, iend);
|
|
135 #else
|
|
136 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
137 ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
138 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
139 #endif
|
|
140
|
|
141 return ret;
|
|
142 }
|
|
143
|
111
|
144 /* Similarly as for dynamic, though the question is how can the chunk sizes
|
|
145 be decreased without a central locking or atomics. */
|
|
146
|
0
|
147 static bool
|
|
148 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
|
|
149 long *istart, long *iend)
|
|
150 {
|
|
151 struct gomp_thread *thr = gomp_thread ();
|
|
152 bool ret;
|
|
153
|
|
154 if (gomp_work_share_start (false))
|
|
155 {
|
|
156 gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
157 GFS_GUIDED, chunk_size);
|
|
158 gomp_work_share_init_done ();
|
|
159 }
|
|
160
|
|
161 #ifdef HAVE_SYNC_BUILTINS
|
|
162 ret = gomp_iter_guided_next (istart, iend);
|
|
163 #else
|
|
164 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
165 ret = gomp_iter_guided_next_locked (istart, iend);
|
|
166 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
167 #endif
|
|
168
|
|
169 return ret;
|
|
170 }
|
|
171
|
|
172 bool
|
|
173 GOMP_loop_runtime_start (long start, long end, long incr,
|
|
174 long *istart, long *iend)
|
|
175 {
|
|
176 struct gomp_task_icv *icv = gomp_icv (false);
|
|
177 switch (icv->run_sched_var)
|
|
178 {
|
|
179 case GFS_STATIC:
|
111
|
180 return gomp_loop_static_start (start, end, incr,
|
|
181 icv->run_sched_chunk_size,
|
0
|
182 istart, iend);
|
|
183 case GFS_DYNAMIC:
|
111
|
184 return gomp_loop_dynamic_start (start, end, incr,
|
|
185 icv->run_sched_chunk_size,
|
0
|
186 istart, iend);
|
|
187 case GFS_GUIDED:
|
111
|
188 return gomp_loop_guided_start (start, end, incr,
|
|
189 icv->run_sched_chunk_size,
|
0
|
190 istart, iend);
|
|
191 case GFS_AUTO:
|
|
192 /* For now map to schedule(static), later on we could play with feedback
|
|
193 driven choice. */
|
|
194 return gomp_loop_static_start (start, end, incr, 0, istart, iend);
|
|
195 default:
|
|
196 abort ();
|
|
197 }
|
|
198 }
|
|
199
|
|
200 /* The *_ordered_*_start routines are similar. The only difference is that
|
|
201 this work-share construct is initialized to expect an ORDERED section. */
|
|
202
|
|
203 static bool
|
|
204 gomp_loop_ordered_static_start (long start, long end, long incr,
|
|
205 long chunk_size, long *istart, long *iend)
|
|
206 {
|
|
207 struct gomp_thread *thr = gomp_thread ();
|
|
208
|
|
209 thr->ts.static_trip = 0;
|
|
210 if (gomp_work_share_start (true))
|
|
211 {
|
|
212 gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
213 GFS_STATIC, chunk_size);
|
|
214 gomp_ordered_static_init ();
|
|
215 gomp_work_share_init_done ();
|
|
216 }
|
|
217
|
|
218 return !gomp_iter_static_next (istart, iend);
|
|
219 }
|
|
220
|
|
221 static bool
|
|
222 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
|
|
223 long chunk_size, long *istart, long *iend)
|
|
224 {
|
|
225 struct gomp_thread *thr = gomp_thread ();
|
|
226 bool ret;
|
|
227
|
|
228 if (gomp_work_share_start (true))
|
|
229 {
|
|
230 gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
231 GFS_DYNAMIC, chunk_size);
|
|
232 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
233 gomp_work_share_init_done ();
|
|
234 }
|
|
235 else
|
|
236 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
237
|
|
238 ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
239 if (ret)
|
|
240 gomp_ordered_first ();
|
|
241 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
242
|
|
243 return ret;
|
|
244 }
|
|
245
|
|
246 static bool
|
|
247 gomp_loop_ordered_guided_start (long start, long end, long incr,
|
|
248 long chunk_size, long *istart, long *iend)
|
|
249 {
|
|
250 struct gomp_thread *thr = gomp_thread ();
|
|
251 bool ret;
|
|
252
|
|
253 if (gomp_work_share_start (true))
|
|
254 {
|
|
255 gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
256 GFS_GUIDED, chunk_size);
|
|
257 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
258 gomp_work_share_init_done ();
|
|
259 }
|
|
260 else
|
|
261 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
262
|
|
263 ret = gomp_iter_guided_next_locked (istart, iend);
|
|
264 if (ret)
|
|
265 gomp_ordered_first ();
|
|
266 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
267
|
|
268 return ret;
|
|
269 }
|
|
270
|
|
271 bool
|
|
272 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
|
|
273 long *istart, long *iend)
|
|
274 {
|
|
275 struct gomp_task_icv *icv = gomp_icv (false);
|
|
276 switch (icv->run_sched_var)
|
|
277 {
|
|
278 case GFS_STATIC:
|
|
279 return gomp_loop_ordered_static_start (start, end, incr,
|
111
|
280 icv->run_sched_chunk_size,
|
0
|
281 istart, iend);
|
|
282 case GFS_DYNAMIC:
|
|
283 return gomp_loop_ordered_dynamic_start (start, end, incr,
|
111
|
284 icv->run_sched_chunk_size,
|
0
|
285 istart, iend);
|
|
286 case GFS_GUIDED:
|
|
287 return gomp_loop_ordered_guided_start (start, end, incr,
|
111
|
288 icv->run_sched_chunk_size,
|
0
|
289 istart, iend);
|
|
290 case GFS_AUTO:
|
|
291 /* For now map to schedule(static), later on we could play with feedback
|
|
292 driven choice. */
|
|
293 return gomp_loop_ordered_static_start (start, end, incr,
|
|
294 0, istart, iend);
|
|
295 default:
|
|
296 abort ();
|
|
297 }
|
|
298 }
|
|
299
|
111
|
300 /* The *_doacross_*_start routines are similar. The only difference is that
|
|
301 this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
|
|
302 section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
|
|
303 and other COUNTS array elements tell the library number of iterations
|
|
304 in the ordered inner loops. */
|
|
305
|
|
306 static bool
|
|
307 gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
|
|
308 long chunk_size, long *istart, long *iend)
|
|
309 {
|
|
310 struct gomp_thread *thr = gomp_thread ();
|
|
311
|
|
312 thr->ts.static_trip = 0;
|
|
313 if (gomp_work_share_start (false))
|
|
314 {
|
|
315 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
|
|
316 GFS_STATIC, chunk_size);
|
|
317 gomp_doacross_init (ncounts, counts, chunk_size);
|
|
318 gomp_work_share_init_done ();
|
|
319 }
|
|
320
|
|
321 return !gomp_iter_static_next (istart, iend);
|
|
322 }
|
|
323
|
|
324 static bool
|
|
325 gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
|
|
326 long chunk_size, long *istart, long *iend)
|
|
327 {
|
|
328 struct gomp_thread *thr = gomp_thread ();
|
|
329 bool ret;
|
|
330
|
|
331 if (gomp_work_share_start (false))
|
|
332 {
|
|
333 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
|
|
334 GFS_DYNAMIC, chunk_size);
|
|
335 gomp_doacross_init (ncounts, counts, chunk_size);
|
|
336 gomp_work_share_init_done ();
|
|
337 }
|
|
338
|
|
339 #ifdef HAVE_SYNC_BUILTINS
|
|
340 ret = gomp_iter_dynamic_next (istart, iend);
|
|
341 #else
|
|
342 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
343 ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
344 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
345 #endif
|
|
346
|
|
347 return ret;
|
|
348 }
|
|
349
|
|
350 static bool
|
|
351 gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
|
|
352 long chunk_size, long *istart, long *iend)
|
|
353 {
|
|
354 struct gomp_thread *thr = gomp_thread ();
|
|
355 bool ret;
|
|
356
|
|
357 if (gomp_work_share_start (false))
|
|
358 {
|
|
359 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
|
|
360 GFS_GUIDED, chunk_size);
|
|
361 gomp_doacross_init (ncounts, counts, chunk_size);
|
|
362 gomp_work_share_init_done ();
|
|
363 }
|
|
364
|
|
365 #ifdef HAVE_SYNC_BUILTINS
|
|
366 ret = gomp_iter_guided_next (istart, iend);
|
|
367 #else
|
|
368 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
369 ret = gomp_iter_guided_next_locked (istart, iend);
|
|
370 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
371 #endif
|
|
372
|
|
373 return ret;
|
|
374 }
|
|
375
|
|
376 bool
|
|
377 GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
|
|
378 long *istart, long *iend)
|
|
379 {
|
|
380 struct gomp_task_icv *icv = gomp_icv (false);
|
|
381 switch (icv->run_sched_var)
|
|
382 {
|
|
383 case GFS_STATIC:
|
|
384 return gomp_loop_doacross_static_start (ncounts, counts,
|
|
385 icv->run_sched_chunk_size,
|
|
386 istart, iend);
|
|
387 case GFS_DYNAMIC:
|
|
388 return gomp_loop_doacross_dynamic_start (ncounts, counts,
|
|
389 icv->run_sched_chunk_size,
|
|
390 istart, iend);
|
|
391 case GFS_GUIDED:
|
|
392 return gomp_loop_doacross_guided_start (ncounts, counts,
|
|
393 icv->run_sched_chunk_size,
|
|
394 istart, iend);
|
|
395 case GFS_AUTO:
|
|
396 /* For now map to schedule(static), later on we could play with feedback
|
|
397 driven choice. */
|
|
398 return gomp_loop_doacross_static_start (ncounts, counts,
|
|
399 0, istart, iend);
|
|
400 default:
|
|
401 abort ();
|
|
402 }
|
|
403 }
|
|
404
|
0
|
405 /* The *_next routines are called when the thread completes processing of
|
|
406 the iteration block currently assigned to it. If the work-share
|
|
407 construct is bound directly to a parallel construct, then the iteration
|
|
408 bounds may have been set up before the parallel. In which case, this
|
|
409 may be the first iteration for the thread.
|
|
410
|
|
411 Returns true if there is work remaining to be performed; *ISTART and
|
|
412 *IEND are filled with a new iteration block. Returns false if all work
|
|
413 has been assigned. */
|
|
414
|
|
415 static bool
|
|
416 gomp_loop_static_next (long *istart, long *iend)
|
|
417 {
|
|
418 return !gomp_iter_static_next (istart, iend);
|
|
419 }
|
|
420
|
|
421 static bool
|
|
422 gomp_loop_dynamic_next (long *istart, long *iend)
|
|
423 {
|
|
424 bool ret;
|
|
425
|
|
426 #ifdef HAVE_SYNC_BUILTINS
|
|
427 ret = gomp_iter_dynamic_next (istart, iend);
|
|
428 #else
|
|
429 struct gomp_thread *thr = gomp_thread ();
|
|
430 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
431 ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
432 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
433 #endif
|
|
434
|
|
435 return ret;
|
|
436 }
|
|
437
|
|
438 static bool
|
|
439 gomp_loop_guided_next (long *istart, long *iend)
|
|
440 {
|
|
441 bool ret;
|
|
442
|
|
443 #ifdef HAVE_SYNC_BUILTINS
|
|
444 ret = gomp_iter_guided_next (istart, iend);
|
|
445 #else
|
|
446 struct gomp_thread *thr = gomp_thread ();
|
|
447 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
448 ret = gomp_iter_guided_next_locked (istart, iend);
|
|
449 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
450 #endif
|
|
451
|
|
452 return ret;
|
|
453 }
|
|
454
|
|
455 bool
|
|
456 GOMP_loop_runtime_next (long *istart, long *iend)
|
|
457 {
|
|
458 struct gomp_thread *thr = gomp_thread ();
|
|
459
|
|
460 switch (thr->ts.work_share->sched)
|
|
461 {
|
|
462 case GFS_STATIC:
|
|
463 case GFS_AUTO:
|
|
464 return gomp_loop_static_next (istart, iend);
|
|
465 case GFS_DYNAMIC:
|
|
466 return gomp_loop_dynamic_next (istart, iend);
|
|
467 case GFS_GUIDED:
|
|
468 return gomp_loop_guided_next (istart, iend);
|
|
469 default:
|
|
470 abort ();
|
|
471 }
|
|
472 }
|
|
473
|
|
474 /* The *_ordered_*_next routines are called when the thread completes
|
|
475 processing of the iteration block currently assigned to it.
|
|
476
|
|
477 Returns true if there is work remaining to be performed; *ISTART and
|
|
478 *IEND are filled with a new iteration block. Returns false if all work
|
|
479 has been assigned. */
|
|
480
|
|
481 static bool
|
|
482 gomp_loop_ordered_static_next (long *istart, long *iend)
|
|
483 {
|
|
484 struct gomp_thread *thr = gomp_thread ();
|
|
485 int test;
|
|
486
|
|
487 gomp_ordered_sync ();
|
|
488 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
489 test = gomp_iter_static_next (istart, iend);
|
|
490 if (test >= 0)
|
|
491 gomp_ordered_static_next ();
|
|
492 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
493
|
|
494 return test == 0;
|
|
495 }
|
|
496
|
|
497 static bool
|
|
498 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
|
|
499 {
|
|
500 struct gomp_thread *thr = gomp_thread ();
|
|
501 bool ret;
|
|
502
|
|
503 gomp_ordered_sync ();
|
|
504 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
505 ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
506 if (ret)
|
|
507 gomp_ordered_next ();
|
|
508 else
|
|
509 gomp_ordered_last ();
|
|
510 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
511
|
|
512 return ret;
|
|
513 }
|
|
514
|
|
515 static bool
|
|
516 gomp_loop_ordered_guided_next (long *istart, long *iend)
|
|
517 {
|
|
518 struct gomp_thread *thr = gomp_thread ();
|
|
519 bool ret;
|
|
520
|
|
521 gomp_ordered_sync ();
|
|
522 gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
523 ret = gomp_iter_guided_next_locked (istart, iend);
|
|
524 if (ret)
|
|
525 gomp_ordered_next ();
|
|
526 else
|
|
527 gomp_ordered_last ();
|
|
528 gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
529
|
|
530 return ret;
|
|
531 }
|
|
532
|
|
533 bool
|
|
534 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
|
|
535 {
|
|
536 struct gomp_thread *thr = gomp_thread ();
|
|
537
|
|
538 switch (thr->ts.work_share->sched)
|
|
539 {
|
|
540 case GFS_STATIC:
|
|
541 case GFS_AUTO:
|
|
542 return gomp_loop_ordered_static_next (istart, iend);
|
|
543 case GFS_DYNAMIC:
|
|
544 return gomp_loop_ordered_dynamic_next (istart, iend);
|
|
545 case GFS_GUIDED:
|
|
546 return gomp_loop_ordered_guided_next (istart, iend);
|
|
547 default:
|
|
548 abort ();
|
|
549 }
|
|
550 }
|
|
551
|
|
552 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
|
|
553 to avoid one synchronization once we get into the loop. */
|
|
554
|
|
555 static void
|
|
556 gomp_parallel_loop_start (void (*fn) (void *), void *data,
|
|
557 unsigned num_threads, long start, long end,
|
|
558 long incr, enum gomp_schedule_type sched,
|
111
|
559 long chunk_size, unsigned int flags)
|
0
|
560 {
|
|
561 struct gomp_team *team;
|
|
562
|
|
563 num_threads = gomp_resolve_num_threads (num_threads, 0);
|
|
564 team = gomp_new_team (num_threads);
|
|
565 gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
|
111
|
566 gomp_team_start (fn, data, num_threads, flags, team);
|
0
|
567 }
|
|
568
|
|
569 void
|
|
570 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
|
|
571 unsigned num_threads, long start, long end,
|
|
572 long incr, long chunk_size)
|
|
573 {
|
|
574 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
111
|
575 GFS_STATIC, chunk_size, 0);
|
0
|
576 }
|
|
577
|
|
578 void
|
|
579 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
|
|
580 unsigned num_threads, long start, long end,
|
|
581 long incr, long chunk_size)
|
|
582 {
|
|
583 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
111
|
584 GFS_DYNAMIC, chunk_size, 0);
|
0
|
585 }
|
|
586
|
|
587 void
|
|
588 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
|
|
589 unsigned num_threads, long start, long end,
|
|
590 long incr, long chunk_size)
|
|
591 {
|
|
592 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
111
|
593 GFS_GUIDED, chunk_size, 0);
|
0
|
594 }
|
|
595
|
|
596 void
|
|
597 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
|
|
598 unsigned num_threads, long start, long end,
|
|
599 long incr)
|
|
600 {
|
|
601 struct gomp_task_icv *icv = gomp_icv (false);
|
|
602 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
111
|
603 icv->run_sched_var, icv->run_sched_chunk_size, 0);
|
|
604 }
|
|
605
|
|
606 ialias_redirect (GOMP_parallel_end)
|
|
607
|
|
608 void
|
|
609 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
|
|
610 unsigned num_threads, long start, long end,
|
|
611 long incr, long chunk_size, unsigned flags)
|
|
612 {
|
|
613 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
614 GFS_STATIC, chunk_size, flags);
|
|
615 fn (data);
|
|
616 GOMP_parallel_end ();
|
|
617 }
|
|
618
|
|
619 void
|
|
620 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
|
|
621 unsigned num_threads, long start, long end,
|
|
622 long incr, long chunk_size, unsigned flags)
|
|
623 {
|
|
624 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
625 GFS_DYNAMIC, chunk_size, flags);
|
|
626 fn (data);
|
|
627 GOMP_parallel_end ();
|
|
628 }
|
|
629
|
|
630 void
|
|
631 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
|
|
632 unsigned num_threads, long start, long end,
|
|
633 long incr, long chunk_size, unsigned flags)
|
|
634 {
|
|
635 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
636 GFS_GUIDED, chunk_size, flags);
|
|
637 fn (data);
|
|
638 GOMP_parallel_end ();
|
|
639 }
|
|
640
|
|
641 #ifdef HAVE_ATTRIBUTE_ALIAS
|
|
642 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
|
|
643 __attribute__((alias ("GOMP_parallel_loop_dynamic")));
|
|
644 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
|
|
645 __attribute__((alias ("GOMP_parallel_loop_guided")));
|
|
646 #else
|
|
647 void
|
|
648 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
|
|
649 unsigned num_threads, long start,
|
|
650 long end, long incr, long chunk_size,
|
|
651 unsigned flags)
|
|
652 {
|
|
653 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
654 GFS_DYNAMIC, chunk_size, flags);
|
|
655 fn (data);
|
|
656 GOMP_parallel_end ();
|
|
657 }
|
|
658
|
|
659 void
|
|
660 GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
|
|
661 unsigned num_threads, long start,
|
|
662 long end, long incr, long chunk_size,
|
|
663 unsigned flags)
|
|
664 {
|
|
665 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
666 GFS_GUIDED, chunk_size, flags);
|
|
667 fn (data);
|
|
668 GOMP_parallel_end ();
|
|
669 }
|
|
670 #endif
|
|
671
|
|
672 void
|
|
673 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
|
|
674 unsigned num_threads, long start, long end,
|
|
675 long incr, unsigned flags)
|
|
676 {
|
|
677 struct gomp_task_icv *icv = gomp_icv (false);
|
|
678 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
679 icv->run_sched_var, icv->run_sched_chunk_size,
|
|
680 flags);
|
|
681 fn (data);
|
|
682 GOMP_parallel_end ();
|
0
|
683 }
|
|
684
|
|
685 /* The GOMP_loop_end* routines are called after the thread is told that
|
111
|
686 all loop iterations are complete. The first two versions synchronize
|
0
|
687 all threads; the nowait version does not. */
|
|
688
|
|
689 void
|
|
690 GOMP_loop_end (void)
|
|
691 {
|
|
692 gomp_work_share_end ();
|
|
693 }
|
|
694
|
111
|
695 bool
|
|
696 GOMP_loop_end_cancel (void)
|
|
697 {
|
|
698 return gomp_work_share_end_cancel ();
|
|
699 }
|
|
700
|
0
|
701 void
|
|
702 GOMP_loop_end_nowait (void)
|
|
703 {
|
|
704 gomp_work_share_end_nowait ();
|
|
705 }
|
|
706
|
|
707
|
|
708 /* We use static functions above so that we're sure that the "runtime"
|
|
709 function can defer to the proper routine without interposition. We
|
|
710 export the static function with a strong alias when possible, or with
|
|
711 a wrapper function otherwise. */
|
|
712
|
|
713 #ifdef HAVE_ATTRIBUTE_ALIAS
|
|
714 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
|
|
715 __attribute__((alias ("gomp_loop_static_start")));
|
|
716 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
|
|
717 __attribute__((alias ("gomp_loop_dynamic_start")));
|
|
718 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
|
|
719 __attribute__((alias ("gomp_loop_guided_start")));
|
111
|
720 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
|
|
721 __attribute__((alias ("gomp_loop_dynamic_start")));
|
|
722 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
|
|
723 __attribute__((alias ("gomp_loop_guided_start")));
|
0
|
724
|
|
725 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
|
|
726 __attribute__((alias ("gomp_loop_ordered_static_start")));
|
|
727 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
|
|
728 __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
|
|
729 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
|
|
730 __attribute__((alias ("gomp_loop_ordered_guided_start")));
|
|
731
|
111
|
732 extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
|
|
733 __attribute__((alias ("gomp_loop_doacross_static_start")));
|
|
734 extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
|
|
735 __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
|
|
736 extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
|
|
737 __attribute__((alias ("gomp_loop_doacross_guided_start")));
|
|
738
|
0
|
739 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
|
|
740 __attribute__((alias ("gomp_loop_static_next")));
|
|
741 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
|
|
742 __attribute__((alias ("gomp_loop_dynamic_next")));
|
|
743 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
|
|
744 __attribute__((alias ("gomp_loop_guided_next")));
|
111
|
745 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
|
|
746 __attribute__((alias ("gomp_loop_dynamic_next")));
|
|
747 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
|
|
748 __attribute__((alias ("gomp_loop_guided_next")));
|
0
|
749
|
|
750 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
|
|
751 __attribute__((alias ("gomp_loop_ordered_static_next")));
|
|
752 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
|
|
753 __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
|
|
754 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
|
|
755 __attribute__((alias ("gomp_loop_ordered_guided_next")));
|
|
756 #else
|
|
757 bool
|
|
758 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
|
|
759 long *istart, long *iend)
|
|
760 {
|
|
761 return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
|
|
762 }
|
|
763
|
|
764 bool
|
|
765 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
|
|
766 long *istart, long *iend)
|
|
767 {
|
|
768 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
|
|
769 }
|
|
770
|
|
771 bool
|
|
772 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
|
|
773 long *istart, long *iend)
|
|
774 {
|
|
775 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
|
|
776 }
|
|
777
|
|
778 bool
|
111
|
779 GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
|
|
780 long chunk_size, long *istart,
|
|
781 long *iend)
|
|
782 {
|
|
783 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
|
|
784 }
|
|
785
|
|
786 bool
|
|
787 GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
|
|
788 long chunk_size, long *istart, long *iend)
|
|
789 {
|
|
790 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
|
|
791 }
|
|
792
|
|
793 bool
|
0
|
794 GOMP_loop_ordered_static_start (long start, long end, long incr,
|
|
795 long chunk_size, long *istart, long *iend)
|
|
796 {
|
|
797 return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
|
|
798 istart, iend);
|
|
799 }
|
|
800
|
|
801 bool
|
|
802 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
|
|
803 long chunk_size, long *istart, long *iend)
|
|
804 {
|
|
805 return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
|
|
806 istart, iend);
|
|
807 }
|
|
808
|
|
809 bool
|
|
810 GOMP_loop_ordered_guided_start (long start, long end, long incr,
|
|
811 long chunk_size, long *istart, long *iend)
|
|
812 {
|
|
813 return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
|
|
814 istart, iend);
|
|
815 }
|
|
816
|
|
817 bool
|
111
|
818 GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
|
|
819 long chunk_size, long *istart, long *iend)
|
|
820 {
|
|
821 return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
|
|
822 istart, iend);
|
|
823 }
|
|
824
|
|
825 bool
|
|
826 GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
|
|
827 long chunk_size, long *istart, long *iend)
|
|
828 {
|
|
829 return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
|
|
830 istart, iend);
|
|
831 }
|
|
832
|
|
833 bool
|
|
834 GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
|
|
835 long chunk_size, long *istart, long *iend)
|
|
836 {
|
|
837 return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
|
|
838 istart, iend);
|
|
839 }
|
|
840
|
|
841 bool
|
0
|
842 GOMP_loop_static_next (long *istart, long *iend)
|
|
843 {
|
|
844 return gomp_loop_static_next (istart, iend);
|
|
845 }
|
|
846
|
|
847 bool
|
|
848 GOMP_loop_dynamic_next (long *istart, long *iend)
|
|
849 {
|
|
850 return gomp_loop_dynamic_next (istart, iend);
|
|
851 }
|
|
852
|
|
853 bool
|
|
854 GOMP_loop_guided_next (long *istart, long *iend)
|
|
855 {
|
|
856 return gomp_loop_guided_next (istart, iend);
|
|
857 }
|
|
858
|
|
859 bool
|
111
|
860 GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
|
|
861 {
|
|
862 return gomp_loop_dynamic_next (istart, iend);
|
|
863 }
|
|
864
|
|
865 bool
|
|
866 GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
|
|
867 {
|
|
868 return gomp_loop_guided_next (istart, iend);
|
|
869 }
|
|
870
|
|
871 bool
|
0
|
872 GOMP_loop_ordered_static_next (long *istart, long *iend)
|
|
873 {
|
|
874 return gomp_loop_ordered_static_next (istart, iend);
|
|
875 }
|
|
876
|
|
877 bool
|
|
878 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
|
|
879 {
|
|
880 return gomp_loop_ordered_dynamic_next (istart, iend);
|
|
881 }
|
|
882
|
|
883 bool
|
|
884 GOMP_loop_ordered_guided_next (long *istart, long *iend)
|
|
885 {
|
|
886 return gomp_loop_ordered_guided_next (istart, iend);
|
|
887 }
|
|
888 #endif
|