Mercurial > hg > CbC > CbC_gcc
annotate libgomp/team.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | 58ad6c70ea60 |
children | 84e7813d76e9 |
rev | line source |
---|---|
111 | 1 /* Copyright (C) 2005-2017 Free Software Foundation, Inc. |
0 | 2 Contributed by Richard Henderson <rth@redhat.com>. |
3 | |
111 | 4 This file is part of the GNU Offloading and Multi Processing Library |
5 (libgomp). | |
0 | 6 |
7 Libgomp is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 more details. | |
16 | |
17 Under Section 7 of GPL version 3, you are granted additional | |
18 permissions described in the GCC Runtime Library Exception, version | |
19 3.1, as published by the Free Software Foundation. | |
20 | |
21 You should have received a copy of the GNU General Public License and | |
22 a copy of the GCC Runtime Library Exception along with this program; | |
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 <http://www.gnu.org/licenses/>. */ | |
25 | |
26 /* This file handles the maintainence of threads in response to team | |
27 creation and termination. */ | |
28 | |
29 #include "libgomp.h" | |
111 | 30 #include "pool.h" |
0 | 31 #include <stdlib.h> |
32 #include <string.h> | |
33 | |
111 | 34 #ifdef LIBGOMP_USE_PTHREADS |
0 | 35 /* This attribute contains PTHREAD_CREATE_DETACHED. */ |
36 pthread_attr_t gomp_thread_attr; | |
37 | |
38 /* This key is for the thread destructor. */ | |
39 pthread_key_t gomp_thread_destructor; | |
40 | |
41 | |
42 /* This is the libgomp per-thread data structure. */ | |
111 | 43 #if defined HAVE_TLS || defined USE_EMUTLS |
0 | 44 __thread struct gomp_thread gomp_tls_data; |
45 #else | |
46 pthread_key_t gomp_tls_key; | |
47 #endif | |
48 | |
49 | |
50 /* This structure is used to communicate across pthread_create. */ | |
51 | |
52 struct gomp_thread_start_data | |
53 { | |
54 void (*fn) (void *); | |
55 void *fn_data; | |
56 struct gomp_team_state ts; | |
57 struct gomp_task *task; | |
58 struct gomp_thread_pool *thread_pool; | |
111 | 59 unsigned int place; |
0 | 60 bool nested; |
61 }; | |
62 | |
63 | |
64 /* This function is a pthread_create entry point. This contains the idle | |
65 loop in which a thread waits to be called up to become part of a team. */ | |
66 | |
67 static void * | |
68 gomp_thread_start (void *xdata) | |
69 { | |
70 struct gomp_thread_start_data *data = xdata; | |
71 struct gomp_thread *thr; | |
72 struct gomp_thread_pool *pool; | |
73 void (*local_fn) (void *); | |
74 void *local_data; | |
75 | |
111 | 76 #if defined HAVE_TLS || defined USE_EMUTLS |
0 | 77 thr = &gomp_tls_data; |
78 #else | |
79 struct gomp_thread local_thr; | |
80 thr = &local_thr; | |
81 pthread_setspecific (gomp_tls_key, thr); | |
82 #endif | |
83 gomp_sem_init (&thr->release, 0); | |
84 | |
85 /* Extract what we need from data. */ | |
86 local_fn = data->fn; | |
87 local_data = data->fn_data; | |
88 thr->thread_pool = data->thread_pool; | |
89 thr->ts = data->ts; | |
90 thr->task = data->task; | |
111 | 91 thr->place = data->place; |
0 | 92 |
93 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; | |
94 | |
95 /* Make thread pool local. */ | |
96 pool = thr->thread_pool; | |
97 | |
98 if (data->nested) | |
99 { | |
100 struct gomp_team *team = thr->ts.team; | |
101 struct gomp_task *task = thr->task; | |
102 | |
103 gomp_barrier_wait (&team->barrier); | |
104 | |
105 local_fn (local_data); | |
111 | 106 gomp_team_barrier_wait_final (&team->barrier); |
0 | 107 gomp_finish_task (task); |
108 gomp_barrier_wait_last (&team->barrier); | |
109 } | |
110 else | |
111 { | |
112 pool->threads[thr->ts.team_id] = thr; | |
113 | |
111 | 114 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 115 do |
116 { | |
117 struct gomp_team *team = thr->ts.team; | |
118 struct gomp_task *task = thr->task; | |
119 | |
120 local_fn (local_data); | |
111 | 121 gomp_team_barrier_wait_final (&team->barrier); |
0 | 122 gomp_finish_task (task); |
123 | |
111 | 124 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 125 |
126 local_fn = thr->fn; | |
127 local_data = thr->data; | |
128 thr->fn = NULL; | |
129 } | |
130 while (local_fn); | |
131 } | |
132 | |
19
58ad6c70ea60
update gcc from 4.4.0 to 4.4.1.
kent@firefly.cr.ie.u-ryukyu.ac.jp
parents:
0
diff
changeset
|
133 gomp_sem_destroy (&thr->release); |
111 | 134 thr->thread_pool = NULL; |
135 thr->task = NULL; | |
0 | 136 return NULL; |
137 } | |
111 | 138 #endif |
0 | 139 |
111 | 140 static inline struct gomp_team * |
141 get_last_team (unsigned nthreads) | |
142 { | |
143 struct gomp_thread *thr = gomp_thread (); | |
144 if (thr->ts.team == NULL) | |
145 { | |
146 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads); | |
147 struct gomp_team *last_team = pool->last_team; | |
148 if (last_team != NULL && last_team->nthreads == nthreads) | |
149 { | |
150 pool->last_team = NULL; | |
151 return last_team; | |
152 } | |
153 } | |
154 return NULL; | |
155 } | |
0 | 156 |
157 /* Create a new team data structure. */ | |
158 | |
159 struct gomp_team * | |
160 gomp_new_team (unsigned nthreads) | |
161 { | |
162 struct gomp_team *team; | |
163 int i; | |
164 | |
111 | 165 team = get_last_team (nthreads); |
166 if (team == NULL) | |
167 { | |
168 size_t extra = sizeof (team->ordered_release[0]) | |
169 + sizeof (team->implicit_task[0]); | |
170 team = gomp_malloc (sizeof (*team) + nthreads * extra); | |
171 | |
172 #ifndef HAVE_SYNC_BUILTINS | |
173 gomp_mutex_init (&team->work_share_list_free_lock); | |
174 #endif | |
175 gomp_barrier_init (&team->barrier, nthreads); | |
176 gomp_mutex_init (&team->task_lock); | |
177 | |
178 team->nthreads = nthreads; | |
179 } | |
0 | 180 |
181 team->work_share_chunk = 8; | |
182 #ifdef HAVE_SYNC_BUILTINS | |
183 team->single_count = 0; | |
184 #endif | |
111 | 185 team->work_shares_to_free = &team->work_shares[0]; |
0 | 186 gomp_init_work_share (&team->work_shares[0], false, nthreads); |
187 team->work_shares[0].next_alloc = NULL; | |
188 team->work_share_list_free = NULL; | |
189 team->work_share_list_alloc = &team->work_shares[1]; | |
190 for (i = 1; i < 7; i++) | |
191 team->work_shares[i].next_free = &team->work_shares[i + 1]; | |
192 team->work_shares[i].next_free = NULL; | |
193 | |
194 gomp_sem_init (&team->master_release, 0); | |
195 team->ordered_release = (void *) &team->implicit_task[nthreads]; | |
196 team->ordered_release[0] = &team->master_release; | |
197 | |
111 | 198 priority_queue_init (&team->task_queue); |
0 | 199 team->task_count = 0; |
111 | 200 team->task_queued_count = 0; |
0 | 201 team->task_running_count = 0; |
111 | 202 team->work_share_cancelled = 0; |
203 team->team_cancelled = 0; | |
0 | 204 |
205 return team; | |
206 } | |
207 | |
208 | |
209 /* Free a team data structure. */ | |
210 | |
211 static void | |
212 free_team (struct gomp_team *team) | |
213 { | |
111 | 214 #ifndef HAVE_SYNC_BUILTINS |
215 gomp_mutex_destroy (&team->work_share_list_free_lock); | |
216 #endif | |
0 | 217 gomp_barrier_destroy (&team->barrier); |
218 gomp_mutex_destroy (&team->task_lock); | |
111 | 219 priority_queue_free (&team->task_queue); |
0 | 220 free (team); |
221 } | |
222 | |
223 static void | |
224 gomp_free_pool_helper (void *thread_pool) | |
225 { | |
111 | 226 struct gomp_thread *thr = gomp_thread (); |
0 | 227 struct gomp_thread_pool *pool |
228 = (struct gomp_thread_pool *) thread_pool; | |
111 | 229 gomp_simple_barrier_wait_last (&pool->threads_dock); |
230 gomp_sem_destroy (&thr->release); | |
231 thr->thread_pool = NULL; | |
232 thr->task = NULL; | |
233 #ifdef LIBGOMP_USE_PTHREADS | |
0 | 234 pthread_exit (NULL); |
111 | 235 #elif defined(__nvptx__) |
236 asm ("exit;"); | |
237 #else | |
238 #error gomp_free_pool_helper must terminate the thread | |
239 #endif | |
0 | 240 } |
241 | |
242 /* Free a thread pool and release its threads. */ | |
243 | |
111 | 244 void |
0 | 245 gomp_free_thread (void *arg __attribute__((unused))) |
246 { | |
247 struct gomp_thread *thr = gomp_thread (); | |
248 struct gomp_thread_pool *pool = thr->thread_pool; | |
249 if (pool) | |
250 { | |
251 if (pool->threads_used > 0) | |
252 { | |
253 int i; | |
254 for (i = 1; i < pool->threads_used; i++) | |
255 { | |
256 struct gomp_thread *nthr = pool->threads[i]; | |
257 nthr->fn = gomp_free_pool_helper; | |
258 nthr->data = pool; | |
259 } | |
260 /* This barrier undocks threads docked on pool->threads_dock. */ | |
111 | 261 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 262 /* And this waits till all threads have called gomp_barrier_wait_last |
263 in gomp_free_pool_helper. */ | |
111 | 264 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 265 /* Now it is safe to destroy the barrier and free the pool. */ |
111 | 266 gomp_simple_barrier_destroy (&pool->threads_dock); |
267 | |
268 #ifdef HAVE_SYNC_BUILTINS | |
269 __sync_fetch_and_add (&gomp_managed_threads, | |
270 1L - pool->threads_used); | |
271 #else | |
272 gomp_mutex_lock (&gomp_managed_threads_lock); | |
273 gomp_managed_threads -= pool->threads_used - 1L; | |
274 gomp_mutex_unlock (&gomp_managed_threads_lock); | |
275 #endif | |
0 | 276 } |
277 if (pool->last_team) | |
278 free_team (pool->last_team); | |
111 | 279 #ifndef __nvptx__ |
280 free (pool->threads); | |
0 | 281 free (pool); |
111 | 282 #endif |
0 | 283 thr->thread_pool = NULL; |
284 } | |
111 | 285 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) |
286 gomp_team_end (); | |
0 | 287 if (thr->task != NULL) |
288 { | |
289 struct gomp_task *task = thr->task; | |
290 gomp_end_task (); | |
291 free (task); | |
292 } | |
293 } | |
294 | |
295 /* Launch a team. */ | |
296 | |
111 | 297 #ifdef LIBGOMP_USE_PTHREADS |
0 | 298 void |
299 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, | |
111 | 300 unsigned flags, struct gomp_team *team) |
0 | 301 { |
302 struct gomp_thread_start_data *start_data; | |
303 struct gomp_thread *thr, *nthr; | |
304 struct gomp_task *task; | |
305 struct gomp_task_icv *icv; | |
306 bool nested; | |
307 struct gomp_thread_pool *pool; | |
308 unsigned i, n, old_threads_used = 0; | |
309 pthread_attr_t thread_attr, *attr; | |
111 | 310 unsigned long nthreads_var; |
311 char bind, bind_var; | |
312 unsigned int s = 0, rest = 0, p = 0, k = 0; | |
313 unsigned int affinity_count = 0; | |
314 struct gomp_thread **affinity_thr = NULL; | |
0 | 315 |
316 thr = gomp_thread (); | |
111 | 317 nested = thr->ts.level; |
0 | 318 pool = thr->thread_pool; |
319 task = thr->task; | |
320 icv = task ? &task->icv : &gomp_global_icv; | |
111 | 321 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) |
322 gomp_init_affinity (); | |
0 | 323 |
324 /* Always save the previous state, even if this isn't a nested team. | |
325 In particular, we should save any work share state from an outer | |
326 orphaned work share construct. */ | |
327 team->prev_ts = thr->ts; | |
328 | |
329 thr->ts.team = team; | |
330 thr->ts.team_id = 0; | |
331 ++thr->ts.level; | |
332 if (nthreads > 1) | |
333 ++thr->ts.active_level; | |
334 thr->ts.work_share = &team->work_shares[0]; | |
335 thr->ts.last_work_share = NULL; | |
336 #ifdef HAVE_SYNC_BUILTINS | |
337 thr->ts.single_count = 0; | |
338 #endif | |
339 thr->ts.static_trip = 0; | |
340 thr->task = &team->implicit_task[0]; | |
111 | 341 nthreads_var = icv->nthreads_var; |
342 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) | |
343 && thr->ts.level < gomp_nthreads_var_list_len) | |
344 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; | |
345 bind_var = icv->bind_var; | |
346 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) | |
347 bind_var = flags & 7; | |
348 bind = bind_var; | |
349 if (__builtin_expect (gomp_bind_var_list != NULL, 0) | |
350 && thr->ts.level < gomp_bind_var_list_len) | |
351 bind_var = gomp_bind_var_list[thr->ts.level]; | |
0 | 352 gomp_init_task (thr->task, task, icv); |
111 | 353 team->implicit_task[0].icv.nthreads_var = nthreads_var; |
354 team->implicit_task[0].icv.bind_var = bind_var; | |
0 | 355 |
356 if (nthreads == 1) | |
357 return; | |
358 | |
359 i = 1; | |
360 | |
111 | 361 if (__builtin_expect (gomp_places_list != NULL, 0)) |
362 { | |
363 /* Depending on chosen proc_bind model, set subpartition | |
364 for the master thread and initialize helper variables | |
365 P and optionally S, K and/or REST used by later place | |
366 computation for each additional thread. */ | |
367 p = thr->place - 1; | |
368 switch (bind) | |
369 { | |
370 case omp_proc_bind_true: | |
371 case omp_proc_bind_close: | |
372 if (nthreads > thr->ts.place_partition_len) | |
373 { | |
374 /* T > P. S threads will be placed in each place, | |
375 and the final REM threads placed one by one | |
376 into the already occupied places. */ | |
377 s = nthreads / thr->ts.place_partition_len; | |
378 rest = nthreads % thr->ts.place_partition_len; | |
379 } | |
380 else | |
381 s = 1; | |
382 k = 1; | |
383 break; | |
384 case omp_proc_bind_master: | |
385 /* Each thread will be bound to master's place. */ | |
386 break; | |
387 case omp_proc_bind_spread: | |
388 if (nthreads <= thr->ts.place_partition_len) | |
389 { | |
390 /* T <= P. Each subpartition will have in between s | |
391 and s+1 places (subpartitions starting at or | |
392 after rest will have s places, earlier s+1 places), | |
393 each thread will be bound to the first place in | |
394 its subpartition (except for the master thread | |
395 that can be bound to another place in its | |
396 subpartition). */ | |
397 s = thr->ts.place_partition_len / nthreads; | |
398 rest = thr->ts.place_partition_len % nthreads; | |
399 rest = (s + 1) * rest + thr->ts.place_partition_off; | |
400 if (p < rest) | |
401 { | |
402 p -= (p - thr->ts.place_partition_off) % (s + 1); | |
403 thr->ts.place_partition_len = s + 1; | |
404 } | |
405 else | |
406 { | |
407 p -= (p - rest) % s; | |
408 thr->ts.place_partition_len = s; | |
409 } | |
410 thr->ts.place_partition_off = p; | |
411 } | |
412 else | |
413 { | |
414 /* T > P. Each subpartition will have just a single | |
415 place and we'll place between s and s+1 | |
416 threads into each subpartition. */ | |
417 s = nthreads / thr->ts.place_partition_len; | |
418 rest = nthreads % thr->ts.place_partition_len; | |
419 thr->ts.place_partition_off = p; | |
420 thr->ts.place_partition_len = 1; | |
421 k = 1; | |
422 } | |
423 break; | |
424 } | |
425 } | |
426 else | |
427 bind = omp_proc_bind_false; | |
428 | |
0 | 429 /* We only allow the reuse of idle threads for non-nested PARALLEL |
430 regions. This appears to be implied by the semantics of | |
431 threadprivate variables, but perhaps that's reading too much into | |
432 things. Certainly it does prevent any locking problems, since | |
433 only the initial program thread will modify gomp_threads. */ | |
434 if (!nested) | |
435 { | |
436 old_threads_used = pool->threads_used; | |
437 | |
438 if (nthreads <= old_threads_used) | |
439 n = nthreads; | |
440 else if (old_threads_used == 0) | |
441 { | |
442 n = 0; | |
111 | 443 gomp_simple_barrier_init (&pool->threads_dock, nthreads); |
0 | 444 } |
445 else | |
446 { | |
447 n = old_threads_used; | |
448 | |
449 /* Increase the barrier threshold to make sure all new | |
450 threads arrive before the team is released. */ | |
111 | 451 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); |
0 | 452 } |
453 | |
454 /* Not true yet, but soon will be. We're going to release all | |
455 threads from the dock, and those that aren't part of the | |
456 team will exit. */ | |
457 pool->threads_used = nthreads; | |
458 | |
459 /* If necessary, expand the size of the gomp_threads array. It is | |
460 expected that changes in the number of threads are rare, thus we | |
461 make no effort to expand gomp_threads_size geometrically. */ | |
462 if (nthreads >= pool->threads_size) | |
463 { | |
464 pool->threads_size = nthreads + 1; | |
465 pool->threads | |
466 = gomp_realloc (pool->threads, | |
467 pool->threads_size | |
468 * sizeof (struct gomp_thread_data *)); | |
469 } | |
111 | 470 |
471 /* Release existing idle threads. */ | |
472 for (; i < n; ++i) | |
473 { | |
474 unsigned int place_partition_off = thr->ts.place_partition_off; | |
475 unsigned int place_partition_len = thr->ts.place_partition_len; | |
476 unsigned int place = 0; | |
477 if (__builtin_expect (gomp_places_list != NULL, 0)) | |
478 { | |
479 switch (bind) | |
480 { | |
481 case omp_proc_bind_true: | |
482 case omp_proc_bind_close: | |
483 if (k == s) | |
484 { | |
485 ++p; | |
486 if (p == (team->prev_ts.place_partition_off | |
487 + team->prev_ts.place_partition_len)) | |
488 p = team->prev_ts.place_partition_off; | |
489 k = 1; | |
490 if (i == nthreads - rest) | |
491 s = 1; | |
492 } | |
493 else | |
494 ++k; | |
495 break; | |
496 case omp_proc_bind_master: | |
497 break; | |
498 case omp_proc_bind_spread: | |
499 if (k == 0) | |
500 { | |
501 /* T <= P. */ | |
502 if (p < rest) | |
503 p += s + 1; | |
504 else | |
505 p += s; | |
506 if (p == (team->prev_ts.place_partition_off | |
507 + team->prev_ts.place_partition_len)) | |
508 p = team->prev_ts.place_partition_off; | |
509 place_partition_off = p; | |
510 if (p < rest) | |
511 place_partition_len = s + 1; | |
512 else | |
513 place_partition_len = s; | |
514 } | |
515 else | |
516 { | |
517 /* T > P. */ | |
518 if (k == s) | |
519 { | |
520 ++p; | |
521 if (p == (team->prev_ts.place_partition_off | |
522 + team->prev_ts.place_partition_len)) | |
523 p = team->prev_ts.place_partition_off; | |
524 k = 1; | |
525 if (i == nthreads - rest) | |
526 s = 1; | |
527 } | |
528 else | |
529 ++k; | |
530 place_partition_off = p; | |
531 place_partition_len = 1; | |
532 } | |
533 break; | |
534 } | |
535 if (affinity_thr != NULL | |
536 || (bind != omp_proc_bind_true | |
537 && pool->threads[i]->place != p + 1) | |
538 || pool->threads[i]->place <= place_partition_off | |
539 || pool->threads[i]->place > (place_partition_off | |
540 + place_partition_len)) | |
541 { | |
542 unsigned int l; | |
543 if (affinity_thr == NULL) | |
544 { | |
545 unsigned int j; | |
546 | |
547 if (team->prev_ts.place_partition_len > 64) | |
548 affinity_thr | |
549 = gomp_malloc (team->prev_ts.place_partition_len | |
550 * sizeof (struct gomp_thread *)); | |
551 else | |
552 affinity_thr | |
553 = gomp_alloca (team->prev_ts.place_partition_len | |
554 * sizeof (struct gomp_thread *)); | |
555 memset (affinity_thr, '\0', | |
556 team->prev_ts.place_partition_len | |
557 * sizeof (struct gomp_thread *)); | |
558 for (j = i; j < old_threads_used; j++) | |
559 { | |
560 if (pool->threads[j]->place | |
561 > team->prev_ts.place_partition_off | |
562 && (pool->threads[j]->place | |
563 <= (team->prev_ts.place_partition_off | |
564 + team->prev_ts.place_partition_len))) | |
565 { | |
566 l = pool->threads[j]->place - 1 | |
567 - team->prev_ts.place_partition_off; | |
568 pool->threads[j]->data = affinity_thr[l]; | |
569 affinity_thr[l] = pool->threads[j]; | |
570 } | |
571 pool->threads[j] = NULL; | |
572 } | |
573 if (nthreads > old_threads_used) | |
574 memset (&pool->threads[old_threads_used], | |
575 '\0', ((nthreads - old_threads_used) | |
576 * sizeof (struct gomp_thread *))); | |
577 n = nthreads; | |
578 affinity_count = old_threads_used - i; | |
579 } | |
580 if (affinity_count == 0) | |
581 break; | |
582 l = p; | |
583 if (affinity_thr[l - team->prev_ts.place_partition_off] | |
584 == NULL) | |
585 { | |
586 if (bind != omp_proc_bind_true) | |
587 continue; | |
588 for (l = place_partition_off; | |
589 l < place_partition_off + place_partition_len; | |
590 l++) | |
591 if (affinity_thr[l - team->prev_ts.place_partition_off] | |
592 != NULL) | |
593 break; | |
594 if (l == place_partition_off + place_partition_len) | |
595 continue; | |
596 } | |
597 nthr = affinity_thr[l - team->prev_ts.place_partition_off]; | |
598 affinity_thr[l - team->prev_ts.place_partition_off] | |
599 = (struct gomp_thread *) nthr->data; | |
600 affinity_count--; | |
601 pool->threads[i] = nthr; | |
602 } | |
603 else | |
604 nthr = pool->threads[i]; | |
605 place = p + 1; | |
606 } | |
607 else | |
608 nthr = pool->threads[i]; | |
609 nthr->ts.team = team; | |
610 nthr->ts.work_share = &team->work_shares[0]; | |
611 nthr->ts.last_work_share = NULL; | |
612 nthr->ts.team_id = i; | |
613 nthr->ts.level = team->prev_ts.level + 1; | |
614 nthr->ts.active_level = thr->ts.active_level; | |
615 nthr->ts.place_partition_off = place_partition_off; | |
616 nthr->ts.place_partition_len = place_partition_len; | |
617 #ifdef HAVE_SYNC_BUILTINS | |
618 nthr->ts.single_count = 0; | |
619 #endif | |
620 nthr->ts.static_trip = 0; | |
621 nthr->task = &team->implicit_task[i]; | |
622 nthr->place = place; | |
623 gomp_init_task (nthr->task, task, icv); | |
624 team->implicit_task[i].icv.nthreads_var = nthreads_var; | |
625 team->implicit_task[i].icv.bind_var = bind_var; | |
626 nthr->fn = fn; | |
627 nthr->data = data; | |
628 team->ordered_release[i] = &nthr->release; | |
629 } | |
630 | |
631 if (__builtin_expect (affinity_thr != NULL, 0)) | |
632 { | |
633 /* If AFFINITY_THR is non-NULL just because we had to | |
634 permute some threads in the pool, but we've managed | |
635 to find exactly as many old threads as we'd find | |
636 without affinity, we don't need to handle this | |
637 specially anymore. */ | |
638 if (nthreads <= old_threads_used | |
639 ? (affinity_count == old_threads_used - nthreads) | |
640 : (i == old_threads_used)) | |
641 { | |
642 if (team->prev_ts.place_partition_len > 64) | |
643 free (affinity_thr); | |
644 affinity_thr = NULL; | |
645 affinity_count = 0; | |
646 } | |
647 else | |
648 { | |
649 i = 1; | |
650 /* We are going to compute the places/subpartitions | |
651 again from the beginning. So, we need to reinitialize | |
652 vars modified by the switch (bind) above inside | |
653 of the loop, to the state they had after the initial | |
654 switch (bind). */ | |
655 switch (bind) | |
656 { | |
657 case omp_proc_bind_true: | |
658 case omp_proc_bind_close: | |
659 if (nthreads > thr->ts.place_partition_len) | |
660 /* T > P. S has been changed, so needs | |
661 to be recomputed. */ | |
662 s = nthreads / thr->ts.place_partition_len; | |
663 k = 1; | |
664 p = thr->place - 1; | |
665 break; | |
666 case omp_proc_bind_master: | |
667 /* No vars have been changed. */ | |
668 break; | |
669 case omp_proc_bind_spread: | |
670 p = thr->ts.place_partition_off; | |
671 if (k != 0) | |
672 { | |
673 /* T > P. */ | |
674 s = nthreads / team->prev_ts.place_partition_len; | |
675 k = 1; | |
676 } | |
677 break; | |
678 } | |
679 | |
680 /* Increase the barrier threshold to make sure all new | |
681 threads and all the threads we're going to let die | |
682 arrive before the team is released. */ | |
683 if (affinity_count) | |
684 gomp_simple_barrier_reinit (&pool->threads_dock, | |
685 nthreads + affinity_count); | |
686 } | |
687 } | |
688 | |
689 if (i == nthreads) | |
690 goto do_release; | |
691 | |
0 | 692 } |
693 | |
111 | 694 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) |
0 | 695 { |
111 | 696 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; |
0 | 697 |
698 if (old_threads_used == 0) | |
699 --diff; | |
700 | |
701 #ifdef HAVE_SYNC_BUILTINS | |
702 __sync_fetch_and_add (&gomp_managed_threads, diff); | |
703 #else | |
111 | 704 gomp_mutex_lock (&gomp_managed_threads_lock); |
0 | 705 gomp_managed_threads += diff; |
111 | 706 gomp_mutex_unlock (&gomp_managed_threads_lock); |
0 | 707 #endif |
708 } | |
709 | |
710 attr = &gomp_thread_attr; | |
111 | 711 if (__builtin_expect (gomp_places_list != NULL, 0)) |
0 | 712 { |
713 size_t stacksize; | |
714 pthread_attr_init (&thread_attr); | |
715 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); | |
716 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) | |
717 pthread_attr_setstacksize (&thread_attr, stacksize); | |
718 attr = &thread_attr; | |
719 } | |
720 | |
721 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) | |
722 * (nthreads-i)); | |
723 | |
724 /* Launch new threads. */ | |
111 | 725 for (; i < nthreads; ++i) |
0 | 726 { |
727 pthread_t pt; | |
728 int err; | |
729 | |
111 | 730 start_data->ts.place_partition_off = thr->ts.place_partition_off; |
731 start_data->ts.place_partition_len = thr->ts.place_partition_len; | |
732 start_data->place = 0; | |
733 if (__builtin_expect (gomp_places_list != NULL, 0)) | |
734 { | |
735 switch (bind) | |
736 { | |
737 case omp_proc_bind_true: | |
738 case omp_proc_bind_close: | |
739 if (k == s) | |
740 { | |
741 ++p; | |
742 if (p == (team->prev_ts.place_partition_off | |
743 + team->prev_ts.place_partition_len)) | |
744 p = team->prev_ts.place_partition_off; | |
745 k = 1; | |
746 if (i == nthreads - rest) | |
747 s = 1; | |
748 } | |
749 else | |
750 ++k; | |
751 break; | |
752 case omp_proc_bind_master: | |
753 break; | |
754 case omp_proc_bind_spread: | |
755 if (k == 0) | |
756 { | |
757 /* T <= P. */ | |
758 if (p < rest) | |
759 p += s + 1; | |
760 else | |
761 p += s; | |
762 if (p == (team->prev_ts.place_partition_off | |
763 + team->prev_ts.place_partition_len)) | |
764 p = team->prev_ts.place_partition_off; | |
765 start_data->ts.place_partition_off = p; | |
766 if (p < rest) | |
767 start_data->ts.place_partition_len = s + 1; | |
768 else | |
769 start_data->ts.place_partition_len = s; | |
770 } | |
771 else | |
772 { | |
773 /* T > P. */ | |
774 if (k == s) | |
775 { | |
776 ++p; | |
777 if (p == (team->prev_ts.place_partition_off | |
778 + team->prev_ts.place_partition_len)) | |
779 p = team->prev_ts.place_partition_off; | |
780 k = 1; | |
781 if (i == nthreads - rest) | |
782 s = 1; | |
783 } | |
784 else | |
785 ++k; | |
786 start_data->ts.place_partition_off = p; | |
787 start_data->ts.place_partition_len = 1; | |
788 } | |
789 break; | |
790 } | |
791 start_data->place = p + 1; | |
792 if (affinity_thr != NULL && pool->threads[i] != NULL) | |
793 continue; | |
794 gomp_init_thread_affinity (attr, p); | |
795 } | |
796 | |
0 | 797 start_data->fn = fn; |
798 start_data->fn_data = data; | |
799 start_data->ts.team = team; | |
800 start_data->ts.work_share = &team->work_shares[0]; | |
801 start_data->ts.last_work_share = NULL; | |
802 start_data->ts.team_id = i; | |
803 start_data->ts.level = team->prev_ts.level + 1; | |
804 start_data->ts.active_level = thr->ts.active_level; | |
805 #ifdef HAVE_SYNC_BUILTINS | |
806 start_data->ts.single_count = 0; | |
807 #endif | |
808 start_data->ts.static_trip = 0; | |
809 start_data->task = &team->implicit_task[i]; | |
810 gomp_init_task (start_data->task, task, icv); | |
111 | 811 team->implicit_task[i].icv.nthreads_var = nthreads_var; |
812 team->implicit_task[i].icv.bind_var = bind_var; | |
0 | 813 start_data->thread_pool = pool; |
814 start_data->nested = nested; | |
815 | |
111 | 816 attr = gomp_adjust_thread_attr (attr, &thread_attr); |
817 err = pthread_create (&pt, attr, gomp_thread_start, start_data++); | |
0 | 818 if (err != 0) |
819 gomp_fatal ("Thread creation failed: %s", strerror (err)); | |
820 } | |
821 | |
111 | 822 if (__builtin_expect (attr == &thread_attr, 0)) |
0 | 823 pthread_attr_destroy (&thread_attr); |
824 | |
825 do_release: | |
111 | 826 if (nested) |
827 gomp_barrier_wait (&team->barrier); | |
828 else | |
829 gomp_simple_barrier_wait (&pool->threads_dock); | |
0 | 830 |
831 /* Decrease the barrier threshold to match the number of threads | |
832 that should arrive back at the end of this team. The extra | |
833 threads should be exiting. Note that we arrange for this test | |
111 | 834 to never be true for nested teams. If AFFINITY_COUNT is non-zero, |
835 the barrier as well as gomp_managed_threads was temporarily | |
836 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, | |
837 AFFINITY_COUNT if non-zero will be always at least | |
838 OLD_THREADS_COUNT - NTHREADS. */ | |
839 if (__builtin_expect (nthreads < old_threads_used, 0) | |
840 || __builtin_expect (affinity_count, 0)) | |
0 | 841 { |
842 long diff = (long) nthreads - (long) old_threads_used; | |
843 | |
111 | 844 if (affinity_count) |
845 diff = -affinity_count; | |
846 | |
847 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); | |
0 | 848 |
849 #ifdef HAVE_SYNC_BUILTINS | |
850 __sync_fetch_and_add (&gomp_managed_threads, diff); | |
851 #else | |
111 | 852 gomp_mutex_lock (&gomp_managed_threads_lock); |
0 | 853 gomp_managed_threads += diff; |
111 | 854 gomp_mutex_unlock (&gomp_managed_threads_lock); |
0 | 855 #endif |
856 } | |
111 | 857 if (__builtin_expect (affinity_thr != NULL, 0) |
858 && team->prev_ts.place_partition_len > 64) | |
859 free (affinity_thr); | |
0 | 860 } |
111 | 861 #endif |
0 | 862 |
863 | |
864 /* Terminate the current team. This is only to be called by the master | |
865 thread. We assume that we must wait for the other threads. */ | |
866 | |
867 void | |
868 gomp_team_end (void) | |
869 { | |
870 struct gomp_thread *thr = gomp_thread (); | |
871 struct gomp_team *team = thr->ts.team; | |
872 | |
111 | 873 /* This barrier handles all pending explicit threads. |
874 As #pragma omp cancel parallel might get awaited count in | |
875 team->barrier in a inconsistent state, we need to use a different | |
876 counter here. */ | |
877 gomp_team_barrier_wait_final (&team->barrier); | |
878 if (__builtin_expect (team->team_cancelled, 0)) | |
879 { | |
880 struct gomp_work_share *ws = team->work_shares_to_free; | |
881 do | |
882 { | |
883 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); | |
884 if (next_ws == NULL) | |
885 gomp_ptrlock_set (&ws->next_ws, ws); | |
886 gomp_fini_work_share (ws); | |
887 ws = next_ws; | |
888 } | |
889 while (ws != NULL); | |
890 } | |
891 else | |
892 gomp_fini_work_share (thr->ts.work_share); | |
0 | 893 |
894 gomp_end_task (); | |
895 thr->ts = team->prev_ts; | |
896 | |
897 if (__builtin_expect (thr->ts.team != NULL, 0)) | |
898 { | |
899 #ifdef HAVE_SYNC_BUILTINS | |
900 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); | |
901 #else | |
111 | 902 gomp_mutex_lock (&gomp_managed_threads_lock); |
0 | 903 gomp_managed_threads -= team->nthreads - 1L; |
111 | 904 gomp_mutex_unlock (&gomp_managed_threads_lock); |
0 | 905 #endif |
906 /* This barrier has gomp_barrier_wait_last counterparts | |
907 and ensures the team can be safely destroyed. */ | |
908 gomp_barrier_wait (&team->barrier); | |
909 } | |
910 | |
911 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) | |
912 { | |
913 struct gomp_work_share *ws = team->work_shares[0].next_alloc; | |
914 do | |
915 { | |
916 struct gomp_work_share *next_ws = ws->next_alloc; | |
917 free (ws); | |
918 ws = next_ws; | |
919 } | |
920 while (ws != NULL); | |
921 } | |
922 gomp_sem_destroy (&team->master_release); | |
923 | |
924 if (__builtin_expect (thr->ts.team != NULL, 0) | |
925 || __builtin_expect (team->nthreads == 1, 0)) | |
926 free_team (team); | |
927 else | |
928 { | |
929 struct gomp_thread_pool *pool = thr->thread_pool; | |
930 if (pool->last_team) | |
931 free_team (pool->last_team); | |
932 pool->last_team = team; | |
111 | 933 gomp_release_thread_pool (pool); |
0 | 934 } |
935 } | |
936 | |
111 | 937 #ifdef LIBGOMP_USE_PTHREADS |
0 | 938 |
939 /* Constructors for this file. */ | |
940 | |
941 static void __attribute__((constructor)) | |
942 initialize_team (void) | |
943 { | |
111 | 944 #if !defined HAVE_TLS && !defined USE_EMUTLS |
0 | 945 static struct gomp_thread initial_thread_tls_data; |
946 | |
947 pthread_key_create (&gomp_tls_key, NULL); | |
948 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); | |
949 #endif | |
950 | |
951 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) | |
952 gomp_fatal ("could not create thread pool destructor."); | |
953 } | |
954 | |
955 static void __attribute__((destructor)) | |
956 team_destructor (void) | |
957 { | |
958 /* Without this dlclose on libgomp could lead to subsequent | |
959 crashes. */ | |
960 pthread_key_delete (gomp_thread_destructor); | |
961 } | |
111 | 962 #endif |
0 | 963 |
964 struct gomp_task_icv * | |
965 gomp_new_icv (void) | |
966 { | |
967 struct gomp_thread *thr = gomp_thread (); | |
968 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); | |
969 gomp_init_task (task, NULL, &gomp_global_icv); | |
970 thr->task = task; | |
111 | 971 #ifdef LIBGOMP_USE_PTHREADS |
0 | 972 pthread_setspecific (gomp_thread_destructor, thr); |
111 | 973 #endif |
0 | 974 return &task->icv; |
975 } |