Mercurial > hg > CbC > CbC_gcc
annotate libgomp/team.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
145 | 1 /* Copyright (C) 2005-2020 Free Software Foundation, Inc. |
0 | 2 Contributed by Richard Henderson <rth@redhat.com>. |
3 | |
111 | 4 This file is part of the GNU Offloading and Multi Processing Library |
5 (libgomp). | |
0 | 6 |
7 Libgomp is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 more details. | |
16 | |
17 Under Section 7 of GPL version 3, you are granted additional | |
18 permissions described in the GCC Runtime Library Exception, version | |
19 3.1, as published by the Free Software Foundation. | |
20 | |
21 You should have received a copy of the GNU General Public License and | |
22 a copy of the GCC Runtime Library Exception along with this program; | |
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 <http://www.gnu.org/licenses/>. */ | |
25 | |
145 | 26 /* This file handles the maintenance of threads in response to team |
0 | 27 creation and termination. */ |
28 | |
29 #include "libgomp.h" | |
111 | 30 #include "pool.h" |
0 | 31 #include <stdlib.h> |
32 #include <string.h> | |
33 | |
111 | 34 #ifdef LIBGOMP_USE_PTHREADS |
0 | 35 pthread_attr_t gomp_thread_attr; |
36 | |
37 /* This key is for the thread destructor. */ | |
38 pthread_key_t gomp_thread_destructor; | |
39 | |
40 | |
41 /* This is the libgomp per-thread data structure. */ | |
111 | 42 #if defined HAVE_TLS || defined USE_EMUTLS |
0 | 43 __thread struct gomp_thread gomp_tls_data; |
44 #else | |
45 pthread_key_t gomp_tls_key; | |
46 #endif | |
47 | |
48 | |
49 /* This structure is used to communicate across pthread_create. */ | |
50 | |
51 struct gomp_thread_start_data | |
52 { | |
53 void (*fn) (void *); | |
54 void *fn_data; | |
55 struct gomp_team_state ts; | |
56 struct gomp_task *task; | |
57 struct gomp_thread_pool *thread_pool; | |
111 | 58 unsigned int place; |
0 | 59 bool nested; |
145 | 60 pthread_t handle; |
0 | 61 }; |
62 | |
63 | |
64 /* This function is a pthread_create entry point. This contains the idle | |
65 loop in which a thread waits to be called up to become part of a team. */ | |
66 | |
67 static void * | |
68 gomp_thread_start (void *xdata) | |
69 { | |
70 struct gomp_thread_start_data *data = xdata; | |
71 struct gomp_thread *thr; | |
72 struct gomp_thread_pool *pool; | |
73 void (*local_fn) (void *); | |
74 void *local_data; | |
75 | |
111 | 76 #if defined HAVE_TLS || defined USE_EMUTLS |
0 | 77 thr = &gomp_tls_data; |
78 #else | |
79 struct gomp_thread local_thr; | |
80 thr = &local_thr; | |
81 pthread_setspecific (gomp_tls_key, thr); | |
82 #endif | |
83 gomp_sem_init (&thr->release, 0); | |
84 | |
85 /* Extract what we need from data. */ | |
86 local_fn = data->fn; | |
87 local_data = data->fn_data; | |
88 thr->thread_pool = data->thread_pool; | |
89 thr->ts = data->ts; | |
90 thr->task = data->task; | |
111 | 91 thr->place = data->place; |
145 | 92 #ifdef GOMP_NEEDS_THREAD_HANDLE |
93 thr->handle = data->handle; | |
94 #endif | |
0 | 95 |
96 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; | |
97 | |
98 /* Make thread pool local. */ | |
99 pool = thr->thread_pool; | |
100 | |
101 if (data->nested) | |
102 { | |
103 struct gomp_team *team = thr->ts.team; | |
104 struct gomp_task *task = thr->task; | |
105 | |
106 gomp_barrier_wait (&team->barrier); | |
107 | |
108 local_fn (local_data); | |
111 | 109 gomp_team_barrier_wait_final (&team->barrier); |
0 | 110 gomp_finish_task (task); |
111 gomp_barrier_wait_last (&team->barrier); | |
112 } | |
113 else | |
114 { | |
115 pool->threads[thr->ts.team_id] = thr; | |
116 | |
111 | 117 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 118 do |
119 { | |
120 struct gomp_team *team = thr->ts.team; | |
121 struct gomp_task *task = thr->task; | |
122 | |
123 local_fn (local_data); | |
111 | 124 gomp_team_barrier_wait_final (&team->barrier); |
0 | 125 gomp_finish_task (task); |
126 | |
111 | 127 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 128 |
129 local_fn = thr->fn; | |
130 local_data = thr->data; | |
131 thr->fn = NULL; | |
132 } | |
133 while (local_fn); | |
134 } | |
135 | |
19
58ad6c70ea60
update gcc from 4.4.0 to 4.4.1.
kent@firefly.cr.ie.u-ryukyu.ac.jp
parents:
0
diff
changeset
|
136 gomp_sem_destroy (&thr->release); |
145 | 137 pthread_detach (pthread_self ()); |
111 | 138 thr->thread_pool = NULL; |
139 thr->task = NULL; | |
0 | 140 return NULL; |
141 } | |
111 | 142 #endif |
0 | 143 |
111 | 144 static inline struct gomp_team * |
145 get_last_team (unsigned nthreads) | |
146 { | |
147 struct gomp_thread *thr = gomp_thread (); | |
148 if (thr->ts.team == NULL) | |
149 { | |
150 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads); | |
151 struct gomp_team *last_team = pool->last_team; | |
152 if (last_team != NULL && last_team->nthreads == nthreads) | |
153 { | |
154 pool->last_team = NULL; | |
155 return last_team; | |
156 } | |
157 } | |
158 return NULL; | |
159 } | |
0 | 160 |
161 /* Create a new team data structure. */ | |
162 | |
163 struct gomp_team * | |
164 gomp_new_team (unsigned nthreads) | |
165 { | |
166 struct gomp_team *team; | |
167 int i; | |
168 | |
111 | 169 team = get_last_team (nthreads); |
170 if (team == NULL) | |
171 { | |
172 size_t extra = sizeof (team->ordered_release[0]) | |
173 + sizeof (team->implicit_task[0]); | |
145 | 174 team = team_malloc (sizeof (*team) + nthreads * extra); |
111 | 175 |
176 #ifndef HAVE_SYNC_BUILTINS | |
177 gomp_mutex_init (&team->work_share_list_free_lock); | |
178 #endif | |
179 gomp_barrier_init (&team->barrier, nthreads); | |
180 gomp_mutex_init (&team->task_lock); | |
181 | |
182 team->nthreads = nthreads; | |
183 } | |
0 | 184 |
185 team->work_share_chunk = 8; | |
186 #ifdef HAVE_SYNC_BUILTINS | |
187 team->single_count = 0; | |
188 #endif | |
111 | 189 team->work_shares_to_free = &team->work_shares[0]; |
145 | 190 gomp_init_work_share (&team->work_shares[0], 0, nthreads); |
0 | 191 team->work_shares[0].next_alloc = NULL; |
192 team->work_share_list_free = NULL; | |
193 team->work_share_list_alloc = &team->work_shares[1]; | |
194 for (i = 1; i < 7; i++) | |
195 team->work_shares[i].next_free = &team->work_shares[i + 1]; | |
196 team->work_shares[i].next_free = NULL; | |
197 | |
198 gomp_sem_init (&team->master_release, 0); | |
199 team->ordered_release = (void *) &team->implicit_task[nthreads]; | |
200 team->ordered_release[0] = &team->master_release; | |
201 | |
111 | 202 priority_queue_init (&team->task_queue); |
0 | 203 team->task_count = 0; |
111 | 204 team->task_queued_count = 0; |
0 | 205 team->task_running_count = 0; |
111 | 206 team->work_share_cancelled = 0; |
207 team->team_cancelled = 0; | |
0 | 208 |
209 return team; | |
210 } | |
211 | |
212 | |
213 /* Free a team data structure. */ | |
214 | |
215 static void | |
216 free_team (struct gomp_team *team) | |
217 { | |
111 | 218 #ifndef HAVE_SYNC_BUILTINS |
219 gomp_mutex_destroy (&team->work_share_list_free_lock); | |
220 #endif | |
0 | 221 gomp_barrier_destroy (&team->barrier); |
222 gomp_mutex_destroy (&team->task_lock); | |
111 | 223 priority_queue_free (&team->task_queue); |
145 | 224 team_free (team); |
0 | 225 } |
226 | |
227 static void | |
228 gomp_free_pool_helper (void *thread_pool) | |
229 { | |
111 | 230 struct gomp_thread *thr = gomp_thread (); |
0 | 231 struct gomp_thread_pool *pool |
232 = (struct gomp_thread_pool *) thread_pool; | |
111 | 233 gomp_simple_barrier_wait_last (&pool->threads_dock); |
234 gomp_sem_destroy (&thr->release); | |
235 thr->thread_pool = NULL; | |
236 thr->task = NULL; | |
237 #ifdef LIBGOMP_USE_PTHREADS | |
145 | 238 pthread_detach (pthread_self ()); |
0 | 239 pthread_exit (NULL); |
111 | 240 #elif defined(__nvptx__) |
241 asm ("exit;"); | |
145 | 242 #elif defined(__AMDGCN__) |
243 asm ("s_dcache_wb\n\t" | |
244 "s_endpgm"); | |
111 | 245 #else |
246 #error gomp_free_pool_helper must terminate the thread | |
247 #endif | |
0 | 248 } |
249 | |
250 /* Free a thread pool and release its threads. */ | |
251 | |
111 | 252 void |
0 | 253 gomp_free_thread (void *arg __attribute__((unused))) |
254 { | |
255 struct gomp_thread *thr = gomp_thread (); | |
256 struct gomp_thread_pool *pool = thr->thread_pool; | |
257 if (pool) | |
258 { | |
259 if (pool->threads_used > 0) | |
260 { | |
261 int i; | |
262 for (i = 1; i < pool->threads_used; i++) | |
263 { | |
264 struct gomp_thread *nthr = pool->threads[i]; | |
265 nthr->fn = gomp_free_pool_helper; | |
266 nthr->data = pool; | |
267 } | |
268 /* This barrier undocks threads docked on pool->threads_dock. */ | |
111 | 269 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 270 /* And this waits till all threads have called gomp_barrier_wait_last |
271 in gomp_free_pool_helper. */ | |
111 | 272 gomp_simple_barrier_wait (&pool->threads_dock); |
0 | 273 /* Now it is safe to destroy the barrier and free the pool. */ |
111 | 274 gomp_simple_barrier_destroy (&pool->threads_dock); |
275 | |
276 #ifdef HAVE_SYNC_BUILTINS | |
277 __sync_fetch_and_add (&gomp_managed_threads, | |
278 1L - pool->threads_used); | |
279 #else | |
280 gomp_mutex_lock (&gomp_managed_threads_lock); | |
281 gomp_managed_threads -= pool->threads_used - 1L; | |
282 gomp_mutex_unlock (&gomp_managed_threads_lock); | |
283 #endif | |
0 | 284 } |
285 if (pool->last_team) | |
286 free_team (pool->last_team); | |
111 | 287 #ifndef __nvptx__ |
145 | 288 team_free (pool->threads); |
289 team_free (pool); | |
111 | 290 #endif |
0 | 291 thr->thread_pool = NULL; |
292 } | |
111 | 293 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) |
294 gomp_team_end (); | |
0 | 295 if (thr->task != NULL) |
296 { | |
297 struct gomp_task *task = thr->task; | |
298 gomp_end_task (); | |
299 free (task); | |
300 } | |
301 } | |
302 | |
303 /* Launch a team. */ | |
304 | |
111 | 305 #ifdef LIBGOMP_USE_PTHREADS |
0 | 306 void |
307 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, | |
145 | 308 unsigned flags, struct gomp_team *team, |
309 struct gomp_taskgroup *taskgroup) | |
0 | 310 { |
311 struct gomp_thread_start_data *start_data; | |
312 struct gomp_thread *thr, *nthr; | |
313 struct gomp_task *task; | |
314 struct gomp_task_icv *icv; | |
315 bool nested; | |
316 struct gomp_thread_pool *pool; | |
317 unsigned i, n, old_threads_used = 0; | |
318 pthread_attr_t thread_attr, *attr; | |
111 | 319 unsigned long nthreads_var; |
320 char bind, bind_var; | |
321 unsigned int s = 0, rest = 0, p = 0, k = 0; | |
322 unsigned int affinity_count = 0; | |
323 struct gomp_thread **affinity_thr = NULL; | |
145 | 324 bool force_display = false; |
0 | 325 |
326 thr = gomp_thread (); | |
111 | 327 nested = thr->ts.level; |
0 | 328 pool = thr->thread_pool; |
329 task = thr->task; | |
330 icv = task ? &task->icv : &gomp_global_icv; | |
111 | 331 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) |
145 | 332 { |
333 gomp_init_affinity (); | |
334 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1) | |
335 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, | |
336 thr->place); | |
337 } | |
0 | 338 |
339 /* Always save the previous state, even if this isn't a nested team. | |
340 In particular, we should save any work share state from an outer | |
341 orphaned work share construct. */ | |
342 team->prev_ts = thr->ts; | |
343 | |
344 thr->ts.team = team; | |
345 thr->ts.team_id = 0; | |
346 ++thr->ts.level; | |
347 if (nthreads > 1) | |
348 ++thr->ts.active_level; | |
349 thr->ts.work_share = &team->work_shares[0]; | |
350 thr->ts.last_work_share = NULL; | |
351 #ifdef HAVE_SYNC_BUILTINS | |
352 thr->ts.single_count = 0; | |
353 #endif | |
354 thr->ts.static_trip = 0; | |
355 thr->task = &team->implicit_task[0]; | |
145 | 356 #ifdef GOMP_NEEDS_THREAD_HANDLE |
357 thr->handle = pthread_self (); | |
358 #endif | |
111 | 359 nthreads_var = icv->nthreads_var; |
360 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) | |
361 && thr->ts.level < gomp_nthreads_var_list_len) | |
362 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; | |
363 bind_var = icv->bind_var; | |
364 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) | |
365 bind_var = flags & 7; | |
366 bind = bind_var; | |
367 if (__builtin_expect (gomp_bind_var_list != NULL, 0) | |
368 && thr->ts.level < gomp_bind_var_list_len) | |
369 bind_var = gomp_bind_var_list[thr->ts.level]; | |
0 | 370 gomp_init_task (thr->task, task, icv); |
145 | 371 thr->task->taskgroup = taskgroup; |
111 | 372 team->implicit_task[0].icv.nthreads_var = nthreads_var; |
373 team->implicit_task[0].icv.bind_var = bind_var; | |
0 | 374 |
375 if (nthreads == 1) | |
376 return; | |
377 | |
378 i = 1; | |
379 | |
111 | 380 if (__builtin_expect (gomp_places_list != NULL, 0)) |
381 { | |
382 /* Depending on chosen proc_bind model, set subpartition | |
383 for the master thread and initialize helper variables | |
384 P and optionally S, K and/or REST used by later place | |
385 computation for each additional thread. */ | |
386 p = thr->place - 1; | |
387 switch (bind) | |
388 { | |
389 case omp_proc_bind_true: | |
390 case omp_proc_bind_close: | |
391 if (nthreads > thr->ts.place_partition_len) | |
392 { | |
393 /* T > P. S threads will be placed in each place, | |
394 and the final REM threads placed one by one | |
395 into the already occupied places. */ | |
396 s = nthreads / thr->ts.place_partition_len; | |
397 rest = nthreads % thr->ts.place_partition_len; | |
398 } | |
399 else | |
400 s = 1; | |
401 k = 1; | |
402 break; | |
403 case omp_proc_bind_master: | |
404 /* Each thread will be bound to master's place. */ | |
405 break; | |
406 case omp_proc_bind_spread: | |
407 if (nthreads <= thr->ts.place_partition_len) | |
408 { | |
409 /* T <= P. Each subpartition will have in between s | |
410 and s+1 places (subpartitions starting at or | |
411 after rest will have s places, earlier s+1 places), | |
412 each thread will be bound to the first place in | |
413 its subpartition (except for the master thread | |
414 that can be bound to another place in its | |
415 subpartition). */ | |
416 s = thr->ts.place_partition_len / nthreads; | |
417 rest = thr->ts.place_partition_len % nthreads; | |
418 rest = (s + 1) * rest + thr->ts.place_partition_off; | |
419 if (p < rest) | |
420 { | |
421 p -= (p - thr->ts.place_partition_off) % (s + 1); | |
422 thr->ts.place_partition_len = s + 1; | |
423 } | |
424 else | |
425 { | |
426 p -= (p - rest) % s; | |
427 thr->ts.place_partition_len = s; | |
428 } | |
429 thr->ts.place_partition_off = p; | |
430 } | |
431 else | |
432 { | |
433 /* T > P. Each subpartition will have just a single | |
434 place and we'll place between s and s+1 | |
435 threads into each subpartition. */ | |
436 s = nthreads / thr->ts.place_partition_len; | |
437 rest = nthreads % thr->ts.place_partition_len; | |
438 thr->ts.place_partition_off = p; | |
439 thr->ts.place_partition_len = 1; | |
440 k = 1; | |
441 } | |
442 break; | |
443 } | |
444 } | |
445 else | |
446 bind = omp_proc_bind_false; | |
447 | |
0 | 448 /* We only allow the reuse of idle threads for non-nested PARALLEL |
449 regions. This appears to be implied by the semantics of | |
450 threadprivate variables, but perhaps that's reading too much into | |
451 things. Certainly it does prevent any locking problems, since | |
452 only the initial program thread will modify gomp_threads. */ | |
453 if (!nested) | |
454 { | |
455 old_threads_used = pool->threads_used; | |
456 | |
457 if (nthreads <= old_threads_used) | |
458 n = nthreads; | |
459 else if (old_threads_used == 0) | |
460 { | |
461 n = 0; | |
111 | 462 gomp_simple_barrier_init (&pool->threads_dock, nthreads); |
0 | 463 } |
464 else | |
465 { | |
466 n = old_threads_used; | |
467 | |
468 /* Increase the barrier threshold to make sure all new | |
469 threads arrive before the team is released. */ | |
111 | 470 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); |
0 | 471 } |
472 | |
473 /* Not true yet, but soon will be. We're going to release all | |
474 threads from the dock, and those that aren't part of the | |
475 team will exit. */ | |
476 pool->threads_used = nthreads; | |
477 | |
478 /* If necessary, expand the size of the gomp_threads array. It is | |
479 expected that changes in the number of threads are rare, thus we | |
480 make no effort to expand gomp_threads_size geometrically. */ | |
481 if (nthreads >= pool->threads_size) | |
482 { | |
483 pool->threads_size = nthreads + 1; | |
484 pool->threads | |
485 = gomp_realloc (pool->threads, | |
486 pool->threads_size | |
145 | 487 * sizeof (struct gomp_thread *)); |
488 /* Add current (master) thread to threads[]. */ | |
489 pool->threads[0] = thr; | |
0 | 490 } |
111 | 491 |
492 /* Release existing idle threads. */ | |
493 for (; i < n; ++i) | |
494 { | |
495 unsigned int place_partition_off = thr->ts.place_partition_off; | |
496 unsigned int place_partition_len = thr->ts.place_partition_len; | |
497 unsigned int place = 0; | |
498 if (__builtin_expect (gomp_places_list != NULL, 0)) | |
499 { | |
500 switch (bind) | |
501 { | |
502 case omp_proc_bind_true: | |
503 case omp_proc_bind_close: | |
504 if (k == s) | |
505 { | |
506 ++p; | |
507 if (p == (team->prev_ts.place_partition_off | |
508 + team->prev_ts.place_partition_len)) | |
509 p = team->prev_ts.place_partition_off; | |
510 k = 1; | |
511 if (i == nthreads - rest) | |
512 s = 1; | |
513 } | |
514 else | |
515 ++k; | |
516 break; | |
517 case omp_proc_bind_master: | |
518 break; | |
519 case omp_proc_bind_spread: | |
520 if (k == 0) | |
521 { | |
522 /* T <= P. */ | |
523 if (p < rest) | |
524 p += s + 1; | |
525 else | |
526 p += s; | |
527 if (p == (team->prev_ts.place_partition_off | |
528 + team->prev_ts.place_partition_len)) | |
529 p = team->prev_ts.place_partition_off; | |
530 place_partition_off = p; | |
531 if (p < rest) | |
532 place_partition_len = s + 1; | |
533 else | |
534 place_partition_len = s; | |
535 } | |
536 else | |
537 { | |
538 /* T > P. */ | |
539 if (k == s) | |
540 { | |
541 ++p; | |
542 if (p == (team->prev_ts.place_partition_off | |
543 + team->prev_ts.place_partition_len)) | |
544 p = team->prev_ts.place_partition_off; | |
545 k = 1; | |
546 if (i == nthreads - rest) | |
547 s = 1; | |
548 } | |
549 else | |
550 ++k; | |
551 place_partition_off = p; | |
552 place_partition_len = 1; | |
553 } | |
554 break; | |
555 } | |
556 if (affinity_thr != NULL | |
557 || (bind != omp_proc_bind_true | |
558 && pool->threads[i]->place != p + 1) | |
559 || pool->threads[i]->place <= place_partition_off | |
560 || pool->threads[i]->place > (place_partition_off | |
561 + place_partition_len)) | |
562 { | |
563 unsigned int l; | |
145 | 564 force_display = true; |
111 | 565 if (affinity_thr == NULL) |
566 { | |
567 unsigned int j; | |
568 | |
569 if (team->prev_ts.place_partition_len > 64) | |
570 affinity_thr | |
571 = gomp_malloc (team->prev_ts.place_partition_len | |
572 * sizeof (struct gomp_thread *)); | |
573 else | |
574 affinity_thr | |
575 = gomp_alloca (team->prev_ts.place_partition_len | |
576 * sizeof (struct gomp_thread *)); | |
577 memset (affinity_thr, '\0', | |
578 team->prev_ts.place_partition_len | |
579 * sizeof (struct gomp_thread *)); | |
580 for (j = i; j < old_threads_used; j++) | |
581 { | |
582 if (pool->threads[j]->place | |
583 > team->prev_ts.place_partition_off | |
584 && (pool->threads[j]->place | |
585 <= (team->prev_ts.place_partition_off | |
586 + team->prev_ts.place_partition_len))) | |
587 { | |
588 l = pool->threads[j]->place - 1 | |
589 - team->prev_ts.place_partition_off; | |
590 pool->threads[j]->data = affinity_thr[l]; | |
591 affinity_thr[l] = pool->threads[j]; | |
592 } | |
593 pool->threads[j] = NULL; | |
594 } | |
595 if (nthreads > old_threads_used) | |
596 memset (&pool->threads[old_threads_used], | |
597 '\0', ((nthreads - old_threads_used) | |
598 * sizeof (struct gomp_thread *))); | |
599 n = nthreads; | |
600 affinity_count = old_threads_used - i; | |
601 } | |
602 if (affinity_count == 0) | |
603 break; | |
604 l = p; | |
605 if (affinity_thr[l - team->prev_ts.place_partition_off] | |
606 == NULL) | |
607 { | |
608 if (bind != omp_proc_bind_true) | |
609 continue; | |
610 for (l = place_partition_off; | |
611 l < place_partition_off + place_partition_len; | |
612 l++) | |
613 if (affinity_thr[l - team->prev_ts.place_partition_off] | |
614 != NULL) | |
615 break; | |
616 if (l == place_partition_off + place_partition_len) | |
617 continue; | |
618 } | |
619 nthr = affinity_thr[l - team->prev_ts.place_partition_off]; | |
620 affinity_thr[l - team->prev_ts.place_partition_off] | |
621 = (struct gomp_thread *) nthr->data; | |
622 affinity_count--; | |
623 pool->threads[i] = nthr; | |
624 } | |
625 else | |
626 nthr = pool->threads[i]; | |
627 place = p + 1; | |
628 } | |
629 else | |
630 nthr = pool->threads[i]; | |
631 nthr->ts.team = team; | |
632 nthr->ts.work_share = &team->work_shares[0]; | |
633 nthr->ts.last_work_share = NULL; | |
634 nthr->ts.team_id = i; | |
635 nthr->ts.level = team->prev_ts.level + 1; | |
636 nthr->ts.active_level = thr->ts.active_level; | |
637 nthr->ts.place_partition_off = place_partition_off; | |
638 nthr->ts.place_partition_len = place_partition_len; | |
639 #ifdef HAVE_SYNC_BUILTINS | |
640 nthr->ts.single_count = 0; | |
641 #endif | |
642 nthr->ts.static_trip = 0; | |
643 nthr->task = &team->implicit_task[i]; | |
644 nthr->place = place; | |
645 gomp_init_task (nthr->task, task, icv); | |
646 team->implicit_task[i].icv.nthreads_var = nthreads_var; | |
647 team->implicit_task[i].icv.bind_var = bind_var; | |
145 | 648 nthr->task->taskgroup = taskgroup; |
111 | 649 nthr->fn = fn; |
650 nthr->data = data; | |
651 team->ordered_release[i] = &nthr->release; | |
652 } | |
653 | |
654 if (__builtin_expect (affinity_thr != NULL, 0)) | |
655 { | |
656 /* If AFFINITY_THR is non-NULL just because we had to | |
657 permute some threads in the pool, but we've managed | |
658 to find exactly as many old threads as we'd find | |
659 without affinity, we don't need to handle this | |
660 specially anymore. */ | |
661 if (nthreads <= old_threads_used | |
662 ? (affinity_count == old_threads_used - nthreads) | |
663 : (i == old_threads_used)) | |
664 { | |
665 if (team->prev_ts.place_partition_len > 64) | |
666 free (affinity_thr); | |
667 affinity_thr = NULL; | |
668 affinity_count = 0; | |
669 } | |
670 else | |
671 { | |
672 i = 1; | |
673 /* We are going to compute the places/subpartitions | |
674 again from the beginning. So, we need to reinitialize | |
675 vars modified by the switch (bind) above inside | |
676 of the loop, to the state they had after the initial | |
677 switch (bind). */ | |
678 switch (bind) | |
679 { | |
680 case omp_proc_bind_true: | |
681 case omp_proc_bind_close: | |
682 if (nthreads > thr->ts.place_partition_len) | |
683 /* T > P. S has been changed, so needs | |
684 to be recomputed. */ | |
685 s = nthreads / thr->ts.place_partition_len; | |
686 k = 1; | |
687 p = thr->place - 1; | |
688 break; | |
689 case omp_proc_bind_master: | |
690 /* No vars have been changed. */ | |
691 break; | |
692 case omp_proc_bind_spread: | |
693 p = thr->ts.place_partition_off; | |
694 if (k != 0) | |
695 { | |
696 /* T > P. */ | |
697 s = nthreads / team->prev_ts.place_partition_len; | |
698 k = 1; | |
699 } | |
700 break; | |
701 } | |
702 | |
703 /* Increase the barrier threshold to make sure all new | |
704 threads and all the threads we're going to let die | |
705 arrive before the team is released. */ | |
706 if (affinity_count) | |
707 gomp_simple_barrier_reinit (&pool->threads_dock, | |
708 nthreads + affinity_count); | |
709 } | |
710 } | |
711 | |
712 if (i == nthreads) | |
713 goto do_release; | |
714 | |
0 | 715 } |
716 | |
111 | 717 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) |
0 | 718 { |
111 | 719 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; |
0 | 720 |
721 if (old_threads_used == 0) | |
722 --diff; | |
723 | |
724 #ifdef HAVE_SYNC_BUILTINS | |
725 __sync_fetch_and_add (&gomp_managed_threads, diff); | |
726 #else | |
111 | 727 gomp_mutex_lock (&gomp_managed_threads_lock); |
0 | 728 gomp_managed_threads += diff; |
111 | 729 gomp_mutex_unlock (&gomp_managed_threads_lock); |
0 | 730 #endif |
731 } | |
732 | |
733 attr = &gomp_thread_attr; | |
111 | 734 if (__builtin_expect (gomp_places_list != NULL, 0)) |
0 | 735 { |
736 size_t stacksize; | |
737 pthread_attr_init (&thread_attr); | |
738 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) | |
739 pthread_attr_setstacksize (&thread_attr, stacksize); | |
740 attr = &thread_attr; | |
741 } | |
742 | |
743 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) | |
145 | 744 * (nthreads - i)); |
0 | 745 |
746 /* Launch new threads. */ | |
111 | 747 for (; i < nthreads; ++i) |
0 | 748 { |
749 int err; | |
750 | |
111 | 751 start_data->ts.place_partition_off = thr->ts.place_partition_off; |
752 start_data->ts.place_partition_len = thr->ts.place_partition_len; | |
753 start_data->place = 0; | |
754 if (__builtin_expect (gomp_places_list != NULL, 0)) | |
755 { | |
756 switch (bind) | |
757 { | |
758 case omp_proc_bind_true: | |
759 case omp_proc_bind_close: | |
760 if (k == s) | |
761 { | |
762 ++p; | |
763 if (p == (team->prev_ts.place_partition_off | |
764 + team->prev_ts.place_partition_len)) | |
765 p = team->prev_ts.place_partition_off; | |
766 k = 1; | |
767 if (i == nthreads - rest) | |
768 s = 1; | |
769 } | |
770 else | |
771 ++k; | |
772 break; | |
773 case omp_proc_bind_master: | |
774 break; | |
775 case omp_proc_bind_spread: | |
776 if (k == 0) | |
777 { | |
778 /* T <= P. */ | |
779 if (p < rest) | |
780 p += s + 1; | |
781 else | |
782 p += s; | |
783 if (p == (team->prev_ts.place_partition_off | |
784 + team->prev_ts.place_partition_len)) | |
785 p = team->prev_ts.place_partition_off; | |
786 start_data->ts.place_partition_off = p; | |
787 if (p < rest) | |
788 start_data->ts.place_partition_len = s + 1; | |
789 else | |
790 start_data->ts.place_partition_len = s; | |
791 } | |
792 else | |
793 { | |
794 /* T > P. */ | |
795 if (k == s) | |
796 { | |
797 ++p; | |
798 if (p == (team->prev_ts.place_partition_off | |
799 + team->prev_ts.place_partition_len)) | |
800 p = team->prev_ts.place_partition_off; | |
801 k = 1; | |
802 if (i == nthreads - rest) | |
803 s = 1; | |
804 } | |
805 else | |
806 ++k; | |
807 start_data->ts.place_partition_off = p; | |
808 start_data->ts.place_partition_len = 1; | |
809 } | |
810 break; | |
811 } | |
812 start_data->place = p + 1; | |
813 if (affinity_thr != NULL && pool->threads[i] != NULL) | |
814 continue; | |
815 gomp_init_thread_affinity (attr, p); | |
816 } | |
817 | |
0 | 818 start_data->fn = fn; |
819 start_data->fn_data = data; | |
820 start_data->ts.team = team; | |
821 start_data->ts.work_share = &team->work_shares[0]; | |
822 start_data->ts.last_work_share = NULL; | |
823 start_data->ts.team_id = i; | |
824 start_data->ts.level = team->prev_ts.level + 1; | |
825 start_data->ts.active_level = thr->ts.active_level; | |
826 #ifdef HAVE_SYNC_BUILTINS | |
827 start_data->ts.single_count = 0; | |
828 #endif | |
829 start_data->ts.static_trip = 0; | |
830 start_data->task = &team->implicit_task[i]; | |
831 gomp_init_task (start_data->task, task, icv); | |
111 | 832 team->implicit_task[i].icv.nthreads_var = nthreads_var; |
833 team->implicit_task[i].icv.bind_var = bind_var; | |
145 | 834 start_data->task->taskgroup = taskgroup; |
0 | 835 start_data->thread_pool = pool; |
836 start_data->nested = nested; | |
837 | |
111 | 838 attr = gomp_adjust_thread_attr (attr, &thread_attr); |
145 | 839 err = pthread_create (&start_data->handle, attr, gomp_thread_start, |
840 start_data); | |
841 start_data++; | |
0 | 842 if (err != 0) |
843 gomp_fatal ("Thread creation failed: %s", strerror (err)); | |
844 } | |
845 | |
111 | 846 if (__builtin_expect (attr == &thread_attr, 0)) |
0 | 847 pthread_attr_destroy (&thread_attr); |
848 | |
849 do_release: | |
111 | 850 if (nested) |
851 gomp_barrier_wait (&team->barrier); | |
852 else | |
853 gomp_simple_barrier_wait (&pool->threads_dock); | |
0 | 854 |
855 /* Decrease the barrier threshold to match the number of threads | |
856 that should arrive back at the end of this team. The extra | |
857 threads should be exiting. Note that we arrange for this test | |
111 | 858 to never be true for nested teams. If AFFINITY_COUNT is non-zero, |
859 the barrier as well as gomp_managed_threads was temporarily | |
860 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, | |
861 AFFINITY_COUNT if non-zero will be always at least | |
862 OLD_THREADS_COUNT - NTHREADS. */ | |
863 if (__builtin_expect (nthreads < old_threads_used, 0) | |
864 || __builtin_expect (affinity_count, 0)) | |
0 | 865 { |
866 long diff = (long) nthreads - (long) old_threads_used; | |
867 | |
111 | 868 if (affinity_count) |
869 diff = -affinity_count; | |
870 | |
871 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); | |
0 | 872 |
873 #ifdef HAVE_SYNC_BUILTINS | |
874 __sync_fetch_and_add (&gomp_managed_threads, diff); | |
875 #else | |
111 | 876 gomp_mutex_lock (&gomp_managed_threads_lock); |
0 | 877 gomp_managed_threads += diff; |
111 | 878 gomp_mutex_unlock (&gomp_managed_threads_lock); |
0 | 879 #endif |
880 } | |
145 | 881 if (__builtin_expect (gomp_display_affinity_var, 0)) |
882 { | |
883 if (nested | |
884 || nthreads != old_threads_used | |
885 || force_display) | |
886 { | |
887 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, | |
888 thr->place); | |
889 if (nested) | |
890 { | |
891 start_data -= nthreads - 1; | |
892 for (i = 1; i < nthreads; ++i) | |
893 { | |
894 gomp_display_affinity_thread ( | |
895 #ifdef LIBGOMP_USE_PTHREADS | |
896 start_data->handle, | |
897 #else | |
898 gomp_thread_self (), | |
899 #endif | |
900 &start_data->ts, | |
901 start_data->place); | |
902 start_data++; | |
903 } | |
904 } | |
905 else | |
906 { | |
907 for (i = 1; i < nthreads; ++i) | |
908 { | |
909 gomp_thread_handle handle | |
910 = gomp_thread_to_pthread_t (pool->threads[i]); | |
911 gomp_display_affinity_thread (handle, &pool->threads[i]->ts, | |
912 pool->threads[i]->place); | |
913 } | |
914 } | |
915 } | |
916 } | |
111 | 917 if (__builtin_expect (affinity_thr != NULL, 0) |
918 && team->prev_ts.place_partition_len > 64) | |
919 free (affinity_thr); | |
0 | 920 } |
111 | 921 #endif |
0 | 922 |
923 | |
924 /* Terminate the current team. This is only to be called by the master | |
925 thread. We assume that we must wait for the other threads. */ | |
926 | |
927 void | |
928 gomp_team_end (void) | |
929 { | |
930 struct gomp_thread *thr = gomp_thread (); | |
931 struct gomp_team *team = thr->ts.team; | |
932 | |
111 | 933 /* This barrier handles all pending explicit threads. |
934 As #pragma omp cancel parallel might get awaited count in | |
935 team->barrier in a inconsistent state, we need to use a different | |
936 counter here. */ | |
937 gomp_team_barrier_wait_final (&team->barrier); | |
938 if (__builtin_expect (team->team_cancelled, 0)) | |
939 { | |
940 struct gomp_work_share *ws = team->work_shares_to_free; | |
941 do | |
942 { | |
943 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); | |
944 if (next_ws == NULL) | |
945 gomp_ptrlock_set (&ws->next_ws, ws); | |
946 gomp_fini_work_share (ws); | |
947 ws = next_ws; | |
948 } | |
949 while (ws != NULL); | |
950 } | |
951 else | |
952 gomp_fini_work_share (thr->ts.work_share); | |
0 | 953 |
954 gomp_end_task (); | |
955 thr->ts = team->prev_ts; | |
956 | |
145 | 957 if (__builtin_expect (thr->ts.level != 0, 0)) |
0 | 958 { |
959 #ifdef HAVE_SYNC_BUILTINS | |
960 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); | |
961 #else | |
111 | 962 gomp_mutex_lock (&gomp_managed_threads_lock); |
0 | 963 gomp_managed_threads -= team->nthreads - 1L; |
111 | 964 gomp_mutex_unlock (&gomp_managed_threads_lock); |
0 | 965 #endif |
966 /* This barrier has gomp_barrier_wait_last counterparts | |
967 and ensures the team can be safely destroyed. */ | |
968 gomp_barrier_wait (&team->barrier); | |
969 } | |
970 | |
971 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) | |
972 { | |
973 struct gomp_work_share *ws = team->work_shares[0].next_alloc; | |
974 do | |
975 { | |
976 struct gomp_work_share *next_ws = ws->next_alloc; | |
977 free (ws); | |
978 ws = next_ws; | |
979 } | |
980 while (ws != NULL); | |
981 } | |
982 gomp_sem_destroy (&team->master_release); | |
983 | |
984 if (__builtin_expect (thr->ts.team != NULL, 0) | |
985 || __builtin_expect (team->nthreads == 1, 0)) | |
986 free_team (team); | |
987 else | |
988 { | |
989 struct gomp_thread_pool *pool = thr->thread_pool; | |
990 if (pool->last_team) | |
991 free_team (pool->last_team); | |
992 pool->last_team = team; | |
111 | 993 gomp_release_thread_pool (pool); |
0 | 994 } |
995 } | |
996 | |
111 | 997 #ifdef LIBGOMP_USE_PTHREADS |
0 | 998 |
999 /* Constructors for this file. */ | |
1000 | |
1001 static void __attribute__((constructor)) | |
1002 initialize_team (void) | |
1003 { | |
111 | 1004 #if !defined HAVE_TLS && !defined USE_EMUTLS |
0 | 1005 static struct gomp_thread initial_thread_tls_data; |
1006 | |
1007 pthread_key_create (&gomp_tls_key, NULL); | |
1008 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); | |
1009 #endif | |
1010 | |
1011 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) | |
1012 gomp_fatal ("could not create thread pool destructor."); | |
1013 } | |
1014 | |
1015 static void __attribute__((destructor)) | |
1016 team_destructor (void) | |
1017 { | |
1018 /* Without this dlclose on libgomp could lead to subsequent | |
1019 crashes. */ | |
1020 pthread_key_delete (gomp_thread_destructor); | |
1021 } | |
145 | 1022 |
1023 /* Similar to gomp_free_pool_helper, but don't detach itself, | |
1024 gomp_pause_host will pthread_join those threads. */ | |
1025 | |
1026 static void | |
1027 gomp_pause_pool_helper (void *thread_pool) | |
1028 { | |
1029 struct gomp_thread *thr = gomp_thread (); | |
1030 struct gomp_thread_pool *pool | |
1031 = (struct gomp_thread_pool *) thread_pool; | |
1032 gomp_simple_barrier_wait_last (&pool->threads_dock); | |
1033 gomp_sem_destroy (&thr->release); | |
1034 thr->thread_pool = NULL; | |
1035 thr->task = NULL; | |
1036 pthread_exit (NULL); | |
1037 } | |
1038 | |
1039 /* Free a thread pool and release its threads. Return non-zero on | |
1040 failure. */ | |
1041 | |
1042 int | |
1043 gomp_pause_host (void) | |
1044 { | |
1045 struct gomp_thread *thr = gomp_thread (); | |
1046 struct gomp_thread_pool *pool = thr->thread_pool; | |
1047 if (thr->ts.level) | |
1048 return -1; | |
1049 if (pool) | |
1050 { | |
1051 if (pool->threads_used > 0) | |
1052 { | |
1053 int i; | |
1054 pthread_t *thrs | |
1055 = gomp_alloca (sizeof (pthread_t) * pool->threads_used); | |
1056 for (i = 1; i < pool->threads_used; i++) | |
1057 { | |
1058 struct gomp_thread *nthr = pool->threads[i]; | |
1059 nthr->fn = gomp_pause_pool_helper; | |
1060 nthr->data = pool; | |
1061 thrs[i] = gomp_thread_to_pthread_t (nthr); | |
1062 } | |
1063 /* This barrier undocks threads docked on pool->threads_dock. */ | |
1064 gomp_simple_barrier_wait (&pool->threads_dock); | |
1065 /* And this waits till all threads have called gomp_barrier_wait_last | |
1066 in gomp_pause_pool_helper. */ | |
1067 gomp_simple_barrier_wait (&pool->threads_dock); | |
1068 /* Now it is safe to destroy the barrier and free the pool. */ | |
1069 gomp_simple_barrier_destroy (&pool->threads_dock); | |
1070 | |
1071 #ifdef HAVE_SYNC_BUILTINS | |
1072 __sync_fetch_and_add (&gomp_managed_threads, | |
1073 1L - pool->threads_used); | |
1074 #else | |
1075 gomp_mutex_lock (&gomp_managed_threads_lock); | |
1076 gomp_managed_threads -= pool->threads_used - 1L; | |
1077 gomp_mutex_unlock (&gomp_managed_threads_lock); | |
1078 #endif | |
1079 for (i = 1; i < pool->threads_used; i++) | |
1080 pthread_join (thrs[i], NULL); | |
1081 } | |
1082 if (pool->last_team) | |
1083 free_team (pool->last_team); | |
1084 #ifndef __nvptx__ | |
1085 team_free (pool->threads); | |
1086 team_free (pool); | |
1087 #endif | |
1088 thr->thread_pool = NULL; | |
1089 } | |
1090 return 0; | |
1091 } | |
111 | 1092 #endif |
0 | 1093 |
1094 struct gomp_task_icv * | |
1095 gomp_new_icv (void) | |
1096 { | |
1097 struct gomp_thread *thr = gomp_thread (); | |
1098 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); | |
1099 gomp_init_task (task, NULL, &gomp_global_icv); | |
1100 thr->task = task; | |
111 | 1101 #ifdef LIBGOMP_USE_PTHREADS |
0 | 1102 pthread_setspecific (gomp_thread_destructor, thr); |
111 | 1103 #endif |
0 | 1104 return &task->icv; |
1105 } |