comparison libgomp/team.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents 58ad6c70ea60
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. 1 /* Copyright (C) 2005-2017 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>. 2 Contributed by Richard Henderson <rth@redhat.com>.
3 3
4 This file is part of the GNU OpenMP Library (libgomp). 4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
5 6
6 Libgomp is free software; you can redistribute it and/or modify it 7 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by 8 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option) 9 the Free Software Foundation; either version 3, or (at your option)
9 any later version. 10 any later version.
24 25
25 /* This file handles the maintainence of threads in response to team 26 /* This file handles the maintainence of threads in response to team
26 creation and termination. */ 27 creation and termination. */
27 28
28 #include "libgomp.h" 29 #include "libgomp.h"
30 #include "pool.h"
29 #include <stdlib.h> 31 #include <stdlib.h>
30 #include <string.h> 32 #include <string.h>
31 33
34 #ifdef LIBGOMP_USE_PTHREADS
32 /* This attribute contains PTHREAD_CREATE_DETACHED. */ 35 /* This attribute contains PTHREAD_CREATE_DETACHED. */
33 pthread_attr_t gomp_thread_attr; 36 pthread_attr_t gomp_thread_attr;
34 37
35 /* This key is for the thread destructor. */ 38 /* This key is for the thread destructor. */
36 pthread_key_t gomp_thread_destructor; 39 pthread_key_t gomp_thread_destructor;
37 40
38 41
39 /* This is the libgomp per-thread data structure. */ 42 /* This is the libgomp per-thread data structure. */
40 #ifdef HAVE_TLS 43 #if defined HAVE_TLS || defined USE_EMUTLS
41 __thread struct gomp_thread gomp_tls_data; 44 __thread struct gomp_thread gomp_tls_data;
42 #else 45 #else
43 pthread_key_t gomp_tls_key; 46 pthread_key_t gomp_tls_key;
44 #endif 47 #endif
45 48
51 void (*fn) (void *); 54 void (*fn) (void *);
52 void *fn_data; 55 void *fn_data;
53 struct gomp_team_state ts; 56 struct gomp_team_state ts;
54 struct gomp_task *task; 57 struct gomp_task *task;
55 struct gomp_thread_pool *thread_pool; 58 struct gomp_thread_pool *thread_pool;
59 unsigned int place;
56 bool nested; 60 bool nested;
57 }; 61 };
58 62
59 63
60 /* This function is a pthread_create entry point. This contains the idle 64 /* This function is a pthread_create entry point. This contains the idle
67 struct gomp_thread *thr; 71 struct gomp_thread *thr;
68 struct gomp_thread_pool *pool; 72 struct gomp_thread_pool *pool;
69 void (*local_fn) (void *); 73 void (*local_fn) (void *);
70 void *local_data; 74 void *local_data;
71 75
72 #ifdef HAVE_TLS 76 #if defined HAVE_TLS || defined USE_EMUTLS
73 thr = &gomp_tls_data; 77 thr = &gomp_tls_data;
74 #else 78 #else
75 struct gomp_thread local_thr; 79 struct gomp_thread local_thr;
76 thr = &local_thr; 80 thr = &local_thr;
77 pthread_setspecific (gomp_tls_key, thr); 81 pthread_setspecific (gomp_tls_key, thr);
82 local_fn = data->fn; 86 local_fn = data->fn;
83 local_data = data->fn_data; 87 local_data = data->fn_data;
84 thr->thread_pool = data->thread_pool; 88 thr->thread_pool = data->thread_pool;
85 thr->ts = data->ts; 89 thr->ts = data->ts;
86 thr->task = data->task; 90 thr->task = data->task;
91 thr->place = data->place;
87 92
88 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 93 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
89 94
90 /* Make thread pool local. */ 95 /* Make thread pool local. */
91 pool = thr->thread_pool; 96 pool = thr->thread_pool;
96 struct gomp_task *task = thr->task; 101 struct gomp_task *task = thr->task;
97 102
98 gomp_barrier_wait (&team->barrier); 103 gomp_barrier_wait (&team->barrier);
99 104
100 local_fn (local_data); 105 local_fn (local_data);
101 gomp_team_barrier_wait (&team->barrier); 106 gomp_team_barrier_wait_final (&team->barrier);
102 gomp_finish_task (task); 107 gomp_finish_task (task);
103 gomp_barrier_wait_last (&team->barrier); 108 gomp_barrier_wait_last (&team->barrier);
104 } 109 }
105 else 110 else
106 { 111 {
107 pool->threads[thr->ts.team_id] = thr; 112 pool->threads[thr->ts.team_id] = thr;
108 113
109 gomp_barrier_wait (&pool->threads_dock); 114 gomp_simple_barrier_wait (&pool->threads_dock);
110 do 115 do
111 { 116 {
112 struct gomp_team *team = thr->ts.team; 117 struct gomp_team *team = thr->ts.team;
113 struct gomp_task *task = thr->task; 118 struct gomp_task *task = thr->task;
114 119
115 local_fn (local_data); 120 local_fn (local_data);
116 gomp_team_barrier_wait (&team->barrier); 121 gomp_team_barrier_wait_final (&team->barrier);
117 gomp_finish_task (task); 122 gomp_finish_task (task);
118 123
119 gomp_barrier_wait (&pool->threads_dock); 124 gomp_simple_barrier_wait (&pool->threads_dock);
120 125
121 local_fn = thr->fn; 126 local_fn = thr->fn;
122 local_data = thr->data; 127 local_data = thr->data;
123 thr->fn = NULL; 128 thr->fn = NULL;
124 } 129 }
125 while (local_fn); 130 while (local_fn);
126 } 131 }
127 132
128 gomp_sem_destroy (&thr->release); 133 gomp_sem_destroy (&thr->release);
134 thr->thread_pool = NULL;
135 thr->task = NULL;
129 return NULL; 136 return NULL;
130 } 137 }
131 138 #endif
139
140 static inline struct gomp_team *
141 get_last_team (unsigned nthreads)
142 {
143 struct gomp_thread *thr = gomp_thread ();
144 if (thr->ts.team == NULL)
145 {
146 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
147 struct gomp_team *last_team = pool->last_team;
148 if (last_team != NULL && last_team->nthreads == nthreads)
149 {
150 pool->last_team = NULL;
151 return last_team;
152 }
153 }
154 return NULL;
155 }
132 156
133 /* Create a new team data structure. */ 157 /* Create a new team data structure. */
134 158
135 struct gomp_team * 159 struct gomp_team *
136 gomp_new_team (unsigned nthreads) 160 gomp_new_team (unsigned nthreads)
137 { 161 {
138 struct gomp_team *team; 162 struct gomp_team *team;
139 size_t size;
140 int i; 163 int i;
141 164
142 size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) 165 team = get_last_team (nthreads);
143 + sizeof (team->implicit_task[0])); 166 if (team == NULL)
144 team = gomp_malloc (size); 167 {
168 size_t extra = sizeof (team->ordered_release[0])
169 + sizeof (team->implicit_task[0]);
170 team = gomp_malloc (sizeof (*team) + nthreads * extra);
171
172 #ifndef HAVE_SYNC_BUILTINS
173 gomp_mutex_init (&team->work_share_list_free_lock);
174 #endif
175 gomp_barrier_init (&team->barrier, nthreads);
176 gomp_mutex_init (&team->task_lock);
177
178 team->nthreads = nthreads;
179 }
145 180
146 team->work_share_chunk = 8; 181 team->work_share_chunk = 8;
147 #ifdef HAVE_SYNC_BUILTINS 182 #ifdef HAVE_SYNC_BUILTINS
148 team->single_count = 0; 183 team->single_count = 0;
149 #else 184 #endif
150 gomp_mutex_init (&team->work_share_list_free_lock); 185 team->work_shares_to_free = &team->work_shares[0];
151 #endif
152 gomp_init_work_share (&team->work_shares[0], false, nthreads); 186 gomp_init_work_share (&team->work_shares[0], false, nthreads);
153 team->work_shares[0].next_alloc = NULL; 187 team->work_shares[0].next_alloc = NULL;
154 team->work_share_list_free = NULL; 188 team->work_share_list_free = NULL;
155 team->work_share_list_alloc = &team->work_shares[1]; 189 team->work_share_list_alloc = &team->work_shares[1];
156 for (i = 1; i < 7; i++) 190 for (i = 1; i < 7; i++)
157 team->work_shares[i].next_free = &team->work_shares[i + 1]; 191 team->work_shares[i].next_free = &team->work_shares[i + 1];
158 team->work_shares[i].next_free = NULL; 192 team->work_shares[i].next_free = NULL;
159 193
160 team->nthreads = nthreads;
161 gomp_barrier_init (&team->barrier, nthreads);
162
163 gomp_sem_init (&team->master_release, 0); 194 gomp_sem_init (&team->master_release, 0);
164 team->ordered_release = (void *) &team->implicit_task[nthreads]; 195 team->ordered_release = (void *) &team->implicit_task[nthreads];
165 team->ordered_release[0] = &team->master_release; 196 team->ordered_release[0] = &team->master_release;
166 197
167 gomp_mutex_init (&team->task_lock); 198 priority_queue_init (&team->task_queue);
168 team->task_queue = NULL;
169 team->task_count = 0; 199 team->task_count = 0;
200 team->task_queued_count = 0;
170 team->task_running_count = 0; 201 team->task_running_count = 0;
202 team->work_share_cancelled = 0;
203 team->team_cancelled = 0;
171 204
172 return team; 205 return team;
173 } 206 }
174 207
175 208
176 /* Free a team data structure. */ 209 /* Free a team data structure. */
177 210
178 static void 211 static void
179 free_team (struct gomp_team *team) 212 free_team (struct gomp_team *team)
180 { 213 {
214 #ifndef HAVE_SYNC_BUILTINS
215 gomp_mutex_destroy (&team->work_share_list_free_lock);
216 #endif
181 gomp_barrier_destroy (&team->barrier); 217 gomp_barrier_destroy (&team->barrier);
182 gomp_mutex_destroy (&team->task_lock); 218 gomp_mutex_destroy (&team->task_lock);
219 priority_queue_free (&team->task_queue);
183 free (team); 220 free (team);
184 }
185
186 /* Allocate and initialize a thread pool. */
187
188 static struct gomp_thread_pool *gomp_new_thread_pool (void)
189 {
190 struct gomp_thread_pool *pool
191 = gomp_malloc (sizeof(struct gomp_thread_pool));
192 pool->threads = NULL;
193 pool->threads_size = 0;
194 pool->threads_used = 0;
195 pool->last_team = NULL;
196 return pool;
197 } 221 }
198 222
199 static void 223 static void
200 gomp_free_pool_helper (void *thread_pool) 224 gomp_free_pool_helper (void *thread_pool)
201 { 225 {
226 struct gomp_thread *thr = gomp_thread ();
202 struct gomp_thread_pool *pool 227 struct gomp_thread_pool *pool
203 = (struct gomp_thread_pool *) thread_pool; 228 = (struct gomp_thread_pool *) thread_pool;
204 gomp_barrier_wait_last (&pool->threads_dock); 229 gomp_simple_barrier_wait_last (&pool->threads_dock);
205 gomp_sem_destroy (&gomp_thread ()->release); 230 gomp_sem_destroy (&thr->release);
231 thr->thread_pool = NULL;
232 thr->task = NULL;
233 #ifdef LIBGOMP_USE_PTHREADS
206 pthread_exit (NULL); 234 pthread_exit (NULL);
235 #elif defined(__nvptx__)
236 asm ("exit;");
237 #else
238 #error gomp_free_pool_helper must terminate the thread
239 #endif
207 } 240 }
208 241
209 /* Free a thread pool and release its threads. */ 242 /* Free a thread pool and release its threads. */
210 243
211 static void 244 void
212 gomp_free_thread (void *arg __attribute__((unused))) 245 gomp_free_thread (void *arg __attribute__((unused)))
213 { 246 {
214 struct gomp_thread *thr = gomp_thread (); 247 struct gomp_thread *thr = gomp_thread ();
215 struct gomp_thread_pool *pool = thr->thread_pool; 248 struct gomp_thread_pool *pool = thr->thread_pool;
216 if (pool) 249 if (pool)
223 struct gomp_thread *nthr = pool->threads[i]; 256 struct gomp_thread *nthr = pool->threads[i];
224 nthr->fn = gomp_free_pool_helper; 257 nthr->fn = gomp_free_pool_helper;
225 nthr->data = pool; 258 nthr->data = pool;
226 } 259 }
227 /* This barrier undocks threads docked on pool->threads_dock. */ 260 /* This barrier undocks threads docked on pool->threads_dock. */
228 gomp_barrier_wait (&pool->threads_dock); 261 gomp_simple_barrier_wait (&pool->threads_dock);
229 /* And this waits till all threads have called gomp_barrier_wait_last 262 /* And this waits till all threads have called gomp_barrier_wait_last
230 in gomp_free_pool_helper. */ 263 in gomp_free_pool_helper. */
231 gomp_barrier_wait (&pool->threads_dock); 264 gomp_simple_barrier_wait (&pool->threads_dock);
232 /* Now it is safe to destroy the barrier and free the pool. */ 265 /* Now it is safe to destroy the barrier and free the pool. */
233 gomp_barrier_destroy (&pool->threads_dock); 266 gomp_simple_barrier_destroy (&pool->threads_dock);
234 } 267
235 free (pool->threads); 268 #ifdef HAVE_SYNC_BUILTINS
269 __sync_fetch_and_add (&gomp_managed_threads,
270 1L - pool->threads_used);
271 #else
272 gomp_mutex_lock (&gomp_managed_threads_lock);
273 gomp_managed_threads -= pool->threads_used - 1L;
274 gomp_mutex_unlock (&gomp_managed_threads_lock);
275 #endif
276 }
236 if (pool->last_team) 277 if (pool->last_team)
237 free_team (pool->last_team); 278 free_team (pool->last_team);
279 #ifndef __nvptx__
280 free (pool->threads);
238 free (pool); 281 free (pool);
282 #endif
239 thr->thread_pool = NULL; 283 thr->thread_pool = NULL;
240 } 284 }
285 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
286 gomp_team_end ();
241 if (thr->task != NULL) 287 if (thr->task != NULL)
242 { 288 {
243 struct gomp_task *task = thr->task; 289 struct gomp_task *task = thr->task;
244 gomp_end_task (); 290 gomp_end_task ();
245 free (task); 291 free (task);
246 } 292 }
247 } 293 }
248 294
249 /* Launch a team. */ 295 /* Launch a team. */
250 296
297 #ifdef LIBGOMP_USE_PTHREADS
251 void 298 void
252 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 299 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
253 struct gomp_team *team) 300 unsigned flags, struct gomp_team *team)
254 { 301 {
255 struct gomp_thread_start_data *start_data; 302 struct gomp_thread_start_data *start_data;
256 struct gomp_thread *thr, *nthr; 303 struct gomp_thread *thr, *nthr;
257 struct gomp_task *task; 304 struct gomp_task *task;
258 struct gomp_task_icv *icv; 305 struct gomp_task_icv *icv;
259 bool nested; 306 bool nested;
260 struct gomp_thread_pool *pool; 307 struct gomp_thread_pool *pool;
261 unsigned i, n, old_threads_used = 0; 308 unsigned i, n, old_threads_used = 0;
262 pthread_attr_t thread_attr, *attr; 309 pthread_attr_t thread_attr, *attr;
310 unsigned long nthreads_var;
311 char bind, bind_var;
312 unsigned int s = 0, rest = 0, p = 0, k = 0;
313 unsigned int affinity_count = 0;
314 struct gomp_thread **affinity_thr = NULL;
263 315
264 thr = gomp_thread (); 316 thr = gomp_thread ();
265 nested = thr->ts.team != NULL; 317 nested = thr->ts.level;
266 if (__builtin_expect (thr->thread_pool == NULL, 0))
267 {
268 thr->thread_pool = gomp_new_thread_pool ();
269 pthread_setspecific (gomp_thread_destructor, thr);
270 }
271 pool = thr->thread_pool; 318 pool = thr->thread_pool;
272 task = thr->task; 319 task = thr->task;
273 icv = task ? &task->icv : &gomp_global_icv; 320 icv = task ? &task->icv : &gomp_global_icv;
321 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
322 gomp_init_affinity ();
274 323
275 /* Always save the previous state, even if this isn't a nested team. 324 /* Always save the previous state, even if this isn't a nested team.
276 In particular, we should save any work share state from an outer 325 In particular, we should save any work share state from an outer
277 orphaned work share construct. */ 326 orphaned work share construct. */
278 team->prev_ts = thr->ts; 327 team->prev_ts = thr->ts;
287 #ifdef HAVE_SYNC_BUILTINS 336 #ifdef HAVE_SYNC_BUILTINS
288 thr->ts.single_count = 0; 337 thr->ts.single_count = 0;
289 #endif 338 #endif
290 thr->ts.static_trip = 0; 339 thr->ts.static_trip = 0;
291 thr->task = &team->implicit_task[0]; 340 thr->task = &team->implicit_task[0];
341 nthreads_var = icv->nthreads_var;
342 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
343 && thr->ts.level < gomp_nthreads_var_list_len)
344 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
345 bind_var = icv->bind_var;
346 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
347 bind_var = flags & 7;
348 bind = bind_var;
349 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
350 && thr->ts.level < gomp_bind_var_list_len)
351 bind_var = gomp_bind_var_list[thr->ts.level];
292 gomp_init_task (thr->task, task, icv); 352 gomp_init_task (thr->task, task, icv);
353 team->implicit_task[0].icv.nthreads_var = nthreads_var;
354 team->implicit_task[0].icv.bind_var = bind_var;
293 355
294 if (nthreads == 1) 356 if (nthreads == 1)
295 return; 357 return;
296 358
297 i = 1; 359 i = 1;
360
361 if (__builtin_expect (gomp_places_list != NULL, 0))
362 {
363 /* Depending on chosen proc_bind model, set subpartition
364 for the master thread and initialize helper variables
365 P and optionally S, K and/or REST used by later place
366 computation for each additional thread. */
367 p = thr->place - 1;
368 switch (bind)
369 {
370 case omp_proc_bind_true:
371 case omp_proc_bind_close:
372 if (nthreads > thr->ts.place_partition_len)
373 {
374 /* T > P. S threads will be placed in each place,
375 and the final REM threads placed one by one
376 into the already occupied places. */
377 s = nthreads / thr->ts.place_partition_len;
378 rest = nthreads % thr->ts.place_partition_len;
379 }
380 else
381 s = 1;
382 k = 1;
383 break;
384 case omp_proc_bind_master:
385 /* Each thread will be bound to master's place. */
386 break;
387 case omp_proc_bind_spread:
388 if (nthreads <= thr->ts.place_partition_len)
389 {
390 /* T <= P. Each subpartition will have in between s
391 and s+1 places (subpartitions starting at or
392 after rest will have s places, earlier s+1 places),
393 each thread will be bound to the first place in
394 its subpartition (except for the master thread
395 that can be bound to another place in its
396 subpartition). */
397 s = thr->ts.place_partition_len / nthreads;
398 rest = thr->ts.place_partition_len % nthreads;
399 rest = (s + 1) * rest + thr->ts.place_partition_off;
400 if (p < rest)
401 {
402 p -= (p - thr->ts.place_partition_off) % (s + 1);
403 thr->ts.place_partition_len = s + 1;
404 }
405 else
406 {
407 p -= (p - rest) % s;
408 thr->ts.place_partition_len = s;
409 }
410 thr->ts.place_partition_off = p;
411 }
412 else
413 {
414 /* T > P. Each subpartition will have just a single
415 place and we'll place between s and s+1
416 threads into each subpartition. */
417 s = nthreads / thr->ts.place_partition_len;
418 rest = nthreads % thr->ts.place_partition_len;
419 thr->ts.place_partition_off = p;
420 thr->ts.place_partition_len = 1;
421 k = 1;
422 }
423 break;
424 }
425 }
426 else
427 bind = omp_proc_bind_false;
298 428
299 /* We only allow the reuse of idle threads for non-nested PARALLEL 429 /* We only allow the reuse of idle threads for non-nested PARALLEL
300 regions. This appears to be implied by the semantics of 430 regions. This appears to be implied by the semantics of
301 threadprivate variables, but perhaps that's reading too much into 431 threadprivate variables, but perhaps that's reading too much into
302 things. Certainly it does prevent any locking problems, since 432 things. Certainly it does prevent any locking problems, since
308 if (nthreads <= old_threads_used) 438 if (nthreads <= old_threads_used)
309 n = nthreads; 439 n = nthreads;
310 else if (old_threads_used == 0) 440 else if (old_threads_used == 0)
311 { 441 {
312 n = 0; 442 n = 0;
313 gomp_barrier_init (&pool->threads_dock, nthreads); 443 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
314 } 444 }
315 else 445 else
316 { 446 {
317 n = old_threads_used; 447 n = old_threads_used;
318 448
319 /* Increase the barrier threshold to make sure all new 449 /* Increase the barrier threshold to make sure all new
320 threads arrive before the team is released. */ 450 threads arrive before the team is released. */
321 gomp_barrier_reinit (&pool->threads_dock, nthreads); 451 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
322 } 452 }
323 453
324 /* Not true yet, but soon will be. We're going to release all 454 /* Not true yet, but soon will be. We're going to release all
325 threads from the dock, and those that aren't part of the 455 threads from the dock, and those that aren't part of the
326 team will exit. */ 456 team will exit. */
327 pool->threads_used = nthreads; 457 pool->threads_used = nthreads;
328 458
459 /* If necessary, expand the size of the gomp_threads array. It is
460 expected that changes in the number of threads are rare, thus we
461 make no effort to expand gomp_threads_size geometrically. */
462 if (nthreads >= pool->threads_size)
463 {
464 pool->threads_size = nthreads + 1;
465 pool->threads
466 = gomp_realloc (pool->threads,
467 pool->threads_size
468 * sizeof (struct gomp_thread_data *));
469 }
470
329 /* Release existing idle threads. */ 471 /* Release existing idle threads. */
330 for (; i < n; ++i) 472 for (; i < n; ++i)
331 { 473 {
332 nthr = pool->threads[i]; 474 unsigned int place_partition_off = thr->ts.place_partition_off;
475 unsigned int place_partition_len = thr->ts.place_partition_len;
476 unsigned int place = 0;
477 if (__builtin_expect (gomp_places_list != NULL, 0))
478 {
479 switch (bind)
480 {
481 case omp_proc_bind_true:
482 case omp_proc_bind_close:
483 if (k == s)
484 {
485 ++p;
486 if (p == (team->prev_ts.place_partition_off
487 + team->prev_ts.place_partition_len))
488 p = team->prev_ts.place_partition_off;
489 k = 1;
490 if (i == nthreads - rest)
491 s = 1;
492 }
493 else
494 ++k;
495 break;
496 case omp_proc_bind_master:
497 break;
498 case omp_proc_bind_spread:
499 if (k == 0)
500 {
501 /* T <= P. */
502 if (p < rest)
503 p += s + 1;
504 else
505 p += s;
506 if (p == (team->prev_ts.place_partition_off
507 + team->prev_ts.place_partition_len))
508 p = team->prev_ts.place_partition_off;
509 place_partition_off = p;
510 if (p < rest)
511 place_partition_len = s + 1;
512 else
513 place_partition_len = s;
514 }
515 else
516 {
517 /* T > P. */
518 if (k == s)
519 {
520 ++p;
521 if (p == (team->prev_ts.place_partition_off
522 + team->prev_ts.place_partition_len))
523 p = team->prev_ts.place_partition_off;
524 k = 1;
525 if (i == nthreads - rest)
526 s = 1;
527 }
528 else
529 ++k;
530 place_partition_off = p;
531 place_partition_len = 1;
532 }
533 break;
534 }
535 if (affinity_thr != NULL
536 || (bind != omp_proc_bind_true
537 && pool->threads[i]->place != p + 1)
538 || pool->threads[i]->place <= place_partition_off
539 || pool->threads[i]->place > (place_partition_off
540 + place_partition_len))
541 {
542 unsigned int l;
543 if (affinity_thr == NULL)
544 {
545 unsigned int j;
546
547 if (team->prev_ts.place_partition_len > 64)
548 affinity_thr
549 = gomp_malloc (team->prev_ts.place_partition_len
550 * sizeof (struct gomp_thread *));
551 else
552 affinity_thr
553 = gomp_alloca (team->prev_ts.place_partition_len
554 * sizeof (struct gomp_thread *));
555 memset (affinity_thr, '\0',
556 team->prev_ts.place_partition_len
557 * sizeof (struct gomp_thread *));
558 for (j = i; j < old_threads_used; j++)
559 {
560 if (pool->threads[j]->place
561 > team->prev_ts.place_partition_off
562 && (pool->threads[j]->place
563 <= (team->prev_ts.place_partition_off
564 + team->prev_ts.place_partition_len)))
565 {
566 l = pool->threads[j]->place - 1
567 - team->prev_ts.place_partition_off;
568 pool->threads[j]->data = affinity_thr[l];
569 affinity_thr[l] = pool->threads[j];
570 }
571 pool->threads[j] = NULL;
572 }
573 if (nthreads > old_threads_used)
574 memset (&pool->threads[old_threads_used],
575 '\0', ((nthreads - old_threads_used)
576 * sizeof (struct gomp_thread *)));
577 n = nthreads;
578 affinity_count = old_threads_used - i;
579 }
580 if (affinity_count == 0)
581 break;
582 l = p;
583 if (affinity_thr[l - team->prev_ts.place_partition_off]
584 == NULL)
585 {
586 if (bind != omp_proc_bind_true)
587 continue;
588 for (l = place_partition_off;
589 l < place_partition_off + place_partition_len;
590 l++)
591 if (affinity_thr[l - team->prev_ts.place_partition_off]
592 != NULL)
593 break;
594 if (l == place_partition_off + place_partition_len)
595 continue;
596 }
597 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
598 affinity_thr[l - team->prev_ts.place_partition_off]
599 = (struct gomp_thread *) nthr->data;
600 affinity_count--;
601 pool->threads[i] = nthr;
602 }
603 else
604 nthr = pool->threads[i];
605 place = p + 1;
606 }
607 else
608 nthr = pool->threads[i];
333 nthr->ts.team = team; 609 nthr->ts.team = team;
334 nthr->ts.work_share = &team->work_shares[0]; 610 nthr->ts.work_share = &team->work_shares[0];
335 nthr->ts.last_work_share = NULL; 611 nthr->ts.last_work_share = NULL;
336 nthr->ts.team_id = i; 612 nthr->ts.team_id = i;
337 nthr->ts.level = team->prev_ts.level + 1; 613 nthr->ts.level = team->prev_ts.level + 1;
338 nthr->ts.active_level = thr->ts.active_level; 614 nthr->ts.active_level = thr->ts.active_level;
615 nthr->ts.place_partition_off = place_partition_off;
616 nthr->ts.place_partition_len = place_partition_len;
339 #ifdef HAVE_SYNC_BUILTINS 617 #ifdef HAVE_SYNC_BUILTINS
340 nthr->ts.single_count = 0; 618 nthr->ts.single_count = 0;
341 #endif 619 #endif
342 nthr->ts.static_trip = 0; 620 nthr->ts.static_trip = 0;
343 nthr->task = &team->implicit_task[i]; 621 nthr->task = &team->implicit_task[i];
622 nthr->place = place;
344 gomp_init_task (nthr->task, task, icv); 623 gomp_init_task (nthr->task, task, icv);
624 team->implicit_task[i].icv.nthreads_var = nthreads_var;
625 team->implicit_task[i].icv.bind_var = bind_var;
345 nthr->fn = fn; 626 nthr->fn = fn;
346 nthr->data = data; 627 nthr->data = data;
347 team->ordered_release[i] = &nthr->release; 628 team->ordered_release[i] = &nthr->release;
348 } 629 }
349 630
631 if (__builtin_expect (affinity_thr != NULL, 0))
632 {
633 /* If AFFINITY_THR is non-NULL just because we had to
634 permute some threads in the pool, but we've managed
635 to find exactly as many old threads as we'd find
636 without affinity, we don't need to handle this
637 specially anymore. */
638 if (nthreads <= old_threads_used
639 ? (affinity_count == old_threads_used - nthreads)
640 : (i == old_threads_used))
641 {
642 if (team->prev_ts.place_partition_len > 64)
643 free (affinity_thr);
644 affinity_thr = NULL;
645 affinity_count = 0;
646 }
647 else
648 {
649 i = 1;
650 /* We are going to compute the places/subpartitions
651 again from the beginning. So, we need to reinitialize
652 vars modified by the switch (bind) above inside
653 of the loop, to the state they had after the initial
654 switch (bind). */
655 switch (bind)
656 {
657 case omp_proc_bind_true:
658 case omp_proc_bind_close:
659 if (nthreads > thr->ts.place_partition_len)
660 /* T > P. S has been changed, so needs
661 to be recomputed. */
662 s = nthreads / thr->ts.place_partition_len;
663 k = 1;
664 p = thr->place - 1;
665 break;
666 case omp_proc_bind_master:
667 /* No vars have been changed. */
668 break;
669 case omp_proc_bind_spread:
670 p = thr->ts.place_partition_off;
671 if (k != 0)
672 {
673 /* T > P. */
674 s = nthreads / team->prev_ts.place_partition_len;
675 k = 1;
676 }
677 break;
678 }
679
680 /* Increase the barrier threshold to make sure all new
681 threads and all the threads we're going to let die
682 arrive before the team is released. */
683 if (affinity_count)
684 gomp_simple_barrier_reinit (&pool->threads_dock,
685 nthreads + affinity_count);
686 }
687 }
688
350 if (i == nthreads) 689 if (i == nthreads)
351 goto do_release; 690 goto do_release;
352 691
353 /* If necessary, expand the size of the gomp_threads array. It is 692 }
354 expected that changes in the number of threads are rare, thus we 693
355 make no effort to expand gomp_threads_size geometrically. */ 694 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
356 if (nthreads >= pool->threads_size) 695 {
357 { 696 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
358 pool->threads_size = nthreads + 1;
359 pool->threads
360 = gomp_realloc (pool->threads,
361 pool->threads_size
362 * sizeof (struct gomp_thread_data *));
363 }
364 }
365
366 if (__builtin_expect (nthreads > old_threads_used, 0))
367 {
368 long diff = (long) nthreads - (long) old_threads_used;
369 697
370 if (old_threads_used == 0) 698 if (old_threads_used == 0)
371 --diff; 699 --diff;
372 700
373 #ifdef HAVE_SYNC_BUILTINS 701 #ifdef HAVE_SYNC_BUILTINS
374 __sync_fetch_and_add (&gomp_managed_threads, diff); 702 __sync_fetch_and_add (&gomp_managed_threads, diff);
375 #else 703 #else
376 gomp_mutex_lock (&gomp_remaining_threads_lock); 704 gomp_mutex_lock (&gomp_managed_threads_lock);
377 gomp_managed_threads += diff; 705 gomp_managed_threads += diff;
378 gomp_mutex_unlock (&gomp_remaining_threads_lock); 706 gomp_mutex_unlock (&gomp_managed_threads_lock);
379 #endif 707 #endif
380 } 708 }
381 709
382 attr = &gomp_thread_attr; 710 attr = &gomp_thread_attr;
383 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 711 if (__builtin_expect (gomp_places_list != NULL, 0))
384 { 712 {
385 size_t stacksize; 713 size_t stacksize;
386 pthread_attr_init (&thread_attr); 714 pthread_attr_init (&thread_attr);
387 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); 715 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
388 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 716 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
392 720
393 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 721 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
394 * (nthreads-i)); 722 * (nthreads-i));
395 723
396 /* Launch new threads. */ 724 /* Launch new threads. */
397 for (; i < nthreads; ++i, ++start_data) 725 for (; i < nthreads; ++i)
398 { 726 {
399 pthread_t pt; 727 pthread_t pt;
400 int err; 728 int err;
729
730 start_data->ts.place_partition_off = thr->ts.place_partition_off;
731 start_data->ts.place_partition_len = thr->ts.place_partition_len;
732 start_data->place = 0;
733 if (__builtin_expect (gomp_places_list != NULL, 0))
734 {
735 switch (bind)
736 {
737 case omp_proc_bind_true:
738 case omp_proc_bind_close:
739 if (k == s)
740 {
741 ++p;
742 if (p == (team->prev_ts.place_partition_off
743 + team->prev_ts.place_partition_len))
744 p = team->prev_ts.place_partition_off;
745 k = 1;
746 if (i == nthreads - rest)
747 s = 1;
748 }
749 else
750 ++k;
751 break;
752 case omp_proc_bind_master:
753 break;
754 case omp_proc_bind_spread:
755 if (k == 0)
756 {
757 /* T <= P. */
758 if (p < rest)
759 p += s + 1;
760 else
761 p += s;
762 if (p == (team->prev_ts.place_partition_off
763 + team->prev_ts.place_partition_len))
764 p = team->prev_ts.place_partition_off;
765 start_data->ts.place_partition_off = p;
766 if (p < rest)
767 start_data->ts.place_partition_len = s + 1;
768 else
769 start_data->ts.place_partition_len = s;
770 }
771 else
772 {
773 /* T > P. */
774 if (k == s)
775 {
776 ++p;
777 if (p == (team->prev_ts.place_partition_off
778 + team->prev_ts.place_partition_len))
779 p = team->prev_ts.place_partition_off;
780 k = 1;
781 if (i == nthreads - rest)
782 s = 1;
783 }
784 else
785 ++k;
786 start_data->ts.place_partition_off = p;
787 start_data->ts.place_partition_len = 1;
788 }
789 break;
790 }
791 start_data->place = p + 1;
792 if (affinity_thr != NULL && pool->threads[i] != NULL)
793 continue;
794 gomp_init_thread_affinity (attr, p);
795 }
401 796
402 start_data->fn = fn; 797 start_data->fn = fn;
403 start_data->fn_data = data; 798 start_data->fn_data = data;
404 start_data->ts.team = team; 799 start_data->ts.team = team;
405 start_data->ts.work_share = &team->work_shares[0]; 800 start_data->ts.work_share = &team->work_shares[0];
411 start_data->ts.single_count = 0; 806 start_data->ts.single_count = 0;
412 #endif 807 #endif
413 start_data->ts.static_trip = 0; 808 start_data->ts.static_trip = 0;
414 start_data->task = &team->implicit_task[i]; 809 start_data->task = &team->implicit_task[i];
415 gomp_init_task (start_data->task, task, icv); 810 gomp_init_task (start_data->task, task, icv);
811 team->implicit_task[i].icv.nthreads_var = nthreads_var;
812 team->implicit_task[i].icv.bind_var = bind_var;
416 start_data->thread_pool = pool; 813 start_data->thread_pool = pool;
417 start_data->nested = nested; 814 start_data->nested = nested;
418 815
419 if (gomp_cpu_affinity != NULL) 816 attr = gomp_adjust_thread_attr (attr, &thread_attr);
420 gomp_init_thread_affinity (attr); 817 err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
421
422 err = pthread_create (&pt, attr, gomp_thread_start, start_data);
423 if (err != 0) 818 if (err != 0)
424 gomp_fatal ("Thread creation failed: %s", strerror (err)); 819 gomp_fatal ("Thread creation failed: %s", strerror (err));
425 } 820 }
426 821
427 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 822 if (__builtin_expect (attr == &thread_attr, 0))
428 pthread_attr_destroy (&thread_attr); 823 pthread_attr_destroy (&thread_attr);
429 824
430 do_release: 825 do_release:
431 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock); 826 if (nested)
827 gomp_barrier_wait (&team->barrier);
828 else
829 gomp_simple_barrier_wait (&pool->threads_dock);
432 830
433 /* Decrease the barrier threshold to match the number of threads 831 /* Decrease the barrier threshold to match the number of threads
434 that should arrive back at the end of this team. The extra 832 that should arrive back at the end of this team. The extra
435 threads should be exiting. Note that we arrange for this test 833 threads should be exiting. Note that we arrange for this test
436 to never be true for nested teams. */ 834 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
437 if (__builtin_expect (nthreads < old_threads_used, 0)) 835 the barrier as well as gomp_managed_threads was temporarily
836 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
837 AFFINITY_COUNT if non-zero will be always at least
838 OLD_THREADS_COUNT - NTHREADS. */
839 if (__builtin_expect (nthreads < old_threads_used, 0)
840 || __builtin_expect (affinity_count, 0))
438 { 841 {
439 long diff = (long) nthreads - (long) old_threads_used; 842 long diff = (long) nthreads - (long) old_threads_used;
440 843
441 gomp_barrier_reinit (&pool->threads_dock, nthreads); 844 if (affinity_count)
845 diff = -affinity_count;
846
847 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
442 848
443 #ifdef HAVE_SYNC_BUILTINS 849 #ifdef HAVE_SYNC_BUILTINS
444 __sync_fetch_and_add (&gomp_managed_threads, diff); 850 __sync_fetch_and_add (&gomp_managed_threads, diff);
445 #else 851 #else
446 gomp_mutex_lock (&gomp_remaining_threads_lock); 852 gomp_mutex_lock (&gomp_managed_threads_lock);
447 gomp_managed_threads += diff; 853 gomp_managed_threads += diff;
448 gomp_mutex_unlock (&gomp_remaining_threads_lock); 854 gomp_mutex_unlock (&gomp_managed_threads_lock);
449 #endif 855 #endif
450 } 856 }
451 } 857 if (__builtin_expect (affinity_thr != NULL, 0)
858 && team->prev_ts.place_partition_len > 64)
859 free (affinity_thr);
860 }
861 #endif
452 862
453 863
454 /* Terminate the current team. This is only to be called by the master 864 /* Terminate the current team. This is only to be called by the master
455 thread. We assume that we must wait for the other threads. */ 865 thread. We assume that we must wait for the other threads. */
456 866
458 gomp_team_end (void) 868 gomp_team_end (void)
459 { 869 {
460 struct gomp_thread *thr = gomp_thread (); 870 struct gomp_thread *thr = gomp_thread ();
461 struct gomp_team *team = thr->ts.team; 871 struct gomp_team *team = thr->ts.team;
462 872
463 /* This barrier handles all pending explicit threads. */ 873 /* This barrier handles all pending explicit threads.
464 gomp_team_barrier_wait (&team->barrier); 874 As #pragma omp cancel parallel might get awaited count in
465 gomp_fini_work_share (thr->ts.work_share); 875 team->barrier in a inconsistent state, we need to use a different
876 counter here. */
877 gomp_team_barrier_wait_final (&team->barrier);
878 if (__builtin_expect (team->team_cancelled, 0))
879 {
880 struct gomp_work_share *ws = team->work_shares_to_free;
881 do
882 {
883 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
884 if (next_ws == NULL)
885 gomp_ptrlock_set (&ws->next_ws, ws);
886 gomp_fini_work_share (ws);
887 ws = next_ws;
888 }
889 while (ws != NULL);
890 }
891 else
892 gomp_fini_work_share (thr->ts.work_share);
466 893
467 gomp_end_task (); 894 gomp_end_task ();
468 thr->ts = team->prev_ts; 895 thr->ts = team->prev_ts;
469 896
470 if (__builtin_expect (thr->ts.team != NULL, 0)) 897 if (__builtin_expect (thr->ts.team != NULL, 0))
471 { 898 {
472 #ifdef HAVE_SYNC_BUILTINS 899 #ifdef HAVE_SYNC_BUILTINS
473 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 900 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
474 #else 901 #else
475 gomp_mutex_lock (&gomp_remaining_threads_lock); 902 gomp_mutex_lock (&gomp_managed_threads_lock);
476 gomp_managed_threads -= team->nthreads - 1L; 903 gomp_managed_threads -= team->nthreads - 1L;
477 gomp_mutex_unlock (&gomp_remaining_threads_lock); 904 gomp_mutex_unlock (&gomp_managed_threads_lock);
478 #endif 905 #endif
479 /* This barrier has gomp_barrier_wait_last counterparts 906 /* This barrier has gomp_barrier_wait_last counterparts
480 and ensures the team can be safely destroyed. */ 907 and ensures the team can be safely destroyed. */
481 gomp_barrier_wait (&team->barrier); 908 gomp_barrier_wait (&team->barrier);
482 } 909 }
491 ws = next_ws; 918 ws = next_ws;
492 } 919 }
493 while (ws != NULL); 920 while (ws != NULL);
494 } 921 }
495 gomp_sem_destroy (&team->master_release); 922 gomp_sem_destroy (&team->master_release);
496 #ifndef HAVE_SYNC_BUILTINS
497 gomp_mutex_destroy (&team->work_share_list_free_lock);
498 #endif
499 923
500 if (__builtin_expect (thr->ts.team != NULL, 0) 924 if (__builtin_expect (thr->ts.team != NULL, 0)
501 || __builtin_expect (team->nthreads == 1, 0)) 925 || __builtin_expect (team->nthreads == 1, 0))
502 free_team (team); 926 free_team (team);
503 else 927 else
504 { 928 {
505 struct gomp_thread_pool *pool = thr->thread_pool; 929 struct gomp_thread_pool *pool = thr->thread_pool;
506 if (pool->last_team) 930 if (pool->last_team)
507 free_team (pool->last_team); 931 free_team (pool->last_team);
508 pool->last_team = team; 932 pool->last_team = team;
509 } 933 gomp_release_thread_pool (pool);
510 } 934 }
511 935 }
936
937 #ifdef LIBGOMP_USE_PTHREADS
512 938
513 /* Constructors for this file. */ 939 /* Constructors for this file. */
514 940
515 static void __attribute__((constructor)) 941 static void __attribute__((constructor))
516 initialize_team (void) 942 initialize_team (void)
517 { 943 {
518 struct gomp_thread *thr; 944 #if !defined HAVE_TLS && !defined USE_EMUTLS
519
520 #ifndef HAVE_TLS
521 static struct gomp_thread initial_thread_tls_data; 945 static struct gomp_thread initial_thread_tls_data;
522 946
523 pthread_key_create (&gomp_tls_key, NULL); 947 pthread_key_create (&gomp_tls_key, NULL);
524 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 948 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
525 #endif 949 #endif
526 950
527 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 951 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
528 gomp_fatal ("could not create thread pool destructor."); 952 gomp_fatal ("could not create thread pool destructor.");
529
530 #ifdef HAVE_TLS
531 thr = &gomp_tls_data;
532 #else
533 thr = &initial_thread_tls_data;
534 #endif
535 gomp_sem_init (&thr->release, 0);
536 } 953 }
537 954
538 static void __attribute__((destructor)) 955 static void __attribute__((destructor))
539 team_destructor (void) 956 team_destructor (void)
540 { 957 {
541 /* Without this dlclose on libgomp could lead to subsequent 958 /* Without this dlclose on libgomp could lead to subsequent
542 crashes. */ 959 crashes. */
543 pthread_key_delete (gomp_thread_destructor); 960 pthread_key_delete (gomp_thread_destructor);
544 } 961 }
962 #endif
545 963
546 struct gomp_task_icv * 964 struct gomp_task_icv *
547 gomp_new_icv (void) 965 gomp_new_icv (void)
548 { 966 {
549 struct gomp_thread *thr = gomp_thread (); 967 struct gomp_thread *thr = gomp_thread ();
550 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 968 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
551 gomp_init_task (task, NULL, &gomp_global_icv); 969 gomp_init_task (task, NULL, &gomp_global_icv);
552 thr->task = task; 970 thr->task = task;
971 #ifdef LIBGOMP_USE_PTHREADS
553 pthread_setspecific (gomp_thread_destructor, thr); 972 pthread_setspecific (gomp_thread_destructor, thr);
973 #endif
554 return &task->icv; 974 return &task->icv;
555 } 975 }