comparison libgomp/iter_ull.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU OpenMP Library (libgomp).
5
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 /* This file contains routines for managing work-share iteration, both
26 for loops and sections. */
27
28 #include "libgomp.h"
29 #include <stdlib.h>
30
31 typedef unsigned long long gomp_ull;
32
33 /* This function implements the STATIC scheduling method. The caller should
34 iterate *pstart <= x < *pend. Return zero if there are more iterations
35 to perform; nonzero if not. Return less than 0 if this thread had
36 received the absolutely last iteration. */
37
38 int
39 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
40 {
41 struct gomp_thread *thr = gomp_thread ();
42 struct gomp_team *team = thr->ts.team;
43 struct gomp_work_share *ws = thr->ts.work_share;
44 unsigned long nthreads = team ? team->nthreads : 1;
45
46 if (thr->ts.static_trip == -1)
47 return -1;
48
49 /* Quick test for degenerate teams and orphaned constructs. */
50 if (nthreads == 1)
51 {
52 *pstart = ws->next_ull;
53 *pend = ws->end_ull;
54 thr->ts.static_trip = -1;
55 return ws->next_ull == ws->end_ull;
56 }
57
58 /* We interpret chunk_size zero as "unspecified", which means that we
59 should break up the iterations such that each thread makes only one
60 trip through the outer loop. */
61 if (ws->chunk_size_ull == 0)
62 {
63 gomp_ull n, q, i, s0, e0, s, e;
64
65 if (thr->ts.static_trip > 0)
66 return 1;
67
68 /* Compute the total number of iterations. */
69 if (__builtin_expect (ws->mode, 0) == 0)
70 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71 else
72 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73 i = thr->ts.team_id;
74
75 /* Compute the "zero-based" start and end points. That is, as
76 if the loop began at zero and incremented by one. */
77 q = n / nthreads;
78 q += (q * nthreads != n);
79 s0 = q * i;
80 e0 = s0 + q;
81 if (e0 > n)
82 e0 = n;
83
84 /* Notice when no iterations allocated for this thread. */
85 if (s0 >= e0)
86 {
87 thr->ts.static_trip = 1;
88 return 1;
89 }
90
91 /* Transform these to the actual start and end numbers. */
92 s = s0 * ws->incr_ull + ws->next_ull;
93 e = e0 * ws->incr_ull + ws->next_ull;
94
95 *pstart = s;
96 *pend = e;
97 thr->ts.static_trip = (e0 == n ? -1 : 1);
98 return 0;
99 }
100 else
101 {
102 gomp_ull n, s0, e0, i, c, s, e;
103
104 /* Otherwise, each thread gets exactly chunk_size iterations
105 (if available) each time through the loop. */
106
107 if (__builtin_expect (ws->mode, 0) == 0)
108 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
109 else
110 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
111 i = thr->ts.team_id;
112 c = ws->chunk_size_ull;
113
114 /* Initial guess is a C sized chunk positioned nthreads iterations
115 in, offset by our thread number. */
116 s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
117 e0 = s0 + c;
118
119 /* Detect overflow. */
120 if (s0 >= n)
121 return 1;
122 if (e0 > n)
123 e0 = n;
124
125 /* Transform these to the actual start and end numbers. */
126 s = s0 * ws->incr_ull + ws->next_ull;
127 e = e0 * ws->incr_ull + ws->next_ull;
128
129 *pstart = s;
130 *pend = e;
131
132 if (e0 == n)
133 thr->ts.static_trip = -1;
134 else
135 thr->ts.static_trip++;
136 return 0;
137 }
138 }
139
140
141 /* This function implements the DYNAMIC scheduling method. Arguments are
142 as for gomp_iter_ull_static_next. This function must be called with
143 ws->lock held. */
144
145 bool
146 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
147 {
148 struct gomp_thread *thr = gomp_thread ();
149 struct gomp_work_share *ws = thr->ts.work_share;
150 gomp_ull start, end, chunk, left;
151
152 start = ws->next_ull;
153 if (start == ws->end_ull)
154 return false;
155
156 chunk = ws->chunk_size_ull;
157 left = ws->end_ull - start;
158 if (__builtin_expect (ws->mode & 2, 0))
159 {
160 if (chunk < left)
161 chunk = left;
162 }
163 else
164 {
165 if (chunk > left)
166 chunk = left;
167 }
168 end = start + chunk;
169
170 ws->next_ull = end;
171 *pstart = start;
172 *pend = end;
173 return true;
174 }
175
176
177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
178 /* Similar, but doesn't require the lock held, and uses compare-and-swap
179 instead. Note that the only memory value that changes is ws->next_ull. */
180
181 bool
182 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
183 {
184 struct gomp_thread *thr = gomp_thread ();
185 struct gomp_work_share *ws = thr->ts.work_share;
186 gomp_ull start, end, nend, chunk;
187
188 end = ws->end_ull;
189 chunk = ws->chunk_size_ull;
190
191 if (__builtin_expect (ws->mode & 1, 1))
192 {
193 gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
194 if (__builtin_expect (ws->mode & 2, 0) == 0)
195 {
196 if (tmp >= end)
197 return false;
198 nend = tmp + chunk;
199 if (nend > end)
200 nend = end;
201 *pstart = tmp;
202 *pend = nend;
203 return true;
204 }
205 else
206 {
207 if (tmp <= end)
208 return false;
209 nend = tmp + chunk;
210 if (nend < end)
211 nend = end;
212 *pstart = tmp;
213 *pend = nend;
214 return true;
215 }
216 }
217
218 start = ws->next_ull;
219 while (1)
220 {
221 gomp_ull left = end - start;
222 gomp_ull tmp;
223
224 if (start == end)
225 return false;
226
227 if (__builtin_expect (ws->mode & 2, 0))
228 {
229 if (chunk < left)
230 chunk = left;
231 }
232 else
233 {
234 if (chunk > left)
235 chunk = left;
236 }
237 nend = start + chunk;
238
239 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
240 if (__builtin_expect (tmp == start, 1))
241 break;
242
243 start = tmp;
244 }
245
246 *pstart = start;
247 *pend = nend;
248 return true;
249 }
250 #endif /* HAVE_SYNC_BUILTINS */
251
252
253 /* This function implements the GUIDED scheduling method. Arguments are
254 as for gomp_iter_ull_static_next. This function must be called with the
255 work share lock held. */
256
257 bool
258 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
259 {
260 struct gomp_thread *thr = gomp_thread ();
261 struct gomp_work_share *ws = thr->ts.work_share;
262 struct gomp_team *team = thr->ts.team;
263 gomp_ull nthreads = team ? team->nthreads : 1;
264 gomp_ull n, q;
265 gomp_ull start, end;
266
267 if (ws->next_ull == ws->end_ull)
268 return false;
269
270 start = ws->next_ull;
271 if (__builtin_expect (ws->mode, 0) == 0)
272 n = (ws->end_ull - start) / ws->incr_ull;
273 else
274 n = (start - ws->end_ull) / -ws->incr_ull;
275 q = (n + nthreads - 1) / nthreads;
276
277 if (q < ws->chunk_size_ull)
278 q = ws->chunk_size_ull;
279 if (q <= n)
280 end = start + q * ws->incr_ull;
281 else
282 end = ws->end_ull;
283
284 ws->next_ull = end;
285 *pstart = start;
286 *pend = end;
287 return true;
288 }
289
290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
291 /* Similar, but doesn't require the lock held, and uses compare-and-swap
292 instead. Note that the only memory value that changes is ws->next_ull. */
293
294 bool
295 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
296 {
297 struct gomp_thread *thr = gomp_thread ();
298 struct gomp_work_share *ws = thr->ts.work_share;
299 struct gomp_team *team = thr->ts.team;
300 gomp_ull nthreads = team ? team->nthreads : 1;
301 gomp_ull start, end, nend, incr;
302 gomp_ull chunk_size;
303
304 start = ws->next_ull;
305 end = ws->end_ull;
306 incr = ws->incr_ull;
307 chunk_size = ws->chunk_size_ull;
308
309 while (1)
310 {
311 gomp_ull n, q;
312 gomp_ull tmp;
313
314 if (start == end)
315 return false;
316
317 if (__builtin_expect (ws->mode, 0) == 0)
318 n = (end - start) / incr;
319 else
320 n = (start - end) / -incr;
321 q = (n + nthreads - 1) / nthreads;
322
323 if (q < chunk_size)
324 q = chunk_size;
325 if (__builtin_expect (q <= n, 1))
326 nend = start + q * incr;
327 else
328 nend = end;
329
330 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
331 if (__builtin_expect (tmp == start, 1))
332 break;
333
334 start = tmp;
335 }
336
337 *pstart = start;
338 *pend = nend;
339 return true;
340 }
341 #endif /* HAVE_SYNC_BUILTINS */