Mercurial > hg > CbC > CbC_gcc
comparison libgomp/iter_ull.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. | |
2 Contributed by Richard Henderson <rth@redhat.com>. | |
3 | |
4 This file is part of the GNU OpenMP Library (libgomp). | |
5 | |
6 Libgomp is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation; either version 3, or (at your option) | |
9 any later version. | |
10 | |
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 more details. | |
15 | |
16 Under Section 7 of GPL version 3, you are granted additional | |
17 permissions described in the GCC Runtime Library Exception, version | |
18 3.1, as published by the Free Software Foundation. | |
19 | |
20 You should have received a copy of the GNU General Public License and | |
21 a copy of the GCC Runtime Library Exception along with this program; | |
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 <http://www.gnu.org/licenses/>. */ | |
24 | |
25 /* This file contains routines for managing work-share iteration, both | |
26 for loops and sections. */ | |
27 | |
28 #include "libgomp.h" | |
29 #include <stdlib.h> | |
30 | |
31 typedef unsigned long long gomp_ull; | |
32 | |
33 /* This function implements the STATIC scheduling method. The caller should | |
34 iterate *pstart <= x < *pend. Return zero if there are more iterations | |
35 to perform; nonzero if not. Return less than 0 if this thread had | |
36 received the absolutely last iteration. */ | |
37 | |
38 int | |
39 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend) | |
40 { | |
41 struct gomp_thread *thr = gomp_thread (); | |
42 struct gomp_team *team = thr->ts.team; | |
43 struct gomp_work_share *ws = thr->ts.work_share; | |
44 unsigned long nthreads = team ? team->nthreads : 1; | |
45 | |
46 if (thr->ts.static_trip == -1) | |
47 return -1; | |
48 | |
49 /* Quick test for degenerate teams and orphaned constructs. */ | |
50 if (nthreads == 1) | |
51 { | |
52 *pstart = ws->next_ull; | |
53 *pend = ws->end_ull; | |
54 thr->ts.static_trip = -1; | |
55 return ws->next_ull == ws->end_ull; | |
56 } | |
57 | |
58 /* We interpret chunk_size zero as "unspecified", which means that we | |
59 should break up the iterations such that each thread makes only one | |
60 trip through the outer loop. */ | |
61 if (ws->chunk_size_ull == 0) | |
62 { | |
63 gomp_ull n, q, i, s0, e0, s, e; | |
64 | |
65 if (thr->ts.static_trip > 0) | |
66 return 1; | |
67 | |
68 /* Compute the total number of iterations. */ | |
69 if (__builtin_expect (ws->mode, 0) == 0) | |
70 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull; | |
71 else | |
72 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull; | |
73 i = thr->ts.team_id; | |
74 | |
75 /* Compute the "zero-based" start and end points. That is, as | |
76 if the loop began at zero and incremented by one. */ | |
77 q = n / nthreads; | |
78 q += (q * nthreads != n); | |
79 s0 = q * i; | |
80 e0 = s0 + q; | |
81 if (e0 > n) | |
82 e0 = n; | |
83 | |
84 /* Notice when no iterations allocated for this thread. */ | |
85 if (s0 >= e0) | |
86 { | |
87 thr->ts.static_trip = 1; | |
88 return 1; | |
89 } | |
90 | |
91 /* Transform these to the actual start and end numbers. */ | |
92 s = s0 * ws->incr_ull + ws->next_ull; | |
93 e = e0 * ws->incr_ull + ws->next_ull; | |
94 | |
95 *pstart = s; | |
96 *pend = e; | |
97 thr->ts.static_trip = (e0 == n ? -1 : 1); | |
98 return 0; | |
99 } | |
100 else | |
101 { | |
102 gomp_ull n, s0, e0, i, c, s, e; | |
103 | |
104 /* Otherwise, each thread gets exactly chunk_size iterations | |
105 (if available) each time through the loop. */ | |
106 | |
107 if (__builtin_expect (ws->mode, 0) == 0) | |
108 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull; | |
109 else | |
110 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull; | |
111 i = thr->ts.team_id; | |
112 c = ws->chunk_size_ull; | |
113 | |
114 /* Initial guess is a C sized chunk positioned nthreads iterations | |
115 in, offset by our thread number. */ | |
116 s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c; | |
117 e0 = s0 + c; | |
118 | |
119 /* Detect overflow. */ | |
120 if (s0 >= n) | |
121 return 1; | |
122 if (e0 > n) | |
123 e0 = n; | |
124 | |
125 /* Transform these to the actual start and end numbers. */ | |
126 s = s0 * ws->incr_ull + ws->next_ull; | |
127 e = e0 * ws->incr_ull + ws->next_ull; | |
128 | |
129 *pstart = s; | |
130 *pend = e; | |
131 | |
132 if (e0 == n) | |
133 thr->ts.static_trip = -1; | |
134 else | |
135 thr->ts.static_trip++; | |
136 return 0; | |
137 } | |
138 } | |
139 | |
140 | |
141 /* This function implements the DYNAMIC scheduling method. Arguments are | |
142 as for gomp_iter_ull_static_next. This function must be called with | |
143 ws->lock held. */ | |
144 | |
145 bool | |
146 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend) | |
147 { | |
148 struct gomp_thread *thr = gomp_thread (); | |
149 struct gomp_work_share *ws = thr->ts.work_share; | |
150 gomp_ull start, end, chunk, left; | |
151 | |
152 start = ws->next_ull; | |
153 if (start == ws->end_ull) | |
154 return false; | |
155 | |
156 chunk = ws->chunk_size_ull; | |
157 left = ws->end_ull - start; | |
158 if (__builtin_expect (ws->mode & 2, 0)) | |
159 { | |
160 if (chunk < left) | |
161 chunk = left; | |
162 } | |
163 else | |
164 { | |
165 if (chunk > left) | |
166 chunk = left; | |
167 } | |
168 end = start + chunk; | |
169 | |
170 ws->next_ull = end; | |
171 *pstart = start; | |
172 *pend = end; | |
173 return true; | |
174 } | |
175 | |
176 | |
177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__ | |
178 /* Similar, but doesn't require the lock held, and uses compare-and-swap | |
179 instead. Note that the only memory value that changes is ws->next_ull. */ | |
180 | |
181 bool | |
182 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend) | |
183 { | |
184 struct gomp_thread *thr = gomp_thread (); | |
185 struct gomp_work_share *ws = thr->ts.work_share; | |
186 gomp_ull start, end, nend, chunk; | |
187 | |
188 end = ws->end_ull; | |
189 chunk = ws->chunk_size_ull; | |
190 | |
191 if (__builtin_expect (ws->mode & 1, 1)) | |
192 { | |
193 gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk); | |
194 if (__builtin_expect (ws->mode & 2, 0) == 0) | |
195 { | |
196 if (tmp >= end) | |
197 return false; | |
198 nend = tmp + chunk; | |
199 if (nend > end) | |
200 nend = end; | |
201 *pstart = tmp; | |
202 *pend = nend; | |
203 return true; | |
204 } | |
205 else | |
206 { | |
207 if (tmp <= end) | |
208 return false; | |
209 nend = tmp + chunk; | |
210 if (nend < end) | |
211 nend = end; | |
212 *pstart = tmp; | |
213 *pend = nend; | |
214 return true; | |
215 } | |
216 } | |
217 | |
218 start = ws->next_ull; | |
219 while (1) | |
220 { | |
221 gomp_ull left = end - start; | |
222 gomp_ull tmp; | |
223 | |
224 if (start == end) | |
225 return false; | |
226 | |
227 if (__builtin_expect (ws->mode & 2, 0)) | |
228 { | |
229 if (chunk < left) | |
230 chunk = left; | |
231 } | |
232 else | |
233 { | |
234 if (chunk > left) | |
235 chunk = left; | |
236 } | |
237 nend = start + chunk; | |
238 | |
239 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend); | |
240 if (__builtin_expect (tmp == start, 1)) | |
241 break; | |
242 | |
243 start = tmp; | |
244 } | |
245 | |
246 *pstart = start; | |
247 *pend = nend; | |
248 return true; | |
249 } | |
250 #endif /* HAVE_SYNC_BUILTINS */ | |
251 | |
252 | |
253 /* This function implements the GUIDED scheduling method. Arguments are | |
254 as for gomp_iter_ull_static_next. This function must be called with the | |
255 work share lock held. */ | |
256 | |
257 bool | |
258 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend) | |
259 { | |
260 struct gomp_thread *thr = gomp_thread (); | |
261 struct gomp_work_share *ws = thr->ts.work_share; | |
262 struct gomp_team *team = thr->ts.team; | |
263 gomp_ull nthreads = team ? team->nthreads : 1; | |
264 gomp_ull n, q; | |
265 gomp_ull start, end; | |
266 | |
267 if (ws->next_ull == ws->end_ull) | |
268 return false; | |
269 | |
270 start = ws->next_ull; | |
271 if (__builtin_expect (ws->mode, 0) == 0) | |
272 n = (ws->end_ull - start) / ws->incr_ull; | |
273 else | |
274 n = (start - ws->end_ull) / -ws->incr_ull; | |
275 q = (n + nthreads - 1) / nthreads; | |
276 | |
277 if (q < ws->chunk_size_ull) | |
278 q = ws->chunk_size_ull; | |
279 if (q <= n) | |
280 end = start + q * ws->incr_ull; | |
281 else | |
282 end = ws->end_ull; | |
283 | |
284 ws->next_ull = end; | |
285 *pstart = start; | |
286 *pend = end; | |
287 return true; | |
288 } | |
289 | |
290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__ | |
291 /* Similar, but doesn't require the lock held, and uses compare-and-swap | |
292 instead. Note that the only memory value that changes is ws->next_ull. */ | |
293 | |
294 bool | |
295 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend) | |
296 { | |
297 struct gomp_thread *thr = gomp_thread (); | |
298 struct gomp_work_share *ws = thr->ts.work_share; | |
299 struct gomp_team *team = thr->ts.team; | |
300 gomp_ull nthreads = team ? team->nthreads : 1; | |
301 gomp_ull start, end, nend, incr; | |
302 gomp_ull chunk_size; | |
303 | |
304 start = ws->next_ull; | |
305 end = ws->end_ull; | |
306 incr = ws->incr_ull; | |
307 chunk_size = ws->chunk_size_ull; | |
308 | |
309 while (1) | |
310 { | |
311 gomp_ull n, q; | |
312 gomp_ull tmp; | |
313 | |
314 if (start == end) | |
315 return false; | |
316 | |
317 if (__builtin_expect (ws->mode, 0) == 0) | |
318 n = (end - start) / incr; | |
319 else | |
320 n = (start - end) / -incr; | |
321 q = (n + nthreads - 1) / nthreads; | |
322 | |
323 if (q < chunk_size) | |
324 q = chunk_size; | |
325 if (__builtin_expect (q <= n, 1)) | |
326 nend = start + q * incr; | |
327 else | |
328 nend = end; | |
329 | |
330 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend); | |
331 if (__builtin_expect (tmp == start, 1)) | |
332 break; | |
333 | |
334 start = tmp; | |
335 } | |
336 | |
337 *pstart = start; | |
338 *pend = nend; | |
339 return true; | |
340 } | |
341 #endif /* HAVE_SYNC_BUILTINS */ |