Mercurial > hg > CbC > CbC_gcc
annotate libgomp/sections.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
145 | 1 /* Copyright (C) 2005-2020 Free Software Foundation, Inc. |
0 | 2 Contributed by Richard Henderson <rth@redhat.com>. |
3 | |
111 | 4 This file is part of the GNU Offloading and Multi Processing Library |
5 (libgomp). | |
0 | 6 |
7 Libgomp is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 more details. | |
16 | |
17 Under Section 7 of GPL version 3, you are granted additional | |
18 permissions described in the GCC Runtime Library Exception, version | |
19 3.1, as published by the Free Software Foundation. | |
20 | |
21 You should have received a copy of the GNU General Public License and | |
22 a copy of the GCC Runtime Library Exception along with this program; | |
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 <http://www.gnu.org/licenses/>. */ | |
25 | |
26 /* This file handles the SECTIONS construct. */ | |
27 | |
28 #include "libgomp.h" | |
145 | 29 #include <string.h> |
0 | 30 |
31 | |
145 | 32 ialias_redirect (GOMP_taskgroup_reduction_register) |
33 | |
0 | 34 /* Initialize the given work share construct from the given arguments. */ |
35 | |
36 static inline void | |
37 gomp_sections_init (struct gomp_work_share *ws, unsigned count) | |
38 { | |
39 ws->sched = GFS_DYNAMIC; | |
40 ws->chunk_size = 1; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
41 ws->end = count + 1L; |
0 | 42 ws->incr = 1; |
43 ws->next = 1; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
44 #ifdef HAVE_SYNC_BUILTINS |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
45 /* Prepare things to make each iteration faster. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
46 if (sizeof (long) > sizeof (unsigned)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
47 ws->mode = 1; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
48 else |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
49 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
50 struct gomp_thread *thr = gomp_thread (); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
51 struct gomp_team *team = thr->ts.team; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
52 long nthreads = team ? team->nthreads : 1; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
53 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
54 ws->mode = ((nthreads | ws->end) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
55 < 1UL << (sizeof (long) * __CHAR_BIT__ / 2 - 1)); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
56 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
57 #else |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
58 ws->mode = 0; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
59 #endif |
0 | 60 } |
61 | |
62 /* This routine is called when first encountering a sections construct | |
63 that is not bound directly to a parallel construct. The first thread | |
64 that arrives will create the work-share construct; subsequent threads | |
65 will see the construct exists and allocate work from it. | |
66 | |
67 COUNT is the number of sections in this construct. | |
68 | |
69 Returns the 1-based section number for this thread to perform, or 0 if | |
70 all work was assigned to other threads prior to this thread's arrival. */ | |
71 | |
72 unsigned | |
73 GOMP_sections_start (unsigned count) | |
74 { | |
75 struct gomp_thread *thr = gomp_thread (); | |
76 long s, e, ret; | |
77 | |
145 | 78 if (gomp_work_share_start (0)) |
0 | 79 { |
80 gomp_sections_init (thr->ts.work_share, count); | |
81 gomp_work_share_init_done (); | |
82 } | |
83 | |
84 #ifdef HAVE_SYNC_BUILTINS | |
85 if (gomp_iter_dynamic_next (&s, &e)) | |
86 ret = s; | |
87 else | |
88 ret = 0; | |
89 #else | |
90 gomp_mutex_lock (&thr->ts.work_share->lock); | |
91 if (gomp_iter_dynamic_next_locked (&s, &e)) | |
92 ret = s; | |
93 else | |
94 ret = 0; | |
95 gomp_mutex_unlock (&thr->ts.work_share->lock); | |
96 #endif | |
97 | |
98 return ret; | |
99 } | |
100 | |
145 | 101 unsigned |
102 GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem) | |
103 { | |
104 struct gomp_thread *thr = gomp_thread (); | |
105 long s, e, ret; | |
106 | |
107 if (reductions) | |
108 gomp_workshare_taskgroup_start (); | |
109 if (gomp_work_share_start (0)) | |
110 { | |
111 gomp_sections_init (thr->ts.work_share, count); | |
112 if (reductions) | |
113 { | |
114 GOMP_taskgroup_reduction_register (reductions); | |
115 thr->task->taskgroup->workshare = true; | |
116 thr->ts.work_share->task_reductions = reductions; | |
117 } | |
118 if (mem) | |
119 { | |
120 uintptr_t size = (uintptr_t) *mem; | |
121 #define INLINE_ORDERED_TEAM_IDS_OFF \ | |
122 ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \ | |
123 + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1)) | |
124 if (size > (sizeof (struct gomp_work_share) | |
125 - INLINE_ORDERED_TEAM_IDS_OFF)) | |
126 *mem | |
127 = (void *) (thr->ts.work_share->ordered_team_ids | |
128 = gomp_malloc_cleared (size)); | |
129 else | |
130 *mem = memset (((char *) thr->ts.work_share) | |
131 + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size); | |
132 } | |
133 gomp_work_share_init_done (); | |
134 } | |
135 else | |
136 { | |
137 if (reductions) | |
138 { | |
139 uintptr_t *first_reductions = thr->ts.work_share->task_reductions; | |
140 gomp_workshare_task_reduction_register (reductions, | |
141 first_reductions); | |
142 } | |
143 if (mem) | |
144 { | |
145 if ((offsetof (struct gomp_work_share, inline_ordered_team_ids) | |
146 & (__alignof__ (long long) - 1)) == 0) | |
147 *mem = (void *) thr->ts.work_share->ordered_team_ids; | |
148 else | |
149 { | |
150 uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids; | |
151 p += __alignof__ (long long) - 1; | |
152 p &= ~(__alignof__ (long long) - 1); | |
153 *mem = (void *) p; | |
154 } | |
155 } | |
156 } | |
157 | |
158 #ifdef HAVE_SYNC_BUILTINS | |
159 if (gomp_iter_dynamic_next (&s, &e)) | |
160 ret = s; | |
161 else | |
162 ret = 0; | |
163 #else | |
164 gomp_mutex_lock (&thr->ts.work_share->lock); | |
165 if (gomp_iter_dynamic_next_locked (&s, &e)) | |
166 ret = s; | |
167 else | |
168 ret = 0; | |
169 gomp_mutex_unlock (&thr->ts.work_share->lock); | |
170 #endif | |
171 | |
172 return ret; | |
173 } | |
174 | |
0 | 175 /* This routine is called when the thread completes processing of the |
176 section currently assigned to it. If the work-share construct is | |
177 bound directly to a parallel construct, then the construct may have | |
178 been set up before the parallel. In which case, this may be the | |
179 first iteration for the thread. | |
180 | |
181 Returns the 1-based section number for this thread to perform, or 0 if | |
182 all work was assigned to other threads prior to this thread's arrival. */ | |
183 | |
184 unsigned | |
185 GOMP_sections_next (void) | |
186 { | |
187 long s, e, ret; | |
188 | |
189 #ifdef HAVE_SYNC_BUILTINS | |
190 if (gomp_iter_dynamic_next (&s, &e)) | |
191 ret = s; | |
192 else | |
193 ret = 0; | |
194 #else | |
195 struct gomp_thread *thr = gomp_thread (); | |
196 | |
197 gomp_mutex_lock (&thr->ts.work_share->lock); | |
198 if (gomp_iter_dynamic_next_locked (&s, &e)) | |
199 ret = s; | |
200 else | |
201 ret = 0; | |
202 gomp_mutex_unlock (&thr->ts.work_share->lock); | |
203 #endif | |
204 | |
205 return ret; | |
206 } | |
207 | |
208 /* This routine pre-initializes a work-share construct to avoid one | |
209 synchronization once we get into the loop. */ | |
210 | |
211 void | |
212 GOMP_parallel_sections_start (void (*fn) (void *), void *data, | |
213 unsigned num_threads, unsigned count) | |
214 { | |
215 struct gomp_team *team; | |
216 | |
217 num_threads = gomp_resolve_num_threads (num_threads, count); | |
218 team = gomp_new_team (num_threads); | |
219 gomp_sections_init (&team->work_shares[0], count); | |
145 | 220 gomp_team_start (fn, data, num_threads, 0, team, NULL); |
111 | 221 } |
222 | |
223 ialias_redirect (GOMP_parallel_end) | |
224 | |
225 void | |
226 GOMP_parallel_sections (void (*fn) (void *), void *data, | |
227 unsigned num_threads, unsigned count, unsigned flags) | |
228 { | |
229 struct gomp_team *team; | |
230 | |
231 num_threads = gomp_resolve_num_threads (num_threads, count); | |
232 team = gomp_new_team (num_threads); | |
233 gomp_sections_init (&team->work_shares[0], count); | |
145 | 234 gomp_team_start (fn, data, num_threads, flags, team, NULL); |
111 | 235 fn (data); |
236 GOMP_parallel_end (); | |
0 | 237 } |
238 | |
239 /* The GOMP_section_end* routines are called after the thread is told | |
111 | 240 that all sections are complete. The first two versions synchronize |
0 | 241 all threads; the nowait version does not. */ |
242 | |
243 void | |
244 GOMP_sections_end (void) | |
245 { | |
246 gomp_work_share_end (); | |
247 } | |
248 | |
111 | 249 bool |
250 GOMP_sections_end_cancel (void) | |
251 { | |
252 return gomp_work_share_end_cancel (); | |
253 } | |
254 | |
0 | 255 void |
256 GOMP_sections_end_nowait (void) | |
257 { | |
258 gomp_work_share_end_nowait (); | |
259 } |