Mercurial > hg > CbC > CbC_gcc
comparison libgomp/oacc-async.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* OpenACC Runtime Library Definitions. | 1 /* OpenACC Runtime Library Definitions. |
2 | 2 |
3 Copyright (C) 2013-2018 Free Software Foundation, Inc. | 3 Copyright (C) 2013-2020 Free Software Foundation, Inc. |
4 | 4 |
5 Contributed by Mentor Embedded. | 5 Contributed by Mentor Embedded. |
6 | 6 |
7 This file is part of the GNU Offloading and Multi Processing Library | 7 This file is part of the GNU Offloading and Multi Processing Library |
8 (libgomp). | 8 (libgomp). |
25 a copy of the GCC Runtime Library Exception along with this program; | 25 a copy of the GCC Runtime Library Exception along with this program; |
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | 26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
27 <http://www.gnu.org/licenses/>. */ | 27 <http://www.gnu.org/licenses/>. */ |
28 | 28 |
29 #include <assert.h> | 29 #include <assert.h> |
30 #include <string.h> | |
30 #include "openacc.h" | 31 #include "openacc.h" |
31 #include "libgomp.h" | 32 #include "libgomp.h" |
32 #include "oacc-int.h" | 33 #include "oacc-int.h" |
33 | 34 |
35 static struct goacc_thread * | |
36 get_goacc_thread (void) | |
37 { | |
38 struct goacc_thread *thr = goacc_thread (); | |
39 | |
40 if (!thr || !thr->dev) | |
41 gomp_fatal ("no device active"); | |
42 | |
43 return thr; | |
44 } | |
45 | |
46 static int | |
47 validate_async_val (int async) | |
48 { | |
49 if (!async_valid_p (async)) | |
50 gomp_fatal ("invalid async-argument: %d", async); | |
51 | |
52 if (async == acc_async_sync) | |
53 return -1; | |
54 | |
55 if (async == acc_async_noval) | |
56 return 0; | |
57 | |
58 if (async >= 0) | |
59 /* TODO: we reserve 0 for acc_async_noval before we can clarify the | |
60 semantics of "default_async". */ | |
61 return 1 + async; | |
62 else | |
63 __builtin_unreachable (); | |
64 } | |
65 | |
66 /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This | |
67 might return NULL if no asyncqueue is to be used. Otherwise, if CREATE, | |
68 create the asyncqueue if it doesn't exist yet. | |
69 | |
70 Unless CREATE, this will not generate any OpenACC Profiling Interface | |
71 events. */ | |
72 | |
73 attribute_hidden struct goacc_asyncqueue * | |
74 lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) | |
75 { | |
76 async = validate_async_val (async); | |
77 if (async < 0) | |
78 return NULL; | |
79 | |
80 struct goacc_asyncqueue *ret_aq = NULL; | |
81 struct gomp_device_descr *dev = thr->dev; | |
82 | |
83 gomp_mutex_lock (&dev->openacc.async.lock); | |
84 | |
85 if (!create | |
86 && (async >= dev->openacc.async.nasyncqueue | |
87 || !dev->openacc.async.asyncqueue[async])) | |
88 goto end; | |
89 | |
90 if (async >= dev->openacc.async.nasyncqueue) | |
91 { | |
92 int diff = async + 1 - dev->openacc.async.nasyncqueue; | |
93 dev->openacc.async.asyncqueue | |
94 = gomp_realloc (dev->openacc.async.asyncqueue, | |
95 sizeof (goacc_aq) * (async + 1)); | |
96 memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue, | |
97 0, sizeof (goacc_aq) * diff); | |
98 dev->openacc.async.nasyncqueue = async + 1; | |
99 } | |
100 | |
101 if (!dev->openacc.async.asyncqueue[async]) | |
102 { | |
103 dev->openacc.async.asyncqueue[async] | |
104 = dev->openacc.async.construct_func (dev->target_id); | |
105 | |
106 if (!dev->openacc.async.asyncqueue[async]) | |
107 { | |
108 gomp_mutex_unlock (&dev->openacc.async.lock); | |
109 gomp_fatal ("async %d creation failed", async); | |
110 } | |
111 | |
112 /* Link new async queue into active list. */ | |
113 goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list)); | |
114 n->aq = dev->openacc.async.asyncqueue[async]; | |
115 n->next = dev->openacc.async.active; | |
116 dev->openacc.async.active = n; | |
117 } | |
118 | |
119 ret_aq = dev->openacc.async.asyncqueue[async]; | |
120 | |
121 end: | |
122 gomp_mutex_unlock (&dev->openacc.async.lock); | |
123 return ret_aq; | |
124 } | |
125 | |
126 /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This | |
127 might return NULL if no asyncqueue is to be used. Otherwise, create the | |
128 asyncqueue if it doesn't exist yet. */ | |
129 | |
130 attribute_hidden struct goacc_asyncqueue * | |
131 get_goacc_asyncqueue (int async) | |
132 { | |
133 struct goacc_thread *thr = get_goacc_thread (); | |
134 return lookup_goacc_asyncqueue (thr, true, async); | |
135 } | |
136 | |
34 int | 137 int |
35 acc_async_test (int async) | 138 acc_async_test (int async) |
36 { | 139 { |
37 if (!async_valid_p (async)) | |
38 gomp_fatal ("invalid async argument: %d", async); | |
39 | |
40 struct goacc_thread *thr = goacc_thread (); | 140 struct goacc_thread *thr = goacc_thread (); |
41 | 141 |
42 if (!thr || !thr->dev) | 142 if (!thr || !thr->dev) |
43 gomp_fatal ("no device active"); | 143 gomp_fatal ("no device active"); |
44 | 144 |
45 return thr->dev->openacc.async_test_func (async); | 145 goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); |
146 if (!aq) | |
147 return 1; | |
148 | |
149 acc_prof_info prof_info; | |
150 acc_api_info api_info; | |
151 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
152 if (profiling_p) | |
153 { | |
154 prof_info.async = async; | |
155 prof_info.async_queue = prof_info.async; | |
156 } | |
157 | |
158 int res = thr->dev->openacc.async.test_func (aq); | |
159 | |
160 if (profiling_p) | |
161 { | |
162 thr->prof_info = NULL; | |
163 thr->api_info = NULL; | |
164 } | |
165 | |
166 return res; | |
46 } | 167 } |
47 | 168 |
48 int | 169 int |
49 acc_async_test_all (void) | 170 acc_async_test_all (void) |
50 { | 171 { |
51 struct goacc_thread *thr = goacc_thread (); | 172 struct goacc_thread *thr = get_goacc_thread (); |
52 | 173 |
53 if (!thr || !thr->dev) | 174 acc_prof_info prof_info; |
54 gomp_fatal ("no device active"); | 175 acc_api_info api_info; |
55 | 176 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
56 return thr->dev->openacc.async_test_all_func (); | 177 |
178 int ret = 1; | |
179 gomp_mutex_lock (&thr->dev->openacc.async.lock); | |
180 for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) | |
181 if (!thr->dev->openacc.async.test_func (l->aq)) | |
182 { | |
183 ret = 0; | |
184 break; | |
185 } | |
186 gomp_mutex_unlock (&thr->dev->openacc.async.lock); | |
187 | |
188 if (profiling_p) | |
189 { | |
190 thr->prof_info = NULL; | |
191 thr->api_info = NULL; | |
192 } | |
193 | |
194 return ret; | |
57 } | 195 } |
58 | 196 |
59 void | 197 void |
60 acc_wait (int async) | 198 acc_wait (int async) |
61 { | 199 { |
62 if (!async_valid_p (async)) | 200 struct goacc_thread *thr = get_goacc_thread (); |
63 gomp_fatal ("invalid async argument: %d", async); | 201 |
64 | 202 goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); |
65 struct goacc_thread *thr = goacc_thread (); | 203 if (!aq) |
66 | 204 return; |
67 if (!thr || !thr->dev) | 205 |
68 gomp_fatal ("no device active"); | 206 acc_prof_info prof_info; |
69 | 207 acc_api_info api_info; |
70 thr->dev->openacc.async_wait_func (async); | 208 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
209 if (profiling_p) | |
210 { | |
211 prof_info.async = async; | |
212 prof_info.async_queue = prof_info.async; | |
213 } | |
214 | |
215 if (!thr->dev->openacc.async.synchronize_func (aq)) | |
216 gomp_fatal ("wait on %d failed", async); | |
217 | |
218 if (profiling_p) | |
219 { | |
220 thr->prof_info = NULL; | |
221 thr->api_info = NULL; | |
222 } | |
71 } | 223 } |
72 | 224 |
73 /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ | 225 /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ |
74 #ifdef HAVE_ATTRIBUTE_ALIAS | 226 #ifdef HAVE_ATTRIBUTE_ALIAS |
75 strong_alias (acc_wait, acc_async_wait) | 227 strong_alias (acc_wait, acc_async_wait) |
82 #endif | 234 #endif |
83 | 235 |
84 void | 236 void |
85 acc_wait_async (int async1, int async2) | 237 acc_wait_async (int async1, int async2) |
86 { | 238 { |
239 struct goacc_thread *thr = get_goacc_thread (); | |
240 | |
241 goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1); | |
242 /* TODO: Is this also correct for acc_async_sync, assuming that in this case, | |
243 we'll always be synchronous anyways? */ | |
244 if (!aq1) | |
245 return; | |
246 | |
247 acc_prof_info prof_info; | |
248 acc_api_info api_info; | |
249 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
250 if (profiling_p) | |
251 { | |
252 prof_info.async = async2; | |
253 prof_info.async_queue = prof_info.async; | |
254 } | |
255 | |
256 goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2); | |
257 /* An async queue is always synchronized with itself. */ | |
258 if (aq1 == aq2) | |
259 goto out_prof; | |
260 | |
261 if (aq2) | |
262 { | |
263 if (!thr->dev->openacc.async.serialize_func (aq1, aq2)) | |
264 gomp_fatal ("ordering of async ids %d and %d failed", async1, async2); | |
265 } | |
266 else | |
267 { | |
268 /* TODO: Local thread synchronization. | |
269 Necessary for the "async2 == acc_async_sync" case, or can just skip? */ | |
270 if (!thr->dev->openacc.async.synchronize_func (aq1)) | |
271 gomp_fatal ("wait on %d failed", async1); | |
272 } | |
273 | |
274 out_prof: | |
275 if (profiling_p) | |
276 { | |
277 thr->prof_info = NULL; | |
278 thr->api_info = NULL; | |
279 } | |
280 } | |
281 | |
282 void | |
283 acc_wait_all (void) | |
284 { | |
87 struct goacc_thread *thr = goacc_thread (); | 285 struct goacc_thread *thr = goacc_thread (); |
88 | 286 |
89 if (!thr || !thr->dev) | 287 acc_prof_info prof_info; |
90 gomp_fatal ("no device active"); | 288 acc_api_info api_info; |
91 | 289 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
92 thr->dev->openacc.async_wait_async_func (async1, async2); | 290 |
93 } | 291 bool ret = true; |
94 | 292 gomp_mutex_lock (&thr->dev->openacc.async.lock); |
95 void | 293 for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) |
96 acc_wait_all (void) | 294 ret &= thr->dev->openacc.async.synchronize_func (l->aq); |
97 { | 295 gomp_mutex_unlock (&thr->dev->openacc.async.lock); |
98 struct goacc_thread *thr = goacc_thread (); | 296 |
99 | 297 if (profiling_p) |
100 if (!thr || !thr->dev) | 298 { |
101 gomp_fatal ("no device active"); | 299 thr->prof_info = NULL; |
102 | 300 thr->api_info = NULL; |
103 thr->dev->openacc.async_wait_all_func (); | 301 } |
302 | |
303 if (!ret) | |
304 gomp_fatal ("wait all failed"); | |
104 } | 305 } |
105 | 306 |
106 /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ | 307 /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ |
107 #ifdef HAVE_ATTRIBUTE_ALIAS | 308 #ifdef HAVE_ATTRIBUTE_ALIAS |
108 strong_alias (acc_wait_all, acc_async_wait_all) | 309 strong_alias (acc_wait_all, acc_async_wait_all) |
115 #endif | 316 #endif |
116 | 317 |
117 void | 318 void |
118 acc_wait_all_async (int async) | 319 acc_wait_all_async (int async) |
119 { | 320 { |
120 if (!async_valid_p (async)) | 321 struct goacc_thread *thr = get_goacc_thread (); |
121 gomp_fatal ("invalid async argument: %d", async); | 322 |
323 acc_prof_info prof_info; | |
324 acc_api_info api_info; | |
325 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
326 if (profiling_p) | |
327 { | |
328 prof_info.async = async; | |
329 prof_info.async_queue = prof_info.async; | |
330 } | |
331 | |
332 goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async); | |
333 | |
334 bool ret = true; | |
335 gomp_mutex_lock (&thr->dev->openacc.async.lock); | |
336 for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) | |
337 { | |
338 if (waiting_queue) | |
339 ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue); | |
340 else | |
341 /* TODO: Local thread synchronization. | |
342 Necessary for the "async2 == acc_async_sync" case, or can just skip? */ | |
343 ret &= thr->dev->openacc.async.synchronize_func (l->aq); | |
344 } | |
345 gomp_mutex_unlock (&thr->dev->openacc.async.lock); | |
346 | |
347 if (profiling_p) | |
348 { | |
349 thr->prof_info = NULL; | |
350 thr->api_info = NULL; | |
351 } | |
352 | |
353 if (!ret) | |
354 gomp_fatal ("wait all async(%d) failed", async); | |
355 } | |
356 | |
357 void | |
358 GOACC_wait (int async, int num_waits, ...) | |
359 { | |
360 goacc_lazy_initialize (); | |
122 | 361 |
123 struct goacc_thread *thr = goacc_thread (); | 362 struct goacc_thread *thr = goacc_thread (); |
124 | 363 |
125 if (!thr || !thr->dev) | 364 /* No nesting. */ |
126 gomp_fatal ("no device active"); | 365 assert (thr->prof_info == NULL); |
127 | 366 assert (thr->api_info == NULL); |
128 thr->dev->openacc.async_wait_all_async_func (async); | 367 acc_prof_info prof_info; |
129 } | 368 acc_api_info api_info; |
369 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
370 if (profiling_p) | |
371 { | |
372 prof_info.async = async; | |
373 prof_info.async_queue = prof_info.async; | |
374 } | |
375 | |
376 if (num_waits) | |
377 { | |
378 va_list ap; | |
379 | |
380 va_start (ap, num_waits); | |
381 goacc_wait (async, num_waits, &ap); | |
382 va_end (ap); | |
383 } | |
384 else if (async == acc_async_sync) | |
385 acc_wait_all (); | |
386 else | |
387 acc_wait_all_async (async); | |
388 | |
389 if (profiling_p) | |
390 { | |
391 thr->prof_info = NULL; | |
392 thr->api_info = NULL; | |
393 } | |
394 } | |
395 | |
396 attribute_hidden void | |
397 goacc_wait (int async, int num_waits, va_list *ap) | |
398 { | |
399 while (num_waits--) | |
400 { | |
401 int qid = va_arg (*ap, int); | |
402 | |
403 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */ | |
404 if (qid == acc_async_noval) | |
405 { | |
406 if (async == acc_async_sync) | |
407 acc_wait_all (); | |
408 else | |
409 acc_wait_all_async (async); | |
410 break; | |
411 } | |
412 | |
413 if (acc_async_test (qid)) | |
414 continue; | |
415 | |
416 if (async == acc_async_sync) | |
417 acc_wait (qid); | |
418 else if (qid == async) | |
419 /* If we're waiting on the same asynchronous queue as we're | |
420 launching on, the queue itself will order work as | |
421 required, so there's no need to wait explicitly. */ | |
422 ; | |
423 else | |
424 acc_wait_async (qid, async); | |
425 } | |
426 } | |
427 | |
428 attribute_hidden void | |
429 goacc_async_free (struct gomp_device_descr *devicep, | |
430 struct goacc_asyncqueue *aq, void *ptr) | |
431 { | |
432 if (!aq) | |
433 free (ptr); | |
434 else | |
435 devicep->openacc.async.queue_callback_func (aq, free, ptr); | |
436 } | |
437 | |
438 /* This function initializes the asyncqueues for the device specified by | |
439 DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on | |
440 return. */ | |
441 | |
442 attribute_hidden void | |
443 goacc_init_asyncqueues (struct gomp_device_descr *devicep) | |
444 { | |
445 devicep->openacc.async.nasyncqueue = 0; | |
446 devicep->openacc.async.asyncqueue = NULL; | |
447 devicep->openacc.async.active = NULL; | |
448 gomp_mutex_init (&devicep->openacc.async.lock); | |
449 } | |
450 | |
451 /* This function finalizes the asyncqueues for the device specified by DEVICEP. | |
452 TODO DEVICEP must be locked on entry, and remains locked on return. */ | |
453 | |
454 attribute_hidden bool | |
455 goacc_fini_asyncqueues (struct gomp_device_descr *devicep) | |
456 { | |
457 bool ret = true; | |
458 gomp_mutex_lock (&devicep->openacc.async.lock); | |
459 if (devicep->openacc.async.nasyncqueue > 0) | |
460 { | |
461 goacc_aq_list next; | |
462 for (goacc_aq_list l = devicep->openacc.async.active; l; l = next) | |
463 { | |
464 ret &= devicep->openacc.async.destruct_func (l->aq); | |
465 next = l->next; | |
466 free (l); | |
467 } | |
468 free (devicep->openacc.async.asyncqueue); | |
469 devicep->openacc.async.nasyncqueue = 0; | |
470 devicep->openacc.async.asyncqueue = NULL; | |
471 devicep->openacc.async.active = NULL; | |
472 } | |
473 gomp_mutex_unlock (&devicep->openacc.async.lock); | |
474 gomp_mutex_destroy (&devicep->openacc.async.lock); | |
475 return ret; | |
476 } |