Mercurial > hg > CbC > CbC_gcc
comparison libgomp/oacc-parallel.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc. | |
2 | |
3 Contributed by Mentor Embedded. | |
4 | |
5 This file is part of the GNU Offloading and Multi Processing Library | |
6 (libgomp). | |
7 | |
8 Libgomp is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by | |
10 the Free Software Foundation; either version 3, or (at your option) | |
11 any later version. | |
12 | |
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 Under Section 7 of GPL version 3, you are granted additional | |
19 permissions described in the GCC Runtime Library Exception, version | |
20 3.1, as published by the Free Software Foundation. | |
21 | |
22 You should have received a copy of the GNU General Public License and | |
23 a copy of the GCC Runtime Library Exception along with this program; | |
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 <http://www.gnu.org/licenses/>. */ | |
26 | |
27 /* This file handles OpenACC constructs. */ | |
28 | |
29 #include "openacc.h" | |
30 #include "libgomp.h" | |
31 #include "libgomp_g.h" | |
32 #include "gomp-constants.h" | |
33 #include "oacc-int.h" | |
34 #ifdef HAVE_INTTYPES_H | |
35 # include <inttypes.h> /* For PRIu64. */ | |
36 #endif | |
37 #include <string.h> | |
38 #include <stdarg.h> | |
39 #include <assert.h> | |
40 | |
41 static int | |
42 find_pset (int pos, size_t mapnum, unsigned short *kinds) | |
43 { | |
44 if (pos + 1 >= mapnum) | |
45 return 0; | |
46 | |
47 unsigned char kind = kinds[pos+1] & 0xff; | |
48 | |
49 return kind == GOMP_MAP_TO_PSET; | |
50 } | |
51 | |
52 static void goacc_wait (int async, int num_waits, va_list *ap); | |
53 | |
54 | |
55 /* Launch a possibly offloaded function on DEVICE. FN is the host fn | |
56 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory | |
57 blocks to be copied to/from the device. Varadic arguments are | |
58 keyed optional parameters terminated with a zero. */ | |
59 | |
60 void | |
61 GOACC_parallel_keyed (int device, void (*fn) (void *), | |
62 size_t mapnum, void **hostaddrs, size_t *sizes, | |
63 unsigned short *kinds, ...) | |
64 { | |
65 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
66 va_list ap; | |
67 struct goacc_thread *thr; | |
68 struct gomp_device_descr *acc_dev; | |
69 struct target_mem_desc *tgt; | |
70 void **devaddrs; | |
71 unsigned int i; | |
72 struct splay_tree_key_s k; | |
73 splay_tree_key tgt_fn_key; | |
74 void (*tgt_fn); | |
75 int async = GOMP_ASYNC_SYNC; | |
76 unsigned dims[GOMP_DIM_MAX]; | |
77 unsigned tag; | |
78 | |
79 #ifdef HAVE_INTTYPES_H | |
80 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | |
81 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
82 #else | |
83 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
84 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
85 #endif | |
86 goacc_lazy_initialize (); | |
87 | |
88 thr = goacc_thread (); | |
89 acc_dev = thr->dev; | |
90 | |
91 /* Host fallback if "if" clause is false or if the current device is set to | |
92 the host. */ | |
93 if (host_fallback) | |
94 { | |
95 goacc_save_and_set_bind (acc_device_host); | |
96 fn (hostaddrs); | |
97 goacc_restore_bind (); | |
98 return; | |
99 } | |
100 else if (acc_device_type (acc_dev->type) == acc_device_host) | |
101 { | |
102 fn (hostaddrs); | |
103 return; | |
104 } | |
105 | |
106 /* Default: let the runtime choose. */ | |
107 for (i = 0; i != GOMP_DIM_MAX; i++) | |
108 dims[i] = 0; | |
109 | |
110 va_start (ap, kinds); | |
111 /* TODO: This will need amending when device_type is implemented. */ | |
112 while ((tag = va_arg (ap, unsigned)) != 0) | |
113 { | |
114 if (GOMP_LAUNCH_DEVICE (tag)) | |
115 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", | |
116 GOMP_LAUNCH_DEVICE (tag)); | |
117 | |
118 switch (GOMP_LAUNCH_CODE (tag)) | |
119 { | |
120 case GOMP_LAUNCH_DIM: | |
121 { | |
122 unsigned mask = GOMP_LAUNCH_OP (tag); | |
123 | |
124 for (i = 0; i != GOMP_DIM_MAX; i++) | |
125 if (mask & GOMP_DIM_MASK (i)) | |
126 dims[i] = va_arg (ap, unsigned); | |
127 } | |
128 break; | |
129 | |
130 case GOMP_LAUNCH_ASYNC: | |
131 { | |
132 /* Small constant values are encoded in the operand. */ | |
133 async = GOMP_LAUNCH_OP (tag); | |
134 | |
135 if (async == GOMP_LAUNCH_OP_MAX) | |
136 async = va_arg (ap, unsigned); | |
137 break; | |
138 } | |
139 | |
140 case GOMP_LAUNCH_WAIT: | |
141 { | |
142 unsigned num_waits = GOMP_LAUNCH_OP (tag); | |
143 | |
144 if (num_waits) | |
145 goacc_wait (async, num_waits, &ap); | |
146 break; | |
147 } | |
148 | |
149 default: | |
150 gomp_fatal ("unrecognized offload code '%d'," | |
151 " libgomp is too old", GOMP_LAUNCH_CODE (tag)); | |
152 } | |
153 } | |
154 va_end (ap); | |
155 | |
156 acc_dev->openacc.async_set_async_func (async); | |
157 | |
158 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) | |
159 { | |
160 k.host_start = (uintptr_t) fn; | |
161 k.host_end = k.host_start + 1; | |
162 gomp_mutex_lock (&acc_dev->lock); | |
163 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); | |
164 gomp_mutex_unlock (&acc_dev->lock); | |
165 | |
166 if (tgt_fn_key == NULL) | |
167 gomp_fatal ("target function wasn't mapped"); | |
168 | |
169 tgt_fn = (void (*)) tgt_fn_key->tgt_offset; | |
170 } | |
171 else | |
172 tgt_fn = (void (*)) fn; | |
173 | |
174 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
175 GOMP_MAP_VARS_OPENACC); | |
176 | |
177 devaddrs = gomp_alloca (sizeof (void *) * mapnum); | |
178 for (i = 0; i < mapnum; i++) | |
179 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start | |
180 + tgt->list[i].key->tgt_offset); | |
181 | |
182 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, | |
183 async, dims, tgt); | |
184 | |
185 /* If running synchronously, unmap immediately. */ | |
186 if (async < acc_async_noval) | |
187 gomp_unmap_vars (tgt, true); | |
188 else | |
189 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); | |
190 | |
191 acc_dev->openacc.async_set_async_func (acc_async_sync); | |
192 } | |
193 | |
194 /* Legacy entry point, only provide host execution. */ | |
195 | |
196 void | |
197 GOACC_parallel (int device, void (*fn) (void *), | |
198 size_t mapnum, void **hostaddrs, size_t *sizes, | |
199 unsigned short *kinds, | |
200 int num_gangs, int num_workers, int vector_length, | |
201 int async, int num_waits, ...) | |
202 { | |
203 goacc_save_and_set_bind (acc_device_host); | |
204 fn (hostaddrs); | |
205 goacc_restore_bind (); | |
206 } | |
207 | |
208 void | |
209 GOACC_data_start (int device, size_t mapnum, | |
210 void **hostaddrs, size_t *sizes, unsigned short *kinds) | |
211 { | |
212 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
213 struct target_mem_desc *tgt; | |
214 | |
215 #ifdef HAVE_INTTYPES_H | |
216 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | |
217 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
218 #else | |
219 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
220 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
221 #endif | |
222 | |
223 goacc_lazy_initialize (); | |
224 | |
225 struct goacc_thread *thr = goacc_thread (); | |
226 struct gomp_device_descr *acc_dev = thr->dev; | |
227 | |
228 /* Host fallback or 'do nothing'. */ | |
229 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
230 || host_fallback) | |
231 { | |
232 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, | |
233 GOMP_MAP_VARS_OPENACC); | |
234 tgt->prev = thr->mapped_data; | |
235 thr->mapped_data = tgt; | |
236 | |
237 return; | |
238 } | |
239 | |
240 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | |
241 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
242 GOMP_MAP_VARS_OPENACC); | |
243 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); | |
244 tgt->prev = thr->mapped_data; | |
245 thr->mapped_data = tgt; | |
246 } | |
247 | |
248 void | |
249 GOACC_data_end (void) | |
250 { | |
251 struct goacc_thread *thr = goacc_thread (); | |
252 struct target_mem_desc *tgt = thr->mapped_data; | |
253 | |
254 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | |
255 thr->mapped_data = tgt->prev; | |
256 gomp_unmap_vars (tgt, true); | |
257 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | |
258 } | |
259 | |
260 void | |
261 GOACC_enter_exit_data (int device, size_t mapnum, | |
262 void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
263 int async, int num_waits, ...) | |
264 { | |
265 struct goacc_thread *thr; | |
266 struct gomp_device_descr *acc_dev; | |
267 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
268 bool data_enter = false; | |
269 size_t i; | |
270 | |
271 goacc_lazy_initialize (); | |
272 | |
273 thr = goacc_thread (); | |
274 acc_dev = thr->dev; | |
275 | |
276 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
277 || host_fallback) | |
278 return; | |
279 | |
280 if (num_waits) | |
281 { | |
282 va_list ap; | |
283 | |
284 va_start (ap, num_waits); | |
285 goacc_wait (async, num_waits, &ap); | |
286 va_end (ap); | |
287 } | |
288 | |
289 acc_dev->openacc.async_set_async_func (async); | |
290 | |
291 /* Determine if this is an "acc enter data". */ | |
292 for (i = 0; i < mapnum; ++i) | |
293 { | |
294 unsigned char kind = kinds[i] & 0xff; | |
295 | |
296 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) | |
297 continue; | |
298 | |
299 if (kind == GOMP_MAP_FORCE_ALLOC | |
300 || kind == GOMP_MAP_FORCE_PRESENT | |
301 || kind == GOMP_MAP_FORCE_TO) | |
302 { | |
303 data_enter = true; | |
304 break; | |
305 } | |
306 | |
307 if (kind == GOMP_MAP_DELETE | |
308 || kind == GOMP_MAP_FORCE_FROM) | |
309 break; | |
310 | |
311 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
312 kind); | |
313 } | |
314 | |
315 if (data_enter) | |
316 { | |
317 for (i = 0; i < mapnum; i++) | |
318 { | |
319 unsigned char kind = kinds[i] & 0xff; | |
320 | |
321 /* Scan for PSETs. */ | |
322 int psets = find_pset (i, mapnum, kinds); | |
323 | |
324 if (!psets) | |
325 { | |
326 switch (kind) | |
327 { | |
328 case GOMP_MAP_POINTER: | |
329 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i], | |
330 &kinds[i]); | |
331 break; | |
332 case GOMP_MAP_FORCE_ALLOC: | |
333 acc_create (hostaddrs[i], sizes[i]); | |
334 break; | |
335 case GOMP_MAP_FORCE_PRESENT: | |
336 acc_present_or_copyin (hostaddrs[i], sizes[i]); | |
337 break; | |
338 case GOMP_MAP_FORCE_TO: | |
339 acc_present_or_copyin (hostaddrs[i], sizes[i]); | |
340 break; | |
341 default: | |
342 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
343 kind); | |
344 break; | |
345 } | |
346 } | |
347 else | |
348 { | |
349 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]); | |
350 /* Increment 'i' by two because OpenACC requires fortran | |
351 arrays to be contiguous, so each PSET is associated with | |
352 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and | |
353 one MAP_POINTER. */ | |
354 i += 2; | |
355 } | |
356 } | |
357 } | |
358 else | |
359 for (i = 0; i < mapnum; ++i) | |
360 { | |
361 unsigned char kind = kinds[i] & 0xff; | |
362 | |
363 int psets = find_pset (i, mapnum, kinds); | |
364 | |
365 if (!psets) | |
366 { | |
367 switch (kind) | |
368 { | |
369 case GOMP_MAP_POINTER: | |
370 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) | |
371 == GOMP_MAP_FORCE_FROM, | |
372 async, 1); | |
373 break; | |
374 case GOMP_MAP_DELETE: | |
375 acc_delete (hostaddrs[i], sizes[i]); | |
376 break; | |
377 case GOMP_MAP_FORCE_FROM: | |
378 acc_copyout (hostaddrs[i], sizes[i]); | |
379 break; | |
380 default: | |
381 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
382 kind); | |
383 break; | |
384 } | |
385 } | |
386 else | |
387 { | |
388 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) | |
389 == GOMP_MAP_FORCE_FROM, async, 3); | |
390 /* See the above comment. */ | |
391 i += 2; | |
392 } | |
393 } | |
394 | |
395 acc_dev->openacc.async_set_async_func (acc_async_sync); | |
396 } | |
397 | |
398 static void | |
399 goacc_wait (int async, int num_waits, va_list *ap) | |
400 { | |
401 struct goacc_thread *thr = goacc_thread (); | |
402 struct gomp_device_descr *acc_dev = thr->dev; | |
403 | |
404 while (num_waits--) | |
405 { | |
406 int qid = va_arg (*ap, int); | |
407 | |
408 if (acc_async_test (qid)) | |
409 continue; | |
410 | |
411 if (async == acc_async_sync) | |
412 acc_wait (qid); | |
413 else if (qid == async) | |
414 ;/* If we're waiting on the same asynchronous queue as we're | |
415 launching on, the queue itself will order work as | |
416 required, so there's no need to wait explicitly. */ | |
417 else | |
418 acc_dev->openacc.async_wait_async_func (qid, async); | |
419 } | |
420 } | |
421 | |
422 void | |
423 GOACC_update (int device, size_t mapnum, | |
424 void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
425 int async, int num_waits, ...) | |
426 { | |
427 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
428 size_t i; | |
429 | |
430 goacc_lazy_initialize (); | |
431 | |
432 struct goacc_thread *thr = goacc_thread (); | |
433 struct gomp_device_descr *acc_dev = thr->dev; | |
434 | |
435 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
436 || host_fallback) | |
437 return; | |
438 | |
439 if (num_waits) | |
440 { | |
441 va_list ap; | |
442 | |
443 va_start (ap, num_waits); | |
444 goacc_wait (async, num_waits, &ap); | |
445 va_end (ap); | |
446 } | |
447 | |
448 acc_dev->openacc.async_set_async_func (async); | |
449 | |
450 for (i = 0; i < mapnum; ++i) | |
451 { | |
452 unsigned char kind = kinds[i] & 0xff; | |
453 | |
454 switch (kind) | |
455 { | |
456 case GOMP_MAP_POINTER: | |
457 case GOMP_MAP_TO_PSET: | |
458 break; | |
459 | |
460 case GOMP_MAP_FORCE_TO: | |
461 acc_update_device (hostaddrs[i], sizes[i]); | |
462 break; | |
463 | |
464 case GOMP_MAP_FORCE_FROM: | |
465 acc_update_self (hostaddrs[i], sizes[i]); | |
466 break; | |
467 | |
468 default: | |
469 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); | |
470 break; | |
471 } | |
472 } | |
473 | |
474 acc_dev->openacc.async_set_async_func (acc_async_sync); | |
475 } | |
476 | |
477 void | |
478 GOACC_wait (int async, int num_waits, ...) | |
479 { | |
480 if (num_waits) | |
481 { | |
482 va_list ap; | |
483 | |
484 va_start (ap, num_waits); | |
485 goacc_wait (async, num_waits, &ap); | |
486 va_end (ap); | |
487 } | |
488 else if (async == acc_async_sync) | |
489 acc_wait_all (); | |
490 else if (async == acc_async_noval) | |
491 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); | |
492 } | |
493 | |
494 int | |
495 GOACC_get_num_threads (void) | |
496 { | |
497 return 1; | |
498 } | |
499 | |
500 int | |
501 GOACC_get_thread_num (void) | |
502 { | |
503 return 0; | |
504 } | |
505 | |
506 void | |
507 GOACC_declare (int device, size_t mapnum, | |
508 void **hostaddrs, size_t *sizes, unsigned short *kinds) | |
509 { | |
510 int i; | |
511 | |
512 for (i = 0; i < mapnum; i++) | |
513 { | |
514 unsigned char kind = kinds[i] & 0xff; | |
515 | |
516 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) | |
517 continue; | |
518 | |
519 switch (kind) | |
520 { | |
521 case GOMP_MAP_FORCE_ALLOC: | |
522 case GOMP_MAP_FORCE_FROM: | |
523 case GOMP_MAP_FORCE_TO: | |
524 case GOMP_MAP_POINTER: | |
525 case GOMP_MAP_DELETE: | |
526 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | |
527 &kinds[i], 0, 0); | |
528 break; | |
529 | |
530 case GOMP_MAP_FORCE_DEVICEPTR: | |
531 break; | |
532 | |
533 case GOMP_MAP_ALLOC: | |
534 if (!acc_is_present (hostaddrs[i], sizes[i])) | |
535 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | |
536 &kinds[i], 0, 0); | |
537 break; | |
538 | |
539 case GOMP_MAP_TO: | |
540 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | |
541 &kinds[i], 0, 0); | |
542 | |
543 break; | |
544 | |
545 case GOMP_MAP_FROM: | |
546 kinds[i] = GOMP_MAP_FORCE_FROM; | |
547 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | |
548 &kinds[i], 0, 0); | |
549 break; | |
550 | |
551 case GOMP_MAP_FORCE_PRESENT: | |
552 if (!acc_is_present (hostaddrs[i], sizes[i])) | |
553 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], | |
554 (unsigned long) sizes[i]); | |
555 break; | |
556 | |
557 default: | |
558 assert (0); | |
559 break; | |
560 } | |
561 } | |
562 } |