comparison libgomp/oacc-parallel.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc. 1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 2
3 Contributed by Mentor Embedded. 3 Contributed by Mentor Embedded.
4 4
5 This file is part of the GNU Offloading and Multi Processing Library 5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp). 6 (libgomp).
26 26
27 /* This file handles OpenACC constructs. */ 27 /* This file handles OpenACC constructs. */
28 28
29 #include "openacc.h" 29 #include "openacc.h"
30 #include "libgomp.h" 30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h" 31 #include "gomp-constants.h"
33 #include "oacc-int.h" 32 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H 33 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */ 34 # include <inttypes.h> /* For PRIu64. */
36 #endif 35 #endif
37 #include <string.h> 36 #include <string.h>
38 #include <stdarg.h> 37 #include <stdarg.h>
39 #include <assert.h> 38 #include <assert.h>
40 39
41 /* Returns the number of mappings associated with the pointer or pset. PSET 40
42 have three mappings, whereas pointer have two. */ 41 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
43 42 continue to support the following two legacy values. */
44 static int 43 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
45 find_pointer (int pos, size_t mapnum, unsigned short *kinds) 44 "legacy GOMP_DEVICE_ICV broken");
46 { 45 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
47 if (pos + 1 >= mapnum) 46 == GOACC_FLAG_HOST_FALLBACK,
48 return 0; 47 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
49 48
50 unsigned char kind = kinds[pos+1] & 0xff;
51
52 if (kind == GOMP_MAP_TO_PSET)
53 return 3;
54 else if (kind == GOMP_MAP_POINTER)
55 return 2;
56
57 return 0;
58 }
59 49
60 /* Handle the mapping pair that are presented when a 50 /* Handle the mapping pair that are presented when a
61 deviceptr clause is used with Fortran. */ 51 deviceptr clause is used with Fortran. */
62 52
63 static void 53 static void
100 hostaddrs[i] = NULL; 90 hostaddrs[i] = NULL;
101 } 91 }
102 } 92 }
103 } 93 }
104 94
105 static void goacc_wait (int async, int num_waits, va_list *ap); 95
106 96 /* Launch a possibly offloaded function with FLAGS. FN is the host fn
107
108 /* Launch a possibly offloaded function on DEVICE. FN is the host fn
109 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory 97 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
110 blocks to be copied to/from the device. Varadic arguments are 98 blocks to be copied to/from the device. Varadic arguments are
111 keyed optional parameters terminated with a zero. */ 99 keyed optional parameters terminated with a zero. */
112 100
113 void 101 void
114 GOACC_parallel_keyed (int device, void (*fn) (void *), 102 GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
115 size_t mapnum, void **hostaddrs, size_t *sizes, 103 size_t mapnum, void **hostaddrs, size_t *sizes,
116 unsigned short *kinds, ...) 104 unsigned short *kinds, ...)
117 { 105 {
118 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; 106 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
107
119 va_list ap; 108 va_list ap;
120 struct goacc_thread *thr; 109 struct goacc_thread *thr;
121 struct gomp_device_descr *acc_dev; 110 struct gomp_device_descr *acc_dev;
122 struct target_mem_desc *tgt; 111 struct target_mem_desc *tgt;
123 void **devaddrs; 112 void **devaddrs;
139 goacc_lazy_initialize (); 128 goacc_lazy_initialize ();
140 129
141 thr = goacc_thread (); 130 thr = goacc_thread ();
142 acc_dev = thr->dev; 131 acc_dev = thr->dev;
143 132
133 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
134
135 acc_prof_info prof_info;
136 if (profiling_p)
137 {
138 thr->prof_info = &prof_info;
139
140 prof_info.event_type = acc_ev_compute_construct_start;
141 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
142 prof_info.version = _ACC_PROF_INFO_VERSION;
143 prof_info.device_type = acc_device_type (acc_dev->type);
144 prof_info.device_number = acc_dev->target_id;
145 prof_info.thread_id = -1;
146 prof_info.async = async;
147 prof_info.async_queue = prof_info.async;
148 prof_info.src_file = NULL;
149 prof_info.func_name = NULL;
150 prof_info.line_no = -1;
151 prof_info.end_line_no = -1;
152 prof_info.func_line_no = -1;
153 prof_info.func_end_line_no = -1;
154 }
155 acc_event_info compute_construct_event_info;
156 if (profiling_p)
157 {
158 compute_construct_event_info.other_event.event_type
159 = prof_info.event_type;
160 compute_construct_event_info.other_event.valid_bytes
161 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
162 compute_construct_event_info.other_event.parent_construct
163 = acc_construct_parallel;
164 compute_construct_event_info.other_event.implicit = 0;
165 compute_construct_event_info.other_event.tool_info = NULL;
166 }
167 acc_api_info api_info;
168 if (profiling_p)
169 {
170 thr->api_info = &api_info;
171
172 api_info.device_api = acc_device_api_none;
173 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
174 api_info.device_type = prof_info.device_type;
175 api_info.vendor = -1;
176 api_info.device_handle = NULL;
177 api_info.context_handle = NULL;
178 api_info.async_handle = NULL;
179 }
180
181 if (profiling_p)
182 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
183 &api_info);
184
144 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); 185 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
145 186
146 /* Host fallback if "if" clause is false or if the current device is set to 187 /* Host fallback if "if" clause is false or if the current device is set to
147 the host. */ 188 the host. */
148 if (host_fallback) 189 if (flags & GOACC_FLAG_HOST_FALLBACK)
149 { 190 {
191 prof_info.device_type = acc_device_host;
192 api_info.device_type = prof_info.device_type;
150 goacc_save_and_set_bind (acc_device_host); 193 goacc_save_and_set_bind (acc_device_host);
151 fn (hostaddrs); 194 fn (hostaddrs);
152 goacc_restore_bind (); 195 goacc_restore_bind ();
153 return; 196 goto out_prof;
154 } 197 }
155 else if (acc_device_type (acc_dev->type) == acc_device_host) 198 else if (acc_device_type (acc_dev->type) == acc_device_host)
156 { 199 {
157 fn (hostaddrs); 200 fn (hostaddrs);
158 return; 201 goto out_prof;
159 } 202 }
160 203
161 /* Default: let the runtime choose. */ 204 /* Default: let the runtime choose. */
162 for (i = 0; i != GOMP_DIM_MAX; i++) 205 for (i = 0; i != GOMP_DIM_MAX; i++)
163 dims[i] = 0; 206 dims[i] = 0;
187 /* Small constant values are encoded in the operand. */ 230 /* Small constant values are encoded in the operand. */
188 async = GOMP_LAUNCH_OP (tag); 231 async = GOMP_LAUNCH_OP (tag);
189 232
190 if (async == GOMP_LAUNCH_OP_MAX) 233 if (async == GOMP_LAUNCH_OP_MAX)
191 async = va_arg (ap, unsigned); 234 async = va_arg (ap, unsigned);
235
236 if (profiling_p)
237 {
238 prof_info.async = async;
239 prof_info.async_queue = prof_info.async;
240 }
241
192 break; 242 break;
193 } 243 }
194 244
195 case GOMP_LAUNCH_WAIT: 245 case GOMP_LAUNCH_WAIT:
196 { 246 {
197 unsigned num_waits = GOMP_LAUNCH_OP (tag); 247 unsigned num_waits = GOMP_LAUNCH_OP (tag);
198 248 goacc_wait (async, num_waits, &ap);
199 if (num_waits)
200 goacc_wait (async, num_waits, &ap);
201 break; 249 break;
202 } 250 }
203 251
204 default: 252 default:
205 gomp_fatal ("unrecognized offload code '%d'," 253 gomp_fatal ("unrecognized offload code '%d',"
206 " libgomp is too old", GOMP_LAUNCH_CODE (tag)); 254 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
207 } 255 }
208 } 256 }
209 va_end (ap); 257 va_end (ap);
210 258
211 acc_dev->openacc.async_set_async_func (async);
212
213 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) 259 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
214 { 260 {
215 k.host_start = (uintptr_t) fn; 261 k.host_start = (uintptr_t) fn;
216 k.host_end = k.host_start + 1; 262 k.host_end = k.host_start + 1;
217 gomp_mutex_lock (&acc_dev->lock); 263 gomp_mutex_lock (&acc_dev->lock);
224 tgt_fn = (void (*)) tgt_fn_key->tgt_offset; 270 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
225 } 271 }
226 else 272 else
227 tgt_fn = (void (*)) fn; 273 tgt_fn = (void (*)) fn;
228 274
229 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, 275 acc_event_info enter_exit_data_event_info;
230 GOMP_MAP_VARS_OPENACC); 276 if (profiling_p)
231 277 {
278 prof_info.event_type = acc_ev_enter_data_start;
279 enter_exit_data_event_info.other_event.event_type
280 = prof_info.event_type;
281 enter_exit_data_event_info.other_event.valid_bytes
282 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
283 enter_exit_data_event_info.other_event.parent_construct
284 = compute_construct_event_info.other_event.parent_construct;
285 enter_exit_data_event_info.other_event.implicit = 1;
286 enter_exit_data_event_info.other_event.tool_info = NULL;
287 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
288 &api_info);
289 }
290
291 goacc_aq aq = get_goacc_asyncqueue (async);
292
293 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
294 true, GOMP_MAP_VARS_OPENACC);
295 if (profiling_p)
296 {
297 prof_info.event_type = acc_ev_enter_data_end;
298 enter_exit_data_event_info.other_event.event_type
299 = prof_info.event_type;
300 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
301 &api_info);
302 }
303
232 devaddrs = gomp_alloca (sizeof (void *) * mapnum); 304 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
233 for (i = 0; i < mapnum; i++) 305 for (i = 0; i < mapnum; i++)
234 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start 306 devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
235 + tgt->list[i].key->tgt_offset); 307
236 308 if (aq == NULL)
237 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, 309 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
238 async, dims, tgt); 310 tgt);
311 else
312 acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
313 dims, tgt, aq);
314
315 if (profiling_p)
316 {
317 prof_info.event_type = acc_ev_exit_data_start;
318 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
319 enter_exit_data_event_info.other_event.tool_info = NULL;
320 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
321 &api_info);
322 }
239 323
240 /* If running synchronously, unmap immediately. */ 324 /* If running synchronously, unmap immediately. */
241 bool copyfrom = true; 325 if (aq == NULL)
242 if (async_synchronous_p (async))
243 gomp_unmap_vars (tgt, true); 326 gomp_unmap_vars (tgt, true);
244 else 327 else
245 { 328 gomp_unmap_vars_async (tgt, true, aq);
246 bool async_unmap = false; 329
247 for (size_t i = 0; i < tgt->list_count; i++) 330 if (profiling_p)
248 { 331 {
249 splay_tree_key k = tgt->list[i].key; 332 prof_info.event_type = acc_ev_exit_data_end;
250 if (k && k->refcount == 1) 333 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
251 { 334 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
252 async_unmap = true; 335 &api_info);
253 break; 336 }
254 } 337
255 } 338 out_prof:
256 if (async_unmap) 339 if (profiling_p)
257 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); 340 {
258 else 341 prof_info.event_type = acc_ev_compute_construct_end;
259 { 342 compute_construct_event_info.other_event.event_type
260 copyfrom = false; 343 = prof_info.event_type;
261 gomp_unmap_vars (tgt, copyfrom); 344 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
262 } 345 &api_info);
263 } 346
264 347 thr->prof_info = NULL;
265 acc_dev->openacc.async_set_async_func (acc_async_sync); 348 thr->api_info = NULL;
266 } 349 }
267 350 }
268 /* Legacy entry point, only provide host execution. */ 351
352 /* Legacy entry point (GCC 5). Only provide host fallback execution. */
269 353
270 void 354 void
271 GOACC_parallel (int device, void (*fn) (void *), 355 GOACC_parallel (int flags_m, void (*fn) (void *),
272 size_t mapnum, void **hostaddrs, size_t *sizes, 356 size_t mapnum, void **hostaddrs, size_t *sizes,
273 unsigned short *kinds, 357 unsigned short *kinds,
274 int num_gangs, int num_workers, int vector_length, 358 int num_gangs, int num_workers, int vector_length,
275 int async, int num_waits, ...) 359 int async, int num_waits, ...)
276 { 360 {
278 fn (hostaddrs); 362 fn (hostaddrs);
279 goacc_restore_bind (); 363 goacc_restore_bind ();
280 } 364 }
281 365
282 void 366 void
283 GOACC_data_start (int device, size_t mapnum, 367 GOACC_data_start (int flags_m, size_t mapnum,
284 void **hostaddrs, size_t *sizes, unsigned short *kinds) 368 void **hostaddrs, size_t *sizes, unsigned short *kinds)
285 { 369 {
286 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; 370 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
371
287 struct target_mem_desc *tgt; 372 struct target_mem_desc *tgt;
288 373
289 #ifdef HAVE_INTTYPES_H 374 #ifdef HAVE_INTTYPES_H
290 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 375 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
291 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 376 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
297 goacc_lazy_initialize (); 382 goacc_lazy_initialize ();
298 383
299 struct goacc_thread *thr = goacc_thread (); 384 struct goacc_thread *thr = goacc_thread ();
300 struct gomp_device_descr *acc_dev = thr->dev; 385 struct gomp_device_descr *acc_dev = thr->dev;
301 386
387 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
388
389 acc_prof_info prof_info;
390 if (profiling_p)
391 {
392 thr->prof_info = &prof_info;
393
394 prof_info.event_type = acc_ev_enter_data_start;
395 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
396 prof_info.version = _ACC_PROF_INFO_VERSION;
397 prof_info.device_type = acc_device_type (acc_dev->type);
398 prof_info.device_number = acc_dev->target_id;
399 prof_info.thread_id = -1;
400 prof_info.async = acc_async_sync; /* Always synchronous. */
401 prof_info.async_queue = prof_info.async;
402 prof_info.src_file = NULL;
403 prof_info.func_name = NULL;
404 prof_info.line_no = -1;
405 prof_info.end_line_no = -1;
406 prof_info.func_line_no = -1;
407 prof_info.func_end_line_no = -1;
408 }
409 acc_event_info enter_data_event_info;
410 if (profiling_p)
411 {
412 enter_data_event_info.other_event.event_type
413 = prof_info.event_type;
414 enter_data_event_info.other_event.valid_bytes
415 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
416 enter_data_event_info.other_event.parent_construct = acc_construct_data;
417 for (int i = 0; i < mapnum; ++i)
418 if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
419 || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
420 {
421 /* If there is one such data mapping kind, then this is actually an
422 OpenACC 'host_data' construct. (GCC maps the OpenACC
423 'host_data' construct to the OpenACC 'data' construct.) Apart
424 from artificial test cases (such as an OpenACC 'host_data'
425 construct's (implicit) device initialization when there hasn't
426 been any device data be set up before...), there can't really
427 any meaningful events be generated from OpenACC 'host_data'
428 constructs, though. */
429 enter_data_event_info.other_event.parent_construct
430 = acc_construct_host_data;
431 break;
432 }
433 enter_data_event_info.other_event.implicit = 0;
434 enter_data_event_info.other_event.tool_info = NULL;
435 }
436 acc_api_info api_info;
437 if (profiling_p)
438 {
439 thr->api_info = &api_info;
440
441 api_info.device_api = acc_device_api_none;
442 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
443 api_info.device_type = prof_info.device_type;
444 api_info.vendor = -1;
445 api_info.device_handle = NULL;
446 api_info.context_handle = NULL;
447 api_info.async_handle = NULL;
448 }
449
450 if (profiling_p)
451 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
452
302 /* Host fallback or 'do nothing'. */ 453 /* Host fallback or 'do nothing'. */
303 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 454 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
304 || host_fallback) 455 || (flags & GOACC_FLAG_HOST_FALLBACK))
305 { 456 {
457 prof_info.device_type = acc_device_host;
458 api_info.device_type = prof_info.device_type;
306 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, 459 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
307 GOMP_MAP_VARS_OPENACC); 460 GOMP_MAP_VARS_OPENACC);
308 tgt->prev = thr->mapped_data; 461 tgt->prev = thr->mapped_data;
309 thr->mapped_data = tgt; 462 thr->mapped_data = tgt;
310 463
311 return; 464 goto out_prof;
312 } 465 }
313 466
314 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 467 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
315 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, 468 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
316 GOMP_MAP_VARS_OPENACC); 469 GOMP_MAP_VARS_OPENACC);
317 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 470 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
318 tgt->prev = thr->mapped_data; 471 tgt->prev = thr->mapped_data;
319 thr->mapped_data = tgt; 472 thr->mapped_data = tgt;
473
474 out_prof:
475 if (profiling_p)
476 {
477 prof_info.event_type = acc_ev_enter_data_end;
478 enter_data_event_info.other_event.event_type = prof_info.event_type;
479 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
480
481 thr->prof_info = NULL;
482 thr->api_info = NULL;
483 }
320 } 484 }
321 485
322 void 486 void
323 GOACC_data_end (void) 487 GOACC_data_end (void)
324 { 488 {
325 struct goacc_thread *thr = goacc_thread (); 489 struct goacc_thread *thr = goacc_thread ();
490 struct gomp_device_descr *acc_dev = thr->dev;
326 struct target_mem_desc *tgt = thr->mapped_data; 491 struct target_mem_desc *tgt = thr->mapped_data;
492
493 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
494
495 acc_prof_info prof_info;
496 if (profiling_p)
497 {
498 thr->prof_info = &prof_info;
499
500 prof_info.event_type = acc_ev_exit_data_start;
501 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
502 prof_info.version = _ACC_PROF_INFO_VERSION;
503 prof_info.device_type = acc_device_type (acc_dev->type);
504 prof_info.device_number = acc_dev->target_id;
505 prof_info.thread_id = -1;
506 prof_info.async = acc_async_sync; /* Always synchronous. */
507 prof_info.async_queue = prof_info.async;
508 prof_info.src_file = NULL;
509 prof_info.func_name = NULL;
510 prof_info.line_no = -1;
511 prof_info.end_line_no = -1;
512 prof_info.func_line_no = -1;
513 prof_info.func_end_line_no = -1;
514 }
515 acc_event_info exit_data_event_info;
516 if (profiling_p)
517 {
518 exit_data_event_info.other_event.event_type
519 = prof_info.event_type;
520 exit_data_event_info.other_event.valid_bytes
521 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
522 exit_data_event_info.other_event.parent_construct = acc_construct_data;
523 exit_data_event_info.other_event.implicit = 0;
524 exit_data_event_info.other_event.tool_info = NULL;
525 }
526 acc_api_info api_info;
527 if (profiling_p)
528 {
529 thr->api_info = &api_info;
530
531 api_info.device_api = acc_device_api_none;
532 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
533 api_info.device_type = prof_info.device_type;
534 api_info.vendor = -1;
535 api_info.device_handle = NULL;
536 api_info.context_handle = NULL;
537 api_info.async_handle = NULL;
538 }
539
540 if (profiling_p)
541 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
327 542
328 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 543 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
329 thr->mapped_data = tgt->prev; 544 thr->mapped_data = tgt->prev;
330 gomp_unmap_vars (tgt, true); 545 gomp_unmap_vars (tgt, true);
331 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 546 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
547
548 if (profiling_p)
549 {
550 prof_info.event_type = acc_ev_exit_data_end;
551 exit_data_event_info.other_event.event_type = prof_info.event_type;
552 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
553
554 thr->prof_info = NULL;
555 thr->api_info = NULL;
556 }
332 } 557 }
333 558
334 void 559 void
335 GOACC_enter_exit_data (int device, size_t mapnum, 560 GOACC_update (int flags_m, size_t mapnum,
336 void **hostaddrs, size_t *sizes, unsigned short *kinds, 561 void **hostaddrs, size_t *sizes, unsigned short *kinds,
337 int async, int num_waits, ...) 562 int async, int num_waits, ...)
338 { 563 {
339 struct goacc_thread *thr; 564 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
340 struct gomp_device_descr *acc_dev; 565
341 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
342 bool data_enter = false;
343 size_t i; 566 size_t i;
344 567
345 goacc_lazy_initialize (); 568 goacc_lazy_initialize ();
346 569
347 thr = goacc_thread (); 570 struct goacc_thread *thr = goacc_thread ();
348 acc_dev = thr->dev; 571 struct gomp_device_descr *acc_dev = thr->dev;
572
573 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
574
575 acc_prof_info prof_info;
576 if (profiling_p)
577 {
578 thr->prof_info = &prof_info;
579
580 prof_info.event_type = acc_ev_update_start;
581 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
582 prof_info.version = _ACC_PROF_INFO_VERSION;
583 prof_info.device_type = acc_device_type (acc_dev->type);
584 prof_info.device_number = acc_dev->target_id;
585 prof_info.thread_id = -1;
586 prof_info.async = async;
587 prof_info.async_queue = prof_info.async;
588 prof_info.src_file = NULL;
589 prof_info.func_name = NULL;
590 prof_info.line_no = -1;
591 prof_info.end_line_no = -1;
592 prof_info.func_line_no = -1;
593 prof_info.func_end_line_no = -1;
594 }
595 acc_event_info update_event_info;
596 if (profiling_p)
597 {
598 update_event_info.other_event.event_type
599 = prof_info.event_type;
600 update_event_info.other_event.valid_bytes
601 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
602 update_event_info.other_event.parent_construct = acc_construct_update;
603 update_event_info.other_event.implicit = 0;
604 update_event_info.other_event.tool_info = NULL;
605 }
606 acc_api_info api_info;
607 if (profiling_p)
608 {
609 thr->api_info = &api_info;
610
611 api_info.device_api = acc_device_api_none;
612 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
613 api_info.device_type = prof_info.device_type;
614 api_info.vendor = -1;
615 api_info.device_handle = NULL;
616 api_info.context_handle = NULL;
617 api_info.async_handle = NULL;
618 }
619
620 if (profiling_p)
621 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
349 622
350 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 623 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
351 || host_fallback) 624 || (flags & GOACC_FLAG_HOST_FALLBACK))
352 return; 625 {
626 prof_info.device_type = acc_device_host;
627 api_info.device_type = prof_info.device_type;
628
629 goto out_prof;
630 }
353 631
354 if (num_waits) 632 if (num_waits)
355 { 633 {
356 va_list ap; 634 va_list ap;
357 635
358 va_start (ap, num_waits); 636 va_start (ap, num_waits);
359 goacc_wait (async, num_waits, &ap); 637 goacc_wait (async, num_waits, &ap);
360 va_end (ap); 638 va_end (ap);
361 } 639 }
362
363 /* Determine whether "finalize" semantics apply to all mappings of this
364 OpenACC directive. */
365 bool finalize = false;
366 if (mapnum > 0)
367 {
368 unsigned char kind = kinds[0] & 0xff;
369 if (kind == GOMP_MAP_DELETE
370 || kind == GOMP_MAP_FORCE_FROM)
371 finalize = true;
372 }
373
374 acc_dev->openacc.async_set_async_func (async);
375
376 /* Determine if this is an "acc enter data". */
377 for (i = 0; i < mapnum; ++i)
378 {
379 unsigned char kind = kinds[i] & 0xff;
380
381 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
382 continue;
383
384 if (kind == GOMP_MAP_FORCE_ALLOC
385 || kind == GOMP_MAP_FORCE_PRESENT
386 || kind == GOMP_MAP_FORCE_TO
387 || kind == GOMP_MAP_TO
388 || kind == GOMP_MAP_ALLOC)
389 {
390 data_enter = true;
391 break;
392 }
393
394 if (kind == GOMP_MAP_RELEASE
395 || kind == GOMP_MAP_DELETE
396 || kind == GOMP_MAP_FROM
397 || kind == GOMP_MAP_FORCE_FROM)
398 break;
399
400 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
401 kind);
402 }
403
404 /* In c, non-pointers and arrays are represented by a single data clause.
405 Dynamically allocated arrays and subarrays are represented by a data
406 clause followed by an internal GOMP_MAP_POINTER.
407
408 In fortran, scalars and not allocated arrays are represented by a
409 single data clause. Allocated arrays and subarrays have three mappings:
410 1) the original data clause, 2) a PSET 3) a pointer to the array data.
411 */
412
413 if (data_enter)
414 {
415 for (i = 0; i < mapnum; i++)
416 {
417 unsigned char kind = kinds[i] & 0xff;
418
419 /* Scan for pointers and PSETs. */
420 int pointer = find_pointer (i, mapnum, kinds);
421
422 if (!pointer)
423 {
424 switch (kind)
425 {
426 case GOMP_MAP_ALLOC:
427 acc_present_or_create (hostaddrs[i], sizes[i]);
428 break;
429 case GOMP_MAP_FORCE_ALLOC:
430 acc_create (hostaddrs[i], sizes[i]);
431 break;
432 case GOMP_MAP_TO:
433 acc_present_or_copyin (hostaddrs[i], sizes[i]);
434 break;
435 case GOMP_MAP_FORCE_TO:
436 acc_copyin (hostaddrs[i], sizes[i]);
437 break;
438 default:
439 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
440 kind);
441 break;
442 }
443 }
444 else
445 {
446 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
447 &sizes[i], &kinds[i]);
448 /* Increment 'i' by two because OpenACC requires fortran
449 arrays to be contiguous, so each PSET is associated with
450 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
451 one MAP_POINTER. */
452 i += pointer - 1;
453 }
454 }
455 }
456 else
457 for (i = 0; i < mapnum; ++i)
458 {
459 unsigned char kind = kinds[i] & 0xff;
460
461 int pointer = find_pointer (i, mapnum, kinds);
462
463 if (!pointer)
464 {
465 switch (kind)
466 {
467 case GOMP_MAP_RELEASE:
468 case GOMP_MAP_DELETE:
469 if (acc_is_present (hostaddrs[i], sizes[i]))
470 {
471 if (finalize)
472 acc_delete_finalize (hostaddrs[i], sizes[i]);
473 else
474 acc_delete (hostaddrs[i], sizes[i]);
475 }
476 break;
477 case GOMP_MAP_FROM:
478 case GOMP_MAP_FORCE_FROM:
479 if (finalize)
480 acc_copyout_finalize (hostaddrs[i], sizes[i]);
481 else
482 acc_copyout (hostaddrs[i], sizes[i]);
483 break;
484 default:
485 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
486 kind);
487 break;
488 }
489 }
490 else
491 {
492 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
493 || kind == GOMP_MAP_FROM);
494 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
495 finalize, pointer);
496 /* See the above comment. */
497 i += pointer - 1;
498 }
499 }
500
501 acc_dev->openacc.async_set_async_func (acc_async_sync);
502 }
503
504 static void
505 goacc_wait (int async, int num_waits, va_list *ap)
506 {
507 struct goacc_thread *thr = goacc_thread ();
508 struct gomp_device_descr *acc_dev = thr->dev;
509
510 while (num_waits--)
511 {
512 int qid = va_arg (*ap, int);
513
514 if (acc_async_test (qid))
515 continue;
516
517 if (async == acc_async_sync)
518 acc_wait (qid);
519 else if (qid == async)
520 ;/* If we're waiting on the same asynchronous queue as we're
521 launching on, the queue itself will order work as
522 required, so there's no need to wait explicitly. */
523 else
524 acc_dev->openacc.async_wait_async_func (qid, async);
525 }
526 }
527
528 void
529 GOACC_update (int device, size_t mapnum,
530 void **hostaddrs, size_t *sizes, unsigned short *kinds,
531 int async, int num_waits, ...)
532 {
533 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
534 size_t i;
535
536 goacc_lazy_initialize ();
537
538 struct goacc_thread *thr = goacc_thread ();
539 struct gomp_device_descr *acc_dev = thr->dev;
540
541 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
542 || host_fallback)
543 return;
544
545 if (num_waits)
546 {
547 va_list ap;
548
549 va_start (ap, num_waits);
550 goacc_wait (async, num_waits, &ap);
551 va_end (ap);
552 }
553
554 acc_dev->openacc.async_set_async_func (async);
555 640
556 bool update_device = false; 641 bool update_device = false;
557 for (i = 0; i < mapnum; ++i) 642 for (i = 0; i < mapnum; ++i)
558 { 643 {
559 unsigned char kind = kinds[i] & 0xff; 644 unsigned char kind = kinds[i] & 0xff;
573 658
574 /* Update the contents of the host pointer to reflect 659 /* Update the contents of the host pointer to reflect
575 the value of the allocated device memory in the 660 the value of the allocated device memory in the
576 previous pointer. */ 661 previous pointer. */
577 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; 662 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
663 /* TODO: verify that we really cannot use acc_update_device_async
664 here. */
578 acc_update_device (hostaddrs[i], sizeof (uintptr_t)); 665 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
579 666
580 /* Restore the host pointer. */ 667 /* Restore the host pointer. */
581 *(uintptr_t *) hostaddrs[i] = t; 668 *(uintptr_t *) hostaddrs[i] = t;
582 update_device = false; 669 update_device = false;
590 break; 677 break;
591 } 678 }
592 /* Fallthru */ 679 /* Fallthru */
593 case GOMP_MAP_FORCE_TO: 680 case GOMP_MAP_FORCE_TO:
594 update_device = true; 681 update_device = true;
595 acc_update_device (hostaddrs[i], sizes[i]); 682 acc_update_device_async (hostaddrs[i], sizes[i], async);
596 break; 683 break;
597 684
598 case GOMP_MAP_FROM: 685 case GOMP_MAP_FROM:
599 if (!acc_is_present (hostaddrs[i], sizes[i])) 686 if (!acc_is_present (hostaddrs[i], sizes[i]))
600 { 687 {
602 break; 689 break;
603 } 690 }
604 /* Fallthru */ 691 /* Fallthru */
605 case GOMP_MAP_FORCE_FROM: 692 case GOMP_MAP_FORCE_FROM:
606 update_device = false; 693 update_device = false;
607 acc_update_self (hostaddrs[i], sizes[i]); 694 acc_update_self_async (hostaddrs[i], sizes[i], async);
608 break; 695 break;
609 696
610 default: 697 default:
611 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); 698 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
612 break; 699 break;
613 } 700 }
614 } 701 }
615 702
616 acc_dev->openacc.async_set_async_func (acc_async_sync); 703 out_prof:
617 } 704 if (profiling_p)
618 705 {
619 void 706 prof_info.event_type = acc_ev_update_end;
620 GOACC_wait (int async, int num_waits, ...) 707 update_event_info.other_event.event_type = prof_info.event_type;
621 { 708 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
622 if (num_waits) 709
623 { 710 thr->prof_info = NULL;
624 va_list ap; 711 thr->api_info = NULL;
625 712 }
626 va_start (ap, num_waits); 713 }
627 goacc_wait (async, num_waits, &ap); 714
628 va_end (ap); 715
629 } 716 /* Legacy entry point (GCC 5). */
630 else if (async == acc_async_sync)
631 acc_wait_all ();
632 else if (async == acc_async_noval)
633 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
634 }
635 717
636 int 718 int
637 GOACC_get_num_threads (void) 719 GOACC_get_num_threads (void)
638 { 720 {
639 return 1; 721 return 1;
640 } 722 }
641 723
724 /* Legacy entry point (GCC 5). */
725
642 int 726 int
643 GOACC_get_thread_num (void) 727 GOACC_get_thread_num (void)
644 { 728 {
645 return 0; 729 return 0;
646 } 730 }
647 731
648 void 732 void
649 GOACC_declare (int device, size_t mapnum, 733 GOACC_declare (int flags_m, size_t mapnum,
650 void **hostaddrs, size_t *sizes, unsigned short *kinds) 734 void **hostaddrs, size_t *sizes, unsigned short *kinds)
651 { 735 {
652 int i; 736 int i;
653 737
654 for (i = 0; i < mapnum; i++) 738 for (i = 0; i < mapnum; i++)
664 case GOMP_MAP_FORCE_FROM: 748 case GOMP_MAP_FORCE_FROM:
665 case GOMP_MAP_FORCE_TO: 749 case GOMP_MAP_FORCE_TO:
666 case GOMP_MAP_POINTER: 750 case GOMP_MAP_POINTER:
667 case GOMP_MAP_RELEASE: 751 case GOMP_MAP_RELEASE:
668 case GOMP_MAP_DELETE: 752 case GOMP_MAP_DELETE:
669 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], 753 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
670 &kinds[i], GOMP_ASYNC_SYNC, 0); 754 &kinds[i], GOMP_ASYNC_SYNC, 0);
671 break; 755 break;
672 756
673 case GOMP_MAP_FORCE_DEVICEPTR: 757 case GOMP_MAP_FORCE_DEVICEPTR:
674 break; 758 break;
675 759
676 case GOMP_MAP_ALLOC: 760 case GOMP_MAP_ALLOC:
677 if (!acc_is_present (hostaddrs[i], sizes[i])) 761 if (!acc_is_present (hostaddrs[i], sizes[i]))
678 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], 762 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
679 &kinds[i], GOMP_ASYNC_SYNC, 0); 763 &kinds[i], GOMP_ASYNC_SYNC, 0);
680 break; 764 break;
681 765
682 case GOMP_MAP_TO: 766 case GOMP_MAP_TO:
683 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], 767 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
684 &kinds[i], GOMP_ASYNC_SYNC, 0); 768 &kinds[i], GOMP_ASYNC_SYNC, 0);
685 769
686 break; 770 break;
687 771
688 case GOMP_MAP_FROM: 772 case GOMP_MAP_FROM:
689 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], 773 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
690 &kinds[i], GOMP_ASYNC_SYNC, 0); 774 &kinds[i], GOMP_ASYNC_SYNC, 0);
691 break; 775 break;
692 776
693 case GOMP_MAP_FORCE_PRESENT: 777 case GOMP_MAP_FORCE_PRESENT:
694 if (!acc_is_present (hostaddrs[i], sizes[i])) 778 if (!acc_is_present (hostaddrs[i], sizes[i]))