Mercurial > hg > CbC > CbC_gcc
comparison libgomp/oacc-parallel.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc. | 1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc. |
2 | 2 |
3 Contributed by Mentor Embedded. | 3 Contributed by Mentor Embedded. |
4 | 4 |
5 This file is part of the GNU Offloading and Multi Processing Library | 5 This file is part of the GNU Offloading and Multi Processing Library |
6 (libgomp). | 6 (libgomp). |
26 | 26 |
27 /* This file handles OpenACC constructs. */ | 27 /* This file handles OpenACC constructs. */ |
28 | 28 |
29 #include "openacc.h" | 29 #include "openacc.h" |
30 #include "libgomp.h" | 30 #include "libgomp.h" |
31 #include "libgomp_g.h" | |
32 #include "gomp-constants.h" | 31 #include "gomp-constants.h" |
33 #include "oacc-int.h" | 32 #include "oacc-int.h" |
34 #ifdef HAVE_INTTYPES_H | 33 #ifdef HAVE_INTTYPES_H |
35 # include <inttypes.h> /* For PRIu64. */ | 34 # include <inttypes.h> /* For PRIu64. */ |
36 #endif | 35 #endif |
37 #include <string.h> | 36 #include <string.h> |
38 #include <stdarg.h> | 37 #include <stdarg.h> |
39 #include <assert.h> | 38 #include <assert.h> |
40 | 39 |
41 /* Returns the number of mappings associated with the pointer or pset. PSET | 40 |
42 have three mappings, whereas pointer have two. */ | 41 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we |
43 | 42 continue to support the following two legacy values. */ |
44 static int | 43 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0, |
45 find_pointer (int pos, size_t mapnum, unsigned short *kinds) | 44 "legacy GOMP_DEVICE_ICV broken"); |
46 { | 45 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) |
47 if (pos + 1 >= mapnum) | 46 == GOACC_FLAG_HOST_FALLBACK, |
48 return 0; | 47 "legacy GOMP_DEVICE_HOST_FALLBACK broken"); |
49 | 48 |
50 unsigned char kind = kinds[pos+1] & 0xff; | |
51 | |
52 if (kind == GOMP_MAP_TO_PSET) | |
53 return 3; | |
54 else if (kind == GOMP_MAP_POINTER) | |
55 return 2; | |
56 | |
57 return 0; | |
58 } | |
59 | 49 |
60 /* Handle the mapping pair that are presented when a | 50 /* Handle the mapping pair that are presented when a |
61 deviceptr clause is used with Fortran. */ | 51 deviceptr clause is used with Fortran. */ |
62 | 52 |
63 static void | 53 static void |
100 hostaddrs[i] = NULL; | 90 hostaddrs[i] = NULL; |
101 } | 91 } |
102 } | 92 } |
103 } | 93 } |
104 | 94 |
105 static void goacc_wait (int async, int num_waits, va_list *ap); | 95 |
106 | 96 /* Launch a possibly offloaded function with FLAGS. FN is the host fn |
107 | |
108 /* Launch a possibly offloaded function on DEVICE. FN is the host fn | |
109 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory | 97 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory |
110 blocks to be copied to/from the device. Varadic arguments are | 98 blocks to be copied to/from the device. Varadic arguments are |
111 keyed optional parameters terminated with a zero. */ | 99 keyed optional parameters terminated with a zero. */ |
112 | 100 |
113 void | 101 void |
114 GOACC_parallel_keyed (int device, void (*fn) (void *), | 102 GOACC_parallel_keyed (int flags_m, void (*fn) (void *), |
115 size_t mapnum, void **hostaddrs, size_t *sizes, | 103 size_t mapnum, void **hostaddrs, size_t *sizes, |
116 unsigned short *kinds, ...) | 104 unsigned short *kinds, ...) |
117 { | 105 { |
118 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | 106 int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
107 | |
119 va_list ap; | 108 va_list ap; |
120 struct goacc_thread *thr; | 109 struct goacc_thread *thr; |
121 struct gomp_device_descr *acc_dev; | 110 struct gomp_device_descr *acc_dev; |
122 struct target_mem_desc *tgt; | 111 struct target_mem_desc *tgt; |
123 void **devaddrs; | 112 void **devaddrs; |
139 goacc_lazy_initialize (); | 128 goacc_lazy_initialize (); |
140 | 129 |
141 thr = goacc_thread (); | 130 thr = goacc_thread (); |
142 acc_dev = thr->dev; | 131 acc_dev = thr->dev; |
143 | 132 |
133 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
134 | |
135 acc_prof_info prof_info; | |
136 if (profiling_p) | |
137 { | |
138 thr->prof_info = &prof_info; | |
139 | |
140 prof_info.event_type = acc_ev_compute_construct_start; | |
141 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
142 prof_info.version = _ACC_PROF_INFO_VERSION; | |
143 prof_info.device_type = acc_device_type (acc_dev->type); | |
144 prof_info.device_number = acc_dev->target_id; | |
145 prof_info.thread_id = -1; | |
146 prof_info.async = async; | |
147 prof_info.async_queue = prof_info.async; | |
148 prof_info.src_file = NULL; | |
149 prof_info.func_name = NULL; | |
150 prof_info.line_no = -1; | |
151 prof_info.end_line_no = -1; | |
152 prof_info.func_line_no = -1; | |
153 prof_info.func_end_line_no = -1; | |
154 } | |
155 acc_event_info compute_construct_event_info; | |
156 if (profiling_p) | |
157 { | |
158 compute_construct_event_info.other_event.event_type | |
159 = prof_info.event_type; | |
160 compute_construct_event_info.other_event.valid_bytes | |
161 = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
162 compute_construct_event_info.other_event.parent_construct | |
163 = acc_construct_parallel; | |
164 compute_construct_event_info.other_event.implicit = 0; | |
165 compute_construct_event_info.other_event.tool_info = NULL; | |
166 } | |
167 acc_api_info api_info; | |
168 if (profiling_p) | |
169 { | |
170 thr->api_info = &api_info; | |
171 | |
172 api_info.device_api = acc_device_api_none; | |
173 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
174 api_info.device_type = prof_info.device_type; | |
175 api_info.vendor = -1; | |
176 api_info.device_handle = NULL; | |
177 api_info.context_handle = NULL; | |
178 api_info.async_handle = NULL; | |
179 } | |
180 | |
181 if (profiling_p) | |
182 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, | |
183 &api_info); | |
184 | |
144 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); | 185 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); |
145 | 186 |
146 /* Host fallback if "if" clause is false or if the current device is set to | 187 /* Host fallback if "if" clause is false or if the current device is set to |
147 the host. */ | 188 the host. */ |
148 if (host_fallback) | 189 if (flags & GOACC_FLAG_HOST_FALLBACK) |
149 { | 190 { |
191 prof_info.device_type = acc_device_host; | |
192 api_info.device_type = prof_info.device_type; | |
150 goacc_save_and_set_bind (acc_device_host); | 193 goacc_save_and_set_bind (acc_device_host); |
151 fn (hostaddrs); | 194 fn (hostaddrs); |
152 goacc_restore_bind (); | 195 goacc_restore_bind (); |
153 return; | 196 goto out_prof; |
154 } | 197 } |
155 else if (acc_device_type (acc_dev->type) == acc_device_host) | 198 else if (acc_device_type (acc_dev->type) == acc_device_host) |
156 { | 199 { |
157 fn (hostaddrs); | 200 fn (hostaddrs); |
158 return; | 201 goto out_prof; |
159 } | 202 } |
160 | 203 |
161 /* Default: let the runtime choose. */ | 204 /* Default: let the runtime choose. */ |
162 for (i = 0; i != GOMP_DIM_MAX; i++) | 205 for (i = 0; i != GOMP_DIM_MAX; i++) |
163 dims[i] = 0; | 206 dims[i] = 0; |
187 /* Small constant values are encoded in the operand. */ | 230 /* Small constant values are encoded in the operand. */ |
188 async = GOMP_LAUNCH_OP (tag); | 231 async = GOMP_LAUNCH_OP (tag); |
189 | 232 |
190 if (async == GOMP_LAUNCH_OP_MAX) | 233 if (async == GOMP_LAUNCH_OP_MAX) |
191 async = va_arg (ap, unsigned); | 234 async = va_arg (ap, unsigned); |
235 | |
236 if (profiling_p) | |
237 { | |
238 prof_info.async = async; | |
239 prof_info.async_queue = prof_info.async; | |
240 } | |
241 | |
192 break; | 242 break; |
193 } | 243 } |
194 | 244 |
195 case GOMP_LAUNCH_WAIT: | 245 case GOMP_LAUNCH_WAIT: |
196 { | 246 { |
197 unsigned num_waits = GOMP_LAUNCH_OP (tag); | 247 unsigned num_waits = GOMP_LAUNCH_OP (tag); |
198 | 248 goacc_wait (async, num_waits, &ap); |
199 if (num_waits) | |
200 goacc_wait (async, num_waits, &ap); | |
201 break; | 249 break; |
202 } | 250 } |
203 | 251 |
204 default: | 252 default: |
205 gomp_fatal ("unrecognized offload code '%d'," | 253 gomp_fatal ("unrecognized offload code '%d'," |
206 " libgomp is too old", GOMP_LAUNCH_CODE (tag)); | 254 " libgomp is too old", GOMP_LAUNCH_CODE (tag)); |
207 } | 255 } |
208 } | 256 } |
209 va_end (ap); | 257 va_end (ap); |
210 | 258 |
211 acc_dev->openacc.async_set_async_func (async); | |
212 | |
213 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) | 259 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) |
214 { | 260 { |
215 k.host_start = (uintptr_t) fn; | 261 k.host_start = (uintptr_t) fn; |
216 k.host_end = k.host_start + 1; | 262 k.host_end = k.host_start + 1; |
217 gomp_mutex_lock (&acc_dev->lock); | 263 gomp_mutex_lock (&acc_dev->lock); |
224 tgt_fn = (void (*)) tgt_fn_key->tgt_offset; | 270 tgt_fn = (void (*)) tgt_fn_key->tgt_offset; |
225 } | 271 } |
226 else | 272 else |
227 tgt_fn = (void (*)) fn; | 273 tgt_fn = (void (*)) fn; |
228 | 274 |
229 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | 275 acc_event_info enter_exit_data_event_info; |
230 GOMP_MAP_VARS_OPENACC); | 276 if (profiling_p) |
231 | 277 { |
278 prof_info.event_type = acc_ev_enter_data_start; | |
279 enter_exit_data_event_info.other_event.event_type | |
280 = prof_info.event_type; | |
281 enter_exit_data_event_info.other_event.valid_bytes | |
282 = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
283 enter_exit_data_event_info.other_event.parent_construct | |
284 = compute_construct_event_info.other_event.parent_construct; | |
285 enter_exit_data_event_info.other_event.implicit = 1; | |
286 enter_exit_data_event_info.other_event.tool_info = NULL; | |
287 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
288 &api_info); | |
289 } | |
290 | |
291 goacc_aq aq = get_goacc_asyncqueue (async); | |
292 | |
293 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, | |
294 true, GOMP_MAP_VARS_OPENACC); | |
295 if (profiling_p) | |
296 { | |
297 prof_info.event_type = acc_ev_enter_data_end; | |
298 enter_exit_data_event_info.other_event.event_type | |
299 = prof_info.event_type; | |
300 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
301 &api_info); | |
302 } | |
303 | |
232 devaddrs = gomp_alloca (sizeof (void *) * mapnum); | 304 devaddrs = gomp_alloca (sizeof (void *) * mapnum); |
233 for (i = 0; i < mapnum; i++) | 305 for (i = 0; i < mapnum; i++) |
234 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start | 306 devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); |
235 + tgt->list[i].key->tgt_offset); | 307 |
236 | 308 if (aq == NULL) |
237 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, | 309 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, |
238 async, dims, tgt); | 310 tgt); |
311 else | |
312 acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, | |
313 dims, tgt, aq); | |
314 | |
315 if (profiling_p) | |
316 { | |
317 prof_info.event_type = acc_ev_exit_data_start; | |
318 enter_exit_data_event_info.other_event.event_type = prof_info.event_type; | |
319 enter_exit_data_event_info.other_event.tool_info = NULL; | |
320 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
321 &api_info); | |
322 } | |
239 | 323 |
240 /* If running synchronously, unmap immediately. */ | 324 /* If running synchronously, unmap immediately. */ |
241 bool copyfrom = true; | 325 if (aq == NULL) |
242 if (async_synchronous_p (async)) | |
243 gomp_unmap_vars (tgt, true); | 326 gomp_unmap_vars (tgt, true); |
244 else | 327 else |
245 { | 328 gomp_unmap_vars_async (tgt, true, aq); |
246 bool async_unmap = false; | 329 |
247 for (size_t i = 0; i < tgt->list_count; i++) | 330 if (profiling_p) |
248 { | 331 { |
249 splay_tree_key k = tgt->list[i].key; | 332 prof_info.event_type = acc_ev_exit_data_end; |
250 if (k && k->refcount == 1) | 333 enter_exit_data_event_info.other_event.event_type = prof_info.event_type; |
251 { | 334 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, |
252 async_unmap = true; | 335 &api_info); |
253 break; | 336 } |
254 } | 337 |
255 } | 338 out_prof: |
256 if (async_unmap) | 339 if (profiling_p) |
257 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); | 340 { |
258 else | 341 prof_info.event_type = acc_ev_compute_construct_end; |
259 { | 342 compute_construct_event_info.other_event.event_type |
260 copyfrom = false; | 343 = prof_info.event_type; |
261 gomp_unmap_vars (tgt, copyfrom); | 344 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, |
262 } | 345 &api_info); |
263 } | 346 |
264 | 347 thr->prof_info = NULL; |
265 acc_dev->openacc.async_set_async_func (acc_async_sync); | 348 thr->api_info = NULL; |
266 } | 349 } |
267 | 350 } |
268 /* Legacy entry point, only provide host execution. */ | 351 |
352 /* Legacy entry point (GCC 5). Only provide host fallback execution. */ | |
269 | 353 |
270 void | 354 void |
271 GOACC_parallel (int device, void (*fn) (void *), | 355 GOACC_parallel (int flags_m, void (*fn) (void *), |
272 size_t mapnum, void **hostaddrs, size_t *sizes, | 356 size_t mapnum, void **hostaddrs, size_t *sizes, |
273 unsigned short *kinds, | 357 unsigned short *kinds, |
274 int num_gangs, int num_workers, int vector_length, | 358 int num_gangs, int num_workers, int vector_length, |
275 int async, int num_waits, ...) | 359 int async, int num_waits, ...) |
276 { | 360 { |
278 fn (hostaddrs); | 362 fn (hostaddrs); |
279 goacc_restore_bind (); | 363 goacc_restore_bind (); |
280 } | 364 } |
281 | 365 |
282 void | 366 void |
283 GOACC_data_start (int device, size_t mapnum, | 367 GOACC_data_start (int flags_m, size_t mapnum, |
284 void **hostaddrs, size_t *sizes, unsigned short *kinds) | 368 void **hostaddrs, size_t *sizes, unsigned short *kinds) |
285 { | 369 { |
286 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | 370 int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
371 | |
287 struct target_mem_desc *tgt; | 372 struct target_mem_desc *tgt; |
288 | 373 |
289 #ifdef HAVE_INTTYPES_H | 374 #ifdef HAVE_INTTYPES_H |
290 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | 375 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", |
291 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | 376 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); |
297 goacc_lazy_initialize (); | 382 goacc_lazy_initialize (); |
298 | 383 |
299 struct goacc_thread *thr = goacc_thread (); | 384 struct goacc_thread *thr = goacc_thread (); |
300 struct gomp_device_descr *acc_dev = thr->dev; | 385 struct gomp_device_descr *acc_dev = thr->dev; |
301 | 386 |
387 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
388 | |
389 acc_prof_info prof_info; | |
390 if (profiling_p) | |
391 { | |
392 thr->prof_info = &prof_info; | |
393 | |
394 prof_info.event_type = acc_ev_enter_data_start; | |
395 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
396 prof_info.version = _ACC_PROF_INFO_VERSION; | |
397 prof_info.device_type = acc_device_type (acc_dev->type); | |
398 prof_info.device_number = acc_dev->target_id; | |
399 prof_info.thread_id = -1; | |
400 prof_info.async = acc_async_sync; /* Always synchronous. */ | |
401 prof_info.async_queue = prof_info.async; | |
402 prof_info.src_file = NULL; | |
403 prof_info.func_name = NULL; | |
404 prof_info.line_no = -1; | |
405 prof_info.end_line_no = -1; | |
406 prof_info.func_line_no = -1; | |
407 prof_info.func_end_line_no = -1; | |
408 } | |
409 acc_event_info enter_data_event_info; | |
410 if (profiling_p) | |
411 { | |
412 enter_data_event_info.other_event.event_type | |
413 = prof_info.event_type; | |
414 enter_data_event_info.other_event.valid_bytes | |
415 = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
416 enter_data_event_info.other_event.parent_construct = acc_construct_data; | |
417 for (int i = 0; i < mapnum; ++i) | |
418 if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR | |
419 || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) | |
420 { | |
421 /* If there is one such data mapping kind, then this is actually an | |
422 OpenACC 'host_data' construct. (GCC maps the OpenACC | |
423 'host_data' construct to the OpenACC 'data' construct.) Apart | |
424 from artificial test cases (such as an OpenACC 'host_data' | |
425 construct's (implicit) device initialization when there hasn't | |
426 been any device data be set up before...), there can't really | |
427 any meaningful events be generated from OpenACC 'host_data' | |
428 constructs, though. */ | |
429 enter_data_event_info.other_event.parent_construct | |
430 = acc_construct_host_data; | |
431 break; | |
432 } | |
433 enter_data_event_info.other_event.implicit = 0; | |
434 enter_data_event_info.other_event.tool_info = NULL; | |
435 } | |
436 acc_api_info api_info; | |
437 if (profiling_p) | |
438 { | |
439 thr->api_info = &api_info; | |
440 | |
441 api_info.device_api = acc_device_api_none; | |
442 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
443 api_info.device_type = prof_info.device_type; | |
444 api_info.vendor = -1; | |
445 api_info.device_handle = NULL; | |
446 api_info.context_handle = NULL; | |
447 api_info.async_handle = NULL; | |
448 } | |
449 | |
450 if (profiling_p) | |
451 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); | |
452 | |
302 /* Host fallback or 'do nothing'. */ | 453 /* Host fallback or 'do nothing'. */ |
303 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | 454 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
304 || host_fallback) | 455 || (flags & GOACC_FLAG_HOST_FALLBACK)) |
305 { | 456 { |
457 prof_info.device_type = acc_device_host; | |
458 api_info.device_type = prof_info.device_type; | |
306 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, | 459 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, |
307 GOMP_MAP_VARS_OPENACC); | 460 GOMP_MAP_VARS_OPENACC); |
308 tgt->prev = thr->mapped_data; | 461 tgt->prev = thr->mapped_data; |
309 thr->mapped_data = tgt; | 462 thr->mapped_data = tgt; |
310 | 463 |
311 return; | 464 goto out_prof; |
312 } | 465 } |
313 | 466 |
314 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | 467 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); |
315 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | 468 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, |
316 GOMP_MAP_VARS_OPENACC); | 469 GOMP_MAP_VARS_OPENACC); |
317 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); | 470 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); |
318 tgt->prev = thr->mapped_data; | 471 tgt->prev = thr->mapped_data; |
319 thr->mapped_data = tgt; | 472 thr->mapped_data = tgt; |
473 | |
474 out_prof: | |
475 if (profiling_p) | |
476 { | |
477 prof_info.event_type = acc_ev_enter_data_end; | |
478 enter_data_event_info.other_event.event_type = prof_info.event_type; | |
479 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); | |
480 | |
481 thr->prof_info = NULL; | |
482 thr->api_info = NULL; | |
483 } | |
320 } | 484 } |
321 | 485 |
322 void | 486 void |
323 GOACC_data_end (void) | 487 GOACC_data_end (void) |
324 { | 488 { |
325 struct goacc_thread *thr = goacc_thread (); | 489 struct goacc_thread *thr = goacc_thread (); |
490 struct gomp_device_descr *acc_dev = thr->dev; | |
326 struct target_mem_desc *tgt = thr->mapped_data; | 491 struct target_mem_desc *tgt = thr->mapped_data; |
492 | |
493 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
494 | |
495 acc_prof_info prof_info; | |
496 if (profiling_p) | |
497 { | |
498 thr->prof_info = &prof_info; | |
499 | |
500 prof_info.event_type = acc_ev_exit_data_start; | |
501 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
502 prof_info.version = _ACC_PROF_INFO_VERSION; | |
503 prof_info.device_type = acc_device_type (acc_dev->type); | |
504 prof_info.device_number = acc_dev->target_id; | |
505 prof_info.thread_id = -1; | |
506 prof_info.async = acc_async_sync; /* Always synchronous. */ | |
507 prof_info.async_queue = prof_info.async; | |
508 prof_info.src_file = NULL; | |
509 prof_info.func_name = NULL; | |
510 prof_info.line_no = -1; | |
511 prof_info.end_line_no = -1; | |
512 prof_info.func_line_no = -1; | |
513 prof_info.func_end_line_no = -1; | |
514 } | |
515 acc_event_info exit_data_event_info; | |
516 if (profiling_p) | |
517 { | |
518 exit_data_event_info.other_event.event_type | |
519 = prof_info.event_type; | |
520 exit_data_event_info.other_event.valid_bytes | |
521 = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
522 exit_data_event_info.other_event.parent_construct = acc_construct_data; | |
523 exit_data_event_info.other_event.implicit = 0; | |
524 exit_data_event_info.other_event.tool_info = NULL; | |
525 } | |
526 acc_api_info api_info; | |
527 if (profiling_p) | |
528 { | |
529 thr->api_info = &api_info; | |
530 | |
531 api_info.device_api = acc_device_api_none; | |
532 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
533 api_info.device_type = prof_info.device_type; | |
534 api_info.vendor = -1; | |
535 api_info.device_handle = NULL; | |
536 api_info.context_handle = NULL; | |
537 api_info.async_handle = NULL; | |
538 } | |
539 | |
540 if (profiling_p) | |
541 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); | |
327 | 542 |
328 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | 543 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); |
329 thr->mapped_data = tgt->prev; | 544 thr->mapped_data = tgt->prev; |
330 gomp_unmap_vars (tgt, true); | 545 gomp_unmap_vars (tgt, true); |
331 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | 546 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); |
547 | |
548 if (profiling_p) | |
549 { | |
550 prof_info.event_type = acc_ev_exit_data_end; | |
551 exit_data_event_info.other_event.event_type = prof_info.event_type; | |
552 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); | |
553 | |
554 thr->prof_info = NULL; | |
555 thr->api_info = NULL; | |
556 } | |
332 } | 557 } |
333 | 558 |
334 void | 559 void |
335 GOACC_enter_exit_data (int device, size_t mapnum, | 560 GOACC_update (int flags_m, size_t mapnum, |
336 void **hostaddrs, size_t *sizes, unsigned short *kinds, | 561 void **hostaddrs, size_t *sizes, unsigned short *kinds, |
337 int async, int num_waits, ...) | 562 int async, int num_waits, ...) |
338 { | 563 { |
339 struct goacc_thread *thr; | 564 int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
340 struct gomp_device_descr *acc_dev; | 565 |
341 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
342 bool data_enter = false; | |
343 size_t i; | 566 size_t i; |
344 | 567 |
345 goacc_lazy_initialize (); | 568 goacc_lazy_initialize (); |
346 | 569 |
347 thr = goacc_thread (); | 570 struct goacc_thread *thr = goacc_thread (); |
348 acc_dev = thr->dev; | 571 struct gomp_device_descr *acc_dev = thr->dev; |
572 | |
573 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
574 | |
575 acc_prof_info prof_info; | |
576 if (profiling_p) | |
577 { | |
578 thr->prof_info = &prof_info; | |
579 | |
580 prof_info.event_type = acc_ev_update_start; | |
581 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
582 prof_info.version = _ACC_PROF_INFO_VERSION; | |
583 prof_info.device_type = acc_device_type (acc_dev->type); | |
584 prof_info.device_number = acc_dev->target_id; | |
585 prof_info.thread_id = -1; | |
586 prof_info.async = async; | |
587 prof_info.async_queue = prof_info.async; | |
588 prof_info.src_file = NULL; | |
589 prof_info.func_name = NULL; | |
590 prof_info.line_no = -1; | |
591 prof_info.end_line_no = -1; | |
592 prof_info.func_line_no = -1; | |
593 prof_info.func_end_line_no = -1; | |
594 } | |
595 acc_event_info update_event_info; | |
596 if (profiling_p) | |
597 { | |
598 update_event_info.other_event.event_type | |
599 = prof_info.event_type; | |
600 update_event_info.other_event.valid_bytes | |
601 = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
602 update_event_info.other_event.parent_construct = acc_construct_update; | |
603 update_event_info.other_event.implicit = 0; | |
604 update_event_info.other_event.tool_info = NULL; | |
605 } | |
606 acc_api_info api_info; | |
607 if (profiling_p) | |
608 { | |
609 thr->api_info = &api_info; | |
610 | |
611 api_info.device_api = acc_device_api_none; | |
612 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
613 api_info.device_type = prof_info.device_type; | |
614 api_info.vendor = -1; | |
615 api_info.device_handle = NULL; | |
616 api_info.context_handle = NULL; | |
617 api_info.async_handle = NULL; | |
618 } | |
619 | |
620 if (profiling_p) | |
621 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); | |
349 | 622 |
350 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | 623 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
351 || host_fallback) | 624 || (flags & GOACC_FLAG_HOST_FALLBACK)) |
352 return; | 625 { |
626 prof_info.device_type = acc_device_host; | |
627 api_info.device_type = prof_info.device_type; | |
628 | |
629 goto out_prof; | |
630 } | |
353 | 631 |
354 if (num_waits) | 632 if (num_waits) |
355 { | 633 { |
356 va_list ap; | 634 va_list ap; |
357 | 635 |
358 va_start (ap, num_waits); | 636 va_start (ap, num_waits); |
359 goacc_wait (async, num_waits, &ap); | 637 goacc_wait (async, num_waits, &ap); |
360 va_end (ap); | 638 va_end (ap); |
361 } | 639 } |
362 | |
363 /* Determine whether "finalize" semantics apply to all mappings of this | |
364 OpenACC directive. */ | |
365 bool finalize = false; | |
366 if (mapnum > 0) | |
367 { | |
368 unsigned char kind = kinds[0] & 0xff; | |
369 if (kind == GOMP_MAP_DELETE | |
370 || kind == GOMP_MAP_FORCE_FROM) | |
371 finalize = true; | |
372 } | |
373 | |
374 acc_dev->openacc.async_set_async_func (async); | |
375 | |
376 /* Determine if this is an "acc enter data". */ | |
377 for (i = 0; i < mapnum; ++i) | |
378 { | |
379 unsigned char kind = kinds[i] & 0xff; | |
380 | |
381 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) | |
382 continue; | |
383 | |
384 if (kind == GOMP_MAP_FORCE_ALLOC | |
385 || kind == GOMP_MAP_FORCE_PRESENT | |
386 || kind == GOMP_MAP_FORCE_TO | |
387 || kind == GOMP_MAP_TO | |
388 || kind == GOMP_MAP_ALLOC) | |
389 { | |
390 data_enter = true; | |
391 break; | |
392 } | |
393 | |
394 if (kind == GOMP_MAP_RELEASE | |
395 || kind == GOMP_MAP_DELETE | |
396 || kind == GOMP_MAP_FROM | |
397 || kind == GOMP_MAP_FORCE_FROM) | |
398 break; | |
399 | |
400 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
401 kind); | |
402 } | |
403 | |
404 /* In c, non-pointers and arrays are represented by a single data clause. | |
405 Dynamically allocated arrays and subarrays are represented by a data | |
406 clause followed by an internal GOMP_MAP_POINTER. | |
407 | |
408 In fortran, scalars and not allocated arrays are represented by a | |
409 single data clause. Allocated arrays and subarrays have three mappings: | |
410 1) the original data clause, 2) a PSET 3) a pointer to the array data. | |
411 */ | |
412 | |
413 if (data_enter) | |
414 { | |
415 for (i = 0; i < mapnum; i++) | |
416 { | |
417 unsigned char kind = kinds[i] & 0xff; | |
418 | |
419 /* Scan for pointers and PSETs. */ | |
420 int pointer = find_pointer (i, mapnum, kinds); | |
421 | |
422 if (!pointer) | |
423 { | |
424 switch (kind) | |
425 { | |
426 case GOMP_MAP_ALLOC: | |
427 acc_present_or_create (hostaddrs[i], sizes[i]); | |
428 break; | |
429 case GOMP_MAP_FORCE_ALLOC: | |
430 acc_create (hostaddrs[i], sizes[i]); | |
431 break; | |
432 case GOMP_MAP_TO: | |
433 acc_present_or_copyin (hostaddrs[i], sizes[i]); | |
434 break; | |
435 case GOMP_MAP_FORCE_TO: | |
436 acc_copyin (hostaddrs[i], sizes[i]); | |
437 break; | |
438 default: | |
439 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
440 kind); | |
441 break; | |
442 } | |
443 } | |
444 else | |
445 { | |
446 gomp_acc_insert_pointer (pointer, &hostaddrs[i], | |
447 &sizes[i], &kinds[i]); | |
448 /* Increment 'i' by two because OpenACC requires fortran | |
449 arrays to be contiguous, so each PSET is associated with | |
450 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and | |
451 one MAP_POINTER. */ | |
452 i += pointer - 1; | |
453 } | |
454 } | |
455 } | |
456 else | |
457 for (i = 0; i < mapnum; ++i) | |
458 { | |
459 unsigned char kind = kinds[i] & 0xff; | |
460 | |
461 int pointer = find_pointer (i, mapnum, kinds); | |
462 | |
463 if (!pointer) | |
464 { | |
465 switch (kind) | |
466 { | |
467 case GOMP_MAP_RELEASE: | |
468 case GOMP_MAP_DELETE: | |
469 if (acc_is_present (hostaddrs[i], sizes[i])) | |
470 { | |
471 if (finalize) | |
472 acc_delete_finalize (hostaddrs[i], sizes[i]); | |
473 else | |
474 acc_delete (hostaddrs[i], sizes[i]); | |
475 } | |
476 break; | |
477 case GOMP_MAP_FROM: | |
478 case GOMP_MAP_FORCE_FROM: | |
479 if (finalize) | |
480 acc_copyout_finalize (hostaddrs[i], sizes[i]); | |
481 else | |
482 acc_copyout (hostaddrs[i], sizes[i]); | |
483 break; | |
484 default: | |
485 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
486 kind); | |
487 break; | |
488 } | |
489 } | |
490 else | |
491 { | |
492 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM | |
493 || kind == GOMP_MAP_FROM); | |
494 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, | |
495 finalize, pointer); | |
496 /* See the above comment. */ | |
497 i += pointer - 1; | |
498 } | |
499 } | |
500 | |
501 acc_dev->openacc.async_set_async_func (acc_async_sync); | |
502 } | |
503 | |
504 static void | |
505 goacc_wait (int async, int num_waits, va_list *ap) | |
506 { | |
507 struct goacc_thread *thr = goacc_thread (); | |
508 struct gomp_device_descr *acc_dev = thr->dev; | |
509 | |
510 while (num_waits--) | |
511 { | |
512 int qid = va_arg (*ap, int); | |
513 | |
514 if (acc_async_test (qid)) | |
515 continue; | |
516 | |
517 if (async == acc_async_sync) | |
518 acc_wait (qid); | |
519 else if (qid == async) | |
520 ;/* If we're waiting on the same asynchronous queue as we're | |
521 launching on, the queue itself will order work as | |
522 required, so there's no need to wait explicitly. */ | |
523 else | |
524 acc_dev->openacc.async_wait_async_func (qid, async); | |
525 } | |
526 } | |
527 | |
528 void | |
529 GOACC_update (int device, size_t mapnum, | |
530 void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
531 int async, int num_waits, ...) | |
532 { | |
533 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
534 size_t i; | |
535 | |
536 goacc_lazy_initialize (); | |
537 | |
538 struct goacc_thread *thr = goacc_thread (); | |
539 struct gomp_device_descr *acc_dev = thr->dev; | |
540 | |
541 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
542 || host_fallback) | |
543 return; | |
544 | |
545 if (num_waits) | |
546 { | |
547 va_list ap; | |
548 | |
549 va_start (ap, num_waits); | |
550 goacc_wait (async, num_waits, &ap); | |
551 va_end (ap); | |
552 } | |
553 | |
554 acc_dev->openacc.async_set_async_func (async); | |
555 | 640 |
556 bool update_device = false; | 641 bool update_device = false; |
557 for (i = 0; i < mapnum; ++i) | 642 for (i = 0; i < mapnum; ++i) |
558 { | 643 { |
559 unsigned char kind = kinds[i] & 0xff; | 644 unsigned char kind = kinds[i] & 0xff; |
573 | 658 |
574 /* Update the contents of the host pointer to reflect | 659 /* Update the contents of the host pointer to reflect |
575 the value of the allocated device memory in the | 660 the value of the allocated device memory in the |
576 previous pointer. */ | 661 previous pointer. */ |
577 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; | 662 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; |
663 /* TODO: verify that we really cannot use acc_update_device_async | |
664 here. */ | |
578 acc_update_device (hostaddrs[i], sizeof (uintptr_t)); | 665 acc_update_device (hostaddrs[i], sizeof (uintptr_t)); |
579 | 666 |
580 /* Restore the host pointer. */ | 667 /* Restore the host pointer. */ |
581 *(uintptr_t *) hostaddrs[i] = t; | 668 *(uintptr_t *) hostaddrs[i] = t; |
582 update_device = false; | 669 update_device = false; |
590 break; | 677 break; |
591 } | 678 } |
592 /* Fallthru */ | 679 /* Fallthru */ |
593 case GOMP_MAP_FORCE_TO: | 680 case GOMP_MAP_FORCE_TO: |
594 update_device = true; | 681 update_device = true; |
595 acc_update_device (hostaddrs[i], sizes[i]); | 682 acc_update_device_async (hostaddrs[i], sizes[i], async); |
596 break; | 683 break; |
597 | 684 |
598 case GOMP_MAP_FROM: | 685 case GOMP_MAP_FROM: |
599 if (!acc_is_present (hostaddrs[i], sizes[i])) | 686 if (!acc_is_present (hostaddrs[i], sizes[i])) |
600 { | 687 { |
602 break; | 689 break; |
603 } | 690 } |
604 /* Fallthru */ | 691 /* Fallthru */ |
605 case GOMP_MAP_FORCE_FROM: | 692 case GOMP_MAP_FORCE_FROM: |
606 update_device = false; | 693 update_device = false; |
607 acc_update_self (hostaddrs[i], sizes[i]); | 694 acc_update_self_async (hostaddrs[i], sizes[i], async); |
608 break; | 695 break; |
609 | 696 |
610 default: | 697 default: |
611 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); | 698 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); |
612 break; | 699 break; |
613 } | 700 } |
614 } | 701 } |
615 | 702 |
616 acc_dev->openacc.async_set_async_func (acc_async_sync); | 703 out_prof: |
617 } | 704 if (profiling_p) |
618 | 705 { |
619 void | 706 prof_info.event_type = acc_ev_update_end; |
620 GOACC_wait (int async, int num_waits, ...) | 707 update_event_info.other_event.event_type = prof_info.event_type; |
621 { | 708 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); |
622 if (num_waits) | 709 |
623 { | 710 thr->prof_info = NULL; |
624 va_list ap; | 711 thr->api_info = NULL; |
625 | 712 } |
626 va_start (ap, num_waits); | 713 } |
627 goacc_wait (async, num_waits, &ap); | 714 |
628 va_end (ap); | 715 |
629 } | 716 /* Legacy entry point (GCC 5). */ |
630 else if (async == acc_async_sync) | |
631 acc_wait_all (); | |
632 else if (async == acc_async_noval) | |
633 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); | |
634 } | |
635 | 717 |
636 int | 718 int |
637 GOACC_get_num_threads (void) | 719 GOACC_get_num_threads (void) |
638 { | 720 { |
639 return 1; | 721 return 1; |
640 } | 722 } |
641 | 723 |
724 /* Legacy entry point (GCC 5). */ | |
725 | |
642 int | 726 int |
643 GOACC_get_thread_num (void) | 727 GOACC_get_thread_num (void) |
644 { | 728 { |
645 return 0; | 729 return 0; |
646 } | 730 } |
647 | 731 |
648 void | 732 void |
649 GOACC_declare (int device, size_t mapnum, | 733 GOACC_declare (int flags_m, size_t mapnum, |
650 void **hostaddrs, size_t *sizes, unsigned short *kinds) | 734 void **hostaddrs, size_t *sizes, unsigned short *kinds) |
651 { | 735 { |
652 int i; | 736 int i; |
653 | 737 |
654 for (i = 0; i < mapnum; i++) | 738 for (i = 0; i < mapnum; i++) |
664 case GOMP_MAP_FORCE_FROM: | 748 case GOMP_MAP_FORCE_FROM: |
665 case GOMP_MAP_FORCE_TO: | 749 case GOMP_MAP_FORCE_TO: |
666 case GOMP_MAP_POINTER: | 750 case GOMP_MAP_POINTER: |
667 case GOMP_MAP_RELEASE: | 751 case GOMP_MAP_RELEASE: |
668 case GOMP_MAP_DELETE: | 752 case GOMP_MAP_DELETE: |
669 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | 753 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
670 &kinds[i], GOMP_ASYNC_SYNC, 0); | 754 &kinds[i], GOMP_ASYNC_SYNC, 0); |
671 break; | 755 break; |
672 | 756 |
673 case GOMP_MAP_FORCE_DEVICEPTR: | 757 case GOMP_MAP_FORCE_DEVICEPTR: |
674 break; | 758 break; |
675 | 759 |
676 case GOMP_MAP_ALLOC: | 760 case GOMP_MAP_ALLOC: |
677 if (!acc_is_present (hostaddrs[i], sizes[i])) | 761 if (!acc_is_present (hostaddrs[i], sizes[i])) |
678 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | 762 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
679 &kinds[i], GOMP_ASYNC_SYNC, 0); | 763 &kinds[i], GOMP_ASYNC_SYNC, 0); |
680 break; | 764 break; |
681 | 765 |
682 case GOMP_MAP_TO: | 766 case GOMP_MAP_TO: |
683 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | 767 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
684 &kinds[i], GOMP_ASYNC_SYNC, 0); | 768 &kinds[i], GOMP_ASYNC_SYNC, 0); |
685 | 769 |
686 break; | 770 break; |
687 | 771 |
688 case GOMP_MAP_FROM: | 772 case GOMP_MAP_FROM: |
689 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], | 773 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
690 &kinds[i], GOMP_ASYNC_SYNC, 0); | 774 &kinds[i], GOMP_ASYNC_SYNC, 0); |
691 break; | 775 break; |
692 | 776 |
693 case GOMP_MAP_FORCE_PRESENT: | 777 case GOMP_MAP_FORCE_PRESENT: |
694 if (!acc_is_present (hostaddrs[i], sizes[i])) | 778 if (!acc_is_present (hostaddrs[i], sizes[i])) |