111
|
1 /* Plugin for offload execution on Intel MIC devices.
|
|
2
|
|
3 Copyright (C) 2014-2016 Free Software Foundation, Inc.
|
|
4
|
|
5 Contributed by Ilya Verbin <ilya.verbin@intel.com>.
|
|
6
|
|
7 This file is part of the GNU Offloading and Multi Processing Library
|
|
8 (libgomp).
|
|
9
|
|
10 Libgomp is free software; you can redistribute it and/or modify it
|
|
11 under the terms of the GNU General Public License as published by
|
|
12 the Free Software Foundation; either version 3, or (at your option)
|
|
13 any later version.
|
|
14
|
|
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
18 more details.
|
|
19
|
|
20 Under Section 7 of GPL version 3, you are granted additional
|
|
21 permissions described in the GCC Runtime Library Exception, version
|
|
22 3.1, as published by the Free Software Foundation.
|
|
23
|
|
24 You should have received a copy of the GNU General Public License and
|
|
25 a copy of the GCC Runtime Library Exception along with this program;
|
|
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
27 <http://www.gnu.org/licenses/>. */
|
|
28
|
|
29 /* Host side part of a libgomp plugin. */
|
|
30
|
|
31 #include <stdint.h>
|
|
32 #include <stdio.h>
|
|
33 #include <stdlib.h>
|
|
34 #include <string.h>
|
|
35 #include <utility>
|
|
36 #include <vector>
|
|
37 #include <map>
|
|
38 #include "libgomp-plugin.h"
|
|
39 #include "compiler_if_host.h"
|
|
40 #include "main_target_image.h"
|
|
41 #include "gomp-constants.h"
|
|
42
|
|
43 #define OFFLOAD_ACTIVE_WAIT_ENV "OFFLOAD_ACTIVE_WAIT"
|
|
44
|
|
45 #ifdef DEBUG
|
|
46 #define TRACE(...) \
|
|
47 { \
|
|
48 fprintf (stderr, "HOST:\t%s:%s ", __FILE__, __FUNCTION__); \
|
|
49 fprintf (stderr, __VA_ARGS__); \
|
|
50 fprintf (stderr, "\n"); \
|
|
51 }
|
|
52 #else
|
|
53 #define TRACE { }
|
|
54 #endif
|
|
55
|
|
56
|
|
57 /* Start/end addresses of functions and global variables on a device. */
|
|
58 typedef std::vector<addr_pair> AddrVect;
|
|
59
|
|
60 /* Addresses for one image and all devices. */
|
|
61 typedef std::vector<AddrVect> DevAddrVect;
|
|
62
|
|
63 /* Addresses for all images and all devices. */
|
|
64 typedef std::map<const void *, DevAddrVect> ImgDevAddrMap;
|
|
65
|
|
66 /* Image descriptor needed by __offload_[un]register_image. */
|
|
67 struct TargetImageDesc {
|
|
68 int64_t size;
|
|
69 /* 10 characters is enough for max int value. */
|
|
70 char name[sizeof ("lib0000000000.so")];
|
|
71 char data[];
|
|
72 };
|
|
73
|
|
74 /* Image descriptors, indexed by a pointer obtained from libgomp. */
|
|
75 typedef std::map<const void *, TargetImageDesc *> ImgDescMap;
|
|
76
|
|
77
|
|
78 /* Total number of available devices. */
|
|
79 static int num_devices;
|
|
80
|
|
81 /* Total number of shared libraries with offloading to Intel MIC. */
|
|
82 static int num_images;
|
|
83
|
|
84 /* Two dimensional array: one key is a pointer to image,
|
|
85 second key is number of device. Contains a vector of pointer pairs. */
|
|
86 static ImgDevAddrMap *address_table;
|
|
87
|
|
88 /* Descriptors of all images, registered in liboffloadmic. */
|
|
89 static ImgDescMap *image_descriptors;
|
|
90
|
|
91 /* Thread-safe registration of the main image. */
|
|
92 static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
|
|
93
|
|
94 static VarDesc vd_host2tgt = {
|
|
95 { 1, 1 }, /* dst, src */
|
|
96 { 1, 0 }, /* in, out */
|
|
97 1, /* alloc_if */
|
|
98 1, /* free_if */
|
|
99 4, /* align */
|
|
100 0, /* mic_offset */
|
|
101 { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
|
|
102 is_stack_buf, sink_addr, alloc_disp,
|
|
103 is_noncont_src, is_noncont_dst */
|
|
104 0, /* offset */
|
|
105 0, /* size */
|
|
106 1, /* count */
|
|
107 0, /* alloc */
|
|
108 0, /* into */
|
|
109 0 /* ptr */
|
|
110 };
|
|
111
|
|
112 static VarDesc vd_tgt2host = {
|
|
113 { 1, 1 }, /* dst, src */
|
|
114 { 0, 1 }, /* in, out */
|
|
115 1, /* alloc_if */
|
|
116 1, /* free_if */
|
|
117 4, /* align */
|
|
118 0, /* mic_offset */
|
|
119 { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
|
|
120 is_stack_buf, sink_addr, alloc_disp,
|
|
121 is_noncont_src, is_noncont_dst */
|
|
122 0, /* offset */
|
|
123 0, /* size */
|
|
124 1, /* count */
|
|
125 0, /* alloc */
|
|
126 0, /* into */
|
|
127 0 /* ptr */
|
|
128 };
|
|
129
|
|
130
|
|
131 __attribute__((constructor))
|
|
132 static void
|
|
133 init (void)
|
|
134 {
|
|
135 const char *active_wait = getenv (OFFLOAD_ACTIVE_WAIT_ENV);
|
|
136
|
|
137 /* Disable active wait by default to avoid useless CPU usage. */
|
|
138 if (!active_wait)
|
|
139 setenv (OFFLOAD_ACTIVE_WAIT_ENV, "0", 0);
|
|
140
|
|
141 address_table = new ImgDevAddrMap;
|
|
142 image_descriptors = new ImgDescMap;
|
|
143 num_devices = _Offload_number_of_devices ();
|
|
144 }
|
|
145
|
|
146 extern "C" const char *
|
|
147 GOMP_OFFLOAD_get_name (void)
|
|
148 {
|
|
149 const char *res = "intelmic";
|
|
150 TRACE ("(): return %s", res);
|
|
151 return res;
|
|
152 }
|
|
153
|
|
154 extern "C" unsigned int
|
|
155 GOMP_OFFLOAD_get_caps (void)
|
|
156 {
|
|
157 unsigned int res = GOMP_OFFLOAD_CAP_OPENMP_400;
|
|
158 TRACE ("(): return %x", res);
|
|
159 return res;
|
|
160 }
|
|
161
|
|
162 extern "C" int
|
|
163 GOMP_OFFLOAD_get_type (void)
|
|
164 {
|
|
165 enum offload_target_type res = OFFLOAD_TARGET_TYPE_INTEL_MIC;
|
|
166 TRACE ("(): return %d", res);
|
|
167 return res;
|
|
168 }
|
|
169
|
|
170 extern "C" int
|
|
171 GOMP_OFFLOAD_get_num_devices (void)
|
|
172 {
|
|
173 TRACE ("(): return %d", num_devices);
|
|
174 return num_devices;
|
|
175 }
|
|
176
|
|
177 static bool
|
|
178 offload (const char *file, uint64_t line, int device, const char *name,
|
|
179 int num_vars, VarDesc *vars, const void **async_data)
|
|
180 {
|
|
181 OFFLOAD ofld = __offload_target_acquire1 (&device, file, line);
|
|
182 if (ofld)
|
|
183 {
|
|
184 if (async_data == NULL)
|
|
185 return __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0,
|
|
186 NULL, NULL);
|
|
187 else
|
|
188 {
|
|
189 OffloadFlags flags;
|
|
190 flags.flags = 0;
|
|
191 flags.bits.omp_async = 1;
|
|
192 return __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0,
|
|
193 NULL, async_data, 0, NULL, flags, NULL);
|
|
194 }
|
|
195 }
|
|
196 else
|
|
197 {
|
|
198 GOMP_PLUGIN_error ("%s:%d: Offload target acquire failed\n", file, line);
|
|
199 return false;
|
|
200 }
|
|
201 }
|
|
202
|
|
203 static void
|
|
204 register_main_image ()
|
|
205 {
|
|
206 /* Do not check the return value, because old versions of liboffloadmic did
|
|
207 not have return values. */
|
|
208 __offload_register_image (&main_target_image);
|
|
209
|
|
210 /* liboffloadmic will call GOMP_PLUGIN_target_task_completion when
|
|
211 asynchronous task on target is completed. */
|
|
212 __offload_register_task_callback (GOMP_PLUGIN_target_task_completion);
|
|
213 }
|
|
214
|
|
215 /* liboffloadmic loads and runs offload_target_main on all available devices
|
|
216 during a first call to offload (). */
|
|
217 extern "C" bool
|
|
218 GOMP_OFFLOAD_init_device (int device)
|
|
219 {
|
|
220 TRACE ("(device = %d)", device);
|
|
221 pthread_once (&main_image_is_registered, register_main_image);
|
|
222 return offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
|
|
223 NULL, NULL);
|
|
224 }
|
|
225
|
|
226 extern "C" bool
|
|
227 GOMP_OFFLOAD_fini_device (int device)
|
|
228 {
|
|
229 TRACE ("(device = %d)", device);
|
|
230
|
|
231 /* liboffloadmic will finalize target processes on all available devices. */
|
|
232 __offload_unregister_image (&main_target_image);
|
|
233 return true;
|
|
234 }
|
|
235
|
|
236 static bool
|
|
237 get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
|
|
238 {
|
|
239 VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host };
|
|
240 vd1[0].ptr = &num_funcs;
|
|
241 vd1[0].size = sizeof (num_funcs);
|
|
242 vd1[1].ptr = &num_vars;
|
|
243 vd1[1].size = sizeof (num_vars);
|
|
244
|
|
245 if (!offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
|
|
246 vd1, NULL))
|
|
247 return false;
|
|
248
|
|
249 int table_size = num_funcs + 2 * num_vars;
|
|
250 if (table_size > 0)
|
|
251 {
|
|
252 table = new void * [table_size];
|
|
253
|
|
254 VarDesc vd2;
|
|
255 vd2 = vd_tgt2host;
|
|
256 vd2.ptr = table;
|
|
257 vd2.size = table_size * sizeof (void *);
|
|
258
|
|
259 return offload (__FILE__, __LINE__, device, "__offload_target_table_p2",
|
|
260 1, &vd2, NULL);
|
|
261 }
|
|
262 return true;
|
|
263 }
|
|
264
|
|
265 /* Offload TARGET_IMAGE to all available devices and fill address_table with
|
|
266 corresponding target addresses. */
|
|
267
|
|
268 static bool
|
|
269 offload_image (const void *target_image)
|
|
270 {
|
|
271 void *image_start = ((void **) target_image)[0];
|
|
272 void *image_end = ((void **) target_image)[1];
|
|
273
|
|
274 TRACE ("(target_image = %p { %p, %p })",
|
|
275 target_image, image_start, image_end);
|
|
276
|
|
277 int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start;
|
|
278 TargetImageDesc *image = (TargetImageDesc *) malloc (offsetof (TargetImageDesc, data)
|
|
279 + image_size);
|
|
280 if (!image)
|
|
281 {
|
|
282 GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
|
|
283 return false;
|
|
284 }
|
|
285
|
|
286 image->size = image_size;
|
|
287 sprintf (image->name, "lib%010d.so", num_images++);
|
|
288 memcpy (image->data, image_start, image->size);
|
|
289
|
|
290 TRACE ("() __offload_register_image %s { %p, %d }",
|
|
291 image->name, image_start, image->size);
|
|
292 /* Do not check the return value, because old versions of liboffloadmic did
|
|
293 not have return values. */
|
|
294 __offload_register_image (image);
|
|
295
|
|
296 /* Receive tables for target_image from all devices. */
|
|
297 DevAddrVect dev_table;
|
|
298 bool ret = true;
|
|
299 for (int dev = 0; dev < num_devices; dev++)
|
|
300 {
|
|
301 int num_funcs = 0;
|
|
302 int num_vars = 0;
|
|
303 void **table = NULL;
|
|
304
|
|
305 ret &= get_target_table (dev, num_funcs, num_vars, table);
|
|
306
|
|
307 AddrVect curr_dev_table;
|
|
308
|
|
309 for (int i = 0; i < num_funcs; i++)
|
|
310 {
|
|
311 addr_pair tgt_addr;
|
|
312 tgt_addr.start = (uintptr_t) table[i];
|
|
313 tgt_addr.end = tgt_addr.start + 1;
|
|
314 TRACE ("() func %d:\t0x%llx..0x%llx", i,
|
|
315 tgt_addr.start, tgt_addr.end);
|
|
316 curr_dev_table.push_back (tgt_addr);
|
|
317 }
|
|
318
|
|
319 for (int i = 0; i < num_vars; i++)
|
|
320 {
|
|
321 addr_pair tgt_addr;
|
|
322 tgt_addr.start = (uintptr_t) table[num_funcs+i*2];
|
|
323 tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1];
|
|
324 TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end);
|
|
325 curr_dev_table.push_back (tgt_addr);
|
|
326 }
|
|
327
|
|
328 dev_table.push_back (curr_dev_table);
|
|
329 delete [] table;
|
|
330 }
|
|
331
|
|
332 address_table->insert (std::make_pair (target_image, dev_table));
|
|
333 image_descriptors->insert (std::make_pair (target_image, image));
|
|
334 return ret;
|
|
335 }
|
|
336
|
|
337 /* Return the libgomp version number we're compatible with. There is
|
|
338 no requirement for cross-version compatibility. */
|
|
339
|
|
340 extern "C" unsigned
|
|
341 GOMP_OFFLOAD_version (void)
|
|
342 {
|
|
343 return GOMP_VERSION;
|
|
344 }
|
|
345
|
|
346 extern "C" int
|
|
347 GOMP_OFFLOAD_load_image (int device, const unsigned version,
|
|
348 const void *target_image, addr_pair **result)
|
|
349 {
|
|
350 TRACE ("(device = %d, target_image = %p)", device, target_image);
|
|
351
|
|
352 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
|
|
353 {
|
|
354 GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
|
|
355 " (expected %u, received %u)",
|
|
356 GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
|
|
357 return -1;
|
|
358 }
|
|
359
|
|
360 /* If target_image is already present in address_table, then there is no need
|
|
361 to offload it. */
|
|
362 if (address_table->count (target_image) == 0)
|
|
363 {
|
|
364 /* If fail, return -1 as error code. */
|
|
365 if (!offload_image (target_image))
|
|
366 return -1;
|
|
367 }
|
|
368
|
|
369 AddrVect *curr_dev_table = &(*address_table)[target_image][device];
|
|
370 int table_size = curr_dev_table->size ();
|
|
371 addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
|
|
372 if (table == NULL)
|
|
373 {
|
|
374 GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
|
|
375 return -1;
|
|
376 }
|
|
377
|
|
378 std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
|
|
379 *result = table;
|
|
380 return table_size;
|
|
381 }
|
|
382
|
|
383 extern "C" bool
|
|
384 GOMP_OFFLOAD_unload_image (int device, unsigned version,
|
|
385 const void *target_image)
|
|
386 {
|
|
387 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
|
|
388 {
|
|
389 GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
|
|
390 " (expected %u, received %u)",
|
|
391 GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
|
|
392 return false;
|
|
393 }
|
|
394
|
|
395 TRACE ("(device = %d, target_image = %p)", device, target_image);
|
|
396
|
|
397 /* liboffloadmic unloads the image from all available devices. */
|
|
398 if (image_descriptors->count (target_image) > 0)
|
|
399 {
|
|
400 TargetImageDesc *image_desc = (*image_descriptors)[target_image];
|
|
401 __offload_unregister_image (image_desc);
|
|
402 free (image_desc);
|
|
403
|
|
404 address_table->erase (target_image);
|
|
405 image_descriptors->erase (target_image);
|
|
406 }
|
|
407 return true;
|
|
408 }
|
|
409
|
|
410 extern "C" void *
|
|
411 GOMP_OFFLOAD_alloc (int device, size_t size)
|
|
412 {
|
|
413 TRACE ("(device = %d, size = %d)", device, size);
|
|
414
|
|
415 void *tgt_ptr;
|
|
416 VarDesc vd[2] = { vd_host2tgt, vd_tgt2host };
|
|
417 vd[0].ptr = &size;
|
|
418 vd[0].size = sizeof (size);
|
|
419 vd[1].ptr = &tgt_ptr;
|
|
420 vd[1].size = sizeof (void *);
|
|
421
|
|
422 if (!offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2,
|
|
423 vd, NULL))
|
|
424 return NULL;
|
|
425
|
|
426 return tgt_ptr;
|
|
427 }
|
|
428
|
|
429 extern "C" bool
|
|
430 GOMP_OFFLOAD_free (int device, void *tgt_ptr)
|
|
431 {
|
|
432 TRACE ("(device = %d, tgt_ptr = %p)", device, tgt_ptr);
|
|
433
|
|
434 VarDesc vd = vd_host2tgt;
|
|
435 vd.ptr = &tgt_ptr;
|
|
436 vd.size = sizeof (void *);
|
|
437
|
|
438 return offload (__FILE__, __LINE__, device, "__offload_target_free", 1,
|
|
439 &vd, NULL);
|
|
440 }
|
|
441
|
|
442 extern "C" bool
|
|
443 GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
|
|
444 size_t size)
|
|
445 {
|
|
446 TRACE ("(device = %d, tgt_ptr = %p, host_ptr = %p, size = %d)",
|
|
447 device, tgt_ptr, host_ptr, size);
|
|
448 if (!size)
|
|
449 return true;
|
|
450
|
|
451 VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
|
|
452 vd1[0].ptr = &tgt_ptr;
|
|
453 vd1[0].size = sizeof (void *);
|
|
454 vd1[1].ptr = &size;
|
|
455 vd1[1].size = sizeof (size);
|
|
456
|
|
457 if (!offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
|
|
458 vd1, NULL))
|
|
459 return false;
|
|
460
|
|
461 VarDesc vd2 = vd_host2tgt;
|
|
462 vd2.ptr = (void *) host_ptr;
|
|
463 vd2.size = size;
|
|
464
|
|
465 return offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
|
|
466 &vd2, NULL);
|
|
467 }
|
|
468
|
|
469 extern "C" bool
|
|
470 GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
|
|
471 size_t size)
|
|
472 {
|
|
473 TRACE ("(device = %d, host_ptr = %p, tgt_ptr = %p, size = %d)",
|
|
474 device, host_ptr, tgt_ptr, size);
|
|
475 if (!size)
|
|
476 return true;
|
|
477
|
|
478 VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
|
|
479 vd1[0].ptr = &tgt_ptr;
|
|
480 vd1[0].size = sizeof (void *);
|
|
481 vd1[1].ptr = &size;
|
|
482 vd1[1].size = sizeof (size);
|
|
483
|
|
484 if (!offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
|
|
485 vd1, NULL))
|
|
486 return false;
|
|
487
|
|
488 VarDesc vd2 = vd_tgt2host;
|
|
489 vd2.ptr = (void *) host_ptr;
|
|
490 vd2.size = size;
|
|
491
|
|
492 return offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
|
|
493 &vd2, NULL);
|
|
494 }
|
|
495
|
|
496 extern "C" bool
|
|
497 GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
|
|
498 size_t size)
|
|
499 {
|
|
500 TRACE ("(device = %d, dst_ptr = %p, src_ptr = %p, size = %d)",
|
|
501 device, dst_ptr, src_ptr, size);
|
|
502 if (!size)
|
|
503 return true;
|
|
504
|
|
505 VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
|
|
506 vd[0].ptr = &dst_ptr;
|
|
507 vd[0].size = sizeof (void *);
|
|
508 vd[1].ptr = &src_ptr;
|
|
509 vd[1].size = sizeof (void *);
|
|
510 vd[2].ptr = &size;
|
|
511 vd[2].size = sizeof (size);
|
|
512
|
|
513 return offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3,
|
|
514 vd, NULL);
|
|
515 }
|
|
516
|
|
517 extern "C" void
|
|
518 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
|
|
519 void **, void *async_data)
|
|
520 {
|
|
521 TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
|
|
522 tgt_fn, tgt_vars, async_data);
|
|
523
|
|
524 VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
|
|
525 vd[0].ptr = &tgt_fn;
|
|
526 vd[0].size = sizeof (void *);
|
|
527 vd[1].ptr = &tgt_vars;
|
|
528 vd[1].size = sizeof (void *);
|
|
529
|
|
530 offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd,
|
|
531 (const void **) async_data);
|
|
532 }
|
|
533
|
|
534 extern "C" void
|
|
535 GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **)
|
|
536 {
|
|
537 TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);
|
|
538
|
|
539 GOMP_OFFLOAD_async_run (device, tgt_fn, tgt_vars, NULL, NULL);
|
|
540 }
|