111
|
1 /*
|
|
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
|
|
3
|
|
4 Redistribution and use in source and binary forms, with or without
|
|
5 modification, are permitted provided that the following conditions
|
|
6 are met:
|
|
7
|
|
8 * Redistributions of source code must retain the above copyright
|
|
9 notice, this list of conditions and the following disclaimer.
|
|
10 * Redistributions in binary form must reproduce the above copyright
|
|
11 notice, this list of conditions and the following disclaimer in the
|
|
12 documentation and/or other materials provided with the distribution.
|
|
13 * Neither the name of Intel Corporation nor the names of its
|
|
14 contributors may be used to endorse or promote products derived
|
|
15 from this software without specific prior written permission.
|
|
16
|
|
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
28 */
|
|
29
|
|
30
|
|
31 #ifndef OFFLOAD_ENGINE_H_INCLUDED
|
|
32 #define OFFLOAD_ENGINE_H_INCLUDED
|
|
33
|
|
34 #include <limits.h>
|
|
35 #include <bitset>
|
|
36 #include <list>
|
|
37 #include <set>
|
|
38 #include <map>
|
|
39 #include "offload_common.h"
|
|
40 #include "coi/coi_client.h"
|
|
41
|
|
42 #define SIGNAL_HAS_COMPLETED ((OffloadDescriptor *)-1)
|
|
43 const int64_t no_stream = -1;
|
|
44
|
|
45 // Address range
|
|
46 class MemRange {
|
|
47 public:
|
|
48 MemRange() : m_start(0), m_length(0) {}
|
|
49 MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {}
|
|
50
|
|
51 const void* start() const {
|
|
52 return m_start;
|
|
53 }
|
|
54
|
|
55 const void* end() const {
|
|
56 return static_cast<const char*>(m_start) + m_length;
|
|
57 }
|
|
58
|
|
59 uint64_t length() const {
|
|
60 return m_length;
|
|
61 }
|
|
62
|
|
63 // returns true if given range overlaps with another one
|
|
64 bool overlaps(const MemRange &o) const {
|
|
65 // Two address ranges A[start, end) and B[start,end) overlap
|
|
66 // if A.start < B.end and A.end > B.start.
|
|
67 return start() < o.end() && end() > o.start();
|
|
68 }
|
|
69
|
|
70 // returns true if given range contains the other range
|
|
71 bool contains(const MemRange &o) const {
|
|
72 return start() <= o.start() && o.end() <= end();
|
|
73 }
|
|
74
|
|
75 private:
|
|
76 const void* m_start;
|
|
77 uint64_t m_length;
|
|
78 };
|
|
79
|
|
80 // Data associated with a pointer variable
|
|
81 class PtrData {
|
|
82 public:
|
|
83 PtrData(const void *addr, uint64_t len) :
|
|
84 cpu_addr(addr, len), cpu_buf(0),
|
|
85 mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
|
|
86 ref_count(0), is_static(false), is_omp_associate(false)
|
|
87 {}
|
|
88
|
|
89 //
|
|
90 // Copy constructor
|
|
91 //
|
|
92 PtrData(const PtrData& ptr):
|
|
93 cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
|
|
94 mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
|
|
95 mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
|
|
96 ref_count(ptr.ref_count), is_static(ptr.is_static),
|
|
97 is_omp_associate(ptr.is_omp_associate),
|
|
98 var_alloc_type(0)
|
|
99 {}
|
|
100
|
|
101 bool operator<(const PtrData &o) const {
|
|
102 // Variables are sorted by the CPU start address.
|
|
103 // Overlapping memory ranges are considered equal.
|
|
104 return (cpu_addr.start() < o.cpu_addr.start()) &&
|
|
105 !cpu_addr.overlaps(o.cpu_addr);
|
|
106 }
|
|
107
|
|
108 long add_reference() {
|
|
109 if (is_omp_associate || (is_static && !var_alloc_type)) {
|
|
110 return LONG_MAX;
|
|
111 }
|
|
112 #ifndef TARGET_WINNT
|
|
113 return __sync_fetch_and_add(&ref_count, 1);
|
|
114 #else // TARGET_WINNT
|
|
115 return _InterlockedIncrement(&ref_count) - 1;
|
|
116 #endif // TARGET_WINNT
|
|
117 }
|
|
118
|
|
119 long remove_reference() {
|
|
120 if (is_omp_associate || (is_static && !var_alloc_type)) {
|
|
121 return LONG_MAX;
|
|
122 }
|
|
123 #ifndef TARGET_WINNT
|
|
124 return __sync_sub_and_fetch(&ref_count, 1);
|
|
125 #else // TARGET_WINNT
|
|
126 return _InterlockedDecrement(&ref_count);
|
|
127 #endif // TARGET_WINNT
|
|
128 }
|
|
129
|
|
130 long get_reference() const {
|
|
131 if (is_omp_associate || (is_static && !var_alloc_type)) {
|
|
132 return LONG_MAX;
|
|
133 }
|
|
134 return ref_count;
|
|
135 }
|
|
136
|
|
137 public:
|
|
138 // CPU address range
|
|
139 const MemRange cpu_addr;
|
|
140
|
|
141 // CPU and MIC buffers
|
|
142 COIBUFFER cpu_buf;
|
|
143 COIBUFFER mic_buf;
|
|
144
|
|
145 // placeholder for buffer address on mic
|
|
146 uint64_t mic_addr;
|
|
147
|
|
148 uint64_t alloc_disp;
|
|
149
|
|
150 // additional offset to pointer data on MIC for improving bandwidth for
|
|
151 // data which is not 4K aligned
|
|
152 uint32_t mic_offset;
|
|
153
|
|
154 // if true buffers are created from static memory
|
|
155 bool is_static;
|
|
156
|
|
157 // true if MIC buffer created by omp_target_associate
|
|
158 bool is_omp_associate;
|
|
159
|
|
160 bool var_alloc_type;
|
|
161 mutex_t alloc_ptr_data_lock;
|
|
162
|
|
163 private:
|
|
164 // reference count for the entry
|
|
165 long ref_count;
|
|
166 };
|
|
167
|
|
168 typedef std::list<PtrData*> PtrDataList;
|
|
169
|
|
170 class PtrDataTable {
|
|
171 public:
|
|
172 typedef std::set<PtrData> PtrSet;
|
|
173
|
|
174 PtrData* find_ptr_data(const void *ptr) {
|
|
175 m_ptr_lock.lock();
|
|
176 PtrSet::iterator res = list.find(PtrData(ptr, 0));
|
|
177
|
|
178 m_ptr_lock.unlock();
|
|
179 if (res == list.end()) {
|
|
180 return 0;
|
|
181 }
|
|
182 return const_cast<PtrData*>(res.operator->());
|
|
183 }
|
|
184
|
|
185 PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
|
|
186 m_ptr_lock.lock();
|
|
187 std::pair<PtrSet::iterator, bool> res =
|
|
188 list.insert(PtrData(ptr, len));
|
|
189
|
|
190 PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
|
|
191 m_ptr_lock.unlock();
|
|
192
|
|
193 is_new = res.second;
|
|
194 if (is_new) {
|
|
195 // It's necessary to lock as soon as possible.
|
|
196 // unlock must be done at call site of insert_ptr_data at
|
|
197 // branch for is_new
|
|
198 ptr_data->alloc_ptr_data_lock.lock();
|
|
199 }
|
|
200 return ptr_data;
|
|
201 }
|
|
202
|
|
203 void remove_ptr_data(const void *ptr) {
|
|
204 m_ptr_lock.lock();
|
|
205 list.erase(PtrData(ptr, 0));
|
|
206 m_ptr_lock.unlock();
|
|
207 }
|
|
208 private:
|
|
209
|
|
210 PtrSet list;
|
|
211 mutex_t m_ptr_lock;
|
|
212 };
|
|
213
|
|
214 // Data associated with automatic variable
|
|
215 class AutoData {
|
|
216 public:
|
|
217 AutoData(const void *addr, uint64_t len) :
|
|
218 cpu_addr(addr, len), ref_count(0)
|
|
219 {}
|
|
220
|
|
221 bool operator<(const AutoData &o) const {
|
|
222 // Variables are sorted by the CPU start address.
|
|
223 // Overlapping memory ranges are considered equal.
|
|
224 return (cpu_addr.start() < o.cpu_addr.start()) &&
|
|
225 !cpu_addr.overlaps(o.cpu_addr);
|
|
226 }
|
|
227
|
|
228 long add_reference() {
|
|
229 #ifndef TARGET_WINNT
|
|
230 return __sync_fetch_and_add(&ref_count, 1);
|
|
231 #else // TARGET_WINNT
|
|
232 return _InterlockedIncrement(&ref_count) - 1;
|
|
233 #endif // TARGET_WINNT
|
|
234 }
|
|
235
|
|
236 long remove_reference() {
|
|
237 #ifndef TARGET_WINNT
|
|
238 return __sync_sub_and_fetch(&ref_count, 1);
|
|
239 #else // TARGET_WINNT
|
|
240 return _InterlockedDecrement(&ref_count);
|
|
241 #endif // TARGET_WINNT
|
|
242 }
|
|
243
|
|
244 long nullify_reference() {
|
|
245 #ifndef TARGET_WINNT
|
|
246 return __sync_lock_test_and_set(&ref_count, 0);
|
|
247 #else // TARGET_WINNT
|
|
248 return _InterlockedExchange(&ref_count,0);
|
|
249 #endif // TARGET_WINNT
|
|
250 }
|
|
251
|
|
252 long get_reference() const {
|
|
253 return ref_count;
|
|
254 }
|
|
255
|
|
256 public:
|
|
257 // CPU address range
|
|
258 const MemRange cpu_addr;
|
|
259
|
|
260 private:
|
|
261 // reference count for the entry
|
|
262 long ref_count;
|
|
263 };
|
|
264
|
|
265 // Set of autimatic variables
|
|
266 typedef std::set<AutoData> AutoSet;
|
|
267
|
|
268 // Target image data
|
|
269 struct TargetImage
|
|
270 {
|
|
271 TargetImage(const char *_name, const void *_data, uint64_t _size,
|
|
272 const char *_origin, uint64_t _offset) :
|
|
273 name(_name), data(_data), size(_size),
|
|
274 origin(_origin), offset(_offset)
|
|
275 {}
|
|
276
|
|
277 // library name
|
|
278 const char* name;
|
|
279
|
|
280 // contents and size
|
|
281 const void* data;
|
|
282 uint64_t size;
|
|
283
|
|
284 // file of origin and offset within that file
|
|
285 const char* origin;
|
|
286 uint64_t offset;
|
|
287 };
|
|
288
|
|
289 typedef std::list<TargetImage> TargetImageList;
|
|
290
|
|
291 // dynamic library and Image associated with lib
|
|
292 struct DynLib
|
|
293 {
|
|
294 DynLib(const char *_name, const void *_data,
|
|
295 COILIBRARY _lib) :
|
|
296 name(_name), data(_data), lib(_lib)
|
|
297 {}
|
|
298 // library name
|
|
299 const char* name;
|
|
300
|
|
301 // contents
|
|
302 const void* data;
|
|
303
|
|
304 COILIBRARY lib;
|
|
305 };
|
|
306 typedef std::list<DynLib> DynLibList;
|
|
307
|
|
308 // Data associated with persistent auto objects
|
|
309 struct PersistData
|
|
310 {
|
|
311 PersistData(const void *addr, uint64_t routine_num,
|
|
312 uint64_t size, uint64_t thread) :
|
|
313 stack_cpu_addr(addr), routine_id(routine_num), thread_id(thread)
|
|
314 {
|
|
315 stack_ptr_data = new PtrData(0, size);
|
|
316 }
|
|
317 // 1-st key value - beginning of the stack at CPU
|
|
318 const void * stack_cpu_addr;
|
|
319 // 2-nd key value - identifier of routine invocation at CPU
|
|
320 uint64_t routine_id;
|
|
321 // 3-rd key value - thread identifier
|
|
322 uint64_t thread_id;
|
|
323
|
|
324 // corresponded PtrData; only stack_ptr_data->mic_buf is used
|
|
325 PtrData * stack_ptr_data;
|
|
326 // used to get offset of the variable in stack buffer
|
|
327 char * cpu_stack_addr;
|
|
328 };
|
|
329
|
|
330 typedef std::list<PersistData> PersistDataList;
|
|
331
|
|
332 // Data associated with stream
|
|
333 struct Stream
|
|
334 {
|
|
335 Stream(int device, int num_of_cpus) :
|
|
336 m_number_of_cpus(num_of_cpus), m_pipeline(0), m_last_offload(0),
|
|
337 m_device(device)
|
|
338 {}
|
|
339 ~Stream() {
|
|
340 if (m_pipeline) {
|
|
341 COI::PipelineDestroy(m_pipeline);
|
|
342 }
|
|
343 }
|
|
344
|
|
345 COIPIPELINE get_pipeline(void) {
|
|
346 return(m_pipeline);
|
|
347 }
|
|
348
|
|
349 int get_device(void) {
|
|
350 return(m_device);
|
|
351 }
|
|
352
|
|
353 int get_cpu_number(void) {
|
|
354 return(m_number_of_cpus);
|
|
355 }
|
|
356
|
|
357 void set_pipeline(COIPIPELINE pipeline) {
|
|
358 m_pipeline = pipeline;
|
|
359 }
|
|
360
|
|
361 OffloadDescriptor* get_last_offload(void) {
|
|
362 return(m_last_offload);
|
|
363 }
|
|
364
|
|
365 void set_last_offload(OffloadDescriptor* last_offload) {
|
|
366 m_last_offload = last_offload;
|
|
367 }
|
|
368
|
|
369 static Stream* find_stream(uint64_t handle, bool remove);
|
|
370
|
|
371 static _Offload_stream add_stream(int device, int number_of_cpus) {
|
|
372 _Offload_stream result;
|
|
373 m_stream_lock.lock();
|
|
374 result = ++m_streams_count;
|
|
375 all_streams[m_streams_count] = new Stream(device, number_of_cpus);
|
|
376 m_stream_lock.unlock();
|
|
377 return(result);
|
|
378 }
|
|
379
|
|
380 static uint64_t get_streams_count() {
|
|
381 return m_streams_count;
|
|
382 }
|
|
383
|
|
384 typedef std::map<uint64_t, Stream*> StreamMap;
|
|
385
|
|
386 static uint64_t m_streams_count;
|
|
387 static StreamMap all_streams;
|
|
388 static mutex_t m_stream_lock;
|
|
389
|
|
390 int m_device;
|
|
391
|
|
392 // number of cpus
|
|
393 int m_number_of_cpus;
|
|
394
|
|
395 // The pipeline associated with the stream
|
|
396 COIPIPELINE m_pipeline;
|
|
397
|
|
398 // The last offload occured via the stream
|
|
399 OffloadDescriptor* m_last_offload;
|
|
400
|
|
401 // Cpus used by the stream
|
|
402 std::bitset<COI_MAX_HW_THREADS> m_stream_cpus;
|
|
403 };
|
|
404
|
|
405 typedef std::map<uint64_t, Stream*> StreamMap;
|
|
406 typedef std::bitset<COI_MAX_HW_THREADS> micLcpuMask;
|
|
407
|
|
408 // ordered by count double linked list of cpus used by streams
|
|
409 typedef struct CpuEl{
|
|
410 uint64_t count; // number of streams using the cpu
|
|
411 struct CpuEl* prev; // cpu with the same or lesser count
|
|
412 struct CpuEl* next; // cpu with the same or greater count
|
|
413 } CpuEl;
|
|
414
|
|
415 // class representing a single engine
|
|
416 struct Engine {
|
|
417 friend void __offload_init_library_once(void);
|
|
418 friend void __offload_fini_library(void);
|
|
419
|
|
420 #define CPU_INDEX(x) (x - m_cpus)
|
|
421 #define check_result(res, tag, ...) \
|
|
422 { \
|
|
423 if (res == COI_PROCESS_DIED) { \
|
|
424 fini_process(true); \
|
|
425 exit(1); \
|
|
426 } \
|
|
427 if (res != COI_SUCCESS) { \
|
|
428 __liboffload_error_support(tag, __VA_ARGS__); \
|
|
429 exit(1); \
|
|
430 } \
|
|
431 }
|
|
432
|
|
433 int get_logical_index() const {
|
|
434 return m_index;
|
|
435 }
|
|
436
|
|
437 int get_physical_index() const {
|
|
438 return m_physical_index;
|
|
439 }
|
|
440
|
|
441 const COIPROCESS& get_process() const {
|
|
442 return m_process;
|
|
443 }
|
|
444
|
|
445 bool get_ready() {
|
|
446 return m_ready;
|
|
447 }
|
|
448
|
|
449 uint64_t get_thread_id(void);
|
|
450
|
|
451 // initialize device
|
|
452 void init(void);
|
|
453
|
|
454 // unload library
|
|
455 void unload_library(const void *data, const char *name);
|
|
456
|
|
457 // add new library
|
|
458 void add_lib(const TargetImage &lib)
|
|
459 {
|
|
460 m_lock.lock();
|
|
461 m_ready = false;
|
|
462 m_images.push_back(lib);
|
|
463 m_lock.unlock();
|
|
464 }
|
|
465
|
|
466 COIRESULT compute(
|
|
467 _Offload_stream stream,
|
|
468 const std::list<COIBUFFER> &buffers,
|
|
469 const void* data,
|
|
470 uint16_t data_size,
|
|
471 void* ret,
|
|
472 uint16_t ret_size,
|
|
473 uint32_t num_deps,
|
|
474 const COIEVENT* deps,
|
|
475 COIEVENT* event
|
|
476 );
|
|
477
|
|
478 #ifdef MYO_SUPPORT
|
|
479 // temporary workaround for blocking behavior for myoiLibInit/Fini calls
|
|
480 void init_myo(COIEVENT *event) {
|
|
481 COIRESULT res;
|
|
482 res = COI::PipelineRunFunction(get_pipeline(),
|
|
483 m_funcs[c_func_myo_init],
|
|
484 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
485 event);
|
|
486 check_result(res, c_pipeline_run_func, m_index, res);
|
|
487 }
|
|
488
|
|
489 void fini_myo(COIEVENT *event) {
|
|
490 COIRESULT res;
|
|
491 res = COI::PipelineRunFunction(get_pipeline(),
|
|
492 m_funcs[c_func_myo_fini],
|
|
493 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
494 event);
|
|
495 check_result(res, c_pipeline_run_func, m_index, res);
|
|
496 }
|
|
497 #endif // MYO_SUPPORT
|
|
498
|
|
499 //
|
|
500 // Memory association table
|
|
501 //
|
|
502 PtrData* find_ptr_data(const void *ptr) {
|
|
503 return m_ptr_set.find_ptr_data(ptr);
|
|
504 }
|
|
505
|
|
506 PtrData* find_targetptr_data(const void *ptr) {
|
|
507 return m_targetptr_set.find_ptr_data(ptr);
|
|
508 }
|
|
509
|
|
510 PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
|
|
511 return m_ptr_set.insert_ptr_data(ptr, len, is_new);
|
|
512 }
|
|
513
|
|
514 PtrData* insert_targetptr_data(const void *ptr, uint64_t len,
|
|
515 bool &is_new) {
|
|
516 return m_targetptr_set.insert_ptr_data(ptr, len, is_new);
|
|
517 }
|
|
518
|
|
519 void remove_ptr_data(const void *ptr) {
|
|
520 m_ptr_set.remove_ptr_data(ptr);
|
|
521 }
|
|
522
|
|
523 void remove_targetptr_data(const void *ptr) {
|
|
524 m_targetptr_set.remove_ptr_data(ptr);
|
|
525 }
|
|
526
|
|
527 //
|
|
528 // Automatic variables
|
|
529 //
|
|
530 AutoData* find_auto_data(const void *ptr) {
|
|
531 AutoSet &auto_vars = get_auto_vars();
|
|
532 AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0));
|
|
533 if (res == auto_vars.end()) {
|
|
534 return 0;
|
|
535 }
|
|
536 return const_cast<AutoData*>(res.operator->());
|
|
537 }
|
|
538
|
|
539 AutoData* insert_auto_data(const void *ptr, uint64_t len) {
|
|
540 AutoSet &auto_vars = get_auto_vars();
|
|
541 std::pair<AutoSet::iterator, bool> res =
|
|
542 auto_vars.insert(AutoData(ptr, len));
|
|
543 return const_cast<AutoData*>(res.first.operator->());
|
|
544 }
|
|
545
|
|
546 void remove_auto_data(const void *ptr) {
|
|
547 get_auto_vars().erase(AutoData(ptr, 0));
|
|
548 }
|
|
549
|
|
550 //
|
|
551 // Signals
|
|
552 //
|
|
553 void add_signal(const void *signal, OffloadDescriptor *desc) {
|
|
554 m_signal_lock.lock();
|
|
555 m_signal_map[signal] = desc;
|
|
556 m_signal_lock.unlock();
|
|
557 }
|
|
558
|
|
559 OffloadDescriptor* find_signal(const void *signal, bool remove) {
|
|
560 OffloadDescriptor *desc = 0;
|
|
561
|
|
562 m_signal_lock.lock();
|
|
563 {
|
|
564 SignalMap::iterator it = m_signal_map.find(signal);
|
|
565 if (it != m_signal_map.end()) {
|
|
566 desc = it->second;
|
|
567 if (remove) {
|
|
568 it->second = SIGNAL_HAS_COMPLETED;
|
|
569 }
|
|
570 }
|
|
571 }
|
|
572 m_signal_lock.unlock();
|
|
573
|
|
574 return desc;
|
|
575 }
|
|
576
|
|
577 void complete_signaled_ofld(const void *signal) {
|
|
578
|
|
579 m_signal_lock.lock();
|
|
580 {
|
|
581 SignalMap::iterator it = m_signal_map.find(signal);
|
|
582 if (it != m_signal_map.end()) {
|
|
583 it->second = SIGNAL_HAS_COMPLETED;
|
|
584 }
|
|
585 }
|
|
586 m_signal_lock.unlock();
|
|
587 }
|
|
588
|
|
589 void stream_destroy(_Offload_stream handle);
|
|
590
|
|
591 void move_cpu_el_after(CpuEl* cpu_what, CpuEl* cpu_after);
|
|
592 void print_stream_cpu_list(const char *);
|
|
593
|
|
594 COIPIPELINE get_pipeline(_Offload_stream stream);
|
|
595
|
|
596 StreamMap get_stream_map() {
|
|
597 return m_stream_map;
|
|
598 }
|
|
599
|
|
600 // stop device process
|
|
601 void fini_process(bool verbose);
|
|
602
|
|
603 // list of stacks active at the engine
|
|
604 PersistDataList m_persist_list;
|
|
605
|
|
606 private:
|
|
607 Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
|
|
608 m_proc_number(0), m_assigned_cpus(0), m_cpus(0), m_cpu_head(0)
|
|
609 {}
|
|
610
|
|
611 ~Engine() {
|
|
612 m_ready = false;
|
|
613 for (StreamMap::iterator it = m_stream_map.begin();
|
|
614 it != m_stream_map.end(); it++) {
|
|
615 Stream * stream = it->second;
|
|
616 delete stream;
|
|
617 }
|
|
618 if (m_process != 0) {
|
|
619 fini_process(false);
|
|
620 }
|
|
621 if (m_assigned_cpus) {
|
|
622 delete m_assigned_cpus;
|
|
623 }
|
|
624 }
|
|
625
|
|
626 // set indexes
|
|
627 void set_indexes(int logical_index, int physical_index) {
|
|
628 m_index = logical_index;
|
|
629 m_physical_index = physical_index;
|
|
630 }
|
|
631
|
|
632 // set CPU mask
|
|
633 void set_cpu_mask(micLcpuMask *cpu_mask)
|
|
634 {
|
|
635 m_assigned_cpus = cpu_mask;
|
|
636 }
|
|
637
|
|
638 // start process on device
|
|
639 void init_process();
|
|
640
|
|
641 void load_libraries(void);
|
|
642 void init_ptr_data(void);
|
|
643
|
|
644 // performs library intialization on the device side
|
|
645 pid_t init_device(void);
|
|
646
|
|
647 private:
|
|
648 // get pipeline associated with a calling thread
|
|
649 COIPIPELINE get_pipeline(void);
|
|
650
|
|
651 // get automatic vars set associated with the calling thread
|
|
652 AutoSet& get_auto_vars(void);
|
|
653
|
|
654 // destructor for thread data
|
|
655 static void destroy_thread_data(void *data);
|
|
656
|
|
657 private:
|
|
658 typedef std::set<PtrData> PtrSet;
|
|
659 typedef std::map<const void*, OffloadDescriptor*> SignalMap;
|
|
660
|
|
661 // device indexes
|
|
662 int m_index;
|
|
663 int m_physical_index;
|
|
664
|
|
665 // cpu mask
|
|
666 micLcpuMask *m_assigned_cpus;
|
|
667
|
|
668 // number of COI pipes created for the engine
|
|
669 long m_proc_number;
|
|
670
|
|
671 // process handle
|
|
672 COIPROCESS m_process;
|
|
673
|
|
674 // If false, device either has not been initialized or new libraries
|
|
675 // have been added.
|
|
676 bool m_ready;
|
|
677 mutex_t m_lock;
|
|
678
|
|
679 // List of libraries to be loaded
|
|
680 TargetImageList m_images;
|
|
681
|
|
682 // var tables
|
|
683 PtrDataTable m_ptr_set;
|
|
684 PtrDataTable m_targetptr_set;
|
|
685
|
|
686 // signals
|
|
687 SignalMap m_signal_map;
|
|
688 mutex_t m_signal_lock;
|
|
689
|
|
690 // streams
|
|
691 StreamMap m_stream_map;
|
|
692 mutex_t m_stream_lock;
|
|
693 int m_num_cores;
|
|
694 int m_num_threads;
|
|
695 CpuEl* m_cpus;
|
|
696 CpuEl* m_cpu_head;
|
|
697
|
|
698 // List of dynamic libraries to be registred
|
|
699 DynLibList m_dyn_libs;
|
|
700
|
|
701 // constants for accessing device function handles
|
|
702 enum {
|
|
703 c_func_compute = 0,
|
|
704 #ifdef MYO_SUPPORT
|
|
705 c_func_myo_init,
|
|
706 c_func_myo_fini,
|
|
707 #endif // MYO_SUPPORT
|
|
708 c_func_init,
|
|
709 c_func_var_table_size,
|
|
710 c_func_var_table_copy,
|
|
711 c_func_set_stream_affinity,
|
|
712 c_funcs_total
|
|
713 };
|
|
714 static const char* m_func_names[c_funcs_total];
|
|
715
|
|
716 // device function handles
|
|
717 COIFUNCTION m_funcs[c_funcs_total];
|
|
718
|
|
719 // int -> name mapping for device signals
|
|
720 static const int c_signal_max = 32;
|
|
721 static const char* c_signal_names[c_signal_max];
|
|
722 };
|
|
723
|
|
724 #endif // OFFLOAD_ENGINE_H_INCLUDED
|