111
|
1 /* Producing binary form of HSA BRIG from our internal representation.
|
131
|
2 Copyright (C) 2013-2018 Free Software Foundation, Inc.
|
111
|
3 Contributed by Martin Jambor <mjambor@suse.cz> and
|
|
4 Martin Liska <mliska@suse.cz>.
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify
|
|
9 it under the terms of the GNU General Public License as published by
|
|
10 the Free Software Foundation; either version 3, or (at your option)
|
|
11 any later version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful,
|
|
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16 GNU General Public License for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 #include "config.h"
|
|
23 #include "system.h"
|
|
24 #include "coretypes.h"
|
|
25 #include "tm.h"
|
|
26 #include "target.h"
|
|
27 #include "memmodel.h"
|
|
28 #include "tm_p.h"
|
|
29 #include "is-a.h"
|
|
30 #include "vec.h"
|
|
31 #include "hash-table.h"
|
|
32 #include "hash-map.h"
|
|
33 #include "tree.h"
|
|
34 #include "tree-iterator.h"
|
|
35 #include "stor-layout.h"
|
|
36 #include "output.h"
|
|
37 #include "basic-block.h"
|
131
|
38 #include "function.h"
|
111
|
39 #include "cfg.h"
|
|
40 #include "fold-const.h"
|
|
41 #include "stringpool.h"
|
|
42 #include "gimple-pretty-print.h"
|
|
43 #include "diagnostic-core.h"
|
|
44 #include "cgraph.h"
|
|
45 #include "dumpfile.h"
|
|
46 #include "print-tree.h"
|
|
47 #include "symbol-summary.h"
|
|
48 #include "hsa-common.h"
|
|
49 #include "gomp-constants.h"
|
|
50
|
|
51 /* Convert VAL to little endian form, if necessary. */
|
|
52
|
|
53 static uint16_t
|
|
54 lendian16 (uint16_t val)
|
|
55 {
|
|
56 #if GCC_VERSION >= 4008
|
|
57 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
58 return val;
|
|
59 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
60 return __builtin_bswap16 (val);
|
|
61 #else /* __ORDER_PDP_ENDIAN__ */
|
|
62 return val;
|
|
63 #endif
|
|
64 #else
|
|
65 // provide a safe slower default, with shifts and masking
|
|
66 #ifndef WORDS_BIGENDIAN
|
|
67 return val;
|
|
68 #else
|
|
69 return (val >> 8) | (val << 8);
|
|
70 #endif
|
|
71 #endif
|
|
72 }
|
|
73
|
|
74 /* Convert VAL to little endian form, if necessary. */
|
|
75
|
|
76 static uint32_t
|
|
77 lendian32 (uint32_t val)
|
|
78 {
|
|
79 #if GCC_VERSION >= 4006
|
|
80 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
81 return val;
|
|
82 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
83 return __builtin_bswap32 (val);
|
|
84 #else /* __ORDER_PDP_ENDIAN__ */
|
|
85 return (val >> 16) | (val << 16);
|
|
86 #endif
|
|
87 #else
|
|
88 // provide a safe slower default, with shifts and masking
|
|
89 #ifndef WORDS_BIGENDIAN
|
|
90 return val;
|
|
91 #else
|
|
92 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
|
|
93 return (val >> 16) | (val << 16);
|
|
94 #endif
|
|
95 #endif
|
|
96 }
|
|
97
|
|
98 /* Convert VAL to little endian form, if necessary. */
|
|
99
|
|
100 static uint64_t
|
|
101 lendian64 (uint64_t val)
|
|
102 {
|
|
103 #if GCC_VERSION >= 4006
|
|
104 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
105 return val;
|
|
106 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
107 return __builtin_bswap64 (val);
|
|
108 #else /* __ORDER_PDP_ENDIAN__ */
|
|
109 return (((val & 0xffffll) << 48)
|
|
110 | ((val & 0xffff0000ll) << 16)
|
|
111 | ((val & 0xffff00000000ll) >> 16)
|
|
112 | ((val & 0xffff000000000000ll) >> 48));
|
|
113 #endif
|
|
114 #else
|
|
115 // provide a safe slower default, with shifts and masking
|
|
116 #ifndef WORDS_BIGENDIAN
|
|
117 return val;
|
|
118 #else
|
|
119 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
|
|
120 | ((val & 0x00ff00ff00ff00ffll) << 8));
|
|
121 val = ((( val & 0xffff0000ffff0000ll) >> 16)
|
|
122 | (( val & 0x0000ffff0000ffffll) << 16));
|
|
123 return (val >> 32) | (val << 32);
|
|
124 #endif
|
|
125 #endif
|
|
126 }
|
|
127
|
|
128 #define BRIG_ELF_SECTION_NAME ".brig"
|
|
129 #define BRIG_LABEL_STRING "hsa_brig"
|
|
130 #define BRIG_SECTION_DATA_NAME "hsa_data"
|
|
131 #define BRIG_SECTION_CODE_NAME "hsa_code"
|
|
132 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
|
|
133
|
|
134 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
|
|
135
|
|
136 /* Required HSA section alignment. */
|
|
137
|
|
138 #define HSA_SECTION_ALIGNMENT 16
|
|
139
|
|
140 /* Chunks of BRIG binary data. */
|
|
141
|
|
142 struct hsa_brig_data_chunk
|
|
143 {
|
|
144 /* Size of the data already stored into a chunk. */
|
|
145 unsigned size;
|
|
146
|
|
147 /* Pointer to the data. */
|
|
148 char *data;
|
|
149 };
|
|
150
|
|
151 /* Structure representing a BRIG section, holding and writing its data. */
|
|
152
|
|
153 class hsa_brig_section
|
|
154 {
|
|
155 public:
|
|
156 /* Section name that will be output to the BRIG. */
|
|
157 const char *section_name;
|
|
158 /* Size in bytes of all data stored in the section. */
|
|
159 unsigned total_size;
|
|
160 /* The size of the header of the section including padding. */
|
|
161 unsigned header_byte_count;
|
|
162 /* The size of the header of the section without any padding. */
|
|
163 unsigned header_byte_delta;
|
|
164
|
|
165 void init (const char *name);
|
|
166 void release ();
|
|
167 void output ();
|
|
168 unsigned add (const void *data, unsigned len, void **output = NULL);
|
|
169 void round_size_up (int factor);
|
|
170 void *get_ptr_by_offset (unsigned int offset);
|
|
171
|
|
172 private:
|
|
173 void allocate_new_chunk ();
|
|
174
|
|
175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
|
|
176 vec <struct hsa_brig_data_chunk> chunks;
|
|
177
|
|
178 /* More convenient access to the last chunk from the vector above. */
|
|
179 struct hsa_brig_data_chunk *cur_chunk;
|
|
180 };
|
|
181
|
|
182 static struct hsa_brig_section brig_data, brig_code, brig_operand;
|
|
183 static uint32_t brig_insn_count;
|
|
184 static bool brig_initialized = false;
|
|
185
|
|
186 /* Mapping between emitted HSA functions and their offset in code segment. */
|
|
187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
|
|
188
|
|
189 /* Hash map of emitted function declarations. */
|
|
190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
|
|
191
|
|
192 /* Hash table of emitted internal function declaration offsets. */
|
|
193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
|
|
194
|
|
195 /* List of sbr instructions. */
|
|
196 static vec <hsa_insn_sbr *> *switch_instructions;
|
|
197
|
|
198 struct function_linkage_pair
|
|
199 {
|
|
200 function_linkage_pair (tree decl, unsigned int off)
|
|
201 : function_decl (decl), offset (off) {}
|
|
202
|
|
203 /* Declaration of called function. */
|
|
204 tree function_decl;
|
|
205
|
|
206 /* Offset in operand section. */
|
|
207 unsigned int offset;
|
|
208 };
|
|
209
|
|
210 /* Vector of function calls where we need to resolve function offsets. */
|
|
211 static auto_vec <function_linkage_pair> function_call_linkage;
|
|
212
|
|
213 /* Add a new chunk, allocate data for it and initialize it. */
|
|
214
|
|
215 void
|
|
216 hsa_brig_section::allocate_new_chunk ()
|
|
217 {
|
|
218 struct hsa_brig_data_chunk new_chunk;
|
|
219
|
|
220 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
|
|
221 new_chunk.size = 0;
|
|
222 cur_chunk = chunks.safe_push (new_chunk);
|
|
223 }
|
|
224
|
|
225 /* Initialize the brig section. */
|
|
226
|
|
227 void
|
|
228 hsa_brig_section::init (const char *name)
|
|
229 {
|
|
230 section_name = name;
|
|
231 /* While the following computation is basically wrong, because the intent
|
|
232 certainly wasn't to have the first character of name and padding, which
|
|
233 are a part of sizeof (BrigSectionHeader), included in the first addend,
|
|
234 this is what the disassembler expects. */
|
|
235 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
|
|
236 chunks.create (1);
|
|
237 allocate_new_chunk ();
|
|
238 header_byte_delta = total_size;
|
|
239 round_size_up (4);
|
|
240 header_byte_count = total_size;
|
|
241 }
|
|
242
|
|
243 /* Free all data in the section. */
|
|
244
|
|
245 void
|
|
246 hsa_brig_section::release ()
|
|
247 {
|
|
248 for (unsigned i = 0; i < chunks.length (); i++)
|
|
249 free (chunks[i].data);
|
|
250 chunks.release ();
|
|
251 cur_chunk = NULL;
|
|
252 }
|
|
253
|
|
254 /* Write the section to the output file to a section with the name given at
|
|
255 initialization. Switches the output section and does not restore it. */
|
|
256
|
|
257 void
|
|
258 hsa_brig_section::output ()
|
|
259 {
|
|
260 struct BrigSectionHeader section_header;
|
|
261 char padding[8];
|
|
262
|
|
263 section_header.byteCount = lendian64 (total_size);
|
|
264 section_header.headerByteCount = lendian32 (header_byte_count);
|
|
265 section_header.nameLength = lendian32 (strlen (section_name));
|
|
266 assemble_string ((const char *) §ion_header, 16);
|
|
267 assemble_string (section_name, (section_header.nameLength));
|
|
268 memset (&padding, 0, sizeof (padding));
|
|
269 /* This is also a consequence of the wrong header size computation described
|
|
270 in a comment in hsa_brig_section::init. */
|
|
271 assemble_string (padding, 8);
|
|
272 for (unsigned i = 0; i < chunks.length (); i++)
|
|
273 assemble_string (chunks[i].data, chunks[i].size);
|
|
274 }
|
|
275
|
|
276 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
|
|
277 which it was stored. If OUTPUT is not NULL, store into it the pointer to
|
|
278 the place where DATA was actually stored. */
|
|
279
|
|
280 unsigned
|
|
281 hsa_brig_section::add (const void *data, unsigned len, void **output)
|
|
282 {
|
|
283 unsigned offset = total_size;
|
|
284
|
|
285 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
|
|
286 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
|
|
287 allocate_new_chunk ();
|
|
288
|
|
289 char *dst = cur_chunk->data + cur_chunk->size;
|
|
290 memcpy (dst, data, len);
|
|
291 if (output)
|
|
292 *output = dst;
|
|
293 cur_chunk->size += len;
|
|
294 total_size += len;
|
|
295
|
|
296 return offset;
|
|
297 }
|
|
298
|
|
299 /* Add padding to section so that its size is divisible by FACTOR. */
|
|
300
|
|
301 void
|
|
302 hsa_brig_section::round_size_up (int factor)
|
|
303 {
|
|
304 unsigned padding, res = total_size % factor;
|
|
305
|
|
306 if (res == 0)
|
|
307 return;
|
|
308
|
|
309 padding = factor - res;
|
|
310 total_size += padding;
|
|
311 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
|
|
312 {
|
|
313 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
|
|
314 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
|
|
315 allocate_new_chunk ();
|
|
316 }
|
|
317
|
|
318 cur_chunk->size += padding;
|
|
319 }
|
|
320
|
|
321 /* Return pointer to data by global OFFSET in the section. */
|
|
322
|
|
323 void *
|
|
324 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
|
|
325 {
|
|
326 gcc_assert (offset < total_size);
|
|
327 offset -= header_byte_delta;
|
|
328
|
|
329 unsigned i;
|
|
330 for (i = 0; offset >= chunks[i].size; i++)
|
|
331 offset -= chunks[i].size;
|
|
332
|
|
333 return chunks[i].data + offset;
|
|
334 }
|
|
335
|
|
336 /* BRIG string data hashing. */
|
|
337
|
|
338 struct brig_string_slot
|
|
339 {
|
|
340 const char *s;
|
|
341 char prefix;
|
|
342 int len;
|
|
343 uint32_t offset;
|
|
344 };
|
|
345
|
|
346 /* Hash table helpers. */
|
|
347
|
|
348 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
|
|
349 {
|
|
350 static inline hashval_t hash (const value_type);
|
|
351 static inline bool equal (const value_type, const compare_type);
|
|
352 static inline void remove (value_type);
|
|
353 };
|
|
354
|
|
355 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
|
|
356 to support strings that may not end in '\0'. */
|
|
357
|
|
358 inline hashval_t
|
|
359 brig_string_slot_hasher::hash (const value_type ds)
|
|
360 {
|
|
361 hashval_t r = ds->len;
|
|
362 int i;
|
|
363
|
|
364 for (i = 0; i < ds->len; i++)
|
|
365 r = r * 67 + (unsigned) ds->s[i] - 113;
|
|
366 r = r * 67 + (unsigned) ds->prefix - 113;
|
|
367 return r;
|
|
368 }
|
|
369
|
|
370 /* Returns nonzero if DS1 and DS2 are equal. */
|
|
371
|
|
372 inline bool
|
|
373 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
|
|
374 {
|
|
375 if (ds1->len == ds2->len)
|
|
376 return ds1->prefix == ds2->prefix
|
|
377 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
|
|
378
|
|
379 return 0;
|
|
380 }
|
|
381
|
|
382 /* Deallocate memory for DS upon its removal. */
|
|
383
|
|
384 inline void
|
|
385 brig_string_slot_hasher::remove (value_type ds)
|
|
386 {
|
|
387 free (const_cast<char *> (ds->s));
|
|
388 free (ds);
|
|
389 }
|
|
390
|
|
391 /* Hash for strings we output in order not to duplicate them needlessly. */
|
|
392
|
|
393 static hash_table<brig_string_slot_hasher> *brig_string_htab;
|
|
394
|
|
395 /* Emit a null terminated string STR to the data section and return its
|
|
396 offset in it. If PREFIX is non-zero, output it just before STR too.
|
|
397 Sanitize the string if SANITIZE option is set to true. */
|
|
398
|
|
399 static unsigned
|
|
400 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
|
|
401 {
|
|
402 unsigned slen = strlen (str);
|
|
403 unsigned offset, len = slen + (prefix ? 1 : 0);
|
|
404 uint32_t hdr_len = lendian32 (len);
|
|
405 brig_string_slot s_slot;
|
|
406 brig_string_slot **slot;
|
|
407 char *str2;
|
|
408
|
|
409 str2 = xstrdup (str);
|
|
410
|
|
411 if (sanitize)
|
|
412 hsa_sanitize_name (str2);
|
|
413 s_slot.s = str2;
|
|
414 s_slot.len = slen;
|
|
415 s_slot.prefix = prefix;
|
|
416 s_slot.offset = 0;
|
|
417
|
|
418 slot = brig_string_htab->find_slot (&s_slot, INSERT);
|
|
419 if (*slot == NULL)
|
|
420 {
|
|
421 brig_string_slot *new_slot = XCNEW (brig_string_slot);
|
|
422
|
|
423 /* In theory we should fill in BrigData but that would mean copying
|
|
424 the string to a buffer for no reason, so we just emulate it. */
|
|
425 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
|
|
426 if (prefix)
|
|
427 brig_data.add (&prefix, 1);
|
|
428
|
|
429 brig_data.add (str2, slen);
|
|
430 brig_data.round_size_up (4);
|
|
431
|
|
432 /* TODO: could use the string we just copied into
|
|
433 brig_string->cur_chunk */
|
|
434 new_slot->s = str2;
|
|
435 new_slot->len = slen;
|
|
436 new_slot->prefix = prefix;
|
|
437 new_slot->offset = offset;
|
|
438 *slot = new_slot;
|
|
439 }
|
|
440 else
|
|
441 {
|
|
442 offset = (*slot)->offset;
|
|
443 free (str2);
|
|
444 }
|
|
445
|
|
446 return offset;
|
|
447 }
|
|
448
|
|
449 /* Linked list of queued operands. */
|
|
450
|
|
451 static struct operand_queue
|
|
452 {
|
|
453 /* First from the chain of queued operands. */
|
|
454 hsa_op_base *first_op, *last_op;
|
|
455
|
|
456 /* The offset at which the next operand will be enqueued. */
|
|
457 unsigned projected_size;
|
|
458
|
|
459 } op_queue;
|
|
460
|
|
461 /* Unless already initialized, initialize infrastructure to produce BRIG. */
|
|
462
|
|
463 static void
|
|
464 brig_init (void)
|
|
465 {
|
|
466 brig_insn_count = 0;
|
|
467
|
|
468 if (brig_initialized)
|
|
469 return;
|
|
470
|
|
471 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
|
|
472 brig_data.init (BRIG_SECTION_DATA_NAME);
|
|
473 brig_code.init (BRIG_SECTION_CODE_NAME);
|
|
474 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
|
|
475 brig_initialized = true;
|
|
476
|
|
477 struct BrigDirectiveModule moddir;
|
|
478 memset (&moddir, 0, sizeof (moddir));
|
|
479 moddir.base.byteCount = lendian16 (sizeof (moddir));
|
|
480
|
|
481 char *modname;
|
|
482 if (main_input_filename && *main_input_filename != '\0')
|
|
483 {
|
|
484 const char *part = strrchr (main_input_filename, '/');
|
|
485 if (!part)
|
|
486 part = main_input_filename;
|
|
487 else
|
|
488 part++;
|
|
489 modname = concat ("&__hsa_module_", part, NULL);
|
|
490 char *extension = strchr (modname, '.');
|
|
491 if (extension)
|
|
492 *extension = '\0';
|
|
493
|
|
494 /* As in LTO mode, we have to emit a different module names. */
|
|
495 if (flag_ltrans)
|
|
496 {
|
|
497 part = strrchr (asm_file_name, '/');
|
|
498 if (!part)
|
|
499 part = asm_file_name;
|
|
500 else
|
|
501 part++;
|
|
502 char *modname2;
|
|
503 modname2 = xasprintf ("%s_%s", modname, part);
|
|
504 free (modname);
|
|
505 modname = modname2;
|
|
506 }
|
|
507
|
|
508 hsa_sanitize_name (modname);
|
|
509 moddir.name = brig_emit_string (modname);
|
|
510 free (modname);
|
|
511 }
|
|
512 else
|
|
513 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
|
|
514 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
|
|
515 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
|
|
516 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
|
|
517 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
|
|
518 if (hsa_machine_large_p ())
|
|
519 moddir.machineModel = BRIG_MACHINE_LARGE;
|
|
520 else
|
|
521 moddir.machineModel = BRIG_MACHINE_SMALL;
|
|
522 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
|
|
523 brig_code.add (&moddir, sizeof (moddir));
|
|
524 }
|
|
525
|
|
526 /* Free all BRIG data. */
|
|
527
|
|
528 static void
|
|
529 brig_release_data (void)
|
|
530 {
|
|
531 delete brig_string_htab;
|
|
532 brig_data.release ();
|
|
533 brig_code.release ();
|
|
534 brig_operand.release ();
|
|
535
|
|
536 brig_initialized = 0;
|
|
537 }
|
|
538
|
|
539 /* Enqueue operation OP. Return the offset at which it will be stored. */
|
|
540
|
|
541 static unsigned int
|
|
542 enqueue_op (hsa_op_base *op)
|
|
543 {
|
|
544 unsigned ret;
|
|
545
|
|
546 if (op->m_brig_op_offset)
|
|
547 return op->m_brig_op_offset;
|
|
548
|
|
549 ret = op_queue.projected_size;
|
|
550 op->m_brig_op_offset = op_queue.projected_size;
|
|
551
|
|
552 if (!op_queue.first_op)
|
|
553 op_queue.first_op = op;
|
|
554 else
|
|
555 op_queue.last_op->m_next = op;
|
|
556 op_queue.last_op = op;
|
|
557
|
|
558 if (is_a <hsa_op_immed *> (op))
|
|
559 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
|
|
560 else if (is_a <hsa_op_reg *> (op))
|
|
561 op_queue.projected_size += sizeof (struct BrigOperandRegister);
|
|
562 else if (is_a <hsa_op_address *> (op))
|
|
563 op_queue.projected_size += sizeof (struct BrigOperandAddress);
|
|
564 else if (is_a <hsa_op_code_ref *> (op))
|
|
565 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
|
|
566 else if (is_a <hsa_op_code_list *> (op))
|
|
567 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
|
|
568 else if (is_a <hsa_op_operand_list *> (op))
|
|
569 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
|
|
570 else
|
|
571 gcc_unreachable ();
|
|
572 return ret;
|
|
573 }
|
|
574
|
|
575 static void emit_immediate_operand (hsa_op_immed *imm);
|
|
576
|
|
577 /* Emit directive describing a symbol if it has not been emitted already.
|
|
578 Return the offset of the directive. */
|
|
579
|
|
580 static unsigned
|
|
581 emit_directive_variable (struct hsa_symbol *symbol)
|
|
582 {
|
|
583 struct BrigDirectiveVariable dirvar;
|
|
584 unsigned name_offset;
|
|
585 static unsigned res_name_offset;
|
|
586
|
|
587 if (symbol->m_directive_offset)
|
|
588 return symbol->m_directive_offset;
|
|
589
|
|
590 memset (&dirvar, 0, sizeof (dirvar));
|
|
591 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
|
|
592 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
|
|
593 dirvar.allocation = symbol->m_allocation;
|
|
594
|
|
595 char prefix = symbol->m_global_scope_p ? '&' : '%';
|
|
596
|
|
597 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
|
|
598 {
|
|
599 if (res_name_offset == 0)
|
|
600 res_name_offset = brig_emit_string (symbol->m_name, '%');
|
|
601 name_offset = res_name_offset;
|
|
602 }
|
|
603 else if (symbol->m_name)
|
|
604 name_offset = brig_emit_string (symbol->m_name, prefix);
|
|
605 else
|
|
606 {
|
|
607 char buf[64];
|
|
608 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
|
|
609 symbol->m_name_number);
|
|
610 name_offset = brig_emit_string (buf, prefix);
|
|
611 }
|
|
612
|
|
613 dirvar.name = lendian32 (name_offset);
|
|
614
|
|
615 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
|
|
616 {
|
|
617 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
|
|
618 dirvar.init = lendian32 (enqueue_op (tmp));
|
|
619 }
|
|
620 else
|
|
621 dirvar.init = 0;
|
|
622 dirvar.type = lendian16 (symbol->m_type);
|
|
623 dirvar.segment = symbol->m_segment;
|
|
624 dirvar.align = symbol->m_align;
|
|
625 dirvar.linkage = symbol->m_linkage;
|
|
626 dirvar.dim.lo = symbol->m_dim;
|
|
627 dirvar.dim.hi = symbol->m_dim >> 32;
|
|
628
|
|
629 /* Global variables are just declared and linked via HSA runtime. */
|
|
630 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
|
|
631 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
|
|
632 dirvar.reserved = 0;
|
|
633
|
|
634 if (symbol->m_cst_value)
|
|
635 {
|
|
636 dirvar.modifier |= BRIG_VARIABLE_CONST;
|
|
637 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
|
|
638 }
|
|
639
|
|
640 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
|
|
641 return symbol->m_directive_offset;
|
|
642 }
|
|
643
|
|
644 /* Emit directives describing either a function declaration or definition F and
|
|
645 return the produced BrigDirectiveExecutable structure. The function does
|
|
646 not take into account any instructions when calculating nextModuleEntry
|
|
647 field of the produced BrigDirectiveExecutable structure so when emitting
|
|
648 actual definitions, this field needs to be updated after all of the function
|
|
649 is actually added to the code section. */
|
|
650
|
|
651 static BrigDirectiveExecutable *
|
|
652 emit_function_directives (hsa_function_representation *f, bool is_declaration)
|
|
653 {
|
|
654 struct BrigDirectiveExecutable fndir;
|
|
655 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
|
|
656 int count = 0;
|
|
657 void *ptr_to_fndir;
|
|
658 hsa_symbol *sym;
|
|
659
|
|
660 if (!f->m_declaration_p)
|
|
661 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
|
|
662 {
|
|
663 gcc_assert (!sym->m_emitted_to_brig);
|
|
664 sym->m_emitted_to_brig = true;
|
|
665 emit_directive_variable (sym);
|
|
666 brig_insn_count++;
|
|
667 }
|
|
668
|
|
669 name_offset = brig_emit_string (f->m_name, '&');
|
|
670 inarg_off = brig_code.total_size + sizeof (fndir)
|
|
671 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
|
|
672 scoped_off = inarg_off
|
|
673 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
|
|
674
|
|
675 if (!f->m_declaration_p)
|
|
676 {
|
|
677 count += f->m_spill_symbols.length ();
|
|
678 count += f->m_private_variables.length ();
|
|
679 }
|
|
680
|
|
681 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
|
|
682
|
|
683 memset (&fndir, 0, sizeof (fndir));
|
|
684 fndir.base.byteCount = lendian16 (sizeof (fndir));
|
|
685 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
|
|
686 : BRIG_KIND_DIRECTIVE_FUNCTION);
|
|
687 fndir.name = lendian32 (name_offset);
|
|
688 fndir.inArgCount = lendian16 (f->m_input_args.length ());
|
|
689 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
|
|
690 fndir.firstInArg = lendian32 (inarg_off);
|
|
691 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
|
|
692 fndir.nextModuleEntry = lendian32 (next_toplev_off);
|
|
693 fndir.linkage = f->get_linkage ();
|
|
694 if (!f->m_declaration_p)
|
|
695 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
|
|
696 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
|
|
697
|
|
698 /* Once we put a definition of function_offsets, we should not overwrite
|
|
699 it with a declaration of the function. */
|
|
700 if (f->m_internal_fn == NULL)
|
|
701 {
|
|
702 if (!function_offsets->get (f->m_decl) || !is_declaration)
|
|
703 function_offsets->put (f->m_decl, brig_code.total_size);
|
|
704 }
|
|
705 else
|
|
706 {
|
|
707 /* Internal function. */
|
|
708 hsa_internal_fn **slot
|
|
709 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
|
|
710 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
|
|
711 int_fn->m_offset = brig_code.total_size;
|
|
712 *slot = int_fn;
|
|
713 }
|
|
714
|
|
715 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
|
|
716
|
|
717 if (f->m_output_arg)
|
|
718 emit_directive_variable (f->m_output_arg);
|
|
719 for (unsigned i = 0; i < f->m_input_args.length (); i++)
|
|
720 emit_directive_variable (f->m_input_args[i]);
|
|
721
|
|
722 if (!f->m_declaration_p)
|
|
723 {
|
|
724 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
|
|
725 {
|
|
726 emit_directive_variable (sym);
|
|
727 brig_insn_count++;
|
|
728 }
|
|
729 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
|
|
730 {
|
|
731 emit_directive_variable (f->m_private_variables[i]);
|
|
732 brig_insn_count++;
|
|
733 }
|
|
734 }
|
|
735
|
|
736 return (BrigDirectiveExecutable *) ptr_to_fndir;
|
|
737 }
|
|
738
|
|
739 /* Emit a label directive for the given HBB. We assume it is about to start on
|
|
740 the current offset in the code section. */
|
|
741
|
|
742 static void
|
|
743 emit_bb_label_directive (hsa_bb *hbb)
|
|
744 {
|
|
745 struct BrigDirectiveLabel lbldir;
|
|
746
|
|
747 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
|
|
748 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
|
|
749 char buf[32];
|
|
750 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
|
|
751 hbb->m_index);
|
|
752 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
|
|
753
|
|
754 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
|
|
755 sizeof (lbldir));
|
|
756 brig_insn_count++;
|
|
757 }
|
|
758
|
|
759 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
|
|
760 holding such, for constants and registers. */
|
|
761
|
|
762 static BrigType16_t
|
|
763 regtype_for_type (BrigType16_t t)
|
|
764 {
|
|
765 switch (t)
|
|
766 {
|
|
767 case BRIG_TYPE_B1:
|
|
768 return BRIG_TYPE_B1;
|
|
769
|
|
770 case BRIG_TYPE_U8:
|
|
771 case BRIG_TYPE_U16:
|
|
772 case BRIG_TYPE_U32:
|
|
773 case BRIG_TYPE_S8:
|
|
774 case BRIG_TYPE_S16:
|
|
775 case BRIG_TYPE_S32:
|
|
776 case BRIG_TYPE_B8:
|
|
777 case BRIG_TYPE_B16:
|
|
778 case BRIG_TYPE_B32:
|
|
779 case BRIG_TYPE_F16:
|
|
780 case BRIG_TYPE_F32:
|
|
781 case BRIG_TYPE_U8X4:
|
|
782 case BRIG_TYPE_U16X2:
|
|
783 case BRIG_TYPE_S8X4:
|
|
784 case BRIG_TYPE_S16X2:
|
|
785 case BRIG_TYPE_F16X2:
|
|
786 return BRIG_TYPE_B32;
|
|
787
|
|
788 case BRIG_TYPE_U64:
|
|
789 case BRIG_TYPE_S64:
|
|
790 case BRIG_TYPE_F64:
|
|
791 case BRIG_TYPE_B64:
|
|
792 case BRIG_TYPE_U8X8:
|
|
793 case BRIG_TYPE_U16X4:
|
|
794 case BRIG_TYPE_U32X2:
|
|
795 case BRIG_TYPE_S8X8:
|
|
796 case BRIG_TYPE_S16X4:
|
|
797 case BRIG_TYPE_S32X2:
|
|
798 case BRIG_TYPE_F16X4:
|
|
799 case BRIG_TYPE_F32X2:
|
|
800 return BRIG_TYPE_B64;
|
|
801
|
|
802 case BRIG_TYPE_B128:
|
|
803 case BRIG_TYPE_U8X16:
|
|
804 case BRIG_TYPE_U16X8:
|
|
805 case BRIG_TYPE_U32X4:
|
|
806 case BRIG_TYPE_U64X2:
|
|
807 case BRIG_TYPE_S8X16:
|
|
808 case BRIG_TYPE_S16X8:
|
|
809 case BRIG_TYPE_S32X4:
|
|
810 case BRIG_TYPE_S64X2:
|
|
811 case BRIG_TYPE_F16X8:
|
|
812 case BRIG_TYPE_F32X4:
|
|
813 case BRIG_TYPE_F64X2:
|
|
814 return BRIG_TYPE_B128;
|
|
815
|
|
816 default:
|
|
817 gcc_unreachable ();
|
|
818 }
|
|
819 }
|
|
820
|
|
821 /* Return the length of the BRIG type TYPE that is going to be streamed out as
|
|
822 an immediate constant (so it must not be B1). */
|
|
823
|
|
824 unsigned
|
|
825 hsa_get_imm_brig_type_len (BrigType16_t type)
|
|
826 {
|
|
827 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
|
|
828 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
|
|
829
|
|
830 switch (pack_type)
|
|
831 {
|
|
832 case BRIG_TYPE_PACK_NONE:
|
|
833 break;
|
|
834 case BRIG_TYPE_PACK_32:
|
|
835 return 4;
|
|
836 case BRIG_TYPE_PACK_64:
|
|
837 return 8;
|
|
838 case BRIG_TYPE_PACK_128:
|
|
839 return 16;
|
|
840 default:
|
|
841 gcc_unreachable ();
|
|
842 }
|
|
843
|
|
844 switch (base_type)
|
|
845 {
|
|
846 case BRIG_TYPE_U8:
|
|
847 case BRIG_TYPE_S8:
|
|
848 case BRIG_TYPE_B8:
|
|
849 return 1;
|
|
850 case BRIG_TYPE_U16:
|
|
851 case BRIG_TYPE_S16:
|
|
852 case BRIG_TYPE_F16:
|
|
853 case BRIG_TYPE_B16:
|
|
854 return 2;
|
|
855 case BRIG_TYPE_U32:
|
|
856 case BRIG_TYPE_S32:
|
|
857 case BRIG_TYPE_F32:
|
|
858 case BRIG_TYPE_B32:
|
|
859 return 4;
|
|
860 case BRIG_TYPE_U64:
|
|
861 case BRIG_TYPE_S64:
|
|
862 case BRIG_TYPE_F64:
|
|
863 case BRIG_TYPE_B64:
|
|
864 return 8;
|
|
865 case BRIG_TYPE_B128:
|
|
866 return 16;
|
|
867 default:
|
|
868 gcc_unreachable ();
|
|
869 }
|
|
870 }
|
|
871
|
|
872 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
|
|
873 If NEED_LEN is not equal to zero, shrink or extend the value
|
|
874 to NEED_LEN bytes. Return how many bytes were written. */
|
|
875
|
|
876 static int
|
|
877 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
|
|
878 {
|
|
879 union hsa_bytes bytes;
|
|
880
|
|
881 memset (&bytes, 0, sizeof (bytes));
|
|
882 tree type = TREE_TYPE (value);
|
|
883 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
|
|
884
|
|
885 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
|
|
886 if (INTEGRAL_TYPE_P (type)
|
|
887 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
|
|
888 switch (data_len)
|
|
889 {
|
|
890 case 1:
|
|
891 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
|
|
892 break;
|
|
893 case 2:
|
|
894 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
|
|
895 break;
|
|
896 case 4:
|
|
897 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
|
|
898 break;
|
|
899 case 8:
|
|
900 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
|
|
901 break;
|
|
902 default:
|
|
903 gcc_unreachable ();
|
|
904 }
|
|
905 else if (SCALAR_FLOAT_TYPE_P (type))
|
|
906 {
|
|
907 if (data_len == 2)
|
|
908 {
|
|
909 sorry ("Support for HSA does not implement immediate 16 bit FPU "
|
|
910 "operands");
|
|
911 return 2;
|
|
912 }
|
|
913 unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
|
|
914 /* There are always 32 bits in each long, no matter the size of
|
|
915 the hosts long. */
|
|
916 long tmp[6];
|
|
917
|
|
918 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
|
|
919
|
|
920 if (int_len == 4)
|
|
921 bytes.b32 = (uint32_t) tmp[0];
|
|
922 else
|
|
923 {
|
|
924 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
|
|
925 bytes.b64 <<= 32;
|
|
926 bytes.b64 |= (uint32_t) tmp[0];
|
|
927 }
|
|
928 }
|
|
929 else
|
|
930 gcc_unreachable ();
|
|
931
|
|
932 int len;
|
|
933 if (need_len == 0)
|
|
934 len = data_len;
|
|
935 else
|
|
936 len = need_len;
|
|
937
|
|
938 memcpy (data, &bytes, len);
|
|
939 return len;
|
|
940 }
|
|
941
|
|
942 char *
|
|
943 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
|
|
944 {
|
|
945 char *brig_repr;
|
|
946 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
|
|
947
|
|
948 if (m_tree_value != NULL_TREE)
|
|
949 {
|
|
950 /* Update brig_repr_size for special tree values. */
|
|
951 if (TREE_CODE (m_tree_value) == STRING_CST)
|
|
952 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
|
|
953 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
|
|
954 *brig_repr_size
|
|
955 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
|
|
956
|
|
957 unsigned total_len = *brig_repr_size;
|
|
958
|
|
959 /* As we can have a constructor with fewer elements, fill the memory
|
|
960 with zeros. */
|
|
961 brig_repr = XCNEWVEC (char, total_len);
|
|
962 char *p = brig_repr;
|
|
963
|
|
964 if (TREE_CODE (m_tree_value) == VECTOR_CST)
|
|
965 {
|
131
|
966 /* Variable-length vectors aren't supported. */
|
|
967 int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant ();
|
111
|
968 for (i = 0; i < num; i++)
|
|
969 {
|
|
970 tree v = VECTOR_CST_ELT (m_tree_value, i);
|
|
971 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
|
|
972 total_len -= actual;
|
|
973 p += actual;
|
|
974 }
|
|
975 /* Vectors should have the exact size. */
|
|
976 gcc_assert (total_len == 0);
|
|
977 }
|
|
978 else if (TREE_CODE (m_tree_value) == STRING_CST)
|
|
979 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
|
|
980 TREE_STRING_LENGTH (m_tree_value));
|
|
981 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
|
|
982 {
|
|
983 gcc_assert (total_len % 2 == 0);
|
|
984 unsigned actual;
|
|
985 actual
|
|
986 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
|
|
987 total_len / 2);
|
|
988
|
|
989 gcc_assert (actual == total_len / 2);
|
|
990 p += actual;
|
|
991
|
|
992 actual
|
|
993 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
|
|
994 total_len / 2);
|
|
995 gcc_assert (actual == total_len / 2);
|
|
996 }
|
|
997 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
|
|
998 {
|
|
999 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
|
|
1000 for (unsigned i = 0; i < len; i++)
|
|
1001 {
|
|
1002 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
|
|
1003 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
|
|
1004 total_len -= actual;
|
|
1005 p += actual;
|
|
1006 }
|
|
1007 }
|
|
1008 else
|
|
1009 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
|
|
1010 }
|
|
1011 else
|
|
1012 {
|
|
1013 hsa_bytes bytes;
|
|
1014
|
|
1015 switch (*brig_repr_size)
|
|
1016 {
|
|
1017 case 1:
|
|
1018 bytes.b8 = (uint8_t) m_int_value;
|
|
1019 break;
|
|
1020 case 2:
|
|
1021 bytes.b16 = (uint16_t) m_int_value;
|
|
1022 break;
|
|
1023 case 4:
|
|
1024 bytes.b32 = (uint32_t) m_int_value;
|
|
1025 break;
|
|
1026 case 8:
|
|
1027 bytes.b64 = (uint64_t) m_int_value;
|
|
1028 break;
|
|
1029 default:
|
|
1030 gcc_unreachable ();
|
|
1031 }
|
|
1032
|
|
1033 brig_repr = XNEWVEC (char, *brig_repr_size);
|
|
1034 memcpy (brig_repr, &bytes, *brig_repr_size);
|
|
1035 }
|
|
1036
|
|
1037 return brig_repr;
|
|
1038 }
|
|
1039
|
|
1040 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
|
|
1041 have been massaged to comply with various HSA/BRIG type requirements, so the
|
|
1042 only important aspect of that is the length (because HSAIL might expect
|
|
1043 smaller constants or become bit-data). The data should be represented
|
|
1044 according to what is in the tree representation. */
|
|
1045
|
|
1046 static void
|
|
1047 emit_immediate_operand (hsa_op_immed *imm)
|
|
1048 {
|
|
1049 unsigned brig_repr_size;
|
|
1050 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
|
|
1051 struct BrigOperandConstantBytes out;
|
|
1052
|
|
1053 memset (&out, 0, sizeof (out));
|
|
1054 out.base.byteCount = lendian16 (sizeof (out));
|
|
1055 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
|
|
1056 uint32_t byteCount = lendian32 (brig_repr_size);
|
|
1057 out.type = lendian16 (imm->m_type);
|
|
1058 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
|
|
1059 brig_operand.add (&out, sizeof (out));
|
|
1060 brig_data.add (brig_repr, brig_repr_size);
|
|
1061 brig_data.round_size_up (4);
|
|
1062
|
|
1063 free (brig_repr);
|
|
1064 }
|
|
1065
|
|
1066 /* Emit a register BRIG operand REG. */
|
|
1067
|
|
1068 static void
|
|
1069 emit_register_operand (hsa_op_reg *reg)
|
|
1070 {
|
|
1071 struct BrigOperandRegister out;
|
|
1072
|
|
1073 out.base.byteCount = lendian16 (sizeof (out));
|
|
1074 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
|
|
1075 out.regNum = lendian32 (reg->m_hard_num);
|
|
1076
|
|
1077 switch (regtype_for_type (reg->m_type))
|
|
1078 {
|
|
1079 case BRIG_TYPE_B32:
|
|
1080 out.regKind = BRIG_REGISTER_KIND_SINGLE;
|
|
1081 break;
|
|
1082 case BRIG_TYPE_B64:
|
|
1083 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
|
|
1084 break;
|
|
1085 case BRIG_TYPE_B128:
|
|
1086 out.regKind = BRIG_REGISTER_KIND_QUAD;
|
|
1087 break;
|
|
1088 case BRIG_TYPE_B1:
|
|
1089 out.regKind = BRIG_REGISTER_KIND_CONTROL;
|
|
1090 break;
|
|
1091 default:
|
|
1092 gcc_unreachable ();
|
|
1093 }
|
|
1094
|
|
1095 brig_operand.add (&out, sizeof (out));
|
|
1096 }
|
|
1097
|
|
1098 /* Emit an address BRIG operand ADDR. */
|
|
1099
|
|
1100 static void
|
|
1101 emit_address_operand (hsa_op_address *addr)
|
|
1102 {
|
|
1103 struct BrigOperandAddress out;
|
|
1104
|
|
1105 out.base.byteCount = lendian16 (sizeof (out));
|
|
1106 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
|
|
1107 out.symbol = addr->m_symbol
|
|
1108 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
|
|
1109 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
|
|
1110
|
|
1111 if (sizeof (addr->m_imm_offset) == 8)
|
|
1112 {
|
|
1113 out.offset.lo = lendian32 (addr->m_imm_offset);
|
|
1114 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
|
|
1115 }
|
|
1116 else
|
|
1117 {
|
|
1118 gcc_assert (sizeof (addr->m_imm_offset) == 4);
|
|
1119 out.offset.lo = lendian32 (addr->m_imm_offset);
|
|
1120 out.offset.hi = 0;
|
|
1121 }
|
|
1122
|
|
1123 brig_operand.add (&out, sizeof (out));
|
|
1124 }
|
|
1125
|
|
1126 /* Emit a code reference operand REF. */
|
|
1127
|
|
1128 static void
|
|
1129 emit_code_ref_operand (hsa_op_code_ref *ref)
|
|
1130 {
|
|
1131 struct BrigOperandCodeRef out;
|
|
1132
|
|
1133 out.base.byteCount = lendian16 (sizeof (out));
|
|
1134 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
|
|
1135 out.ref = lendian32 (ref->m_directive_offset);
|
|
1136 brig_operand.add (&out, sizeof (out));
|
|
1137 }
|
|
1138
|
|
1139 /* Emit a code list operand CODE_LIST. */
|
|
1140
|
|
1141 static void
|
|
1142 emit_code_list_operand (hsa_op_code_list *code_list)
|
|
1143 {
|
|
1144 struct BrigOperandCodeList out;
|
|
1145 unsigned args = code_list->m_offsets.length ();
|
|
1146
|
|
1147 for (unsigned i = 0; i < args; i++)
|
|
1148 gcc_assert (code_list->m_offsets[i]);
|
|
1149
|
|
1150 out.base.byteCount = lendian16 (sizeof (out));
|
|
1151 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
|
|
1152
|
|
1153 uint32_t byteCount = lendian32 (4 * args);
|
|
1154
|
|
1155 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
|
|
1156 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
|
|
1157 brig_data.round_size_up (4);
|
|
1158 brig_operand.add (&out, sizeof (out));
|
|
1159 }
|
|
1160
|
|
1161 /* Emit an operand list operand OPERAND_LIST. */
|
|
1162
|
|
1163 static void
|
|
1164 emit_operand_list_operand (hsa_op_operand_list *operand_list)
|
|
1165 {
|
|
1166 struct BrigOperandOperandList out;
|
|
1167 unsigned args = operand_list->m_offsets.length ();
|
|
1168
|
|
1169 for (unsigned i = 0; i < args; i++)
|
|
1170 gcc_assert (operand_list->m_offsets[i]);
|
|
1171
|
|
1172 out.base.byteCount = lendian16 (sizeof (out));
|
|
1173 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
|
|
1174
|
|
1175 uint32_t byteCount = lendian32 (4 * args);
|
|
1176
|
|
1177 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
|
|
1178 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
|
|
1179 brig_data.round_size_up (4);
|
|
1180 brig_operand.add (&out, sizeof (out));
|
|
1181 }
|
|
1182
|
|
1183 /* Emit all operands queued for writing. */
|
|
1184
|
|
1185 static void
|
|
1186 emit_queued_operands (void)
|
|
1187 {
|
|
1188 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
|
|
1189 {
|
|
1190 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
|
|
1191 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
|
|
1192 emit_immediate_operand (imm);
|
|
1193 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
|
|
1194 emit_register_operand (reg);
|
|
1195 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
|
|
1196 emit_address_operand (addr);
|
|
1197 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
|
|
1198 emit_code_ref_operand (ref);
|
|
1199 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
|
|
1200 emit_code_list_operand (code_list);
|
|
1201 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
|
|
1202 emit_operand_list_operand (l);
|
|
1203 else
|
|
1204 gcc_unreachable ();
|
|
1205 }
|
|
1206 }
|
|
1207
|
|
1208 /* Emit directives describing the function that is used for
|
|
1209 a function declaration. */
|
|
1210
|
|
1211 static BrigDirectiveExecutable *
|
|
1212 emit_function_declaration (tree decl)
|
|
1213 {
|
|
1214 hsa_function_representation *f = hsa_generate_function_declaration (decl);
|
|
1215
|
|
1216 BrigDirectiveExecutable *e = emit_function_directives (f, true);
|
|
1217 emit_queued_operands ();
|
|
1218
|
|
1219 delete f;
|
|
1220
|
|
1221 return e;
|
|
1222 }
|
|
1223
|
|
1224 /* Emit directives describing the function that is used for
|
|
1225 an internal function declaration. */
|
|
1226
|
|
1227 static BrigDirectiveExecutable *
|
|
1228 emit_internal_fn_decl (hsa_internal_fn *fn)
|
|
1229 {
|
|
1230 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
|
|
1231
|
|
1232 BrigDirectiveExecutable *e = emit_function_directives (f, true);
|
|
1233 emit_queued_operands ();
|
|
1234
|
|
1235 delete f;
|
|
1236
|
|
1237 return e;
|
|
1238 }
|
|
1239
|
|
1240 /* Enqueue all operands of INSN and return offset to BRIG data section
|
|
1241 to list of operand offsets. */
|
|
1242
|
|
1243 static unsigned
|
|
1244 emit_insn_operands (hsa_insn_basic *insn)
|
|
1245 {
|
|
1246 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
|
|
1247 operand_offsets;
|
|
1248
|
|
1249 unsigned l = insn->operand_count ();
|
|
1250
|
|
1251 /* We have N operands so use 4 * N for the byte_count. */
|
|
1252 uint32_t byte_count = lendian32 (4 * l);
|
|
1253 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
|
|
1254 if (l > 0)
|
|
1255 {
|
|
1256 operand_offsets.safe_grow (l);
|
|
1257 for (unsigned i = 0; i < l; i++)
|
|
1258 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
|
|
1259
|
|
1260 brig_data.add (operand_offsets.address (),
|
|
1261 l * sizeof (BrigOperandOffset32_t));
|
|
1262 }
|
|
1263 brig_data.round_size_up (4);
|
|
1264 return offset;
|
|
1265 }
|
|
1266
|
|
1267 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
|
|
1268 to BRIG data section to list of operand offsets. */
|
|
1269
|
|
1270 static unsigned
|
|
1271 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
|
|
1272 hsa_op_base *op2 = NULL)
|
|
1273 {
|
|
1274 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
|
|
1275 operand_offsets;
|
|
1276
|
|
1277 gcc_checking_assert (op0 != NULL);
|
|
1278 operand_offsets.safe_push (enqueue_op (op0));
|
|
1279
|
|
1280 if (op1 != NULL)
|
|
1281 {
|
|
1282 operand_offsets.safe_push (enqueue_op (op1));
|
|
1283 if (op2 != NULL)
|
|
1284 operand_offsets.safe_push (enqueue_op (op2));
|
|
1285 }
|
|
1286
|
|
1287 unsigned l = operand_offsets.length ();
|
|
1288
|
|
1289 /* We have N operands so use 4 * N for the byte_count. */
|
|
1290 uint32_t byte_count = lendian32 (4 * l);
|
|
1291
|
|
1292 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
|
|
1293 brig_data.add (operand_offsets.address (),
|
|
1294 l * sizeof (BrigOperandOffset32_t));
|
|
1295
|
|
1296 brig_data.round_size_up (4);
|
|
1297
|
|
1298 return offset;
|
|
1299 }
|
|
1300
|
|
1301 /* Emit an HSA memory instruction and all necessary directives, schedule
|
|
1302 necessary operands for writing. */
|
|
1303
|
|
1304 static void
|
|
1305 emit_memory_insn (hsa_insn_mem *mem)
|
|
1306 {
|
|
1307 struct BrigInstMem repr;
|
|
1308 gcc_checking_assert (mem->operand_count () == 2);
|
|
1309
|
|
1310 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
|
|
1311
|
|
1312 /* This is necessary because of the erroneous typedef of
|
|
1313 BrigMemoryModifier8_t which introduces padding which may then contain
|
|
1314 random stuff (which we do not want so that we can test things don't
|
|
1315 change). */
|
|
1316 memset (&repr, 0, sizeof (repr));
|
|
1317 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1318 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
|
|
1319 repr.base.opcode = lendian16 (mem->m_opcode);
|
|
1320 repr.base.type = lendian16 (mem->m_type);
|
|
1321 repr.base.operands = lendian32 (emit_insn_operands (mem));
|
|
1322
|
|
1323 if (addr->m_symbol)
|
|
1324 repr.segment = addr->m_symbol->m_segment;
|
|
1325 else
|
|
1326 repr.segment = BRIG_SEGMENT_FLAT;
|
|
1327 repr.modifier = 0;
|
|
1328 repr.equivClass = mem->m_equiv_class;
|
|
1329 repr.align = mem->m_align;
|
|
1330 if (mem->m_opcode == BRIG_OPCODE_LD)
|
|
1331 repr.width = BRIG_WIDTH_1;
|
|
1332 else
|
|
1333 repr.width = BRIG_WIDTH_NONE;
|
|
1334 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1335 brig_code.add (&repr, sizeof (repr));
|
|
1336 brig_insn_count++;
|
|
1337 }
|
|
1338
|
|
1339 /* Emit an HSA signal memory instruction and all necessary directives, schedule
|
|
1340 necessary operands for writing. */
|
|
1341
|
|
1342 static void
|
|
1343 emit_signal_insn (hsa_insn_signal *mem)
|
|
1344 {
|
|
1345 struct BrigInstSignal repr;
|
|
1346
|
|
1347 memset (&repr, 0, sizeof (repr));
|
|
1348 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1349 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
|
|
1350 repr.base.opcode = lendian16 (mem->m_opcode);
|
|
1351 repr.base.type = lendian16 (mem->m_type);
|
|
1352 repr.base.operands = lendian32 (emit_insn_operands (mem));
|
|
1353
|
|
1354 repr.memoryOrder = mem->m_memory_order;
|
|
1355 repr.signalOperation = mem->m_signalop;
|
|
1356 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
|
|
1357
|
|
1358 brig_code.add (&repr, sizeof (repr));
|
|
1359 brig_insn_count++;
|
|
1360 }
|
|
1361
|
|
1362 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
|
|
1363 necessary operands for writing. */
|
|
1364
|
|
1365 static void
|
|
1366 emit_atomic_insn (hsa_insn_atomic *mem)
|
|
1367 {
|
|
1368 struct BrigInstAtomic repr;
|
|
1369
|
|
1370 /* Either operand[0] or operand[1] must be an address operand. */
|
|
1371 hsa_op_address *addr = NULL;
|
|
1372 if (is_a <hsa_op_address *> (mem->get_op (0)))
|
|
1373 addr = as_a <hsa_op_address *> (mem->get_op (0));
|
|
1374 else
|
|
1375 addr = as_a <hsa_op_address *> (mem->get_op (1));
|
|
1376
|
|
1377 memset (&repr, 0, sizeof (repr));
|
|
1378 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1379 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
|
|
1380 repr.base.opcode = lendian16 (mem->m_opcode);
|
|
1381 repr.base.type = lendian16 (mem->m_type);
|
|
1382 repr.base.operands = lendian32 (emit_insn_operands (mem));
|
|
1383
|
|
1384 if (addr->m_symbol)
|
|
1385 repr.segment = addr->m_symbol->m_segment;
|
|
1386 else
|
|
1387 repr.segment = BRIG_SEGMENT_FLAT;
|
|
1388 repr.memoryOrder = mem->m_memoryorder;
|
|
1389 repr.memoryScope = mem->m_memoryscope;
|
|
1390 repr.atomicOperation = mem->m_atomicop;
|
|
1391
|
|
1392 brig_code.add (&repr, sizeof (repr));
|
|
1393 brig_insn_count++;
|
|
1394 }
|
|
1395
|
|
1396 /* Emit an HSA LDA instruction and all necessary directives, schedule
|
|
1397 necessary operands for writing. */
|
|
1398
|
|
1399 static void
|
|
1400 emit_addr_insn (hsa_insn_basic *insn)
|
|
1401 {
|
|
1402 struct BrigInstAddr repr;
|
|
1403
|
|
1404 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
|
|
1405
|
|
1406 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1407 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
|
|
1408 repr.base.opcode = lendian16 (insn->m_opcode);
|
|
1409 repr.base.type = lendian16 (insn->m_type);
|
|
1410 repr.base.operands = lendian32 (emit_insn_operands (insn));
|
|
1411
|
|
1412 if (addr->m_symbol)
|
|
1413 repr.segment = addr->m_symbol->m_segment;
|
|
1414 else
|
|
1415 repr.segment = BRIG_SEGMENT_FLAT;
|
|
1416 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1417
|
|
1418 brig_code.add (&repr, sizeof (repr));
|
|
1419 brig_insn_count++;
|
|
1420 }
|
|
1421
|
|
1422 /* Emit an HSA segment conversion instruction and all necessary directives,
|
|
1423 schedule necessary operands for writing. */
|
|
1424
|
|
1425 static void
|
|
1426 emit_segment_insn (hsa_insn_seg *seg)
|
|
1427 {
|
|
1428 struct BrigInstSegCvt repr;
|
|
1429
|
|
1430 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1431 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
|
|
1432 repr.base.opcode = lendian16 (seg->m_opcode);
|
|
1433 repr.base.type = lendian16 (seg->m_type);
|
|
1434 repr.base.operands = lendian32 (emit_insn_operands (seg));
|
|
1435 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
|
|
1436 repr.segment = seg->m_segment;
|
|
1437 repr.modifier = 0;
|
|
1438
|
|
1439 brig_code.add (&repr, sizeof (repr));
|
|
1440
|
|
1441 brig_insn_count++;
|
|
1442 }
|
|
1443
|
|
1444 /* Emit an HSA alloca instruction and all necessary directives,
|
|
1445 schedule necessary operands for writing. */
|
|
1446
|
|
1447 static void
|
|
1448 emit_alloca_insn (hsa_insn_alloca *alloca)
|
|
1449 {
|
|
1450 struct BrigInstMem repr;
|
|
1451 gcc_checking_assert (alloca->operand_count () == 2);
|
|
1452
|
|
1453 memset (&repr, 0, sizeof (repr));
|
|
1454 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1455 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
|
|
1456 repr.base.opcode = lendian16 (alloca->m_opcode);
|
|
1457 repr.base.type = lendian16 (alloca->m_type);
|
|
1458 repr.base.operands = lendian32 (emit_insn_operands (alloca));
|
|
1459 repr.segment = BRIG_SEGMENT_PRIVATE;
|
|
1460 repr.modifier = 0;
|
|
1461 repr.equivClass = 0;
|
|
1462 repr.align = alloca->m_align;
|
|
1463 repr.width = BRIG_WIDTH_NONE;
|
|
1464 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1465 brig_code.add (&repr, sizeof (repr));
|
|
1466 brig_insn_count++;
|
|
1467 }
|
|
1468
|
|
1469 /* Emit an HSA comparison instruction and all necessary directives,
|
|
1470 schedule necessary operands for writing. */
|
|
1471
|
|
1472 static void
|
|
1473 emit_cmp_insn (hsa_insn_cmp *cmp)
|
|
1474 {
|
|
1475 struct BrigInstCmp repr;
|
|
1476
|
|
1477 memset (&repr, 0, sizeof (repr));
|
|
1478 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1479 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
|
|
1480 repr.base.opcode = lendian16 (cmp->m_opcode);
|
|
1481 repr.base.type = lendian16 (cmp->m_type);
|
|
1482 repr.base.operands = lendian32 (emit_insn_operands (cmp));
|
|
1483
|
|
1484 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
|
|
1485 repr.sourceType
|
|
1486 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
|
|
1487 else
|
|
1488 repr.sourceType
|
|
1489 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
|
|
1490 repr.modifier = 0;
|
|
1491 repr.compare = cmp->m_compare;
|
|
1492 repr.pack = 0;
|
|
1493
|
|
1494 brig_code.add (&repr, sizeof (repr));
|
|
1495 brig_insn_count++;
|
|
1496 }
|
|
1497
|
|
1498 /* Emit an HSA generic branching/sycnronization instruction. */
|
|
1499
|
|
1500 static void
|
|
1501 emit_generic_branch_insn (hsa_insn_br *br)
|
|
1502 {
|
|
1503 struct BrigInstBr repr;
|
|
1504 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1505 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
|
|
1506 repr.base.opcode = lendian16 (br->m_opcode);
|
|
1507 repr.width = br->m_width;
|
|
1508 repr.base.type = lendian16 (br->m_type);
|
|
1509 repr.base.operands = lendian32 (emit_insn_operands (br));
|
|
1510 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1511
|
|
1512 brig_code.add (&repr, sizeof (repr));
|
|
1513 brig_insn_count++;
|
|
1514 }
|
|
1515
|
|
1516 /* Emit an HSA conditional branching instruction and all necessary directives,
|
|
1517 schedule necessary operands for writing. */
|
|
1518
|
|
1519 static void
|
|
1520 emit_cond_branch_insn (hsa_insn_cbr *br)
|
|
1521 {
|
|
1522 struct BrigInstBr repr;
|
|
1523
|
|
1524 basic_block target = NULL;
|
|
1525 edge_iterator ei;
|
|
1526 edge e;
|
|
1527
|
|
1528 /* At the moment we only handle direct conditional jumps. */
|
|
1529 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
|
|
1530 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1531 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
|
|
1532 repr.base.opcode = lendian16 (br->m_opcode);
|
|
1533 repr.width = br->m_width;
|
|
1534 /* For Conditional jumps the type is always B1. */
|
|
1535 repr.base.type = lendian16 (BRIG_TYPE_B1);
|
|
1536
|
|
1537 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
|
|
1538 if (e->flags & EDGE_TRUE_VALUE)
|
|
1539 {
|
|
1540 target = e->dest;
|
|
1541 break;
|
|
1542 }
|
|
1543 gcc_assert (target);
|
|
1544
|
|
1545 repr.base.operands
|
|
1546 = lendian32 (emit_operands (br->get_op (0),
|
|
1547 &hsa_bb_for_bb (target)->m_label_ref));
|
|
1548 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1549
|
|
1550 brig_code.add (&repr, sizeof (repr));
|
|
1551 brig_insn_count++;
|
|
1552 }
|
|
1553
|
|
1554 /* Emit an HSA unconditional jump branching instruction that points to
|
|
1555 a label REFERENCE. */
|
|
1556
|
|
1557 static void
|
|
1558 emit_unconditional_jump (hsa_op_code_ref *reference)
|
|
1559 {
|
|
1560 struct BrigInstBr repr;
|
|
1561
|
|
1562 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1563 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
|
|
1564 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
|
|
1565 repr.base.type = lendian16 (BRIG_TYPE_NONE);
|
|
1566 /* Direct branches to labels must be width(all). */
|
|
1567 repr.width = BRIG_WIDTH_ALL;
|
|
1568
|
|
1569 repr.base.operands = lendian32 (emit_operands (reference));
|
|
1570 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1571 brig_code.add (&repr, sizeof (repr));
|
|
1572 brig_insn_count++;
|
|
1573 }
|
|
1574
|
|
1575 /* Emit an HSA switch jump instruction that uses a jump table to
|
|
1576 jump to a destination label. */
|
|
1577
|
|
1578 static void
|
|
1579 emit_switch_insn (hsa_insn_sbr *sbr)
|
|
1580 {
|
|
1581 struct BrigInstBr repr;
|
|
1582
|
|
1583 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
|
|
1584 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1585 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
|
|
1586 repr.base.opcode = lendian16 (sbr->m_opcode);
|
|
1587 repr.width = BRIG_WIDTH_1;
|
|
1588 /* For Conditional jumps the type is always B1. */
|
|
1589 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
|
|
1590 repr.base.type = lendian16 (index->m_type);
|
|
1591 repr.base.operands
|
|
1592 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
|
|
1593 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1594
|
|
1595 brig_code.add (&repr, sizeof (repr));
|
|
1596 brig_insn_count++;
|
|
1597 }
|
|
1598
|
|
1599 /* Emit a HSA convert instruction and all necessary directives, schedule
|
|
1600 necessary operands for writing. */
|
|
1601
|
|
1602 static void
|
|
1603 emit_cvt_insn (hsa_insn_cvt *insn)
|
|
1604 {
|
|
1605 struct BrigInstCvt repr;
|
|
1606 BrigType16_t srctype;
|
|
1607
|
|
1608 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1609 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
|
|
1610 repr.base.opcode = lendian16 (insn->m_opcode);
|
|
1611 repr.base.type = lendian16 (insn->m_type);
|
|
1612 repr.base.operands = lendian32 (emit_insn_operands (insn));
|
|
1613
|
|
1614 if (is_a <hsa_op_reg *> (insn->get_op (1)))
|
|
1615 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
|
|
1616 else
|
|
1617 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
|
|
1618 repr.sourceType = lendian16 (srctype);
|
|
1619 repr.modifier = 0;
|
|
1620 /* float to smaller float requires a rounding setting (we default
|
|
1621 to 'near'. */
|
|
1622 if (hsa_type_float_p (insn->m_type)
|
|
1623 && (!hsa_type_float_p (srctype)
|
|
1624 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
|
|
1625 < (srctype & BRIG_TYPE_BASE_MASK))))
|
|
1626 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
|
|
1627 else if (hsa_type_integer_p (insn->m_type) &&
|
|
1628 hsa_type_float_p (srctype))
|
|
1629 repr.round = BRIG_ROUND_INTEGER_ZERO;
|
|
1630 else
|
|
1631 repr.round = BRIG_ROUND_NONE;
|
|
1632 brig_code.add (&repr, sizeof (repr));
|
|
1633 brig_insn_count++;
|
|
1634 }
|
|
1635
|
|
1636 /* Emit call instruction INSN, where this instruction must be closed
|
|
1637 within a call block instruction. */
|
|
1638
|
|
1639 static void
|
|
1640 emit_call_insn (hsa_insn_call *call)
|
|
1641 {
|
|
1642 struct BrigInstBr repr;
|
|
1643
|
|
1644 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1645 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
|
|
1646 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
|
|
1647 repr.base.type = lendian16 (BRIG_TYPE_NONE);
|
|
1648
|
|
1649 repr.base.operands
|
|
1650 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
|
|
1651 call->m_args_code_list));
|
|
1652
|
|
1653 /* Internal functions have not set m_called_function. */
|
|
1654 if (call->m_called_function)
|
|
1655 {
|
|
1656 function_linkage_pair pair (call->m_called_function,
|
|
1657 call->m_func.m_brig_op_offset);
|
|
1658 function_call_linkage.safe_push (pair);
|
|
1659 }
|
|
1660 else
|
|
1661 {
|
|
1662 hsa_internal_fn *slot
|
|
1663 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
|
|
1664 gcc_assert (slot);
|
|
1665 gcc_assert (slot->m_offset > 0);
|
|
1666 call->m_func.m_directive_offset = slot->m_offset;
|
|
1667 }
|
|
1668
|
|
1669 repr.width = BRIG_WIDTH_ALL;
|
|
1670 memset (&repr.reserved, 0, sizeof (repr.reserved));
|
|
1671
|
|
1672 brig_code.add (&repr, sizeof (repr));
|
|
1673 brig_insn_count++;
|
|
1674 }
|
|
1675
|
|
1676 /* Emit argument block directive. */
|
|
1677
|
|
1678 static void
|
|
1679 emit_arg_block_insn (hsa_insn_arg_block *insn)
|
|
1680 {
|
|
1681 switch (insn->m_kind)
|
|
1682 {
|
|
1683 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
|
|
1684 {
|
|
1685 struct BrigDirectiveArgBlock repr;
|
|
1686 repr.base.byteCount = lendian16 (sizeof (repr));
|
|
1687 repr.base.kind = lendian16 (insn->m_kind);
|
|
1688 brig_code.add (&repr, sizeof (repr));
|
|
1689
|
|
1690 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
|
|
1691 {
|
|
1692 insn->m_call_insn->m_args_code_list->m_offsets[i]
|
|
1693 = lendian32 (emit_directive_variable
|
|
1694 (insn->m_call_insn->m_input_args[i]));
|
|
1695 brig_insn_count++;
|
|
1696 }
|
|
1697
|
|
1698 if (insn->m_call_insn->m_output_arg)
|
|
1699 {
|
|
1700 insn->m_call_insn->m_result_code_list->m_offsets[0]
|
|
1701 = lendian32 (emit_directive_variable
|
|
1702 (insn->m_call_insn->m_output_arg));
|
|
1703 brig_insn_count++;
|
|
1704 }
|
|
1705
|
|
1706 break;
|
|
1707 }
|
|
1708 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
|
|
1709 {
|
|
1710 struct BrigDirectiveArgBlock repr;
|
|
1711 repr.base.byteCount = lendian16 (sizeof (repr));
|
|
1712 repr.base.kind = lendian16 (insn->m_kind);
|
|
1713 brig_code.add (&repr, sizeof (repr));
|
|
1714 break;
|
|
1715 }
|
|
1716 default:
|
|
1717 gcc_unreachable ();
|
|
1718 }
|
|
1719
|
|
1720 brig_insn_count++;
|
|
1721 }
|
|
1722
|
|
1723 /* Emit comment directive. */
|
|
1724
|
|
1725 static void
|
|
1726 emit_comment_insn (hsa_insn_comment *insn)
|
|
1727 {
|
|
1728 struct BrigDirectiveComment repr;
|
|
1729 memset (&repr, 0, sizeof (repr));
|
|
1730
|
|
1731 repr.base.byteCount = lendian16 (sizeof (repr));
|
|
1732 repr.base.kind = lendian16 (insn->m_opcode);
|
|
1733 repr.name = brig_emit_string (insn->m_comment, '\0', false);
|
|
1734 brig_code.add (&repr, sizeof (repr));
|
|
1735 }
|
|
1736
|
|
1737 /* Emit queue instruction INSN. */
|
|
1738
|
|
1739 static void
|
|
1740 emit_queue_insn (hsa_insn_queue *insn)
|
|
1741 {
|
|
1742 BrigInstQueue repr;
|
|
1743 memset (&repr, 0, sizeof (repr));
|
|
1744
|
|
1745 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1746 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
|
|
1747 repr.base.opcode = lendian16 (insn->m_opcode);
|
|
1748 repr.base.type = lendian16 (insn->m_type);
|
|
1749 repr.segment = insn->m_segment;
|
|
1750 repr.memoryOrder = insn->m_memory_order;
|
|
1751 repr.base.operands = lendian32 (emit_insn_operands (insn));
|
|
1752 brig_data.round_size_up (4);
|
|
1753 brig_code.add (&repr, sizeof (repr));
|
|
1754
|
|
1755 brig_insn_count++;
|
|
1756 }
|
|
1757
|
|
1758 /* Emit source type instruction INSN. */
|
|
1759
|
|
1760 static void
|
|
1761 emit_srctype_insn (hsa_insn_srctype *insn)
|
|
1762 {
|
|
1763 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
|
|
1764 struct BrigInstSourceType repr;
|
|
1765 unsigned operand_count = insn->operand_count ();
|
|
1766 gcc_checking_assert (operand_count >= 2);
|
|
1767
|
|
1768 memset (&repr, 0, sizeof (repr));
|
|
1769 repr.sourceType = lendian16 (insn->m_source_type);
|
|
1770 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1771 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
|
|
1772 repr.base.opcode = lendian16 (insn->m_opcode);
|
|
1773 repr.base.type = lendian16 (insn->m_type);
|
|
1774
|
|
1775 repr.base.operands = lendian32 (emit_insn_operands (insn));
|
|
1776 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
|
|
1777 brig_insn_count++;
|
|
1778 }
|
|
1779
|
|
1780 /* Emit packed instruction INSN. */
|
|
1781
|
|
1782 static void
|
|
1783 emit_packed_insn (hsa_insn_packed *insn)
|
|
1784 {
|
|
1785 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
|
|
1786 struct BrigInstSourceType repr;
|
|
1787 unsigned operand_count = insn->operand_count ();
|
|
1788 gcc_checking_assert (operand_count >= 2);
|
|
1789
|
|
1790 memset (&repr, 0, sizeof (repr));
|
|
1791 repr.sourceType = lendian16 (insn->m_source_type);
|
|
1792 repr.base.base.byteCount = lendian16 (sizeof (repr));
|
|
1793 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
|
|
1794 repr.base.opcode = lendian16 (insn->m_opcode);
|
|
1795 repr.base.type = lendian16 (insn->m_type);
|
|
1796
|
|
1797 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
|
|
1798 {
|
|
1799 /* Create operand list for packed type. */
|
|
1800 for (unsigned i = 1; i < operand_count; i++)
|
|
1801 {
|
|
1802 gcc_checking_assert (insn->get_op (i));
|
|
1803 insn->m_operand_list->m_offsets[i - 1]
|
|
1804 = lendian32 (enqueue_op (insn->get_op (i)));
|
|
1805 }
|
|
1806
|
|
1807 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
|
|
1808 insn->m_operand_list));
|
|
1809 }
|
|
1810 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
|
|
1811 {
|
|
1812 /* Create operand list for packed type. */
|
|
1813 for (unsigned i = 0; i < operand_count - 1; i++)
|
|
1814 {
|
|
1815 gcc_checking_assert (insn->get_op (i));
|
|
1816 insn->m_operand_list->m_offsets[i]
|
|
1817 = lendian32 (enqueue_op (insn->get_op (i)));
|
|
1818 }
|
|
1819
|
|
1820 unsigned ops = emit_operands (insn->m_operand_list,
|
|
1821 insn->get_op (insn->operand_count () - 1));
|
|
1822 repr.base.operands = lendian32 (ops);
|
|
1823 }
|
|
1824
|
|
1825
|
|
1826 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
|
|
1827 brig_insn_count++;
|
|
1828 }
|
|
1829
|
|
1830 /* Emit a basic HSA instruction and all necessary directives, schedule
|
|
1831 necessary operands for writing. */
|
|
1832
|
|
1833 static void
|
|
1834 emit_basic_insn (hsa_insn_basic *insn)
|
|
1835 {
|
|
1836 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
|
|
1837 struct BrigInstMod repr;
|
|
1838 BrigType16_t type;
|
|
1839
|
|
1840 memset (&repr, 0, sizeof (repr));
|
|
1841 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
|
|
1842 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
|
|
1843 repr.base.opcode = lendian16 (insn->m_opcode);
|
|
1844 switch (insn->m_opcode)
|
|
1845 {
|
|
1846 /* And the bit-logical operations need bit types and whine about
|
|
1847 arithmetic types :-/ */
|
|
1848 case BRIG_OPCODE_AND:
|
|
1849 case BRIG_OPCODE_OR:
|
|
1850 case BRIG_OPCODE_XOR:
|
|
1851 case BRIG_OPCODE_NOT:
|
|
1852 type = regtype_for_type (insn->m_type);
|
|
1853 break;
|
|
1854 default:
|
|
1855 type = insn->m_type;
|
|
1856 break;
|
|
1857 }
|
|
1858 repr.base.type = lendian16 (type);
|
|
1859 repr.base.operands = lendian32 (emit_insn_operands (insn));
|
|
1860
|
|
1861 if (hsa_type_packed_p (type))
|
|
1862 {
|
|
1863 if (hsa_type_float_p (type)
|
|
1864 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
|
|
1865 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
|
|
1866 else
|
|
1867 repr.round = 0;
|
|
1868 /* We assume that destination and sources agree in packing layout. */
|
|
1869 if (insn->num_used_ops () >= 2)
|
|
1870 repr.pack = BRIG_PACK_PP;
|
|
1871 else
|
|
1872 repr.pack = BRIG_PACK_P;
|
|
1873 repr.reserved = 0;
|
|
1874 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
|
|
1875 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
|
|
1876 brig_code.add (&repr, sizeof (struct BrigInstMod));
|
|
1877 }
|
|
1878 else
|
|
1879 brig_code.add (&repr, sizeof (struct BrigInstBasic));
|
|
1880 brig_insn_count++;
|
|
1881 }
|
|
1882
|
|
1883 /* Emit an HSA instruction and all necessary directives, schedule necessary
|
|
1884 operands for writing. */
|
|
1885
|
|
1886 static void
|
|
1887 emit_insn (hsa_insn_basic *insn)
|
|
1888 {
|
|
1889 gcc_assert (!is_a <hsa_insn_phi *> (insn));
|
|
1890
|
|
1891 insn->m_brig_offset = brig_code.total_size;
|
|
1892
|
|
1893 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
|
|
1894 emit_signal_insn (signal);
|
|
1895 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
|
|
1896 emit_atomic_insn (atom);
|
|
1897 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
|
|
1898 emit_memory_insn (mem);
|
|
1899 else if (insn->m_opcode == BRIG_OPCODE_LDA)
|
|
1900 emit_addr_insn (insn);
|
|
1901 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
|
|
1902 emit_segment_insn (seg);
|
|
1903 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
|
|
1904 emit_cmp_insn (cmp);
|
|
1905 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
|
|
1906 emit_cond_branch_insn (br);
|
|
1907 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
|
|
1908 {
|
|
1909 if (switch_instructions == NULL)
|
|
1910 switch_instructions = new vec <hsa_insn_sbr *> ();
|
|
1911
|
|
1912 switch_instructions->safe_push (sbr);
|
|
1913 emit_switch_insn (sbr);
|
|
1914 }
|
|
1915 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
|
|
1916 emit_generic_branch_insn (br);
|
|
1917 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
|
|
1918 emit_arg_block_insn (block);
|
|
1919 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
|
|
1920 emit_call_insn (call);
|
|
1921 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
|
|
1922 emit_comment_insn (comment);
|
|
1923 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
|
|
1924 emit_queue_insn (queue);
|
|
1925 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
|
|
1926 emit_srctype_insn (srctype);
|
|
1927 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
|
|
1928 emit_packed_insn (packed);
|
|
1929 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
|
|
1930 emit_cvt_insn (cvt);
|
|
1931 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
|
|
1932 emit_alloca_insn (alloca);
|
|
1933 else
|
|
1934 emit_basic_insn (insn);
|
|
1935 }
|
|
1936
|
|
1937 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
|
|
1938 or we are about to finish emitting code, if it is NULL. If the fall through
|
|
1939 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
|
|
1940
|
|
1941 static void
|
|
1942 perhaps_emit_branch (basic_block bb, basic_block next_bb)
|
|
1943 {
|
|
1944 basic_block t_bb = NULL, ff = NULL;
|
|
1945
|
|
1946 edge_iterator ei;
|
|
1947 edge e;
|
|
1948
|
|
1949 /* If the last instruction of BB is a switch, ignore emission of all
|
|
1950 edges. */
|
|
1951 if (hsa_bb_for_bb (bb)->m_last_insn
|
|
1952 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
|
|
1953 return;
|
|
1954
|
|
1955 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1956 if (e->flags & EDGE_TRUE_VALUE)
|
|
1957 {
|
|
1958 gcc_assert (!t_bb);
|
|
1959 t_bb = e->dest;
|
|
1960 }
|
|
1961 else
|
|
1962 {
|
|
1963 gcc_assert (!ff);
|
|
1964 ff = e->dest;
|
|
1965 }
|
|
1966
|
|
1967 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1968 return;
|
|
1969
|
|
1970 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
|
|
1971 }
|
|
1972
|
|
1973 /* Emit the a function with name NAME to the various brig sections. */
|
|
1974
|
|
1975 void
|
|
1976 hsa_brig_emit_function (void)
|
|
1977 {
|
|
1978 basic_block bb, prev_bb;
|
|
1979 hsa_insn_basic *insn;
|
|
1980 BrigDirectiveExecutable *ptr_to_fndir;
|
|
1981
|
|
1982 brig_init ();
|
|
1983
|
|
1984 brig_insn_count = 0;
|
|
1985 memset (&op_queue, 0, sizeof (op_queue));
|
|
1986 op_queue.projected_size = brig_operand.total_size;
|
|
1987
|
|
1988 if (!function_offsets)
|
|
1989 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
|
|
1990
|
|
1991 if (!emitted_declarations)
|
|
1992 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
|
|
1993
|
|
1994 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
|
|
1995 {
|
|
1996 tree called = hsa_cfun->m_called_functions[i];
|
|
1997
|
|
1998 /* If the function has no definition, emit a declaration. */
|
|
1999 if (!emitted_declarations->get (called))
|
|
2000 {
|
|
2001 BrigDirectiveExecutable *e = emit_function_declaration (called);
|
|
2002 emitted_declarations->put (called, e);
|
|
2003 }
|
|
2004 }
|
|
2005
|
|
2006 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
|
|
2007 {
|
|
2008 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
|
|
2009 emit_internal_fn_decl (called);
|
|
2010 }
|
|
2011
|
|
2012 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
|
|
2013 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
|
|
2014 insn;
|
|
2015 insn = insn->m_next)
|
|
2016 emit_insn (insn);
|
|
2017 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
2018 FOR_EACH_BB_FN (bb, cfun)
|
|
2019 {
|
|
2020 perhaps_emit_branch (prev_bb, bb);
|
|
2021 emit_bb_label_directive (hsa_bb_for_bb (bb));
|
|
2022 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
|
|
2023 emit_insn (insn);
|
|
2024 prev_bb = bb;
|
|
2025 }
|
|
2026 perhaps_emit_branch (prev_bb, NULL);
|
|
2027 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
|
|
2028
|
|
2029 /* Fill up label references for all sbr instructions. */
|
|
2030 if (switch_instructions)
|
|
2031 {
|
|
2032 for (unsigned i = 0; i < switch_instructions->length (); i++)
|
|
2033 {
|
|
2034 hsa_insn_sbr *sbr = (*switch_instructions)[i];
|
|
2035 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
|
|
2036 {
|
|
2037 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
|
|
2038 sbr->m_label_code_list->m_offsets[j]
|
|
2039 = hbb->m_label_ref.m_directive_offset;
|
|
2040 }
|
|
2041 }
|
|
2042
|
|
2043 switch_instructions->release ();
|
|
2044 delete switch_instructions;
|
|
2045 switch_instructions = NULL;
|
|
2046 }
|
|
2047
|
|
2048 if (dump_file)
|
|
2049 {
|
|
2050 fprintf (dump_file, "------- After BRIG emission: -------\n");
|
|
2051 dump_hsa_cfun (dump_file);
|
|
2052 }
|
|
2053
|
|
2054 emit_queued_operands ();
|
|
2055 }
|
|
2056
|
|
2057 /* Emit all OMP symbols related to OMP. */
|
|
2058
|
|
2059 void
|
|
2060 hsa_brig_emit_omp_symbols (void)
|
|
2061 {
|
|
2062 brig_init ();
|
|
2063 emit_directive_variable (hsa_num_threads);
|
|
2064 }
|
|
2065
|
|
2066 /* Create and return __hsa_global_variables symbol that contains
|
|
2067 all informations consumed by libgomp to link global variables
|
|
2068 with their string names used by an HSA kernel. */
|
|
2069
|
|
2070 static tree
|
|
2071 hsa_output_global_variables ()
|
|
2072 {
|
|
2073 unsigned l = hsa_global_variable_symbols->elements ();
|
|
2074
|
|
2075 tree variable_info_type = make_node (RECORD_TYPE);
|
|
2076 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2077 get_identifier ("name"), ptr_type_node);
|
|
2078 DECL_CHAIN (id_f1) = NULL_TREE;
|
|
2079 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2080 get_identifier ("omp_data_size"),
|
|
2081 ptr_type_node);
|
|
2082 DECL_CHAIN (id_f2) = id_f1;
|
|
2083 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
|
|
2084 NULL_TREE);
|
|
2085
|
|
2086 tree int_num_of_global_vars;
|
|
2087 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
|
|
2088 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
|
|
2089 tree global_vars_array_type = build_array_type (variable_info_type,
|
|
2090 global_vars_num_index_type);
|
|
2091 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
|
|
2092
|
|
2093 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
|
|
2094
|
|
2095 for (hash_table <hsa_noop_symbol_hasher>::iterator it
|
|
2096 = hsa_global_variable_symbols->begin ();
|
|
2097 it != hsa_global_variable_symbols->end (); ++it)
|
|
2098 {
|
|
2099 unsigned len = strlen ((*it)->m_name);
|
|
2100 char *copy = XNEWVEC (char, len + 2);
|
|
2101 copy[0] = '&';
|
|
2102 memcpy (copy + 1, (*it)->m_name, len);
|
|
2103 copy[len + 1] = '\0';
|
|
2104 len++;
|
|
2105 hsa_sanitize_name (copy);
|
|
2106
|
|
2107 tree var_name = build_string (len, copy);
|
|
2108 TREE_TYPE (var_name)
|
|
2109 = build_array_type (char_type_node, build_index_type (size_int (len)));
|
|
2110 free (copy);
|
|
2111
|
|
2112 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
|
|
2113 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
|
|
2114 build1 (ADDR_EXPR,
|
|
2115 build_pointer_type (TREE_TYPE (var_name)),
|
|
2116 var_name));
|
|
2117 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
|
|
2118 build_fold_addr_expr ((*it)->m_decl));
|
|
2119
|
|
2120 tree variable_info_ctor = build_constructor (variable_info_type,
|
|
2121 variable_info_vec);
|
|
2122
|
|
2123 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
|
|
2124 variable_info_ctor);
|
|
2125 }
|
|
2126
|
|
2127 tree global_vars_ctor = build_constructor (global_vars_array_type,
|
|
2128 global_vars_vec);
|
|
2129
|
|
2130 char tmp_name[64];
|
|
2131 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
|
|
2132 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
2133 get_identifier (tmp_name),
|
|
2134 global_vars_array_type);
|
|
2135 TREE_STATIC (global_vars_table) = 1;
|
|
2136 TREE_READONLY (global_vars_table) = 1;
|
|
2137 TREE_PUBLIC (global_vars_table) = 0;
|
|
2138 DECL_ARTIFICIAL (global_vars_table) = 1;
|
|
2139 DECL_IGNORED_P (global_vars_table) = 1;
|
|
2140 DECL_EXTERNAL (global_vars_table) = 0;
|
|
2141 TREE_CONSTANT (global_vars_table) = 1;
|
|
2142 DECL_INITIAL (global_vars_table) = global_vars_ctor;
|
|
2143 varpool_node::finalize_decl (global_vars_table);
|
|
2144
|
|
2145 return global_vars_table;
|
|
2146 }
|
|
2147
|
|
2148 /* Create __hsa_host_functions and __hsa_kernels that contain
|
|
2149 all informations consumed by libgomp to register all kernels
|
|
2150 in the BRIG binary. */
|
|
2151
|
|
2152 static void
|
|
2153 hsa_output_kernels (tree *host_func_table, tree *kernels)
|
|
2154 {
|
|
2155 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
|
|
2156
|
|
2157 tree int_num_of_kernels;
|
|
2158 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
|
|
2159 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
|
|
2160 tree host_functions_array_type = build_array_type (ptr_type_node,
|
|
2161 kernel_num_index_type);
|
|
2162 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
|
|
2163
|
|
2164 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
|
|
2165 for (unsigned i = 0; i < map_count; ++i)
|
|
2166 {
|
|
2167 tree decl = hsa_get_decl_kernel_mapping_decl (i);
|
|
2168 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
|
|
2169 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
|
|
2170 }
|
|
2171 tree host_functions_ctor = build_constructor (host_functions_array_type,
|
|
2172 host_functions_vec);
|
|
2173 char tmp_name[64];
|
|
2174 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
|
|
2175 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
2176 get_identifier (tmp_name),
|
|
2177 host_functions_array_type);
|
|
2178 TREE_STATIC (hsa_host_func_table) = 1;
|
|
2179 TREE_READONLY (hsa_host_func_table) = 1;
|
|
2180 TREE_PUBLIC (hsa_host_func_table) = 0;
|
|
2181 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
|
|
2182 DECL_IGNORED_P (hsa_host_func_table) = 1;
|
|
2183 DECL_EXTERNAL (hsa_host_func_table) = 0;
|
|
2184 TREE_CONSTANT (hsa_host_func_table) = 1;
|
|
2185 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
|
|
2186 varpool_node::finalize_decl (hsa_host_func_table);
|
|
2187 *host_func_table = hsa_host_func_table;
|
|
2188
|
|
2189 /* Following code emits list of kernel_info structures. */
|
|
2190
|
|
2191 tree kernel_info_type = make_node (RECORD_TYPE);
|
|
2192 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2193 get_identifier ("name"), ptr_type_node);
|
|
2194 DECL_CHAIN (id_f1) = NULL_TREE;
|
|
2195 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2196 get_identifier ("omp_data_size"),
|
|
2197 unsigned_type_node);
|
|
2198 DECL_CHAIN (id_f2) = id_f1;
|
|
2199 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2200 get_identifier ("gridified_kernel_p"),
|
|
2201 boolean_type_node);
|
|
2202 DECL_CHAIN (id_f3) = id_f2;
|
|
2203 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2204 get_identifier ("kernel_dependencies_count"),
|
|
2205 unsigned_type_node);
|
|
2206 DECL_CHAIN (id_f4) = id_f3;
|
|
2207 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2208 get_identifier ("kernel_dependencies"),
|
|
2209 build_pointer_type (build_pointer_type
|
|
2210 (char_type_node)));
|
|
2211 DECL_CHAIN (id_f5) = id_f4;
|
|
2212 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
|
|
2213 NULL_TREE);
|
|
2214
|
|
2215 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
|
|
2216 tree kernel_info_vector_type
|
|
2217 = build_array_type (kernel_info_type,
|
|
2218 build_index_type (int_num_of_kernels));
|
|
2219 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
|
|
2220
|
|
2221 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
|
|
2222 tree kernel_dependencies_vector_type = NULL;
|
|
2223
|
|
2224 for (unsigned i = 0; i < map_count; ++i)
|
|
2225 {
|
|
2226 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
|
|
2227 char *name = hsa_get_decl_kernel_mapping_name (i);
|
|
2228 unsigned len = strlen (name);
|
|
2229 char *copy = XNEWVEC (char, len + 2);
|
|
2230 copy[0] = '&';
|
|
2231 memcpy (copy + 1, name, len);
|
|
2232 copy[len + 1] = '\0';
|
|
2233 len++;
|
|
2234
|
|
2235 tree kern_name = build_string (len, copy);
|
|
2236 TREE_TYPE (kern_name)
|
|
2237 = build_array_type (char_type_node, build_index_type (size_int (len)));
|
|
2238 free (copy);
|
|
2239
|
|
2240 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
|
|
2241 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
|
|
2242 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
|
|
2243 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
|
|
2244 gridified_kernel_p);
|
|
2245 unsigned count = 0;
|
|
2246 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
|
|
2247 if (hsa_decl_kernel_dependencies)
|
|
2248 {
|
|
2249 vec<const char *> **slot;
|
|
2250 slot = hsa_decl_kernel_dependencies->get (kernel);
|
|
2251 if (slot)
|
|
2252 {
|
|
2253 vec <const char *> *dependencies = *slot;
|
|
2254 count = dependencies->length ();
|
|
2255
|
|
2256 kernel_dependencies_vector_type
|
|
2257 = build_array_type (build_pointer_type (char_type_node),
|
|
2258 build_index_type (size_int (count)));
|
|
2259 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
|
|
2260
|
|
2261 for (unsigned j = 0; j < count; j++)
|
|
2262 {
|
|
2263 const char *d = (*dependencies)[j];
|
|
2264 len = strlen (d);
|
|
2265 tree dependency_name = build_string (len, d);
|
|
2266 TREE_TYPE (dependency_name)
|
|
2267 = build_array_type (char_type_node,
|
|
2268 build_index_type (size_int (len)));
|
|
2269
|
|
2270 CONSTRUCTOR_APPEND_ELT
|
|
2271 (kernel_dependencies_vec, NULL_TREE,
|
|
2272 build1 (ADDR_EXPR,
|
|
2273 build_pointer_type (TREE_TYPE (dependency_name)),
|
|
2274 dependency_name));
|
|
2275 }
|
|
2276 }
|
|
2277 }
|
|
2278
|
|
2279 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
|
|
2280
|
|
2281 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
|
|
2282 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
|
|
2283 build1 (ADDR_EXPR,
|
|
2284 build_pointer_type (TREE_TYPE
|
|
2285 (kern_name)),
|
|
2286 kern_name));
|
|
2287 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
|
|
2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
|
|
2289 gridified_kernel_p_tree);
|
|
2290 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
|
|
2291
|
|
2292 if (count > 0)
|
|
2293 {
|
|
2294 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
|
|
2295 gcc_checking_assert (kernel_dependencies_vector_type);
|
|
2296 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
2297 get_identifier (tmp_name),
|
|
2298 kernel_dependencies_vector_type);
|
|
2299
|
|
2300 TREE_STATIC (dependencies_list) = 1;
|
|
2301 TREE_READONLY (dependencies_list) = 1;
|
|
2302 TREE_PUBLIC (dependencies_list) = 0;
|
|
2303 DECL_ARTIFICIAL (dependencies_list) = 1;
|
|
2304 DECL_IGNORED_P (dependencies_list) = 1;
|
|
2305 DECL_EXTERNAL (dependencies_list) = 0;
|
|
2306 TREE_CONSTANT (dependencies_list) = 1;
|
|
2307 DECL_INITIAL (dependencies_list)
|
|
2308 = build_constructor (kernel_dependencies_vector_type,
|
|
2309 kernel_dependencies_vec);
|
|
2310 varpool_node::finalize_decl (dependencies_list);
|
|
2311
|
|
2312 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
|
|
2313 build1 (ADDR_EXPR,
|
|
2314 build_pointer_type
|
|
2315 (TREE_TYPE (dependencies_list)),
|
|
2316 dependencies_list));
|
|
2317 }
|
|
2318 else
|
|
2319 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
|
|
2320
|
|
2321 tree kernel_info_ctor = build_constructor (kernel_info_type,
|
|
2322 kernel_info_vec);
|
|
2323
|
|
2324 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
|
|
2325 kernel_info_ctor);
|
|
2326 }
|
|
2327
|
|
2328 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
|
|
2329 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
2330 get_identifier (tmp_name),
|
|
2331 kernel_info_vector_type);
|
|
2332
|
|
2333 TREE_STATIC (hsa_kernels) = 1;
|
|
2334 TREE_READONLY (hsa_kernels) = 1;
|
|
2335 TREE_PUBLIC (hsa_kernels) = 0;
|
|
2336 DECL_ARTIFICIAL (hsa_kernels) = 1;
|
|
2337 DECL_IGNORED_P (hsa_kernels) = 1;
|
|
2338 DECL_EXTERNAL (hsa_kernels) = 0;
|
|
2339 TREE_CONSTANT (hsa_kernels) = 1;
|
|
2340 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
|
|
2341 kernel_info_vector_vec);
|
|
2342 varpool_node::finalize_decl (hsa_kernels);
|
|
2343 *kernels = hsa_kernels;
|
|
2344 }
|
|
2345
|
|
2346 /* Create a static constructor that will register out brig stuff with
|
|
2347 libgomp. */
|
|
2348
|
|
2349 static void
|
|
2350 hsa_output_libgomp_mapping (tree brig_decl)
|
|
2351 {
|
|
2352 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
|
|
2353 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
|
|
2354
|
|
2355 tree kernels;
|
|
2356 tree host_func_table;
|
|
2357
|
|
2358 hsa_output_kernels (&host_func_table, &kernels);
|
|
2359 tree global_vars = hsa_output_global_variables ();
|
|
2360
|
|
2361 tree hsa_image_desc_type = make_node (RECORD_TYPE);
|
|
2362 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2363 get_identifier ("brig_module"), ptr_type_node);
|
|
2364 DECL_CHAIN (id_f1) = NULL_TREE;
|
|
2365 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2366 get_identifier ("kernel_count"),
|
|
2367 unsigned_type_node);
|
|
2368
|
|
2369 DECL_CHAIN (id_f2) = id_f1;
|
|
2370 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2371 get_identifier ("hsa_kernel_infos"),
|
|
2372 ptr_type_node);
|
|
2373 DECL_CHAIN (id_f3) = id_f2;
|
|
2374 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2375 get_identifier ("global_variable_count"),
|
|
2376 unsigned_type_node);
|
|
2377 DECL_CHAIN (id_f4) = id_f3;
|
|
2378 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
|
|
2379 get_identifier ("hsa_global_variable_infos"),
|
|
2380 ptr_type_node);
|
|
2381 DECL_CHAIN (id_f5) = id_f4;
|
|
2382 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
|
|
2383 NULL_TREE);
|
|
2384 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
|
|
2385
|
|
2386 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
|
|
2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
|
|
2388 build_fold_addr_expr (brig_decl));
|
|
2389 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
|
|
2390 build_int_cstu (unsigned_type_node, kernel_count));
|
|
2391 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
|
|
2392 build1 (ADDR_EXPR,
|
|
2393 build_pointer_type (TREE_TYPE (kernels)),
|
|
2394 kernels));
|
|
2395 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
|
|
2396 build_int_cstu (unsigned_type_node,
|
|
2397 global_variable_count));
|
|
2398 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
|
|
2399 build1 (ADDR_EXPR,
|
|
2400 build_pointer_type (TREE_TYPE (global_vars)),
|
|
2401 global_vars));
|
|
2402
|
|
2403 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
|
|
2404
|
|
2405 char tmp_name[64];
|
|
2406 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
|
|
2407 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
2408 get_identifier (tmp_name),
|
|
2409 hsa_image_desc_type);
|
|
2410 TREE_STATIC (hsa_img_descriptor) = 1;
|
|
2411 TREE_READONLY (hsa_img_descriptor) = 1;
|
|
2412 TREE_PUBLIC (hsa_img_descriptor) = 0;
|
|
2413 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
|
|
2414 DECL_IGNORED_P (hsa_img_descriptor) = 1;
|
|
2415 DECL_EXTERNAL (hsa_img_descriptor) = 0;
|
|
2416 TREE_CONSTANT (hsa_img_descriptor) = 1;
|
|
2417 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
|
|
2418 varpool_node::finalize_decl (hsa_img_descriptor);
|
|
2419
|
|
2420 /* Construct the "host_table" libgomp expects. */
|
|
2421 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
|
|
2422 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
|
|
2423 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
|
|
2424 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
|
|
2425 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
|
|
2426 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
|
|
2427 host_func_table_addr);
|
|
2428 offset_int func_table_size
|
|
2429 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
|
|
2430 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
|
|
2431 fold_build2 (POINTER_PLUS_EXPR,
|
|
2432 TREE_TYPE (host_func_table_addr),
|
|
2433 host_func_table_addr,
|
|
2434 build_int_cst (size_type_node,
|
|
2435 func_table_size.to_uhwi
|
|
2436 ())));
|
|
2437 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
|
|
2438 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
|
|
2439 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
|
|
2440 libgomp_host_table_vec);
|
|
2441 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
|
|
2442 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
2443 get_identifier (tmp_name),
|
|
2444 libgomp_host_table_type);
|
|
2445
|
|
2446 TREE_STATIC (hsa_libgomp_host_table) = 1;
|
|
2447 TREE_READONLY (hsa_libgomp_host_table) = 1;
|
|
2448 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
|
|
2449 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
|
|
2450 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
|
|
2451 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
|
|
2452 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
|
|
2453 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
|
|
2454 varpool_node::finalize_decl (hsa_libgomp_host_table);
|
|
2455
|
|
2456 /* Generate an initializer with a call to the registration routine. */
|
|
2457
|
|
2458 tree offload_register
|
|
2459 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
|
|
2460 gcc_checking_assert (offload_register);
|
|
2461
|
|
2462 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
|
|
2463 append_to_statement_list
|
|
2464 (build_call_expr (offload_register, 4,
|
|
2465 build_int_cstu (unsigned_type_node,
|
|
2466 GOMP_VERSION_PACK (GOMP_VERSION,
|
|
2467 GOMP_VERSION_HSA)),
|
|
2468 build_fold_addr_expr (hsa_libgomp_host_table),
|
|
2469 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
|
|
2470 build_fold_addr_expr (hsa_img_descriptor)),
|
|
2471 hsa_ctor_stmts);
|
|
2472
|
|
2473 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
|
|
2474
|
|
2475 tree offload_unregister
|
|
2476 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
|
|
2477 gcc_checking_assert (offload_unregister);
|
|
2478
|
|
2479 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
|
|
2480 append_to_statement_list
|
|
2481 (build_call_expr (offload_unregister, 4,
|
|
2482 build_int_cstu (unsigned_type_node,
|
|
2483 GOMP_VERSION_PACK (GOMP_VERSION,
|
|
2484 GOMP_VERSION_HSA)),
|
|
2485 build_fold_addr_expr (hsa_libgomp_host_table),
|
|
2486 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
|
|
2487 build_fold_addr_expr (hsa_img_descriptor)),
|
|
2488 hsa_dtor_stmts);
|
|
2489 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
|
|
2490 }
|
|
2491
|
|
2492 /* Emit the brig module we have compiled to a section in the final assembly and
|
|
2493 also create a compile unit static constructor that will register the brig
|
|
2494 module with libgomp. */
|
|
2495
|
|
2496 void
|
|
2497 hsa_output_brig (void)
|
|
2498 {
|
|
2499 section *saved_section;
|
|
2500
|
|
2501 if (!brig_initialized)
|
|
2502 return;
|
|
2503
|
|
2504 for (unsigned i = 0; i < function_call_linkage.length (); i++)
|
|
2505 {
|
|
2506 function_linkage_pair p = function_call_linkage[i];
|
|
2507
|
|
2508 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
|
|
2509 gcc_assert (*func_offset);
|
|
2510 BrigOperandCodeRef *code_ref
|
|
2511 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
|
|
2512 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
|
|
2513 code_ref->ref = lendian32 (*func_offset);
|
|
2514 }
|
|
2515
|
|
2516 /* Iterate all function declarations and if we meet a function that should
|
|
2517 have module linkage and we are unable to emit HSAIL for the function,
|
|
2518 then change the linkage to program linkage. Doing so, we will emit
|
|
2519 a valid BRIG image. */
|
|
2520 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
|
|
2521 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
|
|
2522 = emitted_declarations->begin ();
|
|
2523 it != emitted_declarations->end ();
|
|
2524 ++it)
|
|
2525 {
|
|
2526 if (hsa_failed_functions->contains ((*it).first))
|
|
2527 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
|
|
2528 }
|
|
2529
|
|
2530 saved_section = in_section;
|
|
2531
|
|
2532 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
|
|
2533 char tmp_name[64];
|
|
2534 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
|
|
2535 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
|
|
2536 tree brig_id = get_identifier (tmp_name);
|
|
2537 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
|
|
2538 char_type_node);
|
|
2539 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
|
|
2540 TREE_ADDRESSABLE (brig_decl) = 1;
|
|
2541 TREE_READONLY (brig_decl) = 1;
|
|
2542 DECL_ARTIFICIAL (brig_decl) = 1;
|
|
2543 DECL_IGNORED_P (brig_decl) = 1;
|
|
2544 TREE_STATIC (brig_decl) = 1;
|
|
2545 TREE_PUBLIC (brig_decl) = 0;
|
|
2546 TREE_USED (brig_decl) = 1;
|
|
2547 DECL_INITIAL (brig_decl) = brig_decl;
|
|
2548 TREE_ASM_WRITTEN (brig_decl) = 1;
|
|
2549
|
|
2550 BrigModuleHeader module_header;
|
|
2551 memcpy (&module_header.identification, "HSA BRIG",
|
|
2552 sizeof (module_header.identification));
|
|
2553 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
|
|
2554 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
|
|
2555 uint64_t section_index[3];
|
|
2556
|
|
2557 int data_padding, code_padding, operand_padding;
|
|
2558 data_padding = HSA_SECTION_ALIGNMENT
|
|
2559 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
|
|
2560 code_padding = HSA_SECTION_ALIGNMENT
|
|
2561 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
|
|
2562 operand_padding = HSA_SECTION_ALIGNMENT
|
|
2563 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
|
|
2564
|
|
2565 uint64_t module_size = sizeof (module_header)
|
|
2566 + sizeof (section_index)
|
|
2567 + brig_data.total_size
|
|
2568 + data_padding
|
|
2569 + brig_code.total_size
|
|
2570 + code_padding
|
|
2571 + brig_operand.total_size
|
|
2572 + operand_padding;
|
|
2573 gcc_assert ((module_size % 16) == 0);
|
|
2574 module_header.byteCount = lendian64 (module_size);
|
|
2575 memset (&module_header.hash, 0, sizeof (module_header.hash));
|
|
2576 module_header.reserved = 0;
|
|
2577 module_header.sectionCount = lendian32 (3);
|
|
2578 module_header.sectionIndex = lendian64 (sizeof (module_header));
|
|
2579 assemble_string ((const char *) &module_header, sizeof (module_header));
|
|
2580 uint64_t off = sizeof (module_header) + sizeof (section_index);
|
|
2581 section_index[0] = lendian64 (off);
|
|
2582 off += brig_data.total_size + data_padding;
|
|
2583 section_index[1] = lendian64 (off);
|
|
2584 off += brig_code.total_size + code_padding;
|
|
2585 section_index[2] = lendian64 (off);
|
|
2586 assemble_string ((const char *) §ion_index, sizeof (section_index));
|
|
2587
|
|
2588 char padding[HSA_SECTION_ALIGNMENT];
|
|
2589 memset (padding, 0, sizeof (padding));
|
|
2590
|
|
2591 brig_data.output ();
|
|
2592 assemble_string (padding, data_padding);
|
|
2593 brig_code.output ();
|
|
2594 assemble_string (padding, code_padding);
|
|
2595 brig_operand.output ();
|
|
2596 assemble_string (padding, operand_padding);
|
|
2597
|
|
2598 if (saved_section)
|
|
2599 switch_to_section (saved_section);
|
|
2600
|
|
2601 hsa_output_libgomp_mapping (brig_decl);
|
|
2602
|
|
2603 hsa_free_decl_kernel_mapping ();
|
|
2604 brig_release_data ();
|
|
2605 hsa_deinit_compilation_unit_data ();
|
|
2606
|
|
2607 delete emitted_declarations;
|
|
2608 emitted_declarations = NULL;
|
|
2609 delete function_offsets;
|
|
2610 function_offsets = NULL;
|
|
2611 }
|