111
|
1 /* Implementation of commonly needed HSAIL related functions and methods.
|
|
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
|
|
3 Contributed by Martin Jambor <mjambor@suse.cz> and
|
|
4 Martin Liska <mliska@suse.cz>.
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify
|
|
9 it under the terms of the GNU General Public License as published by
|
|
10 the Free Software Foundation; either version 3, or (at your option)
|
|
11 any later version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful,
|
|
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16 GNU General Public License for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 #include "config.h"
|
|
23 #include "system.h"
|
|
24 #include "coretypes.h"
|
|
25 #include "tm.h"
|
|
26 #include "is-a.h"
|
|
27 #include "hash-set.h"
|
|
28 #include "hash-map.h"
|
|
29 #include "vec.h"
|
|
30 #include "tree.h"
|
|
31 #include "dumpfile.h"
|
|
32 #include "gimple-pretty-print.h"
|
|
33 #include "diagnostic-core.h"
|
|
34 #include "alloc-pool.h"
|
|
35 #include "cgraph.h"
|
|
36 #include "print-tree.h"
|
|
37 #include "stringpool.h"
|
|
38 #include "symbol-summary.h"
|
|
39 #include "hsa-common.h"
|
|
40 #include "internal-fn.h"
|
|
41 #include "ctype.h"
|
|
42 #include "builtins.h"
|
|
43 #include "stringpool.h"
|
|
44 #include "attribs.h"
|
|
45
|
|
46 /* Structure containing intermediate HSA representation of the generated
|
|
47 function. */
|
|
48 class hsa_function_representation *hsa_cfun;
|
|
49
|
|
50 /* Element of the mapping vector between a host decl and an HSA kernel. */
|
|
51
|
|
52 struct GTY(()) hsa_decl_kernel_map_element
|
|
53 {
|
|
54 /* The decl of the host function. */
|
|
55 tree decl;
|
|
56 /* Name of the HSA kernel in BRIG. */
|
|
57 char * GTY((skip)) name;
|
|
58 /* Size of OMP data, if the kernel contains a kernel dispatch. */
|
|
59 unsigned omp_data_size;
|
|
60 /* True if the function is gridified kernel. */
|
|
61 bool gridified_kernel_p;
|
|
62 };
|
|
63
|
|
64 /* Mapping between decls and corresponding HSA kernels in this compilation
|
|
65 unit. */
|
|
66
|
|
67 static GTY (()) vec<hsa_decl_kernel_map_element, va_gc>
|
|
68 *hsa_decl_kernel_mapping;
|
|
69
|
|
70 /* Mapping between decls and corresponding HSA kernels
|
|
71 called by the function. */
|
|
72 hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies;
|
|
73
|
|
74 /* Hash function to lookup a symbol for a decl. */
|
|
75 hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols;
|
|
76
|
|
77 /* HSA summaries. */
|
|
78 hsa_summary_t *hsa_summaries = NULL;
|
|
79
|
|
80 /* HSA number of threads. */
|
|
81 hsa_symbol *hsa_num_threads = NULL;
|
|
82
|
|
83 /* HSA function that cannot be expanded to HSAIL. */
|
|
84 hash_set <tree> *hsa_failed_functions = NULL;
|
|
85
|
|
86 /* True if compilation unit-wide data are already allocated and initialized. */
|
|
87 static bool compilation_unit_data_initialized;
|
|
88
|
|
89 /* Return true if FNDECL represents an HSA-callable function. */
|
|
90
|
|
91 bool
|
|
92 hsa_callable_function_p (tree fndecl)
|
|
93 {
|
|
94 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl))
|
|
95 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl)));
|
|
96 }
|
|
97
|
|
98 /* Allocate HSA structures that are are used when dealing with different
|
|
99 functions. */
|
|
100
|
|
101 void
|
|
102 hsa_init_compilation_unit_data (void)
|
|
103 {
|
|
104 if (compilation_unit_data_initialized)
|
|
105 return;
|
|
106
|
|
107 compilation_unit_data_initialized = true;
|
|
108
|
|
109 hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8);
|
|
110 hsa_failed_functions = new hash_set <tree> ();
|
|
111 hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2);
|
|
112 }
|
|
113
|
|
114 /* Free data structures that are used when dealing with different
|
|
115 functions. */
|
|
116
|
|
117 void
|
|
118 hsa_deinit_compilation_unit_data (void)
|
|
119 {
|
|
120 gcc_assert (compilation_unit_data_initialized);
|
|
121
|
|
122 delete hsa_failed_functions;
|
|
123 delete hsa_emitted_internal_decls;
|
|
124
|
|
125 for (hash_table <hsa_noop_symbol_hasher>::iterator it
|
|
126 = hsa_global_variable_symbols->begin ();
|
|
127 it != hsa_global_variable_symbols->end ();
|
|
128 ++it)
|
|
129 {
|
|
130 hsa_symbol *sym = *it;
|
|
131 delete sym;
|
|
132 }
|
|
133
|
|
134 delete hsa_global_variable_symbols;
|
|
135
|
|
136 if (hsa_num_threads)
|
|
137 {
|
|
138 delete hsa_num_threads;
|
|
139 hsa_num_threads = NULL;
|
|
140 }
|
|
141
|
|
142 compilation_unit_data_initialized = false;
|
|
143 }
|
|
144
|
|
145 /* Return true if we are generating large HSA machine model. */
|
|
146
|
|
147 bool
|
|
148 hsa_machine_large_p (void)
|
|
149 {
|
|
150 /* FIXME: I suppose this is technically wrong but should work for me now. */
|
|
151 return (GET_MODE_BITSIZE (Pmode) == 64);
|
|
152 }
|
|
153
|
|
154 /* Return the HSA profile we are using. */
|
|
155
|
|
156 bool
|
|
157 hsa_full_profile_p (void)
|
|
158 {
|
|
159 return true;
|
|
160 }
|
|
161
|
|
162 /* Return true if a register in operand number OPNUM of instruction
|
|
163 is an output. False if it is an input. */
|
|
164
|
|
165 bool
|
|
166 hsa_insn_basic::op_output_p (unsigned opnum)
|
|
167 {
|
|
168 switch (m_opcode)
|
|
169 {
|
|
170 case HSA_OPCODE_PHI:
|
|
171 case BRIG_OPCODE_CBR:
|
|
172 case BRIG_OPCODE_SBR:
|
|
173 case BRIG_OPCODE_ST:
|
|
174 case BRIG_OPCODE_SIGNALNORET:
|
|
175 case BRIG_OPCODE_DEBUGTRAP:
|
|
176 /* FIXME: There are probably missing cases here, double check. */
|
|
177 return false;
|
|
178 case BRIG_OPCODE_EXPAND:
|
|
179 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
|
|
180 return opnum < operand_count () - 1;
|
|
181 default:
|
|
182 return opnum == 0;
|
|
183 }
|
|
184 }
|
|
185
|
|
186 /* Return true if OPCODE is an floating-point bit instruction opcode. */
|
|
187
|
|
188 bool
|
|
189 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode)
|
|
190 {
|
|
191 switch (opcode)
|
|
192 {
|
|
193 case BRIG_OPCODE_NEG:
|
|
194 case BRIG_OPCODE_ABS:
|
|
195 case BRIG_OPCODE_CLASS:
|
|
196 case BRIG_OPCODE_COPYSIGN:
|
|
197 return true;
|
|
198 default:
|
|
199 return false;
|
|
200 }
|
|
201 }
|
|
202
|
|
203 /* Return the number of destination operands for this INSN. */
|
|
204
|
|
205 unsigned
|
|
206 hsa_insn_basic::input_count ()
|
|
207 {
|
|
208 switch (m_opcode)
|
|
209 {
|
|
210 default:
|
|
211 return 1;
|
|
212
|
|
213 case BRIG_OPCODE_NOP:
|
|
214 return 0;
|
|
215
|
|
216 case BRIG_OPCODE_EXPAND:
|
|
217 return 2;
|
|
218
|
|
219 case BRIG_OPCODE_LD:
|
|
220 /* ld_v[234] not yet handled. */
|
|
221 return 1;
|
|
222
|
|
223 case BRIG_OPCODE_ST:
|
|
224 return 0;
|
|
225
|
|
226 case BRIG_OPCODE_ATOMICNORET:
|
|
227 return 0;
|
|
228
|
|
229 case BRIG_OPCODE_SIGNAL:
|
|
230 return 1;
|
|
231
|
|
232 case BRIG_OPCODE_SIGNALNORET:
|
|
233 return 0;
|
|
234
|
|
235 case BRIG_OPCODE_MEMFENCE:
|
|
236 return 0;
|
|
237
|
|
238 case BRIG_OPCODE_RDIMAGE:
|
|
239 case BRIG_OPCODE_LDIMAGE:
|
|
240 case BRIG_OPCODE_STIMAGE:
|
|
241 case BRIG_OPCODE_QUERYIMAGE:
|
|
242 case BRIG_OPCODE_QUERYSAMPLER:
|
|
243 sorry ("HSA image ops not handled");
|
|
244 return 0;
|
|
245
|
|
246 case BRIG_OPCODE_CBR:
|
|
247 case BRIG_OPCODE_BR:
|
|
248 return 0;
|
|
249
|
|
250 case BRIG_OPCODE_SBR:
|
|
251 return 0; /* ??? */
|
|
252
|
|
253 case BRIG_OPCODE_WAVEBARRIER:
|
|
254 return 0; /* ??? */
|
|
255
|
|
256 case BRIG_OPCODE_BARRIER:
|
|
257 case BRIG_OPCODE_ARRIVEFBAR:
|
|
258 case BRIG_OPCODE_INITFBAR:
|
|
259 case BRIG_OPCODE_JOINFBAR:
|
|
260 case BRIG_OPCODE_LEAVEFBAR:
|
|
261 case BRIG_OPCODE_RELEASEFBAR:
|
|
262 case BRIG_OPCODE_WAITFBAR:
|
|
263 return 0;
|
|
264
|
|
265 case BRIG_OPCODE_LDF:
|
|
266 return 1;
|
|
267
|
|
268 case BRIG_OPCODE_ACTIVELANECOUNT:
|
|
269 case BRIG_OPCODE_ACTIVELANEID:
|
|
270 case BRIG_OPCODE_ACTIVELANEMASK:
|
|
271 case BRIG_OPCODE_ACTIVELANEPERMUTE:
|
|
272 return 1; /* ??? */
|
|
273
|
|
274 case BRIG_OPCODE_CALL:
|
|
275 case BRIG_OPCODE_SCALL:
|
|
276 case BRIG_OPCODE_ICALL:
|
|
277 return 0;
|
|
278
|
|
279 case BRIG_OPCODE_RET:
|
|
280 return 0;
|
|
281
|
|
282 case BRIG_OPCODE_ALLOCA:
|
|
283 return 1;
|
|
284
|
|
285 case BRIG_OPCODE_CLEARDETECTEXCEPT:
|
|
286 return 0;
|
|
287
|
|
288 case BRIG_OPCODE_SETDETECTEXCEPT:
|
|
289 return 0;
|
|
290
|
|
291 case BRIG_OPCODE_PACKETCOMPLETIONSIG:
|
|
292 case BRIG_OPCODE_PACKETID:
|
|
293 case BRIG_OPCODE_CASQUEUEWRITEINDEX:
|
|
294 case BRIG_OPCODE_LDQUEUEREADINDEX:
|
|
295 case BRIG_OPCODE_LDQUEUEWRITEINDEX:
|
|
296 case BRIG_OPCODE_STQUEUEREADINDEX:
|
|
297 case BRIG_OPCODE_STQUEUEWRITEINDEX:
|
|
298 return 1; /* ??? */
|
|
299
|
|
300 case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
|
|
301 return 1;
|
|
302
|
|
303 case BRIG_OPCODE_DEBUGTRAP:
|
|
304 return 0;
|
|
305
|
|
306 case BRIG_OPCODE_GROUPBASEPTR:
|
|
307 case BRIG_OPCODE_KERNARGBASEPTR:
|
|
308 return 1; /* ??? */
|
|
309
|
|
310 case HSA_OPCODE_ARG_BLOCK:
|
|
311 return 0;
|
|
312
|
|
313 case BRIG_KIND_DIRECTIVE_COMMENT:
|
|
314 return 0;
|
|
315 }
|
|
316 }
|
|
317
|
|
318 /* Return the number of source operands for this INSN. */
|
|
319
|
|
320 unsigned
|
|
321 hsa_insn_basic::num_used_ops ()
|
|
322 {
|
|
323 gcc_checking_assert (input_count () <= operand_count ());
|
|
324
|
|
325 return operand_count () - input_count ();
|
|
326 }
|
|
327
|
|
328 /* Set alignment to VALUE. */
|
|
329
|
|
330 void
|
|
331 hsa_insn_mem::set_align (BrigAlignment8_t value)
|
|
332 {
|
|
333 /* TODO: Perhaps remove this dump later on: */
|
|
334 if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align)
|
|
335 {
|
|
336 fprintf (dump_file, "Decreasing alignment to %u in instruction ", value);
|
|
337 dump_hsa_insn (dump_file, this);
|
|
338 }
|
|
339 m_align = value;
|
|
340 }
|
|
341
|
|
342 /* Return size of HSA type T in bits. */
|
|
343
|
|
344 unsigned
|
|
345 hsa_type_bit_size (BrigType16_t t)
|
|
346 {
|
|
347 switch (t)
|
|
348 {
|
|
349 case BRIG_TYPE_B1:
|
|
350 return 1;
|
|
351
|
|
352 case BRIG_TYPE_U8:
|
|
353 case BRIG_TYPE_S8:
|
|
354 case BRIG_TYPE_B8:
|
|
355 return 8;
|
|
356
|
|
357 case BRIG_TYPE_U16:
|
|
358 case BRIG_TYPE_S16:
|
|
359 case BRIG_TYPE_B16:
|
|
360 case BRIG_TYPE_F16:
|
|
361 return 16;
|
|
362
|
|
363 case BRIG_TYPE_U32:
|
|
364 case BRIG_TYPE_S32:
|
|
365 case BRIG_TYPE_B32:
|
|
366 case BRIG_TYPE_F32:
|
|
367 case BRIG_TYPE_U8X4:
|
|
368 case BRIG_TYPE_U16X2:
|
|
369 case BRIG_TYPE_S8X4:
|
|
370 case BRIG_TYPE_S16X2:
|
|
371 case BRIG_TYPE_F16X2:
|
|
372 return 32;
|
|
373
|
|
374 case BRIG_TYPE_U64:
|
|
375 case BRIG_TYPE_S64:
|
|
376 case BRIG_TYPE_F64:
|
|
377 case BRIG_TYPE_B64:
|
|
378 case BRIG_TYPE_U8X8:
|
|
379 case BRIG_TYPE_U16X4:
|
|
380 case BRIG_TYPE_U32X2:
|
|
381 case BRIG_TYPE_S8X8:
|
|
382 case BRIG_TYPE_S16X4:
|
|
383 case BRIG_TYPE_S32X2:
|
|
384 case BRIG_TYPE_F16X4:
|
|
385 case BRIG_TYPE_F32X2:
|
|
386
|
|
387 return 64;
|
|
388
|
|
389 case BRIG_TYPE_B128:
|
|
390 case BRIG_TYPE_U8X16:
|
|
391 case BRIG_TYPE_U16X8:
|
|
392 case BRIG_TYPE_U32X4:
|
|
393 case BRIG_TYPE_U64X2:
|
|
394 case BRIG_TYPE_S8X16:
|
|
395 case BRIG_TYPE_S16X8:
|
|
396 case BRIG_TYPE_S32X4:
|
|
397 case BRIG_TYPE_S64X2:
|
|
398 case BRIG_TYPE_F16X8:
|
|
399 case BRIG_TYPE_F32X4:
|
|
400 case BRIG_TYPE_F64X2:
|
|
401 return 128;
|
|
402
|
|
403 default:
|
|
404 gcc_assert (hsa_seen_error ());
|
|
405 return t;
|
|
406 }
|
|
407 }
|
|
408
|
|
409 /* Return BRIG bit-type with BITSIZE length. */
|
|
410
|
|
411 BrigType16_t
|
|
412 hsa_bittype_for_bitsize (unsigned bitsize)
|
|
413 {
|
|
414 switch (bitsize)
|
|
415 {
|
|
416 case 1:
|
|
417 return BRIG_TYPE_B1;
|
|
418 case 8:
|
|
419 return BRIG_TYPE_B8;
|
|
420 case 16:
|
|
421 return BRIG_TYPE_B16;
|
|
422 case 32:
|
|
423 return BRIG_TYPE_B32;
|
|
424 case 64:
|
|
425 return BRIG_TYPE_B64;
|
|
426 case 128:
|
|
427 return BRIG_TYPE_B128;
|
|
428 default:
|
|
429 gcc_unreachable ();
|
|
430 }
|
|
431 }
|
|
432
|
|
433 /* Return BRIG unsigned int type with BITSIZE length. */
|
|
434
|
|
435 BrigType16_t
|
|
436 hsa_uint_for_bitsize (unsigned bitsize)
|
|
437 {
|
|
438 switch (bitsize)
|
|
439 {
|
|
440 case 8:
|
|
441 return BRIG_TYPE_U8;
|
|
442 case 16:
|
|
443 return BRIG_TYPE_U16;
|
|
444 case 32:
|
|
445 return BRIG_TYPE_U32;
|
|
446 case 64:
|
|
447 return BRIG_TYPE_U64;
|
|
448 default:
|
|
449 gcc_unreachable ();
|
|
450 }
|
|
451 }
|
|
452
|
|
453 /* Return BRIG float type with BITSIZE length. */
|
|
454
|
|
455 BrigType16_t
|
|
456 hsa_float_for_bitsize (unsigned bitsize)
|
|
457 {
|
|
458 switch (bitsize)
|
|
459 {
|
|
460 case 16:
|
|
461 return BRIG_TYPE_F16;
|
|
462 case 32:
|
|
463 return BRIG_TYPE_F32;
|
|
464 case 64:
|
|
465 return BRIG_TYPE_F64;
|
|
466 default:
|
|
467 gcc_unreachable ();
|
|
468 }
|
|
469 }
|
|
470
|
|
471 /* Return HSA bit-type with the same size as the type T. */
|
|
472
|
|
473 BrigType16_t
|
|
474 hsa_bittype_for_type (BrigType16_t t)
|
|
475 {
|
|
476 return hsa_bittype_for_bitsize (hsa_type_bit_size (t));
|
|
477 }
|
|
478
|
|
479 /* Return HSA unsigned integer type with the same size as the type T. */
|
|
480
|
|
481 BrigType16_t
|
|
482 hsa_unsigned_type_for_type (BrigType16_t t)
|
|
483 {
|
|
484 return hsa_uint_for_bitsize (hsa_type_bit_size (t));
|
|
485 }
|
|
486
|
|
487 /* Return true if TYPE is a packed HSA type. */
|
|
488
|
|
489 bool
|
|
490 hsa_type_packed_p (BrigType16_t type)
|
|
491 {
|
|
492 return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE;
|
|
493 }
|
|
494
|
|
495 /* Return true if and only if TYPE is a floating point number type. */
|
|
496
|
|
497 bool
|
|
498 hsa_type_float_p (BrigType16_t type)
|
|
499 {
|
|
500 switch (type & BRIG_TYPE_BASE_MASK)
|
|
501 {
|
|
502 case BRIG_TYPE_F16:
|
|
503 case BRIG_TYPE_F32:
|
|
504 case BRIG_TYPE_F64:
|
|
505 return true;
|
|
506 default:
|
|
507 return false;
|
|
508 }
|
|
509 }
|
|
510
|
|
511 /* Return true if and only if TYPE is an integer number type. */
|
|
512
|
|
513 bool
|
|
514 hsa_type_integer_p (BrigType16_t type)
|
|
515 {
|
|
516 switch (type & BRIG_TYPE_BASE_MASK)
|
|
517 {
|
|
518 case BRIG_TYPE_U8:
|
|
519 case BRIG_TYPE_U16:
|
|
520 case BRIG_TYPE_U32:
|
|
521 case BRIG_TYPE_U64:
|
|
522 case BRIG_TYPE_S8:
|
|
523 case BRIG_TYPE_S16:
|
|
524 case BRIG_TYPE_S32:
|
|
525 case BRIG_TYPE_S64:
|
|
526 return true;
|
|
527 default:
|
|
528 return false;
|
|
529 }
|
|
530 }
|
|
531
|
|
532 /* Return true if and only if TYPE is an bit-type. */
|
|
533
|
|
534 bool
|
|
535 hsa_btype_p (BrigType16_t type)
|
|
536 {
|
|
537 switch (type & BRIG_TYPE_BASE_MASK)
|
|
538 {
|
|
539 case BRIG_TYPE_B8:
|
|
540 case BRIG_TYPE_B16:
|
|
541 case BRIG_TYPE_B32:
|
|
542 case BRIG_TYPE_B64:
|
|
543 case BRIG_TYPE_B128:
|
|
544 return true;
|
|
545 default:
|
|
546 return false;
|
|
547 }
|
|
548 }
|
|
549
|
|
550
|
|
551 /* Return HSA alignment encoding alignment to N bits. */
|
|
552
|
|
553 BrigAlignment8_t
|
|
554 hsa_alignment_encoding (unsigned n)
|
|
555 {
|
|
556 gcc_assert (n >= 8 && !(n & (n - 1)));
|
|
557 if (n >= 256)
|
|
558 return BRIG_ALIGNMENT_32;
|
|
559
|
|
560 switch (n)
|
|
561 {
|
|
562 case 8:
|
|
563 return BRIG_ALIGNMENT_1;
|
|
564 case 16:
|
|
565 return BRIG_ALIGNMENT_2;
|
|
566 case 32:
|
|
567 return BRIG_ALIGNMENT_4;
|
|
568 case 64:
|
|
569 return BRIG_ALIGNMENT_8;
|
|
570 case 128:
|
|
571 return BRIG_ALIGNMENT_16;
|
|
572 default:
|
|
573 gcc_unreachable ();
|
|
574 }
|
|
575 }
|
|
576
|
|
577 /* Return HSA alignment encoding alignment of T got
|
|
578 by get_object_alignment. */
|
|
579
|
|
580 BrigAlignment8_t
|
|
581 hsa_object_alignment (tree t)
|
|
582 {
|
|
583 return hsa_alignment_encoding (get_object_alignment (t));
|
|
584 }
|
|
585
|
|
586 /* Return byte alignment for given BrigAlignment8_t value. */
|
|
587
|
|
588 unsigned
|
|
589 hsa_byte_alignment (BrigAlignment8_t alignment)
|
|
590 {
|
|
591 gcc_assert (alignment != BRIG_ALIGNMENT_NONE);
|
|
592
|
|
593 return 1 << (alignment - 1);
|
|
594 }
|
|
595
|
|
596 /* Return natural alignment of HSA TYPE. */
|
|
597
|
|
598 BrigAlignment8_t
|
|
599 hsa_natural_alignment (BrigType16_t type)
|
|
600 {
|
|
601 return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY));
|
|
602 }
|
|
603
|
|
604 /* Call the correct destructor of a HSA instruction. */
|
|
605
|
|
606 void
|
|
607 hsa_destroy_insn (hsa_insn_basic *insn)
|
|
608 {
|
|
609 if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn))
|
|
610 phi->~hsa_insn_phi ();
|
|
611 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
|
|
612 br->~hsa_insn_cbr ();
|
|
613 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
|
|
614 cmp->~hsa_insn_cmp ();
|
|
615 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
|
|
616 mem->~hsa_insn_mem ();
|
|
617 else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn))
|
|
618 atomic->~hsa_insn_atomic ();
|
|
619 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
|
|
620 seg->~hsa_insn_seg ();
|
|
621 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
|
|
622 call->~hsa_insn_call ();
|
|
623 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
|
|
624 block->~hsa_insn_arg_block ();
|
|
625 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
|
|
626 sbr->~hsa_insn_sbr ();
|
|
627 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
|
|
628 br->~hsa_insn_br ();
|
|
629 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
|
|
630 comment->~hsa_insn_comment ();
|
|
631 else
|
|
632 insn->~hsa_insn_basic ();
|
|
633 }
|
|
634
|
|
635 /* Call the correct destructor of a HSA operand. */
|
|
636
|
|
637 void
|
|
638 hsa_destroy_operand (hsa_op_base *op)
|
|
639 {
|
|
640 if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op))
|
|
641 list->~hsa_op_code_list ();
|
|
642 else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op))
|
|
643 list->~hsa_op_operand_list ();
|
|
644 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
|
|
645 reg->~hsa_op_reg ();
|
|
646 else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op))
|
|
647 immed->~hsa_op_immed ();
|
|
648 else
|
|
649 op->~hsa_op_base ();
|
|
650 }
|
|
651
|
|
652 /* Create a mapping between the original function DECL and kernel name NAME. */
|
|
653
|
|
654 void
|
|
655 hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size,
|
|
656 bool gridified_kernel_p)
|
|
657 {
|
|
658 hsa_decl_kernel_map_element dkm;
|
|
659 dkm.decl = decl;
|
|
660 dkm.name = name;
|
|
661 dkm.omp_data_size = omp_data_size;
|
|
662 dkm.gridified_kernel_p = gridified_kernel_p;
|
|
663 vec_safe_push (hsa_decl_kernel_mapping, dkm);
|
|
664 }
|
|
665
|
|
666 /* Return the number of kernel decl name mappings. */
|
|
667
|
|
668 unsigned
|
|
669 hsa_get_number_decl_kernel_mappings (void)
|
|
670 {
|
|
671 return vec_safe_length (hsa_decl_kernel_mapping);
|
|
672 }
|
|
673
|
|
674 /* Return the decl in the Ith kernel decl name mapping. */
|
|
675
|
|
676 tree
|
|
677 hsa_get_decl_kernel_mapping_decl (unsigned i)
|
|
678 {
|
|
679 return (*hsa_decl_kernel_mapping)[i].decl;
|
|
680 }
|
|
681
|
|
682 /* Return the name in the Ith kernel decl name mapping. */
|
|
683
|
|
684 char *
|
|
685 hsa_get_decl_kernel_mapping_name (unsigned i)
|
|
686 {
|
|
687 return (*hsa_decl_kernel_mapping)[i].name;
|
|
688 }
|
|
689
|
|
690 /* Return maximum OMP size for kernel decl name mapping. */
|
|
691
|
|
692 unsigned
|
|
693 hsa_get_decl_kernel_mapping_omp_size (unsigned i)
|
|
694 {
|
|
695 return (*hsa_decl_kernel_mapping)[i].omp_data_size;
|
|
696 }
|
|
697
|
|
698 /* Return if the function is gridified kernel in decl name mapping. */
|
|
699
|
|
700 bool
|
|
701 hsa_get_decl_kernel_mapping_gridified (unsigned i)
|
|
702 {
|
|
703 return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p;
|
|
704 }
|
|
705
|
|
706 /* Free the mapping between original decls and kernel names. */
|
|
707
|
|
708 void
|
|
709 hsa_free_decl_kernel_mapping (void)
|
|
710 {
|
|
711 if (hsa_decl_kernel_mapping == NULL)
|
|
712 return;
|
|
713
|
|
714 for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i)
|
|
715 free ((*hsa_decl_kernel_mapping)[i].name);
|
|
716 ggc_free (hsa_decl_kernel_mapping);
|
|
717 }
|
|
718
|
|
719 /* Add new kernel dependency. */
|
|
720
|
|
721 void
|
|
722 hsa_add_kernel_dependency (tree caller, const char *called_function)
|
|
723 {
|
|
724 if (hsa_decl_kernel_dependencies == NULL)
|
|
725 hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> ();
|
|
726
|
|
727 vec <const char *> *s = NULL;
|
|
728 vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller);
|
|
729 if (slot == NULL)
|
|
730 {
|
|
731 s = new vec <const char *> ();
|
|
732 hsa_decl_kernel_dependencies->put (caller, s);
|
|
733 }
|
|
734 else
|
|
735 s = *slot;
|
|
736
|
|
737 s->safe_push (called_function);
|
|
738 }
|
|
739
|
|
740 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
|
|
741 order to minimize the number of GTY roots, we'll root them all in the
|
|
742 following array. The individual elements should only be accessed by the
|
|
743 very simple getters (of a pointer-to-tree) below. */
|
|
744
|
|
745 static GTY(()) tree hsa_tree_gt_roots[3];
|
|
746
|
|
747 tree *
|
|
748 hsa_get_ctor_statements (void)
|
|
749 {
|
|
750 return &hsa_tree_gt_roots[0];
|
|
751 }
|
|
752
|
|
753 tree *
|
|
754 hsa_get_dtor_statements (void)
|
|
755 {
|
|
756 return &hsa_tree_gt_roots[1];
|
|
757 }
|
|
758
|
|
759 tree *
|
|
760 hsa_get_kernel_dispatch_type (void)
|
|
761 {
|
|
762 return &hsa_tree_gt_roots[2];
|
|
763 }
|
|
764
|
|
765 /* Modify the name P in-place so that it is a valid HSA identifier. */
|
|
766
|
|
767 void
|
|
768 hsa_sanitize_name (char *p)
|
|
769 {
|
|
770 for (; *p; p++)
|
|
771 if (*p == '.' || *p == '-')
|
|
772 *p = '_';
|
|
773 }
|
|
774
|
|
775 /* Clone the name P, set trailing ampersand and sanitize the name. */
|
|
776
|
|
777 char *
|
|
778 hsa_brig_function_name (const char *p)
|
|
779 {
|
|
780 unsigned len = strlen (p);
|
|
781 char *buf = XNEWVEC (char, len + 2);
|
|
782
|
|
783 buf[0] = '&';
|
|
784 buf[len + 1] = '\0';
|
|
785 memcpy (buf + 1, p, len);
|
|
786
|
|
787 hsa_sanitize_name (buf);
|
|
788 return buf;
|
|
789 }
|
|
790
|
|
791 /* Add a flatten attribute and disable vectorization for gpu implementation
|
|
792 function decl GDECL. */
|
|
793
|
|
794 void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl)
|
|
795 {
|
|
796 DECL_ATTRIBUTES (gdecl)
|
|
797 = tree_cons (get_identifier ("flatten"), NULL_TREE,
|
|
798 DECL_ATTRIBUTES (gdecl));
|
|
799
|
|
800 tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl);
|
|
801 if (fn_opts == NULL_TREE)
|
|
802 fn_opts = optimization_default_node;
|
|
803 fn_opts = copy_node (fn_opts);
|
|
804 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false;
|
|
805 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false;
|
|
806 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts;
|
|
807 }
|
|
808
|
|
809 void
|
|
810 hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
|
|
811 hsa_function_kind kind, bool gridified_kernel_p)
|
|
812 {
|
|
813 hsa_function_summary *gpu_summary = get (gpu);
|
|
814 hsa_function_summary *host_summary = get (host);
|
|
815
|
|
816 gpu_summary->m_kind = kind;
|
|
817 host_summary->m_kind = kind;
|
|
818
|
|
819 gpu_summary->m_gpu_implementation_p = true;
|
|
820 host_summary->m_gpu_implementation_p = false;
|
|
821
|
|
822 gpu_summary->m_gridified_kernel_p = gridified_kernel_p;
|
|
823 host_summary->m_gridified_kernel_p = gridified_kernel_p;
|
|
824
|
|
825 gpu_summary->m_bound_function = host;
|
|
826 host_summary->m_bound_function = gpu;
|
|
827
|
|
828 process_gpu_implementation_attributes (gpu->decl);
|
|
829
|
|
830 /* Create reference between a kernel and a corresponding host implementation
|
|
831 to quarantee LTO streaming to a same LTRANS. */
|
|
832 if (kind == HSA_KERNEL)
|
|
833 gpu->create_reference (host, IPA_REF_ADDR);
|
|
834 }
|
|
835
|
|
836 /* Add a HOST function to HSA summaries. */
|
|
837
|
|
838 void
|
|
839 hsa_register_kernel (cgraph_node *host)
|
|
840 {
|
|
841 if (hsa_summaries == NULL)
|
|
842 hsa_summaries = new hsa_summary_t (symtab);
|
|
843 hsa_function_summary *s = hsa_summaries->get (host);
|
|
844 s->m_kind = HSA_KERNEL;
|
|
845 }
|
|
846
|
|
847 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
|
|
848 a HOST function. */
|
|
849
|
|
850 void
|
|
851 hsa_register_kernel (cgraph_node *gpu, cgraph_node *host)
|
|
852 {
|
|
853 if (hsa_summaries == NULL)
|
|
854 hsa_summaries = new hsa_summary_t (symtab);
|
|
855 hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true);
|
|
856 }
|
|
857
|
|
858 /* Return true if expansion of the current HSA function has already failed. */
|
|
859
|
|
860 bool
|
|
861 hsa_seen_error (void)
|
|
862 {
|
|
863 return hsa_cfun->m_seen_error;
|
|
864 }
|
|
865
|
|
866 /* Mark current HSA function as failed. */
|
|
867
|
|
868 void
|
|
869 hsa_fail_cfun (void)
|
|
870 {
|
|
871 hsa_failed_functions->add (hsa_cfun->m_decl);
|
|
872 hsa_cfun->m_seen_error = true;
|
|
873 }
|
|
874
|
|
875 char *
|
|
876 hsa_internal_fn::name ()
|
|
877 {
|
|
878 char *name = xstrdup (internal_fn_name (m_fn));
|
|
879 for (char *ptr = name; *ptr; ptr++)
|
|
880 *ptr = TOLOWER (*ptr);
|
|
881
|
|
882 const char *suffix = NULL;
|
|
883 if (m_type_bit_size == 32)
|
|
884 suffix = "f";
|
|
885
|
|
886 if (suffix)
|
|
887 {
|
|
888 char *name2 = concat (name, suffix, NULL);
|
|
889 free (name);
|
|
890 name = name2;
|
|
891 }
|
|
892
|
|
893 hsa_sanitize_name (name);
|
|
894 return name;
|
|
895 }
|
|
896
|
|
897 unsigned
|
|
898 hsa_internal_fn::get_arity ()
|
|
899 {
|
|
900 switch (m_fn)
|
|
901 {
|
|
902 case IFN_ACOS:
|
|
903 case IFN_ASIN:
|
|
904 case IFN_ATAN:
|
|
905 case IFN_COS:
|
|
906 case IFN_EXP:
|
|
907 case IFN_EXP10:
|
|
908 case IFN_EXP2:
|
|
909 case IFN_EXPM1:
|
|
910 case IFN_LOG:
|
|
911 case IFN_LOG10:
|
|
912 case IFN_LOG1P:
|
|
913 case IFN_LOG2:
|
|
914 case IFN_LOGB:
|
|
915 case IFN_SIGNIFICAND:
|
|
916 case IFN_SIN:
|
|
917 case IFN_SQRT:
|
|
918 case IFN_TAN:
|
|
919 case IFN_CEIL:
|
|
920 case IFN_FLOOR:
|
|
921 case IFN_NEARBYINT:
|
|
922 case IFN_RINT:
|
|
923 case IFN_ROUND:
|
|
924 case IFN_TRUNC:
|
|
925 return 1;
|
|
926 case IFN_ATAN2:
|
|
927 case IFN_COPYSIGN:
|
|
928 case IFN_FMOD:
|
|
929 case IFN_POW:
|
|
930 case IFN_REMAINDER:
|
|
931 case IFN_SCALB:
|
|
932 case IFN_LDEXP:
|
|
933 return 2;
|
|
934 case IFN_CLRSB:
|
|
935 case IFN_CLZ:
|
|
936 case IFN_CTZ:
|
|
937 case IFN_FFS:
|
|
938 case IFN_PARITY:
|
|
939 case IFN_POPCOUNT:
|
|
940 default:
|
|
941 /* As we produce sorry message for unknown internal functions,
|
|
942 reaching this label is definitely a bug. */
|
|
943 gcc_unreachable ();
|
|
944 }
|
|
945 }
|
|
946
|
|
947 BrigType16_t
|
|
948 hsa_internal_fn::get_argument_type (int n)
|
|
949 {
|
|
950 switch (m_fn)
|
|
951 {
|
|
952 case IFN_ACOS:
|
|
953 case IFN_ASIN:
|
|
954 case IFN_ATAN:
|
|
955 case IFN_COS:
|
|
956 case IFN_EXP:
|
|
957 case IFN_EXP10:
|
|
958 case IFN_EXP2:
|
|
959 case IFN_EXPM1:
|
|
960 case IFN_LOG:
|
|
961 case IFN_LOG10:
|
|
962 case IFN_LOG1P:
|
|
963 case IFN_LOG2:
|
|
964 case IFN_LOGB:
|
|
965 case IFN_SIGNIFICAND:
|
|
966 case IFN_SIN:
|
|
967 case IFN_SQRT:
|
|
968 case IFN_TAN:
|
|
969 case IFN_CEIL:
|
|
970 case IFN_FLOOR:
|
|
971 case IFN_NEARBYINT:
|
|
972 case IFN_RINT:
|
|
973 case IFN_ROUND:
|
|
974 case IFN_TRUNC:
|
|
975 case IFN_ATAN2:
|
|
976 case IFN_COPYSIGN:
|
|
977 case IFN_FMOD:
|
|
978 case IFN_POW:
|
|
979 case IFN_REMAINDER:
|
|
980 case IFN_SCALB:
|
|
981 return hsa_float_for_bitsize (m_type_bit_size);
|
|
982 case IFN_LDEXP:
|
|
983 {
|
|
984 if (n == -1 || n == 0)
|
|
985 return hsa_float_for_bitsize (m_type_bit_size);
|
|
986 else
|
|
987 return BRIG_TYPE_S32;
|
|
988 }
|
|
989 default:
|
|
990 /* As we produce sorry message for unknown internal functions,
|
|
991 reaching this label is definitely a bug. */
|
|
992 gcc_unreachable ();
|
|
993 }
|
|
994 }
|
|
995
|
|
996 #include "gt-hsa-common.h"
|