Mercurial > hg > CbC > GCC_original
comparison gcc/hsa-common.c @ 16:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
15:561a7518be6b | 16:04ced10e8804 |
---|---|
1 /* Implementation of commonly needed HSAIL related functions and methods. | |
2 Copyright (C) 2013-2017 Free Software Foundation, Inc. | |
3 Contributed by Martin Jambor <mjambor@suse.cz> and | |
4 Martin Liska <mliska@suse.cz>. | |
5 | |
6 This file is part of GCC. | |
7 | |
8 GCC is free software; you can redistribute it and/or modify | |
9 it under the terms of the GNU General Public License as published by | |
10 the Free Software Foundation; either version 3, or (at your option) | |
11 any later version. | |
12 | |
13 GCC is distributed in the hope that it will be useful, | |
14 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 GNU General Public License for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 #include "config.h" | |
23 #include "system.h" | |
24 #include "coretypes.h" | |
25 #include "tm.h" | |
26 #include "is-a.h" | |
27 #include "hash-set.h" | |
28 #include "hash-map.h" | |
29 #include "vec.h" | |
30 #include "tree.h" | |
31 #include "dumpfile.h" | |
32 #include "gimple-pretty-print.h" | |
33 #include "diagnostic-core.h" | |
34 #include "alloc-pool.h" | |
35 #include "cgraph.h" | |
36 #include "print-tree.h" | |
37 #include "stringpool.h" | |
38 #include "symbol-summary.h" | |
39 #include "hsa-common.h" | |
40 #include "internal-fn.h" | |
41 #include "ctype.h" | |
42 #include "builtins.h" | |
43 #include "stringpool.h" | |
44 #include "attribs.h" | |
45 | |
46 /* Structure containing intermediate HSA representation of the generated | |
47 function. */ | |
48 class hsa_function_representation *hsa_cfun; | |
49 | |
50 /* Element of the mapping vector between a host decl and an HSA kernel. */ | |
51 | |
52 struct GTY(()) hsa_decl_kernel_map_element | |
53 { | |
54 /* The decl of the host function. */ | |
55 tree decl; | |
56 /* Name of the HSA kernel in BRIG. */ | |
57 char * GTY((skip)) name; | |
58 /* Size of OMP data, if the kernel contains a kernel dispatch. */ | |
59 unsigned omp_data_size; | |
60 /* True if the function is gridified kernel. */ | |
61 bool gridified_kernel_p; | |
62 }; | |
63 | |
64 /* Mapping between decls and corresponding HSA kernels in this compilation | |
65 unit. */ | |
66 | |
67 static GTY (()) vec<hsa_decl_kernel_map_element, va_gc> | |
68 *hsa_decl_kernel_mapping; | |
69 | |
70 /* Mapping between decls and corresponding HSA kernels | |
71 called by the function. */ | |
72 hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; | |
73 | |
74 /* Hash function to lookup a symbol for a decl. */ | |
75 hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols; | |
76 | |
77 /* HSA summaries. */ | |
78 hsa_summary_t *hsa_summaries = NULL; | |
79 | |
80 /* HSA number of threads. */ | |
81 hsa_symbol *hsa_num_threads = NULL; | |
82 | |
83 /* HSA function that cannot be expanded to HSAIL. */ | |
84 hash_set <tree> *hsa_failed_functions = NULL; | |
85 | |
86 /* True if compilation unit-wide data are already allocated and initialized. */ | |
87 static bool compilation_unit_data_initialized; | |
88 | |
89 /* Return true if FNDECL represents an HSA-callable function. */ | |
90 | |
91 bool | |
92 hsa_callable_function_p (tree fndecl) | |
93 { | |
94 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl)) | |
95 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl))); | |
96 } | |
97 | |
98 /* Allocate HSA structures that are are used when dealing with different | |
99 functions. */ | |
100 | |
101 void | |
102 hsa_init_compilation_unit_data (void) | |
103 { | |
104 if (compilation_unit_data_initialized) | |
105 return; | |
106 | |
107 compilation_unit_data_initialized = true; | |
108 | |
109 hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8); | |
110 hsa_failed_functions = new hash_set <tree> (); | |
111 hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2); | |
112 } | |
113 | |
114 /* Free data structures that are used when dealing with different | |
115 functions. */ | |
116 | |
117 void | |
118 hsa_deinit_compilation_unit_data (void) | |
119 { | |
120 gcc_assert (compilation_unit_data_initialized); | |
121 | |
122 delete hsa_failed_functions; | |
123 delete hsa_emitted_internal_decls; | |
124 | |
125 for (hash_table <hsa_noop_symbol_hasher>::iterator it | |
126 = hsa_global_variable_symbols->begin (); | |
127 it != hsa_global_variable_symbols->end (); | |
128 ++it) | |
129 { | |
130 hsa_symbol *sym = *it; | |
131 delete sym; | |
132 } | |
133 | |
134 delete hsa_global_variable_symbols; | |
135 | |
136 if (hsa_num_threads) | |
137 { | |
138 delete hsa_num_threads; | |
139 hsa_num_threads = NULL; | |
140 } | |
141 | |
142 compilation_unit_data_initialized = false; | |
143 } | |
144 | |
145 /* Return true if we are generating large HSA machine model. */ | |
146 | |
147 bool | |
148 hsa_machine_large_p (void) | |
149 { | |
150 /* FIXME: I suppose this is technically wrong but should work for me now. */ | |
151 return (GET_MODE_BITSIZE (Pmode) == 64); | |
152 } | |
153 | |
154 /* Return the HSA profile we are using. */ | |
155 | |
156 bool | |
157 hsa_full_profile_p (void) | |
158 { | |
159 return true; | |
160 } | |
161 | |
162 /* Return true if a register in operand number OPNUM of instruction | |
163 is an output. False if it is an input. */ | |
164 | |
165 bool | |
166 hsa_insn_basic::op_output_p (unsigned opnum) | |
167 { | |
168 switch (m_opcode) | |
169 { | |
170 case HSA_OPCODE_PHI: | |
171 case BRIG_OPCODE_CBR: | |
172 case BRIG_OPCODE_SBR: | |
173 case BRIG_OPCODE_ST: | |
174 case BRIG_OPCODE_SIGNALNORET: | |
175 case BRIG_OPCODE_DEBUGTRAP: | |
176 /* FIXME: There are probably missing cases here, double check. */ | |
177 return false; | |
178 case BRIG_OPCODE_EXPAND: | |
179 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */ | |
180 return opnum < operand_count () - 1; | |
181 default: | |
182 return opnum == 0; | |
183 } | |
184 } | |
185 | |
186 /* Return true if OPCODE is an floating-point bit instruction opcode. */ | |
187 | |
188 bool | |
189 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode) | |
190 { | |
191 switch (opcode) | |
192 { | |
193 case BRIG_OPCODE_NEG: | |
194 case BRIG_OPCODE_ABS: | |
195 case BRIG_OPCODE_CLASS: | |
196 case BRIG_OPCODE_COPYSIGN: | |
197 return true; | |
198 default: | |
199 return false; | |
200 } | |
201 } | |
202 | |
203 /* Return the number of destination operands for this INSN. */ | |
204 | |
205 unsigned | |
206 hsa_insn_basic::input_count () | |
207 { | |
208 switch (m_opcode) | |
209 { | |
210 default: | |
211 return 1; | |
212 | |
213 case BRIG_OPCODE_NOP: | |
214 return 0; | |
215 | |
216 case BRIG_OPCODE_EXPAND: | |
217 return 2; | |
218 | |
219 case BRIG_OPCODE_LD: | |
220 /* ld_v[234] not yet handled. */ | |
221 return 1; | |
222 | |
223 case BRIG_OPCODE_ST: | |
224 return 0; | |
225 | |
226 case BRIG_OPCODE_ATOMICNORET: | |
227 return 0; | |
228 | |
229 case BRIG_OPCODE_SIGNAL: | |
230 return 1; | |
231 | |
232 case BRIG_OPCODE_SIGNALNORET: | |
233 return 0; | |
234 | |
235 case BRIG_OPCODE_MEMFENCE: | |
236 return 0; | |
237 | |
238 case BRIG_OPCODE_RDIMAGE: | |
239 case BRIG_OPCODE_LDIMAGE: | |
240 case BRIG_OPCODE_STIMAGE: | |
241 case BRIG_OPCODE_QUERYIMAGE: | |
242 case BRIG_OPCODE_QUERYSAMPLER: | |
243 sorry ("HSA image ops not handled"); | |
244 return 0; | |
245 | |
246 case BRIG_OPCODE_CBR: | |
247 case BRIG_OPCODE_BR: | |
248 return 0; | |
249 | |
250 case BRIG_OPCODE_SBR: | |
251 return 0; /* ??? */ | |
252 | |
253 case BRIG_OPCODE_WAVEBARRIER: | |
254 return 0; /* ??? */ | |
255 | |
256 case BRIG_OPCODE_BARRIER: | |
257 case BRIG_OPCODE_ARRIVEFBAR: | |
258 case BRIG_OPCODE_INITFBAR: | |
259 case BRIG_OPCODE_JOINFBAR: | |
260 case BRIG_OPCODE_LEAVEFBAR: | |
261 case BRIG_OPCODE_RELEASEFBAR: | |
262 case BRIG_OPCODE_WAITFBAR: | |
263 return 0; | |
264 | |
265 case BRIG_OPCODE_LDF: | |
266 return 1; | |
267 | |
268 case BRIG_OPCODE_ACTIVELANECOUNT: | |
269 case BRIG_OPCODE_ACTIVELANEID: | |
270 case BRIG_OPCODE_ACTIVELANEMASK: | |
271 case BRIG_OPCODE_ACTIVELANEPERMUTE: | |
272 return 1; /* ??? */ | |
273 | |
274 case BRIG_OPCODE_CALL: | |
275 case BRIG_OPCODE_SCALL: | |
276 case BRIG_OPCODE_ICALL: | |
277 return 0; | |
278 | |
279 case BRIG_OPCODE_RET: | |
280 return 0; | |
281 | |
282 case BRIG_OPCODE_ALLOCA: | |
283 return 1; | |
284 | |
285 case BRIG_OPCODE_CLEARDETECTEXCEPT: | |
286 return 0; | |
287 | |
288 case BRIG_OPCODE_SETDETECTEXCEPT: | |
289 return 0; | |
290 | |
291 case BRIG_OPCODE_PACKETCOMPLETIONSIG: | |
292 case BRIG_OPCODE_PACKETID: | |
293 case BRIG_OPCODE_CASQUEUEWRITEINDEX: | |
294 case BRIG_OPCODE_LDQUEUEREADINDEX: | |
295 case BRIG_OPCODE_LDQUEUEWRITEINDEX: | |
296 case BRIG_OPCODE_STQUEUEREADINDEX: | |
297 case BRIG_OPCODE_STQUEUEWRITEINDEX: | |
298 return 1; /* ??? */ | |
299 | |
300 case BRIG_OPCODE_ADDQUEUEWRITEINDEX: | |
301 return 1; | |
302 | |
303 case BRIG_OPCODE_DEBUGTRAP: | |
304 return 0; | |
305 | |
306 case BRIG_OPCODE_GROUPBASEPTR: | |
307 case BRIG_OPCODE_KERNARGBASEPTR: | |
308 return 1; /* ??? */ | |
309 | |
310 case HSA_OPCODE_ARG_BLOCK: | |
311 return 0; | |
312 | |
313 case BRIG_KIND_DIRECTIVE_COMMENT: | |
314 return 0; | |
315 } | |
316 } | |
317 | |
318 /* Return the number of source operands for this INSN. */ | |
319 | |
320 unsigned | |
321 hsa_insn_basic::num_used_ops () | |
322 { | |
323 gcc_checking_assert (input_count () <= operand_count ()); | |
324 | |
325 return operand_count () - input_count (); | |
326 } | |
327 | |
328 /* Set alignment to VALUE. */ | |
329 | |
330 void | |
331 hsa_insn_mem::set_align (BrigAlignment8_t value) | |
332 { | |
333 /* TODO: Perhaps remove this dump later on: */ | |
334 if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align) | |
335 { | |
336 fprintf (dump_file, "Decreasing alignment to %u in instruction ", value); | |
337 dump_hsa_insn (dump_file, this); | |
338 } | |
339 m_align = value; | |
340 } | |
341 | |
342 /* Return size of HSA type T in bits. */ | |
343 | |
344 unsigned | |
345 hsa_type_bit_size (BrigType16_t t) | |
346 { | |
347 switch (t) | |
348 { | |
349 case BRIG_TYPE_B1: | |
350 return 1; | |
351 | |
352 case BRIG_TYPE_U8: | |
353 case BRIG_TYPE_S8: | |
354 case BRIG_TYPE_B8: | |
355 return 8; | |
356 | |
357 case BRIG_TYPE_U16: | |
358 case BRIG_TYPE_S16: | |
359 case BRIG_TYPE_B16: | |
360 case BRIG_TYPE_F16: | |
361 return 16; | |
362 | |
363 case BRIG_TYPE_U32: | |
364 case BRIG_TYPE_S32: | |
365 case BRIG_TYPE_B32: | |
366 case BRIG_TYPE_F32: | |
367 case BRIG_TYPE_U8X4: | |
368 case BRIG_TYPE_U16X2: | |
369 case BRIG_TYPE_S8X4: | |
370 case BRIG_TYPE_S16X2: | |
371 case BRIG_TYPE_F16X2: | |
372 return 32; | |
373 | |
374 case BRIG_TYPE_U64: | |
375 case BRIG_TYPE_S64: | |
376 case BRIG_TYPE_F64: | |
377 case BRIG_TYPE_B64: | |
378 case BRIG_TYPE_U8X8: | |
379 case BRIG_TYPE_U16X4: | |
380 case BRIG_TYPE_U32X2: | |
381 case BRIG_TYPE_S8X8: | |
382 case BRIG_TYPE_S16X4: | |
383 case BRIG_TYPE_S32X2: | |
384 case BRIG_TYPE_F16X4: | |
385 case BRIG_TYPE_F32X2: | |
386 | |
387 return 64; | |
388 | |
389 case BRIG_TYPE_B128: | |
390 case BRIG_TYPE_U8X16: | |
391 case BRIG_TYPE_U16X8: | |
392 case BRIG_TYPE_U32X4: | |
393 case BRIG_TYPE_U64X2: | |
394 case BRIG_TYPE_S8X16: | |
395 case BRIG_TYPE_S16X8: | |
396 case BRIG_TYPE_S32X4: | |
397 case BRIG_TYPE_S64X2: | |
398 case BRIG_TYPE_F16X8: | |
399 case BRIG_TYPE_F32X4: | |
400 case BRIG_TYPE_F64X2: | |
401 return 128; | |
402 | |
403 default: | |
404 gcc_assert (hsa_seen_error ()); | |
405 return t; | |
406 } | |
407 } | |
408 | |
409 /* Return BRIG bit-type with BITSIZE length. */ | |
410 | |
411 BrigType16_t | |
412 hsa_bittype_for_bitsize (unsigned bitsize) | |
413 { | |
414 switch (bitsize) | |
415 { | |
416 case 1: | |
417 return BRIG_TYPE_B1; | |
418 case 8: | |
419 return BRIG_TYPE_B8; | |
420 case 16: | |
421 return BRIG_TYPE_B16; | |
422 case 32: | |
423 return BRIG_TYPE_B32; | |
424 case 64: | |
425 return BRIG_TYPE_B64; | |
426 case 128: | |
427 return BRIG_TYPE_B128; | |
428 default: | |
429 gcc_unreachable (); | |
430 } | |
431 } | |
432 | |
433 /* Return BRIG unsigned int type with BITSIZE length. */ | |
434 | |
435 BrigType16_t | |
436 hsa_uint_for_bitsize (unsigned bitsize) | |
437 { | |
438 switch (bitsize) | |
439 { | |
440 case 8: | |
441 return BRIG_TYPE_U8; | |
442 case 16: | |
443 return BRIG_TYPE_U16; | |
444 case 32: | |
445 return BRIG_TYPE_U32; | |
446 case 64: | |
447 return BRIG_TYPE_U64; | |
448 default: | |
449 gcc_unreachable (); | |
450 } | |
451 } | |
452 | |
453 /* Return BRIG float type with BITSIZE length. */ | |
454 | |
455 BrigType16_t | |
456 hsa_float_for_bitsize (unsigned bitsize) | |
457 { | |
458 switch (bitsize) | |
459 { | |
460 case 16: | |
461 return BRIG_TYPE_F16; | |
462 case 32: | |
463 return BRIG_TYPE_F32; | |
464 case 64: | |
465 return BRIG_TYPE_F64; | |
466 default: | |
467 gcc_unreachable (); | |
468 } | |
469 } | |
470 | |
471 /* Return HSA bit-type with the same size as the type T. */ | |
472 | |
473 BrigType16_t | |
474 hsa_bittype_for_type (BrigType16_t t) | |
475 { | |
476 return hsa_bittype_for_bitsize (hsa_type_bit_size (t)); | |
477 } | |
478 | |
479 /* Return HSA unsigned integer type with the same size as the type T. */ | |
480 | |
481 BrigType16_t | |
482 hsa_unsigned_type_for_type (BrigType16_t t) | |
483 { | |
484 return hsa_uint_for_bitsize (hsa_type_bit_size (t)); | |
485 } | |
486 | |
487 /* Return true if TYPE is a packed HSA type. */ | |
488 | |
489 bool | |
490 hsa_type_packed_p (BrigType16_t type) | |
491 { | |
492 return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE; | |
493 } | |
494 | |
495 /* Return true if and only if TYPE is a floating point number type. */ | |
496 | |
497 bool | |
498 hsa_type_float_p (BrigType16_t type) | |
499 { | |
500 switch (type & BRIG_TYPE_BASE_MASK) | |
501 { | |
502 case BRIG_TYPE_F16: | |
503 case BRIG_TYPE_F32: | |
504 case BRIG_TYPE_F64: | |
505 return true; | |
506 default: | |
507 return false; | |
508 } | |
509 } | |
510 | |
511 /* Return true if and only if TYPE is an integer number type. */ | |
512 | |
513 bool | |
514 hsa_type_integer_p (BrigType16_t type) | |
515 { | |
516 switch (type & BRIG_TYPE_BASE_MASK) | |
517 { | |
518 case BRIG_TYPE_U8: | |
519 case BRIG_TYPE_U16: | |
520 case BRIG_TYPE_U32: | |
521 case BRIG_TYPE_U64: | |
522 case BRIG_TYPE_S8: | |
523 case BRIG_TYPE_S16: | |
524 case BRIG_TYPE_S32: | |
525 case BRIG_TYPE_S64: | |
526 return true; | |
527 default: | |
528 return false; | |
529 } | |
530 } | |
531 | |
532 /* Return true if and only if TYPE is an bit-type. */ | |
533 | |
534 bool | |
535 hsa_btype_p (BrigType16_t type) | |
536 { | |
537 switch (type & BRIG_TYPE_BASE_MASK) | |
538 { | |
539 case BRIG_TYPE_B8: | |
540 case BRIG_TYPE_B16: | |
541 case BRIG_TYPE_B32: | |
542 case BRIG_TYPE_B64: | |
543 case BRIG_TYPE_B128: | |
544 return true; | |
545 default: | |
546 return false; | |
547 } | |
548 } | |
549 | |
550 | |
551 /* Return HSA alignment encoding alignment to N bits. */ | |
552 | |
553 BrigAlignment8_t | |
554 hsa_alignment_encoding (unsigned n) | |
555 { | |
556 gcc_assert (n >= 8 && !(n & (n - 1))); | |
557 if (n >= 256) | |
558 return BRIG_ALIGNMENT_32; | |
559 | |
560 switch (n) | |
561 { | |
562 case 8: | |
563 return BRIG_ALIGNMENT_1; | |
564 case 16: | |
565 return BRIG_ALIGNMENT_2; | |
566 case 32: | |
567 return BRIG_ALIGNMENT_4; | |
568 case 64: | |
569 return BRIG_ALIGNMENT_8; | |
570 case 128: | |
571 return BRIG_ALIGNMENT_16; | |
572 default: | |
573 gcc_unreachable (); | |
574 } | |
575 } | |
576 | |
577 /* Return HSA alignment encoding alignment of T got | |
578 by get_object_alignment. */ | |
579 | |
580 BrigAlignment8_t | |
581 hsa_object_alignment (tree t) | |
582 { | |
583 return hsa_alignment_encoding (get_object_alignment (t)); | |
584 } | |
585 | |
586 /* Return byte alignment for given BrigAlignment8_t value. */ | |
587 | |
588 unsigned | |
589 hsa_byte_alignment (BrigAlignment8_t alignment) | |
590 { | |
591 gcc_assert (alignment != BRIG_ALIGNMENT_NONE); | |
592 | |
593 return 1 << (alignment - 1); | |
594 } | |
595 | |
596 /* Return natural alignment of HSA TYPE. */ | |
597 | |
598 BrigAlignment8_t | |
599 hsa_natural_alignment (BrigType16_t type) | |
600 { | |
601 return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY)); | |
602 } | |
603 | |
604 /* Call the correct destructor of a HSA instruction. */ | |
605 | |
606 void | |
607 hsa_destroy_insn (hsa_insn_basic *insn) | |
608 { | |
609 if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn)) | |
610 phi->~hsa_insn_phi (); | |
611 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn)) | |
612 br->~hsa_insn_cbr (); | |
613 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) | |
614 cmp->~hsa_insn_cmp (); | |
615 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) | |
616 mem->~hsa_insn_mem (); | |
617 else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn)) | |
618 atomic->~hsa_insn_atomic (); | |
619 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) | |
620 seg->~hsa_insn_seg (); | |
621 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) | |
622 call->~hsa_insn_call (); | |
623 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) | |
624 block->~hsa_insn_arg_block (); | |
625 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) | |
626 sbr->~hsa_insn_sbr (); | |
627 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) | |
628 br->~hsa_insn_br (); | |
629 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) | |
630 comment->~hsa_insn_comment (); | |
631 else | |
632 insn->~hsa_insn_basic (); | |
633 } | |
634 | |
635 /* Call the correct destructor of a HSA operand. */ | |
636 | |
637 void | |
638 hsa_destroy_operand (hsa_op_base *op) | |
639 { | |
640 if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op)) | |
641 list->~hsa_op_code_list (); | |
642 else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op)) | |
643 list->~hsa_op_operand_list (); | |
644 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) | |
645 reg->~hsa_op_reg (); | |
646 else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op)) | |
647 immed->~hsa_op_immed (); | |
648 else | |
649 op->~hsa_op_base (); | |
650 } | |
651 | |
652 /* Create a mapping between the original function DECL and kernel name NAME. */ | |
653 | |
654 void | |
655 hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size, | |
656 bool gridified_kernel_p) | |
657 { | |
658 hsa_decl_kernel_map_element dkm; | |
659 dkm.decl = decl; | |
660 dkm.name = name; | |
661 dkm.omp_data_size = omp_data_size; | |
662 dkm.gridified_kernel_p = gridified_kernel_p; | |
663 vec_safe_push (hsa_decl_kernel_mapping, dkm); | |
664 } | |
665 | |
666 /* Return the number of kernel decl name mappings. */ | |
667 | |
668 unsigned | |
669 hsa_get_number_decl_kernel_mappings (void) | |
670 { | |
671 return vec_safe_length (hsa_decl_kernel_mapping); | |
672 } | |
673 | |
674 /* Return the decl in the Ith kernel decl name mapping. */ | |
675 | |
676 tree | |
677 hsa_get_decl_kernel_mapping_decl (unsigned i) | |
678 { | |
679 return (*hsa_decl_kernel_mapping)[i].decl; | |
680 } | |
681 | |
682 /* Return the name in the Ith kernel decl name mapping. */ | |
683 | |
684 char * | |
685 hsa_get_decl_kernel_mapping_name (unsigned i) | |
686 { | |
687 return (*hsa_decl_kernel_mapping)[i].name; | |
688 } | |
689 | |
690 /* Return maximum OMP size for kernel decl name mapping. */ | |
691 | |
692 unsigned | |
693 hsa_get_decl_kernel_mapping_omp_size (unsigned i) | |
694 { | |
695 return (*hsa_decl_kernel_mapping)[i].omp_data_size; | |
696 } | |
697 | |
698 /* Return if the function is gridified kernel in decl name mapping. */ | |
699 | |
700 bool | |
701 hsa_get_decl_kernel_mapping_gridified (unsigned i) | |
702 { | |
703 return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p; | |
704 } | |
705 | |
706 /* Free the mapping between original decls and kernel names. */ | |
707 | |
708 void | |
709 hsa_free_decl_kernel_mapping (void) | |
710 { | |
711 if (hsa_decl_kernel_mapping == NULL) | |
712 return; | |
713 | |
714 for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i) | |
715 free ((*hsa_decl_kernel_mapping)[i].name); | |
716 ggc_free (hsa_decl_kernel_mapping); | |
717 } | |
718 | |
719 /* Add new kernel dependency. */ | |
720 | |
721 void | |
722 hsa_add_kernel_dependency (tree caller, const char *called_function) | |
723 { | |
724 if (hsa_decl_kernel_dependencies == NULL) | |
725 hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> (); | |
726 | |
727 vec <const char *> *s = NULL; | |
728 vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller); | |
729 if (slot == NULL) | |
730 { | |
731 s = new vec <const char *> (); | |
732 hsa_decl_kernel_dependencies->put (caller, s); | |
733 } | |
734 else | |
735 s = *slot; | |
736 | |
737 s->safe_push (called_function); | |
738 } | |
739 | |
740 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In | |
741 order to minimize the number of GTY roots, we'll root them all in the | |
742 following array. The individual elements should only be accessed by the | |
743 very simple getters (of a pointer-to-tree) below. */ | |
744 | |
745 static GTY(()) tree hsa_tree_gt_roots[3]; | |
746 | |
747 tree * | |
748 hsa_get_ctor_statements (void) | |
749 { | |
750 return &hsa_tree_gt_roots[0]; | |
751 } | |
752 | |
753 tree * | |
754 hsa_get_dtor_statements (void) | |
755 { | |
756 return &hsa_tree_gt_roots[1]; | |
757 } | |
758 | |
759 tree * | |
760 hsa_get_kernel_dispatch_type (void) | |
761 { | |
762 return &hsa_tree_gt_roots[2]; | |
763 } | |
764 | |
765 /* Modify the name P in-place so that it is a valid HSA identifier. */ | |
766 | |
767 void | |
768 hsa_sanitize_name (char *p) | |
769 { | |
770 for (; *p; p++) | |
771 if (*p == '.' || *p == '-') | |
772 *p = '_'; | |
773 } | |
774 | |
775 /* Clone the name P, set trailing ampersand and sanitize the name. */ | |
776 | |
777 char * | |
778 hsa_brig_function_name (const char *p) | |
779 { | |
780 unsigned len = strlen (p); | |
781 char *buf = XNEWVEC (char, len + 2); | |
782 | |
783 buf[0] = '&'; | |
784 buf[len + 1] = '\0'; | |
785 memcpy (buf + 1, p, len); | |
786 | |
787 hsa_sanitize_name (buf); | |
788 return buf; | |
789 } | |
790 | |
791 /* Add a flatten attribute and disable vectorization for gpu implementation | |
792 function decl GDECL. */ | |
793 | |
794 void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl) | |
795 { | |
796 DECL_ATTRIBUTES (gdecl) | |
797 = tree_cons (get_identifier ("flatten"), NULL_TREE, | |
798 DECL_ATTRIBUTES (gdecl)); | |
799 | |
800 tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); | |
801 if (fn_opts == NULL_TREE) | |
802 fn_opts = optimization_default_node; | |
803 fn_opts = copy_node (fn_opts); | |
804 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; | |
805 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; | |
806 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; | |
807 } | |
808 | |
809 void | |
810 hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, | |
811 hsa_function_kind kind, bool gridified_kernel_p) | |
812 { | |
813 hsa_function_summary *gpu_summary = get (gpu); | |
814 hsa_function_summary *host_summary = get (host); | |
815 | |
816 gpu_summary->m_kind = kind; | |
817 host_summary->m_kind = kind; | |
818 | |
819 gpu_summary->m_gpu_implementation_p = true; | |
820 host_summary->m_gpu_implementation_p = false; | |
821 | |
822 gpu_summary->m_gridified_kernel_p = gridified_kernel_p; | |
823 host_summary->m_gridified_kernel_p = gridified_kernel_p; | |
824 | |
825 gpu_summary->m_bound_function = host; | |
826 host_summary->m_bound_function = gpu; | |
827 | |
828 process_gpu_implementation_attributes (gpu->decl); | |
829 | |
830 /* Create reference between a kernel and a corresponding host implementation | |
831 to quarantee LTO streaming to a same LTRANS. */ | |
832 if (kind == HSA_KERNEL) | |
833 gpu->create_reference (host, IPA_REF_ADDR); | |
834 } | |
835 | |
836 /* Add a HOST function to HSA summaries. */ | |
837 | |
838 void | |
839 hsa_register_kernel (cgraph_node *host) | |
840 { | |
841 if (hsa_summaries == NULL) | |
842 hsa_summaries = new hsa_summary_t (symtab); | |
843 hsa_function_summary *s = hsa_summaries->get (host); | |
844 s->m_kind = HSA_KERNEL; | |
845 } | |
846 | |
847 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of | |
848 a HOST function. */ | |
849 | |
850 void | |
851 hsa_register_kernel (cgraph_node *gpu, cgraph_node *host) | |
852 { | |
853 if (hsa_summaries == NULL) | |
854 hsa_summaries = new hsa_summary_t (symtab); | |
855 hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true); | |
856 } | |
857 | |
858 /* Return true if expansion of the current HSA function has already failed. */ | |
859 | |
860 bool | |
861 hsa_seen_error (void) | |
862 { | |
863 return hsa_cfun->m_seen_error; | |
864 } | |
865 | |
866 /* Mark current HSA function as failed. */ | |
867 | |
868 void | |
869 hsa_fail_cfun (void) | |
870 { | |
871 hsa_failed_functions->add (hsa_cfun->m_decl); | |
872 hsa_cfun->m_seen_error = true; | |
873 } | |
874 | |
875 char * | |
876 hsa_internal_fn::name () | |
877 { | |
878 char *name = xstrdup (internal_fn_name (m_fn)); | |
879 for (char *ptr = name; *ptr; ptr++) | |
880 *ptr = TOLOWER (*ptr); | |
881 | |
882 const char *suffix = NULL; | |
883 if (m_type_bit_size == 32) | |
884 suffix = "f"; | |
885 | |
886 if (suffix) | |
887 { | |
888 char *name2 = concat (name, suffix, NULL); | |
889 free (name); | |
890 name = name2; | |
891 } | |
892 | |
893 hsa_sanitize_name (name); | |
894 return name; | |
895 } | |
896 | |
897 unsigned | |
898 hsa_internal_fn::get_arity () | |
899 { | |
900 switch (m_fn) | |
901 { | |
902 case IFN_ACOS: | |
903 case IFN_ASIN: | |
904 case IFN_ATAN: | |
905 case IFN_COS: | |
906 case IFN_EXP: | |
907 case IFN_EXP10: | |
908 case IFN_EXP2: | |
909 case IFN_EXPM1: | |
910 case IFN_LOG: | |
911 case IFN_LOG10: | |
912 case IFN_LOG1P: | |
913 case IFN_LOG2: | |
914 case IFN_LOGB: | |
915 case IFN_SIGNIFICAND: | |
916 case IFN_SIN: | |
917 case IFN_SQRT: | |
918 case IFN_TAN: | |
919 case IFN_CEIL: | |
920 case IFN_FLOOR: | |
921 case IFN_NEARBYINT: | |
922 case IFN_RINT: | |
923 case IFN_ROUND: | |
924 case IFN_TRUNC: | |
925 return 1; | |
926 case IFN_ATAN2: | |
927 case IFN_COPYSIGN: | |
928 case IFN_FMOD: | |
929 case IFN_POW: | |
930 case IFN_REMAINDER: | |
931 case IFN_SCALB: | |
932 case IFN_LDEXP: | |
933 return 2; | |
934 case IFN_CLRSB: | |
935 case IFN_CLZ: | |
936 case IFN_CTZ: | |
937 case IFN_FFS: | |
938 case IFN_PARITY: | |
939 case IFN_POPCOUNT: | |
940 default: | |
941 /* As we produce sorry message for unknown internal functions, | |
942 reaching this label is definitely a bug. */ | |
943 gcc_unreachable (); | |
944 } | |
945 } | |
946 | |
947 BrigType16_t | |
948 hsa_internal_fn::get_argument_type (int n) | |
949 { | |
950 switch (m_fn) | |
951 { | |
952 case IFN_ACOS: | |
953 case IFN_ASIN: | |
954 case IFN_ATAN: | |
955 case IFN_COS: | |
956 case IFN_EXP: | |
957 case IFN_EXP10: | |
958 case IFN_EXP2: | |
959 case IFN_EXPM1: | |
960 case IFN_LOG: | |
961 case IFN_LOG10: | |
962 case IFN_LOG1P: | |
963 case IFN_LOG2: | |
964 case IFN_LOGB: | |
965 case IFN_SIGNIFICAND: | |
966 case IFN_SIN: | |
967 case IFN_SQRT: | |
968 case IFN_TAN: | |
969 case IFN_CEIL: | |
970 case IFN_FLOOR: | |
971 case IFN_NEARBYINT: | |
972 case IFN_RINT: | |
973 case IFN_ROUND: | |
974 case IFN_TRUNC: | |
975 case IFN_ATAN2: | |
976 case IFN_COPYSIGN: | |
977 case IFN_FMOD: | |
978 case IFN_POW: | |
979 case IFN_REMAINDER: | |
980 case IFN_SCALB: | |
981 return hsa_float_for_bitsize (m_type_bit_size); | |
982 case IFN_LDEXP: | |
983 { | |
984 if (n == -1 || n == 0) | |
985 return hsa_float_for_bitsize (m_type_bit_size); | |
986 else | |
987 return BRIG_TYPE_S32; | |
988 } | |
989 default: | |
990 /* As we produce sorry message for unknown internal functions, | |
991 reaching this label is definitely a bug. */ | |
992 gcc_unreachable (); | |
993 } | |
994 } | |
995 | |
996 #include "gt-hsa-common.h" |