Mercurial > hg > CbC > CbC_gcc
comparison gcc/brig/brigfrontend/brig-code-entry-handler.cc @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* brig-code-entry-handler.cc -- a gccbrig base class | 1 /* brig-code-entry-handler.cc -- a gccbrig base class |
2 Copyright (C) 2016-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2016-2018 Free Software Foundation, Inc. |
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> | 3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> |
4 for General Processor Tech. | 4 for General Processor Tech. |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
39 #include "builtins.h" | 39 #include "builtins.h" |
40 #include "phsa.h" | 40 #include "phsa.h" |
41 #include "brig-builtins.h" | 41 #include "brig-builtins.h" |
42 #include "fold-const.h" | 42 #include "fold-const.h" |
43 | 43 |
44 brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins; | |
45 | |
46 brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent) | 44 brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent) |
47 : brig_entry_handler (parent) | 45 : brig_entry_handler (parent) |
48 { | 46 { |
49 if (s_custom_builtins.size () > 0) return; | |
50 | |
51 /* Populate the builtin index. */ | |
52 #undef DEF_HSAIL_ATOMIC_BUILTIN | |
53 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN | |
54 #undef DEF_HSAIL_INTR_BUILTIN | |
55 #undef DEF_HSAIL_SAT_BUILTIN | |
56 #undef DEF_HSAIL_BUILTIN | |
57 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \ | |
58 s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \ | |
59 = builtin_decl_explicit (ENUM); | |
60 | |
61 #include "brig-builtins.def" | |
62 } | 47 } |
63 | 48 |
64 /* Build a tree operand which is a reference to a piece of code. REF is the | 49 /* Build a tree operand which is a reference to a piece of code. REF is the |
65 original reference as a BRIG object. */ | 50 original reference as a BRIG object. */ |
66 | 51 |
135 | 120 |
136 /* In case a vector is used an input, cast the elements to | 121 /* In case a vector is used an input, cast the elements to |
137 correct size here so we don't need a separate unpack/pack for it. | 122 correct size here so we don't need a separate unpack/pack for it. |
138 fp16-fp32 conversion is done in build_operands (). */ | 123 fp16-fp32 conversion is done in build_operands (). */ |
139 if (is_input && TREE_TYPE (element) != operand_type) | 124 if (is_input && TREE_TYPE (element) != operand_type) |
140 { | 125 element = build_resize_convert_view (operand_type, element); |
141 if (int_size_in_bytes (TREE_TYPE (element)) | |
142 == int_size_in_bytes (operand_type) | |
143 && !INTEGRAL_TYPE_P (operand_type)) | |
144 element = build1 (VIEW_CONVERT_EXPR, operand_type, element); | |
145 else | |
146 element = convert (operand_type, element); | |
147 } | |
148 | 126 |
149 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element); | 127 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element); |
150 ++operand_ptr; | 128 ++operand_ptr; |
151 bytes -= 4; | 129 bytes -= 4; |
152 } | 130 } |
306 tree_stl_vec uint32_2 | 284 tree_stl_vec uint32_2 |
307 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); | 285 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); |
308 | 286 |
309 tree local_size | 287 tree local_size |
310 = build2 (MULT_EXPR, uint32_type_node, | 288 = build2 (MULT_EXPR, uint32_type_node, |
311 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | 289 m_parent.m_cf->expand_or_call_builtin |
312 BRIG_TYPE_U32, | 290 (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, |
313 uint32_type_node, uint32_0), | 291 uint32_type_node, uint32_0), |
314 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | 292 m_parent.m_cf->expand_or_call_builtin |
315 BRIG_TYPE_U32, | 293 (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, |
316 uint32_type_node, uint32_1)); | 294 uint32_type_node, uint32_1)); |
317 | 295 |
318 local_size | 296 local_size |
319 = build2 (MULT_EXPR, uint32_type_node, | 297 = build2 (MULT_EXPR, uint32_type_node, |
320 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | 298 m_parent.m_cf->expand_or_call_builtin |
321 BRIG_TYPE_U32, | 299 (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, |
322 uint32_type_node, uint32_2), | 300 uint32_type_node, uint32_2), |
323 local_size); | 301 local_size); |
324 | 302 |
325 tree var_region | 303 tree var_region |
326 = build2 (MULT_EXPR, uint32_type_node, | 304 = build2 (MULT_EXPR, uint32_type_node, |
327 build_int_cst (uint32_type_node, offset), local_size); | 305 build_int_cst (uint32_type_node, offset), local_size); |
329 tree_stl_vec operands; | 307 tree_stl_vec operands; |
330 tree pos | 308 tree pos |
331 = build2 (MULT_EXPR, uint32_type_node, | 309 = build2 (MULT_EXPR, uint32_type_node, |
332 build_int_cst (uint32_type_node, | 310 build_int_cst (uint32_type_node, |
333 m_parent.private_variable_size (var_name)), | 311 m_parent.private_variable_size (var_name)), |
334 expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID, | 312 m_parent.m_cf->expand_or_call_builtin |
335 BRIG_TYPE_U32, | 313 (BRIG_OPCODE_WORKITEMFLATID, BRIG_TYPE_U32, |
336 uint32_type_node, operands)); | 314 uint32_type_node, operands)); |
337 | 315 |
338 tree var_offset | 316 tree var_offset |
339 = build2 (PLUS_EXPR, uint32_type_node, var_region, pos); | 317 = build2 (PLUS_EXPR, uint32_type_node, var_region, pos); |
340 | 318 |
341 /* In case of LDA this is returned directly as an integer value. | 319 /* In case of LDA this is returned directly as an integer value. |
342 For other mem-related instructions, we will convert this segment | 320 For other mem-related instructions, we will convert this segment |
343 offset to a flat address by adding it as an offset to a (private | 321 offset to a flat address by adding it as an offset to a (private |
344 or group) base pointer later on. Same applies to group_var_offset. */ | 322 or group) base pointer later on. Same applies to group_var_offset. */ |
345 symbol_base | 323 symbol_base |
346 = add_temp_var ("priv_var_offset", | 324 = m_parent.m_cf->add_temp_var ("priv_var_offset", |
347 convert (size_type_node, var_offset)); | 325 convert (size_type_node, |
326 var_offset)); | |
348 } | 327 } |
349 else if (segment == BRIG_SEGMENT_ARG) | 328 else if (segment == BRIG_SEGMENT_ARG) |
350 { | 329 { |
351 tree arg_var_decl; | 330 tree arg_var_decl; |
352 if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol) | 331 if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol) |
366 segment), the reference is to an array object and | 345 segment), the reference is to an array object and |
367 in the callee, the array object has been passed as a pointer | 346 in the callee, the array object has been passed as a pointer |
368 to the array object. */ | 347 to the array object. */ |
369 | 348 |
370 if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl))) | 349 if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl))) |
371 symbol_base = build_reinterpret_cast (ptype, arg_var_decl); | 350 symbol_base = build_resize_convert_view (ptype, arg_var_decl); |
372 else | 351 else |
373 { | 352 { |
374 /* In case we are referring to an array (the argument in | 353 /* In case we are referring to an array (the argument in |
375 call site), use its element zero as the base address. */ | 354 call site), use its element zero as the base address. */ |
376 tree element_zero | 355 tree element_zero |
434 { | 413 { |
435 const BrigOperandRegister *mem_base_reg | 414 const BrigOperandRegister *mem_base_reg |
436 = (const BrigOperandRegister *) m_parent.get_brig_operand_entry | 415 = (const BrigOperandRegister *) m_parent.get_brig_operand_entry |
437 (addr_operand.reg); | 416 (addr_operand.reg); |
438 tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg); | 417 tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg); |
439 var_offset = convert_to_pointer (ptr_type_node, base_reg_var); | 418 tree as_uint = build_reinterpret_to_uint (base_reg_var); |
419 var_offset = convert_to_pointer (ptr_type_node, as_uint); | |
440 | 420 |
441 gcc_assert (var_offset != NULL_TREE); | 421 gcc_assert (var_offset != NULL_TREE); |
442 } | 422 } |
443 /* The pointer type we use to access the memory. Should be of the | 423 /* The pointer type we use to access the memory. Should be of the |
444 width of the load/store instruction, not the target/data | 424 width of the load/store instruction, not the target/data |
525 | 505 |
526 uint32_t operand_offset | 506 uint32_t operand_offset |
527 = ((const uint32_t *) &operand_entries->bytes)[operand_index]; | 507 = ((const uint32_t *) &operand_entries->bytes)[operand_index]; |
528 const BrigBase *operand_data | 508 const BrigBase *operand_data |
529 = m_parent.get_brig_operand_entry (operand_offset); | 509 = m_parent.get_brig_operand_entry (operand_offset); |
530 return build_tree_operand (*brig_inst, *operand_data, operand_type); | 510 |
511 bool inputp = !gccbrig_hsa_opcode_op_output_p (brig_inst->opcode, | |
512 operand_index); | |
513 return build_tree_operand (*brig_inst, *operand_data, operand_type, inputp); | |
531 } | 514 } |
532 | 515 |
533 /* Builds a single (scalar) constant initialized element of type | 516 /* Builds a single (scalar) constant initialized element of type |
534 ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */ | 517 ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */ |
535 | 518 |
639 { | 622 { |
640 if (VECTOR_TYPE_P (tree_element_type)) | 623 if (VECTOR_TYPE_P (tree_element_type)) |
641 { | 624 { |
642 /* In case of vector type elements (or sole vectors), | 625 /* In case of vector type elements (or sole vectors), |
643 create a vector ctor. */ | 626 create a vector ctor. */ |
644 size_t element_count = TYPE_VECTOR_SUBPARTS (tree_element_type); | 627 size_t element_count |
628 = gccbrig_type_vector_subparts (tree_element_type); | |
645 if (bytes_left < scalar_element_size * element_count) | 629 if (bytes_left < scalar_element_size * element_count) |
646 fatal_error (UNKNOWN_LOCATION, | 630 fatal_error (UNKNOWN_LOCATION, |
647 "Not enough bytes left for the initializer " | 631 "Not enough bytes left for the initializer " |
648 "(%lu need %lu).", (unsigned long) bytes_left, | 632 "(%lu need %lu).", (unsigned long) bytes_left, |
649 (unsigned long) (scalar_element_size | 633 (unsigned long) (scalar_element_size |
699 } | 683 } |
700 else | 684 else |
701 return gccbrig_tree_type_for_hsa_type (brig_type); | 685 return gccbrig_tree_type_for_hsa_type (brig_type); |
702 } | 686 } |
703 | 687 |
704 /* In case the HSA instruction must be implemented using a builtin, | |
705 this function is called to get the correct builtin function. | |
706 TYPE is the instruction tree type, BRIG_OPCODE the opcode of the | |
707 brig instruction and BRIG_TYPE the brig instruction's type. */ | |
708 | |
709 tree | |
710 brig_code_entry_handler::get_builtin_for_hsa_opcode | |
711 (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const | |
712 { | |
713 tree builtin = NULL_TREE; | |
714 tree builtin_type = type; | |
715 | |
716 /* For vector types, first find the scalar version of the builtin. */ | |
717 if (type != NULL_TREE && VECTOR_TYPE_P (type)) | |
718 builtin_type = TREE_TYPE (type); | |
719 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | |
720 | |
721 /* Some BRIG opcodes can use the same builtins for unsigned and | |
722 signed types. Force these cases to unsigned types. */ | |
723 | |
724 if (brig_opcode == BRIG_OPCODE_BORROW | |
725 || brig_opcode == BRIG_OPCODE_CARRY | |
726 || brig_opcode == BRIG_OPCODE_LASTBIT | |
727 || brig_opcode == BRIG_OPCODE_BITINSERT) | |
728 { | |
729 if (brig_type == BRIG_TYPE_S32) | |
730 brig_type = BRIG_TYPE_U32; | |
731 else if (brig_type == BRIG_TYPE_S64) | |
732 brig_type = BRIG_TYPE_U64; | |
733 } | |
734 | |
735 switch (brig_opcode) | |
736 { | |
737 case BRIG_OPCODE_FLOOR: | |
738 builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR); | |
739 break; | |
740 case BRIG_OPCODE_CEIL: | |
741 builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL); | |
742 break; | |
743 case BRIG_OPCODE_SQRT: | |
744 case BRIG_OPCODE_NSQRT: | |
745 builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT); | |
746 break; | |
747 case BRIG_OPCODE_RINT: | |
748 builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT); | |
749 break; | |
750 case BRIG_OPCODE_TRUNC: | |
751 builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC); | |
752 break; | |
753 case BRIG_OPCODE_COPYSIGN: | |
754 builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN); | |
755 break; | |
756 case BRIG_OPCODE_NSIN: | |
757 builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN); | |
758 break; | |
759 case BRIG_OPCODE_NLOG2: | |
760 builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2); | |
761 break; | |
762 case BRIG_OPCODE_NEXP2: | |
763 builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2); | |
764 break; | |
765 case BRIG_OPCODE_NFMA: | |
766 builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA); | |
767 break; | |
768 case BRIG_OPCODE_NCOS: | |
769 builtin = mathfn_built_in (builtin_type, BUILT_IN_COS); | |
770 break; | |
771 case BRIG_OPCODE_POPCOUNT: | |
772 /* Popcount should be typed by its argument type (the return value | |
773 is always u32). Let's use a b64 version for also for b32 for now. */ | |
774 return builtin_decl_explicit (BUILT_IN_POPCOUNTL); | |
775 case BRIG_OPCODE_BORROW: | |
776 /* Borrow uses the same builtin for unsigned and signed types. */ | |
777 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | |
778 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32); | |
779 else | |
780 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64); | |
781 case BRIG_OPCODE_CARRY: | |
782 /* Carry also uses the same builtin for unsigned and signed types. */ | |
783 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | |
784 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32); | |
785 else | |
786 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64); | |
787 default: | |
788 | |
789 /* Use our builtin index for finding a proper builtin for the BRIG | |
790 opcode and BRIG type. This takes care most of the builtin cases, | |
791 the special cases are handled in the separate 'case' statements | |
792 above. */ | |
793 builtin_map::const_iterator i | |
794 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); | |
795 if (i != s_custom_builtins.end ()) | |
796 return (*i).second; | |
797 | |
798 if (brig_inner_type != brig_type) | |
799 { | |
800 /* Try to find a scalar built-in we could use. */ | |
801 i = s_custom_builtins.find | |
802 (std::make_pair (brig_opcode, brig_inner_type)); | |
803 if (i != s_custom_builtins.end ()) | |
804 return (*i).second; | |
805 } | |
806 | |
807 /* In case this is an fp16 operation that is promoted to fp32, | |
808 try to find a fp32 scalar built-in. */ | |
809 if (brig_inner_type == BRIG_TYPE_F16) | |
810 { | |
811 i = s_custom_builtins.find | |
812 (std::make_pair (brig_opcode, BRIG_TYPE_F32)); | |
813 if (i != s_custom_builtins.end ()) | |
814 return (*i).second; | |
815 } | |
816 gcc_unreachable (); | |
817 } | |
818 | |
819 if (VECTOR_TYPE_P (type) && builtin != NULL_TREE) | |
820 { | |
821 /* Try to find a vectorized version of the built-in. | |
822 TODO: properly assert that builtin is a mathfn builtin? */ | |
823 tree vec_builtin | |
824 = targetm.vectorize.builtin_vectorized_function | |
825 (builtin_mathfn_code (builtin), type, type); | |
826 if (vec_builtin != NULL_TREE) | |
827 return vec_builtin; | |
828 else | |
829 return builtin; | |
830 } | |
831 if (builtin == NULL_TREE) | |
832 gcc_unreachable (); | |
833 return builtin; | |
834 } | |
835 | |
836 /* Return the correct GENERIC type for storing comparison results | 688 /* Return the correct GENERIC type for storing comparison results |
837 of operand with the type given in SOURCE_TYPE. */ | 689 of operand with the type given in SOURCE_TYPE. */ |
838 | 690 |
839 tree | 691 tree |
840 brig_code_entry_handler::get_comparison_result_type (tree source_type) | 692 brig_code_entry_handler::get_comparison_result_type (tree source_type) |
842 if (VECTOR_TYPE_P (source_type)) | 694 if (VECTOR_TYPE_P (source_type)) |
843 { | 695 { |
844 size_t element_size = int_size_in_bytes (TREE_TYPE (source_type)); | 696 size_t element_size = int_size_in_bytes (TREE_TYPE (source_type)); |
845 return build_vector_type | 697 return build_vector_type |
846 (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT), | 698 (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT), |
847 TYPE_VECTOR_SUBPARTS (source_type)); | 699 gccbrig_type_vector_subparts (source_type)); |
848 } | 700 } |
849 else | 701 else |
850 return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1); | 702 return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1); |
851 } | |
852 | |
853 /* Returns true in case the given opcode needs to know about work-item context | |
854 data. In such case the context data is passed as a pointer to a work-item | |
855 context object, as the last argument in the builtin call. */ | |
856 | |
857 bool | |
858 brig_code_entry_handler::needs_workitem_context_data | |
859 (BrigOpcode16_t brig_opcode) const | |
860 { | |
861 switch (brig_opcode) | |
862 { | |
863 case BRIG_OPCODE_WORKITEMABSID: | |
864 case BRIG_OPCODE_WORKITEMFLATABSID: | |
865 case BRIG_OPCODE_WORKITEMFLATID: | |
866 case BRIG_OPCODE_CURRENTWORKITEMFLATID: | |
867 case BRIG_OPCODE_WORKITEMID: | |
868 case BRIG_OPCODE_WORKGROUPID: | |
869 case BRIG_OPCODE_WORKGROUPSIZE: | |
870 case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | |
871 case BRIG_OPCODE_GRIDGROUPS: | |
872 case BRIG_OPCODE_GRIDSIZE: | |
873 case BRIG_OPCODE_DIM: | |
874 case BRIG_OPCODE_PACKETID: | |
875 case BRIG_OPCODE_PACKETCOMPLETIONSIG: | |
876 case BRIG_OPCODE_BARRIER: | |
877 case BRIG_OPCODE_WAVEBARRIER: | |
878 case BRIG_OPCODE_ARRIVEFBAR: | |
879 case BRIG_OPCODE_INITFBAR: | |
880 case BRIG_OPCODE_JOINFBAR: | |
881 case BRIG_OPCODE_LEAVEFBAR: | |
882 case BRIG_OPCODE_RELEASEFBAR: | |
883 case BRIG_OPCODE_WAITFBAR: | |
884 case BRIG_OPCODE_CUID: | |
885 case BRIG_OPCODE_MAXCUID: | |
886 case BRIG_OPCODE_DEBUGTRAP: | |
887 case BRIG_OPCODE_GROUPBASEPTR: | |
888 case BRIG_OPCODE_KERNARGBASEPTR: | |
889 case BRIG_OPCODE_ALLOCA: | |
890 return true; | |
891 default: | |
892 return false; | |
893 }; | |
894 } | |
895 | |
896 /* Returns true in case the given opcode that would normally be generated | |
897 as a builtin call can be expanded to tree nodes. */ | |
898 | |
899 bool | |
900 brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const | |
901 { | |
902 switch (brig_opcode) | |
903 { | |
904 case BRIG_OPCODE_WORKITEMFLATABSID: | |
905 case BRIG_OPCODE_WORKITEMFLATID: | |
906 case BRIG_OPCODE_WORKITEMABSID: | |
907 case BRIG_OPCODE_WORKGROUPSIZE: | |
908 case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | |
909 /* TODO: expand more builtins. */ | |
910 return true; | |
911 default: | |
912 return false; | |
913 }; | |
914 } | |
915 | |
916 /* Try to expand the given builtin call to reuse a previously generated | |
917 variable, if possible. If not, just call the given builtin. | |
918 BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type, | |
919 ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's | |
920 input operands. */ | |
921 | |
922 tree | |
923 brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode, | |
924 BrigType16_t brig_type, | |
925 tree arith_type, | |
926 tree_stl_vec &operands) | |
927 { | |
928 if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode)) | |
929 return expand_builtin (brig_opcode, operands); | |
930 | |
931 tree built_in | |
932 = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type); | |
933 | |
934 if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in))) | |
935 && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type) | |
936 && brig_opcode != BRIG_OPCODE_LERP | |
937 && brig_opcode != BRIG_OPCODE_PACKCVT | |
938 && brig_opcode != BRIG_OPCODE_SAD | |
939 && brig_opcode != BRIG_OPCODE_SADHI) | |
940 { | |
941 /* Call the scalar built-in for all elements in the vector. */ | |
942 tree_stl_vec operand0_elements; | |
943 if (operands.size () > 0) | |
944 unpack (operands[0], operand0_elements); | |
945 | |
946 tree_stl_vec operand1_elements; | |
947 if (operands.size () > 1) | |
948 unpack (operands[1], operand1_elements); | |
949 | |
950 tree_stl_vec result_elements; | |
951 | |
952 for (size_t i = 0; i < TYPE_VECTOR_SUBPARTS (arith_type); ++i) | |
953 { | |
954 tree_stl_vec call_operands; | |
955 if (operand0_elements.size () > 0) | |
956 call_operands.push_back (operand0_elements.at (i)); | |
957 | |
958 if (operand1_elements.size () > 0) | |
959 call_operands.push_back (operand1_elements.at (i)); | |
960 | |
961 result_elements.push_back | |
962 (expand_or_call_builtin (brig_opcode, brig_type, | |
963 TREE_TYPE (arith_type), | |
964 call_operands)); | |
965 } | |
966 return pack (result_elements); | |
967 } | |
968 | |
969 tree_stl_vec call_operands; | |
970 tree_stl_vec operand_types; | |
971 | |
972 tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in)); | |
973 | |
974 for (size_t i = 0; i < operands.size (); ++i) | |
975 { | |
976 tree operand_type = TREE_VALUE (arg_type_chain); | |
977 call_operands.push_back (convert (operand_type, operands[i])); | |
978 operand_types.push_back (operand_type); | |
979 arg_type_chain = TREE_CHAIN (arg_type_chain); | |
980 } | |
981 | |
982 if (needs_workitem_context_data (brig_opcode)) | |
983 { | |
984 call_operands.push_back (m_parent.m_cf->m_context_arg); | |
985 operand_types.push_back (ptr_type_node); | |
986 m_parent.m_cf->m_has_unexpanded_dp_builtins = true; | |
987 } | |
988 | |
989 size_t operand_count = call_operands.size (); | |
990 | |
991 call_operands.resize (4, NULL_TREE); | |
992 operand_types.resize (4, NULL_TREE); | |
993 for (size_t i = 0; i < operand_count; ++i) | |
994 call_operands.at (i) = build_reinterpret_cast (operand_types.at (i), | |
995 call_operands.at (i)); | |
996 | |
997 tree fnptr = build_fold_addr_expr (built_in); | |
998 return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr, | |
999 operand_count, &call_operands[0]); | |
1000 } | |
1001 | |
1002 /* Instead of calling a built-in, reuse a previously returned value known to | |
1003 be still valid. This is beneficial especially for the work-item | |
1004 identification related builtins as not having them as calls can lead to | |
1005 more easily vectorizable parallel loops for multi work-item work-groups. | |
1006 BRIG_OPCODE identifies the builtin and OPERANDS store the operands. */ | |
1007 | |
1008 tree | |
1009 brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode, | |
1010 tree_stl_vec &operands) | |
1011 { | |
1012 tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); | |
1013 | |
1014 tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); | |
1015 | |
1016 tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); | |
1017 | |
1018 if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID) | |
1019 { | |
1020 tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0); | |
1021 id0 = convert (uint64_type_node, id0); | |
1022 | |
1023 tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1); | |
1024 id1 = convert (uint64_type_node, id1); | |
1025 | |
1026 tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2); | |
1027 id2 = convert (uint64_type_node, id2); | |
1028 | |
1029 tree max0 = convert (uint64_type_node, | |
1030 m_parent.m_cf->m_grid_size_vars[0]); | |
1031 tree max1 = convert (uint64_type_node, | |
1032 m_parent.m_cf->m_grid_size_vars[1]); | |
1033 | |
1034 tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0); | |
1035 id2_x_max0_x_max1 | |
1036 = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1); | |
1037 | |
1038 tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0); | |
1039 | |
1040 tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0); | |
1041 sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1); | |
1042 | |
1043 return add_temp_var ("workitemflatabsid", sum); | |
1044 } | |
1045 else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID) | |
1046 { | |
1047 HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1048 | |
1049 tree local_id_var = m_parent.m_cf->m_local_id_vars[dim]; | |
1050 tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim]; | |
1051 tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim]; | |
1052 tree grid_size_var = m_parent.m_cf->m_grid_size_vars[dim]; | |
1053 | |
1054 tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node, | |
1055 convert (uint32_type_node, wg_id_var), | |
1056 convert (uint32_type_node, wg_size_var)); | |
1057 tree sum | |
1058 = build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var); | |
1059 | |
1060 /* We need a modulo here because of work-groups which have dimensions | |
1061 larger than the grid size :( TO CHECK: is this really allowed in the | |
1062 specs? */ | |
1063 tree modulo | |
1064 = build2 (TRUNC_MOD_EXPR, uint32_type_node, sum, grid_size_var); | |
1065 | |
1066 return add_temp_var (std::string ("workitemabsid_") | |
1067 + (char) ((int) 'x' + dim), | |
1068 modulo); | |
1069 } | |
1070 else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID) | |
1071 { | |
1072 tree z_x_wgsx_wgsy | |
1073 = build2 (MULT_EXPR, uint32_type_node, | |
1074 m_parent.m_cf->m_local_id_vars[2], | |
1075 m_parent.m_cf->m_wg_size_vars[0]); | |
1076 z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy, | |
1077 m_parent.m_cf->m_wg_size_vars[1]); | |
1078 | |
1079 tree y_x_wgsx | |
1080 = build2 (MULT_EXPR, uint32_type_node, | |
1081 m_parent.m_cf->m_local_id_vars[1], | |
1082 m_parent.m_cf->m_wg_size_vars[0]); | |
1083 | |
1084 tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy); | |
1085 sum = build2 (PLUS_EXPR, uint32_type_node, | |
1086 m_parent.m_cf->m_local_id_vars[0], | |
1087 sum); | |
1088 return add_temp_var ("workitemflatid", sum); | |
1089 } | |
1090 else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE) | |
1091 { | |
1092 HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1093 return m_parent.m_cf->m_wg_size_vars[dim]; | |
1094 } | |
1095 else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE) | |
1096 { | |
1097 HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1098 return m_parent.m_cf->m_cur_wg_size_vars[dim]; | |
1099 } | |
1100 else | |
1101 gcc_unreachable (); | |
1102 | |
1103 return NULL_TREE; | |
1104 } | |
1105 | |
1106 /* Appends and returns a new temp variable and an accompanying assignment | |
1107 statement that stores the value of the given EXPR and has the given NAME. */ | |
1108 | |
1109 tree | |
1110 brig_code_entry_handler::add_temp_var (std::string name, tree expr) | |
1111 { | |
1112 tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ()); | |
1113 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr); | |
1114 m_parent.m_cf->append_statement (assign); | |
1115 return temp_var; | |
1116 } | 703 } |
1117 | 704 |
1118 /* Creates a FP32 to FP16 conversion call, assuming the source and destination | 705 /* Creates a FP32 to FP16 conversion call, assuming the source and destination |
1119 are FP32 type variables. */ | 706 are FP32 type variables. */ |
1120 | 707 |
1138 performs half to float conversions, constant to correct type variable, | 725 performs half to float conversions, constant to correct type variable, |
1139 and flush to zero (if applicable). */ | 726 and flush to zero (if applicable). */ |
1140 | 727 |
1141 tree_stl_vec | 728 tree_stl_vec |
1142 brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst) | 729 brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst) |
730 { | |
731 return build_or_analyze_operands (brig_inst, false); | |
732 } | |
733 | |
734 void | |
735 brig_code_entry_handler::analyze_operands (const BrigInstBase &brig_inst) | |
736 { | |
737 build_or_analyze_operands (brig_inst, true); | |
738 } | |
739 | |
740 /* Implements both the build_operands () and analyze_operands () call | |
741 so changes go in tandem. Performs build_operands () when ANALYZE | |
742 is false. Otherwise, only analyze operands and return empty | |
743 list. | |
744 | |
745 If analyzing record each HSA register operand with the | |
746 corresponding resolved operand tree type to | |
747 brig_to_generic::m_fn_regs_use_index. */ | |
748 | |
749 tree_stl_vec | |
750 brig_code_entry_handler:: | |
751 build_or_analyze_operands (const BrigInstBase &brig_inst, bool analyze) | |
1143 { | 752 { |
1144 /* Flush to zero. */ | 753 /* Flush to zero. */ |
1145 bool ftz = false; | 754 bool ftz = false; |
1146 const BrigBase *base = &brig_inst.base; | 755 const BrigBase *base = &brig_inst.base; |
1147 | 756 |
1306 } | 915 } |
1307 else if (half_to_float) | 916 else if (half_to_float) |
1308 /* Treat the operands as the storage type at this point. */ | 917 /* Treat the operands as the storage type at this point. */ |
1309 operand_type = half_storage_type; | 918 operand_type = half_storage_type; |
1310 | 919 |
920 if (analyze) | |
921 { | |
922 if (operand_data->kind == BRIG_KIND_OPERAND_REGISTER) | |
923 { | |
924 const BrigOperandRegister &brig_reg | |
925 = (const BrigOperandRegister &) *operand_data; | |
926 m_parent.add_reg_used_as_type (brig_reg, operand_type); | |
927 } | |
928 continue; | |
929 } | |
930 | |
1311 tree operand = build_tree_operand (brig_inst, *operand_data, operand_type, | 931 tree operand = build_tree_operand (brig_inst, *operand_data, operand_type, |
1312 !is_output); | 932 !is_output); |
1313 | |
1314 gcc_assert (operand); | 933 gcc_assert (operand); |
1315 | 934 |
1316 /* Cast/convert the inputs to correct types as expected by the GENERIC | 935 /* Cast/convert the inputs to correct types as expected by the GENERIC |
1317 opcode instruction. */ | 936 opcode instruction. */ |
1318 if (!is_output) | 937 if (!is_output) |
1319 { | 938 { |
1320 if (half_to_float) | 939 if (half_to_float) |
1321 operand = build_h2f_conversion | 940 operand = build_h2f_conversion |
1322 (build_reinterpret_cast (half_storage_type, operand)); | 941 (build_resize_convert_view (half_storage_type, operand)); |
1323 else if (TREE_CODE (operand) != LABEL_DECL | 942 else if (TREE_CODE (operand) != LABEL_DECL |
1324 && TREE_CODE (operand) != TREE_VEC | 943 && TREE_CODE (operand) != TREE_VEC |
1325 && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS | 944 && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS |
1326 && !VECTOR_TYPE_P (TREE_TYPE (operand))) | 945 && operand_data->kind != BRIG_KIND_OPERAND_OPERAND_LIST) |
1327 { | 946 { |
1328 size_t reg_width = int_size_in_bytes (TREE_TYPE (operand)); | 947 operand = build_resize_convert_view (operand_type, operand); |
1329 size_t instr_width = int_size_in_bytes (operand_type); | |
1330 if (reg_width == instr_width) | |
1331 operand = build_reinterpret_cast (operand_type, operand); | |
1332 else if (reg_width > instr_width) | |
1333 { | |
1334 /* Clip the operand because the instruction's bitwidth | |
1335 is smaller than the HSAIL reg width. */ | |
1336 if (INTEGRAL_TYPE_P (operand_type)) | |
1337 operand | |
1338 = convert_to_integer (signed_or_unsigned_type_for | |
1339 (TYPE_UNSIGNED (operand_type), | |
1340 operand_type), operand); | |
1341 else | |
1342 operand = build_reinterpret_cast (operand_type, operand); | |
1343 } | |
1344 else if (reg_width < instr_width) | |
1345 /* At least shift amount operands can be read from smaller | |
1346 registers than the data operands. */ | |
1347 operand = convert (operand_type, operand); | |
1348 } | 948 } |
1349 else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) | 949 else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) |
1350 /* Force the operand type to be treated as the raw type. */ | 950 /* Force the operand type to be treated as the raw type. */ |
1351 operand = build_reinterpret_cast (operand_type, operand); | 951 operand = build_resize_convert_view (operand_type, operand); |
1352 | 952 |
1353 if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1) | 953 if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1) |
1354 { | 954 { |
1355 /* gcc expects the lower bit to be 1 (or all ones in case of | 955 /* gcc expects the lower bit to be 1 (or all ones in case of |
1356 vectors) while CMOV assumes false iff 0. Convert the input | 956 vectors) while CMOV assumes false iff 0. Convert the input |
1377 | 977 |
1378 tree | 978 tree |
1379 brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, | 979 brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, |
1380 tree output, tree inst_expr) | 980 tree output, tree inst_expr) |
1381 { | 981 { |
1382 /* The destination type might be different from the output register | 982 /* The result/input type might be different from the output register |
1383 variable type (which is always an unsigned integer type). */ | 983 variable type (can be any type; see get_m_var_declfor_reg @ |
984 brig-function.cc). */ | |
1384 tree output_type = TREE_TYPE (output); | 985 tree output_type = TREE_TYPE (output); |
1385 tree input_type = TREE_TYPE (inst_expr); | |
1386 bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 | 986 bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 |
1387 && brig_inst.base.kind != BRIG_KIND_INST_MEM | 987 && brig_inst.base.kind != BRIG_KIND_INST_MEM |
1388 && !gccbrig_is_bit_operation (brig_inst.opcode); | 988 && !gccbrig_is_bit_operation (brig_inst.opcode); |
1389 | 989 |
1390 /* Flush to zero. */ | 990 /* Flush to zero. */ |
1391 bool ftz = false; | 991 bool ftz = false; |
1392 const BrigBase *base = &brig_inst.base; | 992 const BrigBase *base = &brig_inst.base; |
1393 | 993 |
994 if (m_parent.m_cf->is_id_val (inst_expr)) | |
995 inst_expr = m_parent.m_cf->id_val (inst_expr); | |
996 | |
997 tree input_type = TREE_TYPE (inst_expr); | |
998 | |
999 m_parent.m_cf->add_reg_var_update (output, inst_expr); | |
1000 | |
1394 if (base->kind == BRIG_KIND_INST_MOD) | 1001 if (base->kind == BRIG_KIND_INST_MOD) |
1395 { | 1002 { |
1396 const BrigInstMod *mod = (const BrigInstMod *) base; | 1003 const BrigInstMod *mod = (const BrigInstMod *) base; |
1397 ftz = mod->modifier & BRIG_ALU_FTZ; | 1004 ftz = mod->modifier & BRIG_ALU_FTZ; |
1398 } | 1005 } |
1411 if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) | 1018 if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) |
1412 || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16)) | 1019 || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16)) |
1413 { | 1020 { |
1414 /* Ensure we don't duplicate the arithmetics to the arguments of the bit | 1021 /* Ensure we don't duplicate the arithmetics to the arguments of the bit |
1415 field reference operators. */ | 1022 field reference operators. */ |
1416 inst_expr = add_temp_var ("before_ftz", inst_expr); | 1023 inst_expr = m_parent.m_cf->add_temp_var ("before_ftz", inst_expr); |
1417 inst_expr = flush_to_zero (is_fp16) (*this, inst_expr); | 1024 inst_expr = flush_to_zero (is_fp16) (*this, inst_expr); |
1418 } | 1025 } |
1419 | 1026 |
1420 if (is_fp16) | 1027 if (is_fp16) |
1421 { | 1028 { |
1422 inst_expr = add_temp_var ("before_f2h", inst_expr); | 1029 inst_expr = m_parent.m_cf->add_temp_var ("before_f2h", inst_expr); |
1423 tree f2h_output = build_f2h_conversion (inst_expr); | 1030 tree f2h_output = build_f2h_conversion (inst_expr); |
1424 tree conv_int = convert_to_integer (output_type, f2h_output); | 1031 tree conv = build_resize_convert_view (output_type, f2h_output); |
1425 tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); | 1032 tree assign = build2 (MODIFY_EXPR, output_type, output, conv); |
1426 m_parent.m_cf->append_statement (assign); | 1033 m_parent.m_cf->append_statement (assign); |
1427 return assign; | 1034 return assign; |
1428 } | 1035 } |
1429 else if (VECTOR_TYPE_P (TREE_TYPE (output))) | 1036 else if (VECTOR_TYPE_P (output_type) && TREE_CODE (output) == CONSTRUCTOR) |
1430 { | 1037 { |
1431 /* Expand/unpack the input value to the given vector elements. */ | 1038 /* Expand/unpack the input value to the given vector elements. */ |
1432 size_t i; | 1039 size_t i; |
1433 tree input = inst_expr; | 1040 tree input = inst_expr; |
1434 tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | 1041 tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); |
1452 /* All we do here is to bitcast the result and store it to the | 1059 /* All we do here is to bitcast the result and store it to the |
1453 'register' (variable). Mainly need to take care of differing | 1060 'register' (variable). Mainly need to take care of differing |
1454 bitwidths. */ | 1061 bitwidths. */ |
1455 size_t src_width = int_size_in_bytes (input_type); | 1062 size_t src_width = int_size_in_bytes (input_type); |
1456 size_t dst_width = int_size_in_bytes (output_type); | 1063 size_t dst_width = int_size_in_bytes (output_type); |
1457 | 1064 tree input = inst_expr; |
1458 if (src_width == dst_width) | 1065 /* Integer results are extended to the target register width, using |
1459 { | 1066 the same sign as the inst_expr. */ |
1460 /* A simple bitcast should do. */ | 1067 if (INTEGRAL_TYPE_P (TREE_TYPE (input)) && src_width != dst_width) |
1461 tree bitcast = build_reinterpret_cast (output_type, inst_expr); | 1068 { |
1462 tree assign = build2 (MODIFY_EXPR, output_type, output, bitcast); | 1069 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (input)); |
1463 m_parent.m_cf->append_statement (assign); | 1070 tree resized_type |
1464 return assign; | 1071 = build_nonstandard_integer_type (dst_width * BITS_PER_UNIT, |
1465 } | 1072 unsigned_p); |
1466 else | 1073 input = convert_to_integer (resized_type, input); |
1467 { | 1074 } |
1468 tree conv_int = convert_to_integer (output_type, inst_expr); | 1075 input = build_resize_convert_view (output_type, input); |
1469 tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); | 1076 tree assign = build2 (MODIFY_EXPR, output_type, output, input); |
1470 m_parent.m_cf->append_statement (assign); | 1077 m_parent.m_cf->append_statement (assign); |
1471 return assign; | 1078 return assign; |
1472 } | |
1473 } | 1079 } |
1474 return NULL_TREE; | 1080 return NULL_TREE; |
1475 } | 1081 } |
1476 | 1082 |
1477 /* Appends a GENERIC statement (STMT) to the currently constructed function. */ | 1083 /* Appends a GENERIC statement (STMT) to the currently constructed function. */ |
1478 | 1084 |
1479 void | 1085 void |
1480 brig_code_entry_handler::append_statement (tree stmt) | 1086 brig_code_entry_handler::append_statement (tree stmt) |
1481 { | 1087 { |
1482 m_parent.m_cf->append_statement (stmt); | 1088 m_parent.m_cf->append_statement (stmt); |
1483 } | |
1484 | |
1485 /* Unpacks the elements of the vector in VALUE to scalars (bit field | |
1486 references) in ELEMENTS. */ | |
1487 | |
1488 void | |
1489 brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements) | |
1490 { | |
1491 size_t vec_size = int_size_in_bytes (TREE_TYPE (value)); | |
1492 size_t element_size | |
1493 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT; | |
1494 size_t element_count | |
1495 = vec_size * BITS_PER_UNIT / element_size; | |
1496 | |
1497 tree input_element_type = TREE_TYPE (TREE_TYPE (value)); | |
1498 | |
1499 value = add_temp_var ("unpack_input", value); | |
1500 | |
1501 for (size_t i = 0; i < element_count; ++i) | |
1502 { | |
1503 tree element | |
1504 = build3 (BIT_FIELD_REF, input_element_type, value, | |
1505 TYPE_SIZE (input_element_type), | |
1506 bitsize_int(i * element_size)); | |
1507 | |
1508 element = add_temp_var ("scalar", element); | |
1509 elements.push_back (element); | |
1510 } | |
1511 } | |
1512 | |
1513 /* Pack the elements of the scalars in ELEMENTS to the returned vector. */ | |
1514 | |
1515 tree | |
1516 brig_code_entry_handler::pack (tree_stl_vec &elements) | |
1517 { | |
1518 size_t element_count = elements.size (); | |
1519 | |
1520 gcc_assert (element_count > 1); | |
1521 | |
1522 tree output_element_type = TREE_TYPE (elements.at (0)); | |
1523 | |
1524 vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
1525 for (size_t i = 0; i < element_count; ++i) | |
1526 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i)); | |
1527 | |
1528 tree vec_type = build_vector_type (output_element_type, element_count); | |
1529 | |
1530 /* build_constructor creates a vector type which is not a vector_cst | |
1531 that requires compile time constant elements. */ | |
1532 tree vec = build_constructor (vec_type, constructor_vals); | |
1533 | |
1534 /* Add a temp variable for readability. */ | |
1535 tree tmp_var = create_tmp_var (vec_type, "vec_out"); | |
1536 tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | |
1537 m_parent.m_cf->append_statement (vec_tmp_assign); | |
1538 return tmp_var; | |
1539 } | 1089 } |
1540 | 1090 |
1541 /* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */ | 1091 /* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */ |
1542 | 1092 |
1543 tree | 1093 tree |
1670 tree | 1220 tree |
1671 float_to_half::visit_element (brig_code_entry_handler &caller, tree operand) | 1221 float_to_half::visit_element (brig_code_entry_handler &caller, tree operand) |
1672 { | 1222 { |
1673 tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16); | 1223 tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16); |
1674 | 1224 |
1675 tree casted_operand = build_reinterpret_cast (uint32_type_node, operand); | 1225 tree casted_operand = build_resize_convert_view (uint32_type_node, operand); |
1676 | 1226 |
1677 tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node, | 1227 tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node, |
1678 casted_operand); | 1228 casted_operand); |
1679 tree output | 1229 tree output |
1680 = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out"); | 1230 = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out"); |
1699 tree const_fp32_type | 1249 tree const_fp32_type |
1700 = build_type_variant (brig_to_generic::s_fp32_type, 1, 0); | 1250 = build_type_variant (brig_to_generic::s_fp32_type, 1, 0); |
1701 | 1251 |
1702 tree output = create_tmp_var (const_fp32_type, "fp32out"); | 1252 tree output = create_tmp_var (const_fp32_type, "fp32out"); |
1703 tree casted_result | 1253 tree casted_result |
1704 = build_reinterpret_cast (brig_to_generic::s_fp32_type, call); | 1254 = build_resize_convert_view (brig_to_generic::s_fp32_type, call); |
1705 | 1255 |
1706 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result); | 1256 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result); |
1707 | 1257 |
1708 caller.append_statement (assign); | 1258 caller.append_statement (assign); |
1709 | 1259 |
1751 tree n = node; | 1301 tree n = node; |
1752 if (TREE_CODE (n) == VIEW_CONVERT_EXPR) | 1302 if (TREE_CODE (n) == VIEW_CONVERT_EXPR) |
1753 n = TREE_OPERAND (n, 0); | 1303 n = TREE_OPERAND (n, 0); |
1754 return int_cst_value (n); | 1304 return int_cst_value (n); |
1755 } | 1305 } |
1756 |