comparison gcc/brig/brigfrontend/brig-code-entry-handler.cc @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* brig-code-entry-handler.cc -- a gccbrig base class 1 /* brig-code-entry-handler.cc -- a gccbrig base class
2 Copyright (C) 2016-2017 Free Software Foundation, Inc. 2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> 3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech. 4 for General Processor Tech.
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
39 #include "builtins.h" 39 #include "builtins.h"
40 #include "phsa.h" 40 #include "phsa.h"
41 #include "brig-builtins.h" 41 #include "brig-builtins.h"
42 #include "fold-const.h" 42 #include "fold-const.h"
43 43
44 brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins;
45
46 brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent) 44 brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent)
47 : brig_entry_handler (parent) 45 : brig_entry_handler (parent)
48 { 46 {
49 if (s_custom_builtins.size () > 0) return;
50
51 /* Populate the builtin index. */
52 #undef DEF_HSAIL_ATOMIC_BUILTIN
53 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
54 #undef DEF_HSAIL_INTR_BUILTIN
55 #undef DEF_HSAIL_SAT_BUILTIN
56 #undef DEF_HSAIL_BUILTIN
57 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
58 s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \
59 = builtin_decl_explicit (ENUM);
60
61 #include "brig-builtins.def"
62 } 47 }
63 48
64 /* Build a tree operand which is a reference to a piece of code. REF is the 49 /* Build a tree operand which is a reference to a piece of code. REF is the
65 original reference as a BRIG object. */ 50 original reference as a BRIG object. */
66 51
135 120
136 /* In case a vector is used an input, cast the elements to 121 /* In case a vector is used an input, cast the elements to
137 correct size here so we don't need a separate unpack/pack for it. 122 correct size here so we don't need a separate unpack/pack for it.
138 fp16-fp32 conversion is done in build_operands (). */ 123 fp16-fp32 conversion is done in build_operands (). */
139 if (is_input && TREE_TYPE (element) != operand_type) 124 if (is_input && TREE_TYPE (element) != operand_type)
140 { 125 element = build_resize_convert_view (operand_type, element);
141 if (int_size_in_bytes (TREE_TYPE (element))
142 == int_size_in_bytes (operand_type)
143 && !INTEGRAL_TYPE_P (operand_type))
144 element = build1 (VIEW_CONVERT_EXPR, operand_type, element);
145 else
146 element = convert (operand_type, element);
147 }
148 126
149 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element); 127 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element);
150 ++operand_ptr; 128 ++operand_ptr;
151 bytes -= 4; 129 bytes -= 4;
152 } 130 }
306 tree_stl_vec uint32_2 284 tree_stl_vec uint32_2
307 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); 285 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
308 286
309 tree local_size 287 tree local_size
310 = build2 (MULT_EXPR, uint32_type_node, 288 = build2 (MULT_EXPR, uint32_type_node,
311 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, 289 m_parent.m_cf->expand_or_call_builtin
312 BRIG_TYPE_U32, 290 (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
313 uint32_type_node, uint32_0), 291 uint32_type_node, uint32_0),
314 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, 292 m_parent.m_cf->expand_or_call_builtin
315 BRIG_TYPE_U32, 293 (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
316 uint32_type_node, uint32_1)); 294 uint32_type_node, uint32_1));
317 295
318 local_size 296 local_size
319 = build2 (MULT_EXPR, uint32_type_node, 297 = build2 (MULT_EXPR, uint32_type_node,
320 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, 298 m_parent.m_cf->expand_or_call_builtin
321 BRIG_TYPE_U32, 299 (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
322 uint32_type_node, uint32_2), 300 uint32_type_node, uint32_2),
323 local_size); 301 local_size);
324 302
325 tree var_region 303 tree var_region
326 = build2 (MULT_EXPR, uint32_type_node, 304 = build2 (MULT_EXPR, uint32_type_node,
327 build_int_cst (uint32_type_node, offset), local_size); 305 build_int_cst (uint32_type_node, offset), local_size);
329 tree_stl_vec operands; 307 tree_stl_vec operands;
330 tree pos 308 tree pos
331 = build2 (MULT_EXPR, uint32_type_node, 309 = build2 (MULT_EXPR, uint32_type_node,
332 build_int_cst (uint32_type_node, 310 build_int_cst (uint32_type_node,
333 m_parent.private_variable_size (var_name)), 311 m_parent.private_variable_size (var_name)),
334 expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID, 312 m_parent.m_cf->expand_or_call_builtin
335 BRIG_TYPE_U32, 313 (BRIG_OPCODE_WORKITEMFLATID, BRIG_TYPE_U32,
336 uint32_type_node, operands)); 314 uint32_type_node, operands));
337 315
338 tree var_offset 316 tree var_offset
339 = build2 (PLUS_EXPR, uint32_type_node, var_region, pos); 317 = build2 (PLUS_EXPR, uint32_type_node, var_region, pos);
340 318
341 /* In case of LDA this is returned directly as an integer value. 319 /* In case of LDA this is returned directly as an integer value.
342 For other mem-related instructions, we will convert this segment 320 For other mem-related instructions, we will convert this segment
343 offset to a flat address by adding it as an offset to a (private 321 offset to a flat address by adding it as an offset to a (private
344 or group) base pointer later on. Same applies to group_var_offset. */ 322 or group) base pointer later on. Same applies to group_var_offset. */
345 symbol_base 323 symbol_base
346 = add_temp_var ("priv_var_offset", 324 = m_parent.m_cf->add_temp_var ("priv_var_offset",
347 convert (size_type_node, var_offset)); 325 convert (size_type_node,
326 var_offset));
348 } 327 }
349 else if (segment == BRIG_SEGMENT_ARG) 328 else if (segment == BRIG_SEGMENT_ARG)
350 { 329 {
351 tree arg_var_decl; 330 tree arg_var_decl;
352 if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol) 331 if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol)
366 segment), the reference is to an array object and 345 segment), the reference is to an array object and
367 in the callee, the array object has been passed as a pointer 346 in the callee, the array object has been passed as a pointer
368 to the array object. */ 347 to the array object. */
369 348
370 if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl))) 349 if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl)))
371 symbol_base = build_reinterpret_cast (ptype, arg_var_decl); 350 symbol_base = build_resize_convert_view (ptype, arg_var_decl);
372 else 351 else
373 { 352 {
374 /* In case we are referring to an array (the argument in 353 /* In case we are referring to an array (the argument in
375 call site), use its element zero as the base address. */ 354 call site), use its element zero as the base address. */
376 tree element_zero 355 tree element_zero
434 { 413 {
435 const BrigOperandRegister *mem_base_reg 414 const BrigOperandRegister *mem_base_reg
436 = (const BrigOperandRegister *) m_parent.get_brig_operand_entry 415 = (const BrigOperandRegister *) m_parent.get_brig_operand_entry
437 (addr_operand.reg); 416 (addr_operand.reg);
438 tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg); 417 tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg);
439 var_offset = convert_to_pointer (ptr_type_node, base_reg_var); 418 tree as_uint = build_reinterpret_to_uint (base_reg_var);
419 var_offset = convert_to_pointer (ptr_type_node, as_uint);
440 420
441 gcc_assert (var_offset != NULL_TREE); 421 gcc_assert (var_offset != NULL_TREE);
442 } 422 }
443 /* The pointer type we use to access the memory. Should be of the 423 /* The pointer type we use to access the memory. Should be of the
444 width of the load/store instruction, not the target/data 424 width of the load/store instruction, not the target/data
525 505
526 uint32_t operand_offset 506 uint32_t operand_offset
527 = ((const uint32_t *) &operand_entries->bytes)[operand_index]; 507 = ((const uint32_t *) &operand_entries->bytes)[operand_index];
528 const BrigBase *operand_data 508 const BrigBase *operand_data
529 = m_parent.get_brig_operand_entry (operand_offset); 509 = m_parent.get_brig_operand_entry (operand_offset);
530 return build_tree_operand (*brig_inst, *operand_data, operand_type); 510
511 bool inputp = !gccbrig_hsa_opcode_op_output_p (brig_inst->opcode,
512 operand_index);
513 return build_tree_operand (*brig_inst, *operand_data, operand_type, inputp);
531 } 514 }
532 515
533 /* Builds a single (scalar) constant initialized element of type 516 /* Builds a single (scalar) constant initialized element of type
534 ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */ 517 ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */
535 518
639 { 622 {
640 if (VECTOR_TYPE_P (tree_element_type)) 623 if (VECTOR_TYPE_P (tree_element_type))
641 { 624 {
642 /* In case of vector type elements (or sole vectors), 625 /* In case of vector type elements (or sole vectors),
643 create a vector ctor. */ 626 create a vector ctor. */
644 size_t element_count = TYPE_VECTOR_SUBPARTS (tree_element_type); 627 size_t element_count
628 = gccbrig_type_vector_subparts (tree_element_type);
645 if (bytes_left < scalar_element_size * element_count) 629 if (bytes_left < scalar_element_size * element_count)
646 fatal_error (UNKNOWN_LOCATION, 630 fatal_error (UNKNOWN_LOCATION,
647 "Not enough bytes left for the initializer " 631 "Not enough bytes left for the initializer "
648 "(%lu need %lu).", (unsigned long) bytes_left, 632 "(%lu need %lu).", (unsigned long) bytes_left,
649 (unsigned long) (scalar_element_size 633 (unsigned long) (scalar_element_size
699 } 683 }
700 else 684 else
701 return gccbrig_tree_type_for_hsa_type (brig_type); 685 return gccbrig_tree_type_for_hsa_type (brig_type);
702 } 686 }
703 687
704 /* In case the HSA instruction must be implemented using a builtin,
705 this function is called to get the correct builtin function.
706 TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
707 brig instruction and BRIG_TYPE the brig instruction's type. */
708
709 tree
710 brig_code_entry_handler::get_builtin_for_hsa_opcode
711 (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
712 {
713 tree builtin = NULL_TREE;
714 tree builtin_type = type;
715
716 /* For vector types, first find the scalar version of the builtin. */
717 if (type != NULL_TREE && VECTOR_TYPE_P (type))
718 builtin_type = TREE_TYPE (type);
719 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
720
721 /* Some BRIG opcodes can use the same builtins for unsigned and
722 signed types. Force these cases to unsigned types. */
723
724 if (brig_opcode == BRIG_OPCODE_BORROW
725 || brig_opcode == BRIG_OPCODE_CARRY
726 || brig_opcode == BRIG_OPCODE_LASTBIT
727 || brig_opcode == BRIG_OPCODE_BITINSERT)
728 {
729 if (brig_type == BRIG_TYPE_S32)
730 brig_type = BRIG_TYPE_U32;
731 else if (brig_type == BRIG_TYPE_S64)
732 brig_type = BRIG_TYPE_U64;
733 }
734
735 switch (brig_opcode)
736 {
737 case BRIG_OPCODE_FLOOR:
738 builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
739 break;
740 case BRIG_OPCODE_CEIL:
741 builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
742 break;
743 case BRIG_OPCODE_SQRT:
744 case BRIG_OPCODE_NSQRT:
745 builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
746 break;
747 case BRIG_OPCODE_RINT:
748 builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
749 break;
750 case BRIG_OPCODE_TRUNC:
751 builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
752 break;
753 case BRIG_OPCODE_COPYSIGN:
754 builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
755 break;
756 case BRIG_OPCODE_NSIN:
757 builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
758 break;
759 case BRIG_OPCODE_NLOG2:
760 builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
761 break;
762 case BRIG_OPCODE_NEXP2:
763 builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
764 break;
765 case BRIG_OPCODE_NFMA:
766 builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
767 break;
768 case BRIG_OPCODE_NCOS:
769 builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
770 break;
771 case BRIG_OPCODE_POPCOUNT:
772 /* Popcount should be typed by its argument type (the return value
773 is always u32). Let's use a b64 version for also for b32 for now. */
774 return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
775 case BRIG_OPCODE_BORROW:
776 /* Borrow uses the same builtin for unsigned and signed types. */
777 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
778 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
779 else
780 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
781 case BRIG_OPCODE_CARRY:
782 /* Carry also uses the same builtin for unsigned and signed types. */
783 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
784 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
785 else
786 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
787 default:
788
789 /* Use our builtin index for finding a proper builtin for the BRIG
790 opcode and BRIG type. This takes care most of the builtin cases,
791 the special cases are handled in the separate 'case' statements
792 above. */
793 builtin_map::const_iterator i
794 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
795 if (i != s_custom_builtins.end ())
796 return (*i).second;
797
798 if (brig_inner_type != brig_type)
799 {
800 /* Try to find a scalar built-in we could use. */
801 i = s_custom_builtins.find
802 (std::make_pair (brig_opcode, brig_inner_type));
803 if (i != s_custom_builtins.end ())
804 return (*i).second;
805 }
806
807 /* In case this is an fp16 operation that is promoted to fp32,
808 try to find a fp32 scalar built-in. */
809 if (brig_inner_type == BRIG_TYPE_F16)
810 {
811 i = s_custom_builtins.find
812 (std::make_pair (brig_opcode, BRIG_TYPE_F32));
813 if (i != s_custom_builtins.end ())
814 return (*i).second;
815 }
816 gcc_unreachable ();
817 }
818
819 if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
820 {
821 /* Try to find a vectorized version of the built-in.
822 TODO: properly assert that builtin is a mathfn builtin? */
823 tree vec_builtin
824 = targetm.vectorize.builtin_vectorized_function
825 (builtin_mathfn_code (builtin), type, type);
826 if (vec_builtin != NULL_TREE)
827 return vec_builtin;
828 else
829 return builtin;
830 }
831 if (builtin == NULL_TREE)
832 gcc_unreachable ();
833 return builtin;
834 }
835
836 /* Return the correct GENERIC type for storing comparison results 688 /* Return the correct GENERIC type for storing comparison results
837 of operand with the type given in SOURCE_TYPE. */ 689 of operand with the type given in SOURCE_TYPE. */
838 690
839 tree 691 tree
840 brig_code_entry_handler::get_comparison_result_type (tree source_type) 692 brig_code_entry_handler::get_comparison_result_type (tree source_type)
842 if (VECTOR_TYPE_P (source_type)) 694 if (VECTOR_TYPE_P (source_type))
843 { 695 {
844 size_t element_size = int_size_in_bytes (TREE_TYPE (source_type)); 696 size_t element_size = int_size_in_bytes (TREE_TYPE (source_type));
845 return build_vector_type 697 return build_vector_type
846 (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT), 698 (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT),
847 TYPE_VECTOR_SUBPARTS (source_type)); 699 gccbrig_type_vector_subparts (source_type));
848 } 700 }
849 else 701 else
850 return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1); 702 return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1);
851 }
852
853 /* Returns true in case the given opcode needs to know about work-item context
854 data. In such case the context data is passed as a pointer to a work-item
855 context object, as the last argument in the builtin call. */
856
857 bool
858 brig_code_entry_handler::needs_workitem_context_data
859 (BrigOpcode16_t brig_opcode) const
860 {
861 switch (brig_opcode)
862 {
863 case BRIG_OPCODE_WORKITEMABSID:
864 case BRIG_OPCODE_WORKITEMFLATABSID:
865 case BRIG_OPCODE_WORKITEMFLATID:
866 case BRIG_OPCODE_CURRENTWORKITEMFLATID:
867 case BRIG_OPCODE_WORKITEMID:
868 case BRIG_OPCODE_WORKGROUPID:
869 case BRIG_OPCODE_WORKGROUPSIZE:
870 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
871 case BRIG_OPCODE_GRIDGROUPS:
872 case BRIG_OPCODE_GRIDSIZE:
873 case BRIG_OPCODE_DIM:
874 case BRIG_OPCODE_PACKETID:
875 case BRIG_OPCODE_PACKETCOMPLETIONSIG:
876 case BRIG_OPCODE_BARRIER:
877 case BRIG_OPCODE_WAVEBARRIER:
878 case BRIG_OPCODE_ARRIVEFBAR:
879 case BRIG_OPCODE_INITFBAR:
880 case BRIG_OPCODE_JOINFBAR:
881 case BRIG_OPCODE_LEAVEFBAR:
882 case BRIG_OPCODE_RELEASEFBAR:
883 case BRIG_OPCODE_WAITFBAR:
884 case BRIG_OPCODE_CUID:
885 case BRIG_OPCODE_MAXCUID:
886 case BRIG_OPCODE_DEBUGTRAP:
887 case BRIG_OPCODE_GROUPBASEPTR:
888 case BRIG_OPCODE_KERNARGBASEPTR:
889 case BRIG_OPCODE_ALLOCA:
890 return true;
891 default:
892 return false;
893 };
894 }
895
896 /* Returns true in case the given opcode that would normally be generated
897 as a builtin call can be expanded to tree nodes. */
898
899 bool
900 brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const
901 {
902 switch (brig_opcode)
903 {
904 case BRIG_OPCODE_WORKITEMFLATABSID:
905 case BRIG_OPCODE_WORKITEMFLATID:
906 case BRIG_OPCODE_WORKITEMABSID:
907 case BRIG_OPCODE_WORKGROUPSIZE:
908 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
909 /* TODO: expand more builtins. */
910 return true;
911 default:
912 return false;
913 };
914 }
915
916 /* Try to expand the given builtin call to reuse a previously generated
917 variable, if possible. If not, just call the given builtin.
918 BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
919 ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
920 input operands. */
921
922 tree
923 brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
924 BrigType16_t brig_type,
925 tree arith_type,
926 tree_stl_vec &operands)
927 {
928 if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode))
929 return expand_builtin (brig_opcode, operands);
930
931 tree built_in
932 = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
933
934 if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
935 && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
936 && brig_opcode != BRIG_OPCODE_LERP
937 && brig_opcode != BRIG_OPCODE_PACKCVT
938 && brig_opcode != BRIG_OPCODE_SAD
939 && brig_opcode != BRIG_OPCODE_SADHI)
940 {
941 /* Call the scalar built-in for all elements in the vector. */
942 tree_stl_vec operand0_elements;
943 if (operands.size () > 0)
944 unpack (operands[0], operand0_elements);
945
946 tree_stl_vec operand1_elements;
947 if (operands.size () > 1)
948 unpack (operands[1], operand1_elements);
949
950 tree_stl_vec result_elements;
951
952 for (size_t i = 0; i < TYPE_VECTOR_SUBPARTS (arith_type); ++i)
953 {
954 tree_stl_vec call_operands;
955 if (operand0_elements.size () > 0)
956 call_operands.push_back (operand0_elements.at (i));
957
958 if (operand1_elements.size () > 0)
959 call_operands.push_back (operand1_elements.at (i));
960
961 result_elements.push_back
962 (expand_or_call_builtin (brig_opcode, brig_type,
963 TREE_TYPE (arith_type),
964 call_operands));
965 }
966 return pack (result_elements);
967 }
968
969 tree_stl_vec call_operands;
970 tree_stl_vec operand_types;
971
972 tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
973
974 for (size_t i = 0; i < operands.size (); ++i)
975 {
976 tree operand_type = TREE_VALUE (arg_type_chain);
977 call_operands.push_back (convert (operand_type, operands[i]));
978 operand_types.push_back (operand_type);
979 arg_type_chain = TREE_CHAIN (arg_type_chain);
980 }
981
982 if (needs_workitem_context_data (brig_opcode))
983 {
984 call_operands.push_back (m_parent.m_cf->m_context_arg);
985 operand_types.push_back (ptr_type_node);
986 m_parent.m_cf->m_has_unexpanded_dp_builtins = true;
987 }
988
989 size_t operand_count = call_operands.size ();
990
991 call_operands.resize (4, NULL_TREE);
992 operand_types.resize (4, NULL_TREE);
993 for (size_t i = 0; i < operand_count; ++i)
994 call_operands.at (i) = build_reinterpret_cast (operand_types.at (i),
995 call_operands.at (i));
996
997 tree fnptr = build_fold_addr_expr (built_in);
998 return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
999 operand_count, &call_operands[0]);
1000 }
1001
1002 /* Instead of calling a built-in, reuse a previously returned value known to
1003 be still valid. This is beneficial especially for the work-item
1004 identification related builtins as not having them as calls can lead to
1005 more easily vectorizable parallel loops for multi work-item work-groups.
1006 BRIG_OPCODE identifies the builtin and OPERANDS store the operands. */
1007
1008 tree
1009 brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode,
1010 tree_stl_vec &operands)
1011 {
1012 tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
1013
1014 tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
1015
1016 tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
1017
1018 if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
1019 {
1020 tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
1021 id0 = convert (uint64_type_node, id0);
1022
1023 tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
1024 id1 = convert (uint64_type_node, id1);
1025
1026 tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
1027 id2 = convert (uint64_type_node, id2);
1028
1029 tree max0 = convert (uint64_type_node,
1030 m_parent.m_cf->m_grid_size_vars[0]);
1031 tree max1 = convert (uint64_type_node,
1032 m_parent.m_cf->m_grid_size_vars[1]);
1033
1034 tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
1035 id2_x_max0_x_max1
1036 = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
1037
1038 tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
1039
1040 tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
1041 sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
1042
1043 return add_temp_var ("workitemflatabsid", sum);
1044 }
1045 else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
1046 {
1047 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1048
1049 tree local_id_var = m_parent.m_cf->m_local_id_vars[dim];
1050 tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim];
1051 tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim];
1052 tree grid_size_var = m_parent.m_cf->m_grid_size_vars[dim];
1053
1054 tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node,
1055 convert (uint32_type_node, wg_id_var),
1056 convert (uint32_type_node, wg_size_var));
1057 tree sum
1058 = build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var);
1059
1060 /* We need a modulo here because of work-groups which have dimensions
1061 larger than the grid size :( TO CHECK: is this really allowed in the
1062 specs? */
1063 tree modulo
1064 = build2 (TRUNC_MOD_EXPR, uint32_type_node, sum, grid_size_var);
1065
1066 return add_temp_var (std::string ("workitemabsid_")
1067 + (char) ((int) 'x' + dim),
1068 modulo);
1069 }
1070 else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
1071 {
1072 tree z_x_wgsx_wgsy
1073 = build2 (MULT_EXPR, uint32_type_node,
1074 m_parent.m_cf->m_local_id_vars[2],
1075 m_parent.m_cf->m_wg_size_vars[0]);
1076 z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
1077 m_parent.m_cf->m_wg_size_vars[1]);
1078
1079 tree y_x_wgsx
1080 = build2 (MULT_EXPR, uint32_type_node,
1081 m_parent.m_cf->m_local_id_vars[1],
1082 m_parent.m_cf->m_wg_size_vars[0]);
1083
1084 tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
1085 sum = build2 (PLUS_EXPR, uint32_type_node,
1086 m_parent.m_cf->m_local_id_vars[0],
1087 sum);
1088 return add_temp_var ("workitemflatid", sum);
1089 }
1090 else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
1091 {
1092 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1093 return m_parent.m_cf->m_wg_size_vars[dim];
1094 }
1095 else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
1096 {
1097 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1098 return m_parent.m_cf->m_cur_wg_size_vars[dim];
1099 }
1100 else
1101 gcc_unreachable ();
1102
1103 return NULL_TREE;
1104 }
1105
1106 /* Appends and returns a new temp variable and an accompanying assignment
1107 statement that stores the value of the given EXPR and has the given NAME. */
1108
1109 tree
1110 brig_code_entry_handler::add_temp_var (std::string name, tree expr)
1111 {
1112 tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
1113 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
1114 m_parent.m_cf->append_statement (assign);
1115 return temp_var;
1116 } 703 }
1117 704
1118 /* Creates a FP32 to FP16 conversion call, assuming the source and destination 705 /* Creates a FP32 to FP16 conversion call, assuming the source and destination
1119 are FP32 type variables. */ 706 are FP32 type variables. */
1120 707
1138 performs half to float conversions, constant to correct type variable, 725 performs half to float conversions, constant to correct type variable,
1139 and flush to zero (if applicable). */ 726 and flush to zero (if applicable). */
1140 727
1141 tree_stl_vec 728 tree_stl_vec
1142 brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst) 729 brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst)
730 {
731 return build_or_analyze_operands (brig_inst, false);
732 }
733
734 void
735 brig_code_entry_handler::analyze_operands (const BrigInstBase &brig_inst)
736 {
737 build_or_analyze_operands (brig_inst, true);
738 }
739
740 /* Implements both the build_operands () and analyze_operands () call
741 so changes go in tandem. Performs build_operands () when ANALYZE
742 is false. Otherwise, only analyze operands and return empty
743 list.
744
745 If analyzing record each HSA register operand with the
746 corresponding resolved operand tree type to
747 brig_to_generic::m_fn_regs_use_index. */
748
749 tree_stl_vec
750 brig_code_entry_handler::
751 build_or_analyze_operands (const BrigInstBase &brig_inst, bool analyze)
1143 { 752 {
1144 /* Flush to zero. */ 753 /* Flush to zero. */
1145 bool ftz = false; 754 bool ftz = false;
1146 const BrigBase *base = &brig_inst.base; 755 const BrigBase *base = &brig_inst.base;
1147 756
1306 } 915 }
1307 else if (half_to_float) 916 else if (half_to_float)
1308 /* Treat the operands as the storage type at this point. */ 917 /* Treat the operands as the storage type at this point. */
1309 operand_type = half_storage_type; 918 operand_type = half_storage_type;
1310 919
920 if (analyze)
921 {
922 if (operand_data->kind == BRIG_KIND_OPERAND_REGISTER)
923 {
924 const BrigOperandRegister &brig_reg
925 = (const BrigOperandRegister &) *operand_data;
926 m_parent.add_reg_used_as_type (brig_reg, operand_type);
927 }
928 continue;
929 }
930
1311 tree operand = build_tree_operand (brig_inst, *operand_data, operand_type, 931 tree operand = build_tree_operand (brig_inst, *operand_data, operand_type,
1312 !is_output); 932 !is_output);
1313
1314 gcc_assert (operand); 933 gcc_assert (operand);
1315 934
1316 /* Cast/convert the inputs to correct types as expected by the GENERIC 935 /* Cast/convert the inputs to correct types as expected by the GENERIC
1317 opcode instruction. */ 936 opcode instruction. */
1318 if (!is_output) 937 if (!is_output)
1319 { 938 {
1320 if (half_to_float) 939 if (half_to_float)
1321 operand = build_h2f_conversion 940 operand = build_h2f_conversion
1322 (build_reinterpret_cast (half_storage_type, operand)); 941 (build_resize_convert_view (half_storage_type, operand));
1323 else if (TREE_CODE (operand) != LABEL_DECL 942 else if (TREE_CODE (operand) != LABEL_DECL
1324 && TREE_CODE (operand) != TREE_VEC 943 && TREE_CODE (operand) != TREE_VEC
1325 && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS 944 && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS
1326 && !VECTOR_TYPE_P (TREE_TYPE (operand))) 945 && operand_data->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
1327 { 946 {
1328 size_t reg_width = int_size_in_bytes (TREE_TYPE (operand)); 947 operand = build_resize_convert_view (operand_type, operand);
1329 size_t instr_width = int_size_in_bytes (operand_type);
1330 if (reg_width == instr_width)
1331 operand = build_reinterpret_cast (operand_type, operand);
1332 else if (reg_width > instr_width)
1333 {
1334 /* Clip the operand because the instruction's bitwidth
1335 is smaller than the HSAIL reg width. */
1336 if (INTEGRAL_TYPE_P (operand_type))
1337 operand
1338 = convert_to_integer (signed_or_unsigned_type_for
1339 (TYPE_UNSIGNED (operand_type),
1340 operand_type), operand);
1341 else
1342 operand = build_reinterpret_cast (operand_type, operand);
1343 }
1344 else if (reg_width < instr_width)
1345 /* At least shift amount operands can be read from smaller
1346 registers than the data operands. */
1347 operand = convert (operand_type, operand);
1348 } 948 }
1349 else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) 949 else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE)
1350 /* Force the operand type to be treated as the raw type. */ 950 /* Force the operand type to be treated as the raw type. */
1351 operand = build_reinterpret_cast (operand_type, operand); 951 operand = build_resize_convert_view (operand_type, operand);
1352 952
1353 if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1) 953 if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1)
1354 { 954 {
1355 /* gcc expects the lower bit to be 1 (or all ones in case of 955 /* gcc expects the lower bit to be 1 (or all ones in case of
1356 vectors) while CMOV assumes false iff 0. Convert the input 956 vectors) while CMOV assumes false iff 0. Convert the input
1377 977
1378 tree 978 tree
1379 brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, 979 brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
1380 tree output, tree inst_expr) 980 tree output, tree inst_expr)
1381 { 981 {
1382 /* The destination type might be different from the output register 982 /* The result/input type might be different from the output register
1383 variable type (which is always an unsigned integer type). */ 983 variable type (can be any type; see get_m_var_declfor_reg @
984 brig-function.cc). */
1384 tree output_type = TREE_TYPE (output); 985 tree output_type = TREE_TYPE (output);
1385 tree input_type = TREE_TYPE (inst_expr);
1386 bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 986 bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
1387 && brig_inst.base.kind != BRIG_KIND_INST_MEM 987 && brig_inst.base.kind != BRIG_KIND_INST_MEM
1388 && !gccbrig_is_bit_operation (brig_inst.opcode); 988 && !gccbrig_is_bit_operation (brig_inst.opcode);
1389 989
1390 /* Flush to zero. */ 990 /* Flush to zero. */
1391 bool ftz = false; 991 bool ftz = false;
1392 const BrigBase *base = &brig_inst.base; 992 const BrigBase *base = &brig_inst.base;
1393 993
994 if (m_parent.m_cf->is_id_val (inst_expr))
995 inst_expr = m_parent.m_cf->id_val (inst_expr);
996
997 tree input_type = TREE_TYPE (inst_expr);
998
999 m_parent.m_cf->add_reg_var_update (output, inst_expr);
1000
1394 if (base->kind == BRIG_KIND_INST_MOD) 1001 if (base->kind == BRIG_KIND_INST_MOD)
1395 { 1002 {
1396 const BrigInstMod *mod = (const BrigInstMod *) base; 1003 const BrigInstMod *mod = (const BrigInstMod *) base;
1397 ftz = mod->modifier & BRIG_ALU_FTZ; 1004 ftz = mod->modifier & BRIG_ALU_FTZ;
1398 } 1005 }
1411 if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) 1018 if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr))
1412 || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16)) 1019 || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16))
1413 { 1020 {
1414 /* Ensure we don't duplicate the arithmetics to the arguments of the bit 1021 /* Ensure we don't duplicate the arithmetics to the arguments of the bit
1415 field reference operators. */ 1022 field reference operators. */
1416 inst_expr = add_temp_var ("before_ftz", inst_expr); 1023 inst_expr = m_parent.m_cf->add_temp_var ("before_ftz", inst_expr);
1417 inst_expr = flush_to_zero (is_fp16) (*this, inst_expr); 1024 inst_expr = flush_to_zero (is_fp16) (*this, inst_expr);
1418 } 1025 }
1419 1026
1420 if (is_fp16) 1027 if (is_fp16)
1421 { 1028 {
1422 inst_expr = add_temp_var ("before_f2h", inst_expr); 1029 inst_expr = m_parent.m_cf->add_temp_var ("before_f2h", inst_expr);
1423 tree f2h_output = build_f2h_conversion (inst_expr); 1030 tree f2h_output = build_f2h_conversion (inst_expr);
1424 tree conv_int = convert_to_integer (output_type, f2h_output); 1031 tree conv = build_resize_convert_view (output_type, f2h_output);
1425 tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); 1032 tree assign = build2 (MODIFY_EXPR, output_type, output, conv);
1426 m_parent.m_cf->append_statement (assign); 1033 m_parent.m_cf->append_statement (assign);
1427 return assign; 1034 return assign;
1428 } 1035 }
1429 else if (VECTOR_TYPE_P (TREE_TYPE (output))) 1036 else if (VECTOR_TYPE_P (output_type) && TREE_CODE (output) == CONSTRUCTOR)
1430 { 1037 {
1431 /* Expand/unpack the input value to the given vector elements. */ 1038 /* Expand/unpack the input value to the given vector elements. */
1432 size_t i; 1039 size_t i;
1433 tree input = inst_expr; 1040 tree input = inst_expr;
1434 tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); 1041 tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1452 /* All we do here is to bitcast the result and store it to the 1059 /* All we do here is to bitcast the result and store it to the
1453 'register' (variable). Mainly need to take care of differing 1060 'register' (variable). Mainly need to take care of differing
1454 bitwidths. */ 1061 bitwidths. */
1455 size_t src_width = int_size_in_bytes (input_type); 1062 size_t src_width = int_size_in_bytes (input_type);
1456 size_t dst_width = int_size_in_bytes (output_type); 1063 size_t dst_width = int_size_in_bytes (output_type);
1457 1064 tree input = inst_expr;
1458 if (src_width == dst_width) 1065 /* Integer results are extended to the target register width, using
1459 { 1066 the same sign as the inst_expr. */
1460 /* A simple bitcast should do. */ 1067 if (INTEGRAL_TYPE_P (TREE_TYPE (input)) && src_width != dst_width)
1461 tree bitcast = build_reinterpret_cast (output_type, inst_expr); 1068 {
1462 tree assign = build2 (MODIFY_EXPR, output_type, output, bitcast); 1069 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (input));
1463 m_parent.m_cf->append_statement (assign); 1070 tree resized_type
1464 return assign; 1071 = build_nonstandard_integer_type (dst_width * BITS_PER_UNIT,
1465 } 1072 unsigned_p);
1466 else 1073 input = convert_to_integer (resized_type, input);
1467 { 1074 }
1468 tree conv_int = convert_to_integer (output_type, inst_expr); 1075 input = build_resize_convert_view (output_type, input);
1469 tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); 1076 tree assign = build2 (MODIFY_EXPR, output_type, output, input);
1470 m_parent.m_cf->append_statement (assign); 1077 m_parent.m_cf->append_statement (assign);
1471 return assign; 1078 return assign;
1472 }
1473 } 1079 }
1474 return NULL_TREE; 1080 return NULL_TREE;
1475 } 1081 }
1476 1082
1477 /* Appends a GENERIC statement (STMT) to the currently constructed function. */ 1083 /* Appends a GENERIC statement (STMT) to the currently constructed function. */
1478 1084
1479 void 1085 void
1480 brig_code_entry_handler::append_statement (tree stmt) 1086 brig_code_entry_handler::append_statement (tree stmt)
1481 { 1087 {
1482 m_parent.m_cf->append_statement (stmt); 1088 m_parent.m_cf->append_statement (stmt);
1483 }
1484
1485 /* Unpacks the elements of the vector in VALUE to scalars (bit field
1486 references) in ELEMENTS. */
1487
1488 void
1489 brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements)
1490 {
1491 size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
1492 size_t element_size
1493 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
1494 size_t element_count
1495 = vec_size * BITS_PER_UNIT / element_size;
1496
1497 tree input_element_type = TREE_TYPE (TREE_TYPE (value));
1498
1499 value = add_temp_var ("unpack_input", value);
1500
1501 for (size_t i = 0; i < element_count; ++i)
1502 {
1503 tree element
1504 = build3 (BIT_FIELD_REF, input_element_type, value,
1505 TYPE_SIZE (input_element_type),
1506 bitsize_int(i * element_size));
1507
1508 element = add_temp_var ("scalar", element);
1509 elements.push_back (element);
1510 }
1511 }
1512
1513 /* Pack the elements of the scalars in ELEMENTS to the returned vector. */
1514
1515 tree
1516 brig_code_entry_handler::pack (tree_stl_vec &elements)
1517 {
1518 size_t element_count = elements.size ();
1519
1520 gcc_assert (element_count > 1);
1521
1522 tree output_element_type = TREE_TYPE (elements.at (0));
1523
1524 vec<constructor_elt, va_gc> *constructor_vals = NULL;
1525 for (size_t i = 0; i < element_count; ++i)
1526 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
1527
1528 tree vec_type = build_vector_type (output_element_type, element_count);
1529
1530 /* build_constructor creates a vector type which is not a vector_cst
1531 that requires compile time constant elements. */
1532 tree vec = build_constructor (vec_type, constructor_vals);
1533
1534 /* Add a temp variable for readability. */
1535 tree tmp_var = create_tmp_var (vec_type, "vec_out");
1536 tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1537 m_parent.m_cf->append_statement (vec_tmp_assign);
1538 return tmp_var;
1539 } 1089 }
1540 1090
1541 /* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */ 1091 /* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */
1542 1092
1543 tree 1093 tree
1670 tree 1220 tree
1671 float_to_half::visit_element (brig_code_entry_handler &caller, tree operand) 1221 float_to_half::visit_element (brig_code_entry_handler &caller, tree operand)
1672 { 1222 {
1673 tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16); 1223 tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16);
1674 1224
1675 tree casted_operand = build_reinterpret_cast (uint32_type_node, operand); 1225 tree casted_operand = build_resize_convert_view (uint32_type_node, operand);
1676 1226
1677 tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node, 1227 tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node,
1678 casted_operand); 1228 casted_operand);
1679 tree output 1229 tree output
1680 = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out"); 1230 = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out");
1699 tree const_fp32_type 1249 tree const_fp32_type
1700 = build_type_variant (brig_to_generic::s_fp32_type, 1, 0); 1250 = build_type_variant (brig_to_generic::s_fp32_type, 1, 0);
1701 1251
1702 tree output = create_tmp_var (const_fp32_type, "fp32out"); 1252 tree output = create_tmp_var (const_fp32_type, "fp32out");
1703 tree casted_result 1253 tree casted_result
1704 = build_reinterpret_cast (brig_to_generic::s_fp32_type, call); 1254 = build_resize_convert_view (brig_to_generic::s_fp32_type, call);
1705 1255
1706 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result); 1256 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result);
1707 1257
1708 caller.append_statement (assign); 1258 caller.append_statement (assign);
1709 1259
1751 tree n = node; 1301 tree n = node;
1752 if (TREE_CODE (n) == VIEW_CONVERT_EXPR) 1302 if (TREE_CODE (n) == VIEW_CONVERT_EXPR)
1753 n = TREE_OPERAND (n, 0); 1303 n = TREE_OPERAND (n, 0);
1754 return int_cst_value (n); 1304 return int_cst_value (n);
1755 } 1305 }
1756