diff gcc/config/arm/arm-builtins.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
line wrap: on
line diff
--- a/gcc/config/arm/arm-builtins.c	Thu Oct 25 07:37:49 2018 +0900
+++ b/gcc/config/arm/arm-builtins.c	Thu Feb 13 11:34:05 2020 +0900
@@ -1,5 +1,5 @@
 /* Description of builtins used by the ARM backend.
-   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   Copyright (C) 2014-2020 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -41,6 +41,7 @@
 #include "langhooks.h"
 #include "case-cfn-macros.h"
 #include "sbitmap.h"
+#include "stringpool.h"
 
 #define SIMD_MAX_BUILTIN_ARGS 7
 
@@ -82,7 +83,13 @@
   /* A void pointer.  */
   qualifier_void_pointer = 0x800,
   /* A const void pointer.  */
-  qualifier_const_void_pointer = 0x802
+  qualifier_const_void_pointer = 0x802,
+  /* Lane indices selected in pairs - must be within range of previous
+     argument = a vector.  */
+  qualifier_lane_pair_index = 0x1000,
+  /* Lane indices selected in quadtuplets - must be within range of previous
+     argument = a vector.  */
+  qualifier_lane_quadtup_index = 0x2000
 };
 
 /*  The qualifier_internal allows generation of a unary builtin from
@@ -118,12 +125,33 @@
       qualifier_unsigned };
 #define UTERNOP_QUALIFIERS (arm_unsigned_uternop_qualifiers)
 
+/* T (T, unsigned T, T).  */
+static enum arm_type_qualifiers
+arm_usternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned,
+      qualifier_none };
+#define USTERNOP_QUALIFIERS (arm_usternop_qualifiers)
+
 /* T (T, immediate).  */
 static enum arm_type_qualifiers
 arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_immediate };
 #define BINOP_IMM_QUALIFIERS (arm_binop_imm_qualifiers)
 
+/* T (T, unsigned immediate).  */
+static enum arm_type_qualifiers
+arm_sat_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none, qualifier_unsigned_immediate };
+#define SAT_BINOP_UNSIGNED_IMM_QUALIFIERS \
+  (arm_sat_binop_imm_qualifiers)
+
+/* unsigned T (T, unsigned immediate).  */
+static enum arm_type_qualifiers
+arm_unsigned_sat_binop_unsigned_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none, qualifier_unsigned_immediate };
+#define UNSIGNED_SAT_BINOP_UNSIGNED_IMM_QUALIFIERS \
+  (arm_unsigned_sat_binop_unsigned_imm_qualifiers)
+
 /* T (T, lane index).  */
 static enum arm_type_qualifiers
 arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -144,6 +172,13 @@
       qualifier_none, qualifier_lane_index };
 #define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers)
 
+/* T (T, T, T, lane pair index).  */
+static enum arm_type_qualifiers
+arm_mac_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_none, qualifier_lane_pair_index };
+#define MAC_LANE_PAIR_QUALIFIERS (arm_mac_lane_pair_qualifiers)
+
 /* unsigned T (unsigned T, unsigned T, unsigend T, lane index).  */
 static enum arm_type_qualifiers
 arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -151,6 +186,20 @@
       qualifier_unsigned, qualifier_lane_index };
 #define UMAC_LANE_QUALIFIERS (arm_umac_lane_qualifiers)
 
+/* T (T, unsigned T, T, lane index).  */
+static enum arm_type_qualifiers
+arm_usmac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned,
+      qualifier_none, qualifier_lane_quadtup_index };
+#define USMAC_LANE_QUADTUP_QUALIFIERS (arm_usmac_lane_quadtup_qualifiers)
+
+/* T (T, T, unsigend T, lane index).  */
+static enum arm_type_qualifiers
+arm_sumac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_unsigned, qualifier_lane_quadtup_index };
+#define SUMAC_LANE_QUADTUP_QUALIFIERS (arm_sumac_lane_quadtup_qualifiers)
+
 /* T (T, T, immediate).  */
 static enum arm_type_qualifiers
 arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -275,15 +324,29 @@
       qualifier_none, qualifier_struct_load_store_lane_index };
 #define STORE1LANE_QUALIFIERS (arm_storestruct_lane_qualifiers)
 
+   /* int (void).  */
+static enum arm_type_qualifiers
+arm_sat_occurred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_void };
+#define SAT_OCCURRED_QUALIFIERS (arm_sat_occurred_qualifiers)
+
+   /* void (int).  */
+static enum arm_type_qualifiers
+arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_void, qualifier_none };
+#define SET_SAT_QUALIFIERS (arm_set_sat_qualifiers)
+
 #define v8qi_UP  E_V8QImode
 #define v4hi_UP  E_V4HImode
 #define v4hf_UP  E_V4HFmode
+#define v4bf_UP  E_V4BFmode
 #define v2si_UP  E_V2SImode
 #define v2sf_UP  E_V2SFmode
 #define di_UP    E_DImode
 #define v16qi_UP E_V16QImode
 #define v8hi_UP  E_V8HImode
 #define v8hf_UP  E_V8HFmode
+#define v8bf_UP  E_V8BFmode
 #define v4si_UP  E_V4SImode
 #define v4sf_UP  E_V4SFmode
 #define v2di_UP  E_V2DImode
@@ -291,9 +354,10 @@
 #define ei_UP	 E_EImode
 #define oi_UP	 E_OImode
 #define hf_UP	 E_HFmode
+#define bf_UP    E_BFmode
 #define si_UP	 E_SImode
 #define void_UP	 E_VOIDmode
-
+#define sf_UP	 E_SFmode
 #define UP(X) X##_UP
 
 typedef struct {
@@ -366,7 +430,7 @@
 #undef CF
 #undef VAR1
 #define VAR1(T, N, A) \
-  {#N, UP (A), CODE_FOR_##N, 0, T##_QUALIFIERS},
+  {#N, UP (A), CODE_FOR_arm_##N, 0, T##_QUALIFIERS},
 
 static arm_builtin_datum acle_builtin_data[] =
 {
@@ -664,6 +728,7 @@
   ARM_BUILTIN_##N,
 
   ARM_BUILTIN_ACLE_BASE,
+  ARM_BUILTIN_SAT_IMM_CHECK = ARM_BUILTIN_ACLE_BASE,
 
 #include "arm_acle_builtins.def"
 
@@ -768,6 +833,11 @@
 
 /* The user-visible __fp16 type.  */
 tree arm_fp16_type_node = NULL_TREE;
+
+/* Back-end node type for brain float (bfloat) types.  */
+tree arm_bf16_type_node = NULL_TREE;
+tree arm_bf16_ptr_type_node = NULL_TREE;
+
 static tree arm_simd_intOI_type_node = NULL_TREE;
 static tree arm_simd_intEI_type_node = NULL_TREE;
 static tree arm_simd_intCI_type_node = NULL_TREE;
@@ -818,7 +888,7 @@
 arm_mangle_builtin_type (const_tree type)
 {
   const char *mangle;
-  /* Walk through all the AArch64 builtins types tables to filter out the
+  /* Walk through all the Arm builtins types tables to filter out the
      incoming type.  */
   if ((mangle = arm_mangle_builtin_vector_type (type))
       || (mangle = arm_mangle_builtin_scalar_type (type)))
@@ -859,6 +929,8 @@
       return float_type_node;
     case E_DFmode:
       return double_type_node;
+    case E_BFmode:
+      return arm_bf16_type_node;
     default:
       gcc_unreachable ();
     }
@@ -964,6 +1036,10 @@
   arm_simd_types[Float32x2_t].eltype = float_type_node;
   arm_simd_types[Float32x4_t].eltype = float_type_node;
 
+  /* Init Bfloat vector types with underlying __bf16 scalar type.  */
+  arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
+  arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
+
   for (i = 0; i < nelts; i++)
     {
       tree eltype = arm_simd_types[i].eltype;
@@ -1149,6 +1225,19 @@
   arm_builtin_decls[fcode] = fndecl;
 }
 
+/* Initialize the backend REAL_TYPE type supporting bfloat types.  */
+static void
+arm_init_bf16_types (void)
+{
+  arm_bf16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (arm_bf16_type_node) = 16;
+  SET_TYPE_MODE (arm_bf16_type_node, BFmode);
+  layout_type (arm_bf16_type_node);
+
+  lang_hooks.types.register_builtin_type (arm_bf16_type_node, "__bf16");
+  arm_bf16_ptr_type_node = build_pointer_type (arm_bf16_type_node);
+}
+
 /* Set up ACLE builtins, even builtins for instructions that are not
    in the current target ISA to allow the user to compile particular modules
    with different target specific options that differ from the command line
@@ -1159,6 +1248,16 @@
 {
   unsigned int i, fcode = ARM_BUILTIN_ACLE_PATTERN_START;
 
+  tree sat_check_fpr = build_function_type_list (void_type_node,
+						 intSI_type_node,
+						 intSI_type_node,
+						 intSI_type_node,
+						 NULL);
+  arm_builtin_decls[ARM_BUILTIN_SAT_IMM_CHECK]
+    = add_builtin_function ("__builtin_sat_imm_check", sat_check_fpr,
+			    ARM_BUILTIN_SAT_IMM_CHECK, BUILT_IN_MD,
+			    NULL, NULL_TREE);
+
   for (i = 0; i < ARRAY_SIZE (acle_builtin_data); i++, fcode++)
     {
       arm_builtin_datum *d = &acle_builtin_data[i];
@@ -1907,6 +2006,8 @@
      arm_init_neon_builtins which uses it.  */
   arm_init_fp16_builtins ();
 
+  arm_init_bf16_types ();
+
   if (TARGET_MAYBE_HARD_FLOAT)
     {
       arm_init_neon_builtins ();
@@ -1983,25 +2084,12 @@
   rtx op0 = expand_normal (arg0);
   rtx op1 = expand_normal (arg1);
   rtx op2 = expand_normal (arg2);
-  rtx op3 = NULL_RTX;
-
-  /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
-     lane operand depending on endianness.  */
-  bool builtin_sha1cpm_p = false;
-
-  if (insn_data[icode].n_operands == 5)
-    {
-      gcc_assert (icode == CODE_FOR_crypto_sha1c
-                  || icode == CODE_FOR_crypto_sha1p
-                  || icode == CODE_FOR_crypto_sha1m);
-      builtin_sha1cpm_p = true;
-    }
+
   machine_mode tmode = insn_data[icode].operand[0].mode;
   machine_mode mode0 = insn_data[icode].operand[1].mode;
   machine_mode mode1 = insn_data[icode].operand[2].mode;
   machine_mode mode2 = insn_data[icode].operand[3].mode;
 
-
   if (VECTOR_MODE_P (mode0))
     op0 = safe_vector_operand (op0, mode0);
   if (VECTOR_MODE_P (mode1))
@@ -2024,13 +2112,8 @@
     op1 = copy_to_mode_reg (mode1, op1);
   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
     op2 = copy_to_mode_reg (mode2, op2);
-  if (builtin_sha1cpm_p)
-    op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
-
-  if (builtin_sha1cpm_p)
-    pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
-  else
-    pat = GEN_FCN (icode) (target, op0, op1, op2);
+
+  pat = GEN_FCN (icode) (target, op0, op1, op2);
   if (! pat)
     return 0;
   emit_insn (pat);
@@ -2086,16 +2169,8 @@
   rtx pat;
   tree arg0 = CALL_EXPR_ARG (exp, 0);
   rtx op0 = expand_normal (arg0);
-  rtx op1 = NULL_RTX;
   machine_mode tmode = insn_data[icode].operand[0].mode;
   machine_mode mode0 = insn_data[icode].operand[1].mode;
-  bool builtin_sha1h_p = false;
-
-  if (insn_data[icode].n_operands == 3)
-    {
-      gcc_assert (icode == CODE_FOR_crypto_sha1h);
-      builtin_sha1h_p = true;
-    }
 
   if (! target
       || GET_MODE (target) != tmode
@@ -2111,13 +2186,9 @@
       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
 	op0 = copy_to_mode_reg (mode0, op0);
     }
-  if (builtin_sha1h_p)
-    op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
-
-  if (builtin_sha1h_p)
-    pat = GEN_FCN (icode) (target, op0, op1);
-  else
-    pat = GEN_FCN (icode) (target, op0);
+
+  pat = GEN_FCN (icode) (target, op0);
+
   if (! pat)
     return 0;
   emit_insn (pat);
@@ -2129,6 +2200,8 @@
   ARG_BUILTIN_CONSTANT,
   ARG_BUILTIN_LANE_INDEX,
   ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX,
+  ARG_BUILTIN_LANE_PAIR_INDEX,
+  ARG_BUILTIN_LANE_QUADTUP_INDEX,
   ARG_BUILTIN_NEON_MEMORY,
   ARG_BUILTIN_MEMORY,
   ARG_BUILTIN_STOP
@@ -2266,9 +2339,35 @@
 		  machine_mode vmode = mode[argc - 1];
 		  neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp);
 		}
-	      /* If the lane index isn't a constant then the next
-		 case will error.  */
-	      /* Fall through.  */
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
+
+	    case ARG_BUILTIN_LANE_PAIR_INDEX:
+	      /* Previous argument must be a vector, which this indexes. The
+		 indexing will always select i and i+1 out of the vector, which
+		 puts a limit on i.  */
+	      gcc_assert (argc > 0);
+	      if (CONST_INT_P (op[argc]))
+		{
+		  machine_mode vmode = mode[argc - 1];
+		  neon_lane_bounds (op[argc], 0,
+				    GET_MODE_NUNITS (vmode) / 2, exp);
+		}
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
+
+	    case ARG_BUILTIN_LANE_QUADTUP_INDEX:
+	      /* Previous argument must be a vector, which this indexes.  */
+	      gcc_assert (argc > 0);
+	      if (CONST_INT_P (op[argc]))
+		{
+		  machine_mode vmode = mode[argc - 1];
+		  neon_lane_bounds (op[argc], 0,
+				    GET_MODE_NUNITS (vmode) / 4, exp);
+		}
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
+
 	    case ARG_BUILTIN_CONSTANT:
 constant_arg:
 	      if (!(*insn_data[icode].operand[opno].predicate)
@@ -2313,6 +2412,9 @@
   if (have_retval)
     switch (argc)
       {
+      case 0:
+	pat = GEN_FCN (icode) (target);
+	break;
       case 1:
 	pat = GEN_FCN (icode) (target, op[0]);
 	break;
@@ -2427,6 +2529,10 @@
 
       if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
 	args[k] = ARG_BUILTIN_LANE_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
+	args[k] = ARG_BUILTIN_LANE_PAIR_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
+	args[k] = ARG_BUILTIN_LANE_QUADTUP_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
 	args[k] = ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
@@ -2469,7 +2575,26 @@
 static rtx
 arm_expand_acle_builtin (int fcode, tree exp, rtx target)
 {
-
+  if (fcode == ARM_BUILTIN_SAT_IMM_CHECK)
+    {
+      /* Check the saturation immediate bounds.  */
+
+      rtx min_sat = expand_normal (CALL_EXPR_ARG (exp, 1));
+      rtx max_sat = expand_normal (CALL_EXPR_ARG (exp, 2));
+      gcc_assert (CONST_INT_P (min_sat));
+      gcc_assert (CONST_INT_P (max_sat));
+      rtx sat_imm = expand_normal (CALL_EXPR_ARG (exp, 0));
+      if (CONST_INT_P (sat_imm))
+	{
+	  if (!IN_RANGE (sat_imm, min_sat, max_sat))
+	    error ("%Ksaturation bit range must be in the range [%wd, %wd]",
+		   exp, UINTVAL (min_sat), UINTVAL (max_sat));
+	}
+      else
+	error ("%Ksaturation bit range must be a constant immediate", exp);
+      /* Don't generate any RTL.  */
+      return const0_rtx;
+    }
   arm_builtin_datum *d
     = &acle_builtin_data[fcode - ARM_BUILTIN_ACLE_PATTERN_START];
 
@@ -2488,7 +2613,7 @@
     {
       fatal_error (input_location,
 		   "You must enable NEON instructions"
-		   " (e.g. -mfloat-abi=softfp -mfpu=neon)"
+		   " (e.g. %<-mfloat-abi=softfp%> %<-mfpu=neon%>)"
 		   " to use these intrinsics.");
       return const0_rtx;
     }
@@ -2559,7 +2684,7 @@
   rtx               op1;
   rtx               op2;
   rtx               pat;
-  unsigned int      fcode = DECL_FUNCTION_CODE (fndecl);
+  unsigned int      fcode = DECL_MD_FUNCTION_CODE (fndecl);
   size_t            i;
   machine_mode tmode;
   machine_mode mode0;
@@ -2586,7 +2711,8 @@
     {
       fatal_error (input_location,
 		   "You must enable crypto instructions"
-		   " (e.g. include -mfloat-abi=softfp -mfpu=crypto-neon...)"
+		   " (e.g. include %<-mfloat-abi=softfp%> "
+		   "%<-mfpu=crypto-neon%>)"
 		   " to use these intrinsics.");
       return const0_rtx;
     }
@@ -3298,4 +3424,29 @@
 			    reload_fenv, restore_fnenv), update_call);
 }
 
+/* Implement TARGET_CHECK_BUILTIN_CALL.  Record a read of the Q bit through
+   intrinsics in the machine function.  */
+bool
+arm_check_builtin_call (location_t , vec<location_t> , tree fndecl,
+			tree, unsigned int, tree *)
+{
+  int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+  if (fcode == ARM_BUILTIN_saturation_occurred
+      || fcode == ARM_BUILTIN_set_saturation)
+    {
+      if (cfun && cfun->decl)
+	DECL_ATTRIBUTES (cfun->decl)
+	  = tree_cons (get_identifier ("acle qbit"), NULL_TREE,
+		       DECL_ATTRIBUTES (cfun->decl));
+    }
+  if (fcode == ARM_BUILTIN_sel)
+    {
+      if (cfun && cfun->decl)
+	DECL_ATTRIBUTES (cfun->decl)
+	  = tree_cons (get_identifier ("acle gebits"), NULL_TREE,
+		       DECL_ATTRIBUTES (cfun->decl));
+    }
+  return true;
+}
+
 #include "gt-arm-builtins.h"