CbC/CbC_gcc: gcc/simplify-rtx.c comparison

comparison gcc/simplify-rtx.c @ 145:1830386684a0

gcc-9.2.0

author	anatofuz
date	Thu, 13 Feb 2020 11:34:05 +0900
parents	84e7813d76e9
children

comparison

equal deleted inserted replaced

-:84e7813d76e9
+:1830386684a0
 /* RTL simplification functions for GNU compiler.
-Copyright (C) 1987-2018 Free Software Foundation, Inc.
+Copyright (C) 1987-2020 Free Software Foundation, Inc.
 This file is part of GCC.
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 #include "diagnostic-core.h"
 #include "varasm.h"
 #include "flags.h"
 #include "selftest.h"
 #include "selftest-rtl.h"
+#include "rtx-vector-builder.h"
 /* Simplification and canonicalization of RTL.  */
 /* Much code operates on (low, high) pairs; the low value is an
 unsigned wide int, the high value a signed wide int.  We
 occasionally need to sign extend from low to high as if low were a
 signed wide int.  */
 #define HWI_SIGN_EXTEND(low) \
 ((((HOST_WIDE_INT) low) < 0) ? HOST_WIDE_INT_M1 : HOST_WIDE_INT_0)
-static rtx neg_const_int (machine_mode, const_rtx);
 static bool plus_minus_operand_p (const_rtx);
 static rtx simplify_plus_minus (enum rtx_code, machine_mode, rtx, rtx);
 static rtx simplify_associative_operation (enum rtx_code, machine_mode,
 					   rtx, rtx);
 static rtx simplify_relational_operation_1 (enum rtx_code, machine_mode,
 					    machine_mode, rtx, rtx);
 static rtx simplify_unary_operation_1 (enum rtx_code, machine_mode, rtx);
 static rtx simplify_binary_operation_1 (enum rtx_code, machine_mode,
 					rtx, rtx, rtx, rtx);
-/* Negate a CONST_INT rtx.  */
+/* Negate I, which satisfies poly_int_rtx_p.  MODE is the mode of I.  */
 static rtx
-neg_const_int (machine_mode mode, const_rtx i)
+neg_poly_int_rtx (machine_mode mode, const_rtx i)
 {
-unsigned HOST_WIDE_INT val = -UINTVAL (i);
+return immed_wide_int_const (-wi::to_poly_wide (i, mode), mode);
-if (!HWI_COMPUTABLE_MODE_P (mode)
-&& val == UINTVAL (i))
-return simplify_const_unary_operation (NEG, mode, CONST_CAST_RTX (i),
-					   mode);
-return gen_int_mode (val, mode);
 }
 /* Test whether expression, X, is an immediate constant that represents
 the most significant bit of machine mode MODE.  */
 	  && GET_CODE (XEXP (op, 0)) == ASHIFT
 	  && is_a <scalar_int_mode> (mode, &int_mode)
 	  && CONST_INT_P (XEXP (op, 1))
 	  && XEXP (XEXP (op, 0), 1) == XEXP (op, 1)
 	  && (op_mode = as_a <scalar_int_mode> (GET_MODE (op)),
-	      GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1))))
+	      GET_MODE_PRECISION (op_mode) > INTVAL (XEXP (op, 1))))
 	{
 	  scalar_int_mode tmode;
-	  gcc_assert (GET_MODE_BITSIZE (int_mode)
+	  gcc_assert (GET_MODE_PRECISION (int_mode)
-		      > GET_MODE_BITSIZE (op_mode));
+		      > GET_MODE_PRECISION (op_mode));
-	  if (int_mode_for_size (GET_MODE_BITSIZE (op_mode)
+	  if (int_mode_for_size (GET_MODE_PRECISION (op_mode)
 				 - INTVAL (XEXP (op, 1)), 1).exists (&tmode))
 	    {
 	      rtx inner =
 		rtl_hooks.gen_lowpart_no_emit (tmode, XEXP (XEXP (op, 0), 0));
 	      if (inner)
 	  gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER
 						(GET_MODE (op)));
 }
 if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op))
 	return gen_const_vec_duplicate (mode, op);
-unsigned int n_elts;
 if (GET_CODE (op) == CONST_VECTOR
-	  && GET_MODE_NUNITS (mode).is_constant (&n_elts))
+	  && (CONST_VECTOR_DUPLICATE_P (op)
-	{
+	      || CONST_VECTOR_NUNITS (op).is_constant ()))
-	  /* This must be constant if we're duplicating it to a constant
+	{
-	     number of elements.  */
+	  unsigned int npatterns = (CONST_VECTOR_DUPLICATE_P (op)
-	  unsigned int in_n_elts = CONST_VECTOR_NUNITS (op).to_constant ();
+				    ? CONST_VECTOR_NPATTERNS (op)
-	  gcc_assert (in_n_elts < n_elts);
+				    : CONST_VECTOR_NUNITS (op).to_constant ());
-	  gcc_assert ((n_elts % in_n_elts) == 0);
+	  gcc_assert (multiple_p (GET_MODE_NUNITS (mode), npatterns));
-	  rtvec v = rtvec_alloc (n_elts);
+	  rtx_vector_builder builder (mode, npatterns, 1);
-	  for (unsigned i = 0; i < n_elts; i++)
+	  for (unsigned i = 0; i < npatterns; i++)
-	    RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts);
+	    builder.quick_push (CONST_VECTOR_ELT (op, i));
-	  return gen_rtx_CONST_VECTOR (mode, v);
+	  return builder.build ();
 	}
 }
-if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
+if (VECTOR_MODE_P (mode)
-{
+&& GET_CODE (op) == CONST_VECTOR
-unsigned int n_elts;
+&& known_eq (GET_MODE_NUNITS (mode), CONST_VECTOR_NUNITS (op)))
-if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
+{
-	return NULL_RTX;
+gcc_assert (GET_MODE (op) == op_mode);
-machine_mode opmode = GET_MODE (op);
+rtx_vector_builder builder;
-gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts));
+if (!builder.new_unary_operation (mode, op, false))
-gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts));
+	return 0;
-rtvec v = rtvec_alloc (n_elts);
+unsigned int count = builder.encoded_nelts ();
-unsigned int i;
+for (unsigned int i = 0; i < count; i++)
-for (i = 0; i < n_elts; i++)
 	{
 	  rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
 					    CONST_VECTOR_ELT (op, i),
-					    GET_MODE_INNER (opmode));
+					    GET_MODE_INNER (op_mode));
 	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
-	  RTVEC_ELT (v, i) = x;
+	  builder.quick_push (x);
 	}
-return gen_rtx_CONST_VECTOR (mode, v);
+return builder.build ();
 }
 /* The order of these tests is critical so that, for example, we don't
 check the wrong mode (input vs. output) for a conversion operation,
 such as FIX.  At some point, this should be simplified.  */
 }
 if (CONST_SCALAR_INT_P (op) && is_a <scalar_int_mode> (mode, &result_mode))
 {
 unsigned int width = GET_MODE_PRECISION (result_mode);
+if (width > MAX_BITSIZE_MODE_ANY_INT)
+	return 0;
 wide_int result;
 scalar_int_mode imode = (op_mode == VOIDmode
 			       ? result_mode
 			       : as_a <scalar_int_mode> (op_mode));
 rtx_mode_t op0 = rtx_mode_t (op, imode);
 else if (CONST_DOUBLE_AS_FLOAT_P (op)
 	   && SCALAR_FLOAT_MODE_P (GET_MODE (op))
 	   && is_int_mode (mode, &result_mode))
 {
 unsigned int width = GET_MODE_PRECISION (result_mode);
+if (width > MAX_BITSIZE_MODE_ANY_INT)
+	return 0;
 /* Although the overflow semantics of RTL's FIX and UNSIGNED_FIX
 	 operators are intentionally left unspecified (to ease implementation
 	 by target backends), for consistency, this routine implements the
 	 same semantics for constant folding as used by the middle-end.  */
 }
 return 0;
 }
+/* Return a mask describing the COMPARISON.  */
+static int
+comparison_to_mask (enum rtx_code comparison)
+{
+switch (comparison)
+{
+case LT:
+return 8;
+case GT:
+return 4;
+case EQ:
+return 2;
+case UNORDERED:
+return 1;
+case LTGT:
+return 12;
+case LE:
+return 10;
+case GE:
+return 6;
+case UNLT:
+return 9;
+case UNGT:
+return 5;
+case UNEQ:
+return 3;
+case ORDERED:
+return 14;
+case NE:
+return 13;
+case UNLE:
+return 11;
+case UNGE:
+return 7;
+default:
+gcc_unreachable ();
+}
+}
+/* Return a comparison corresponding to the MASK.  */
+static enum rtx_code
+mask_to_comparison (int mask)
+{
+switch (mask)
+{
+case 8:
+return LT;
+case 4:
+return GT;
+case 2:
+return EQ;
+case 1:
+return UNORDERED;
+case 12:
+return LTGT;
+case 10:
+return LE;
+case 6:
+return GE;
+case 9:
+return UNLT;
+case 5:
+return UNGT;
+case 3:
+return UNEQ;
+case 14:
+return ORDERED;
+case 13:
+return NE;
+case 11:
+return UNLE;
+case 7:
+return UNGE;
+default:
+gcc_unreachable ();
+}
+}
+/* Simplify a logical operation CODE with result mode MODE, operating on OP0
+and OP1, which should be both relational operations.  Return 0 if no such
+simplification is possible.  */
+rtx
+simplify_logical_relational_operation (enum rtx_code code, machine_mode mode,
+				       rtx op0, rtx op1)
+{
+/* We only handle IOR of two relational operations.  */
+if (code != IOR)
+return 0;
+if (!(COMPARISON_P (op0) && COMPARISON_P (op1)))
+return 0;
+if (!(rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0))
+	&& rtx_equal_p (XEXP (op0, 1), XEXP (op1, 1))))
+return 0;
+enum rtx_code code0 = GET_CODE (op0);
+enum rtx_code code1 = GET_CODE (op1);
+/* We don't handle unsigned comparisons currently.  */
+if (code0 == LTU || code0 == GTU || code0 == LEU || code0 == GEU)
+return 0;
+if (code1 == LTU || code1 == GTU || code1 == LEU || code1 == GEU)
+return 0;
+int mask0 = comparison_to_mask (code0);
+int mask1 = comparison_to_mask (code1);
+int mask = mask0 | mask1;
+if (mask == 15)
+return const_true_rtx;
+code = mask_to_comparison (mask);
+op0 = XEXP (op1, 0);
+op1 = XEXP (op1, 1);
+return simplify_gen_relational (code, mode, VOIDmode, op0, op1);
+}
 /* Simplify a binary operation CODE with result mode MODE, operating on OP0
 and OP1.  Return 0 if no simplification is possible.
 Don't use this for relational operations such as EQ or LT.
 	   || GET_CODE (op0) == LABEL_REF)
 	  && poly_int_rtx_p (op1, &offset))
 	return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode));
 /* Don't let a relocatable value get a negative coeff.  */
-if (CONST_INT_P (op1) && GET_MODE (op0) != VOIDmode)
+if (poly_int_rtx_p (op1) && GET_MODE (op0) != VOIDmode)
 	return simplify_gen_binary (PLUS, mode,
 				    op0,
-				    neg_const_int (mode, op1));
+				    neg_poly_int_rtx (mode, op1));
 /* (x - (x & y)) -> (x & ~y) */
 if (INTEGRAL_MODE_P (mode) && GET_CODE (op1) == AND)
 	{
 	  if (rtx_equal_p (op0, XEXP (op1, 0)))
 					plus_constant (mode, XEXP (op0, 0),
 						       mask),
 					XEXP (op0, 1));
 }
+/* The following happens with bitfield merging.
+(X & C) | ((X | Y) & ~C) -> X | (Y & ~C) */
+if (GET_CODE (op0) == AND
+	  && GET_CODE (op1) == AND
+	  && CONST_INT_P (XEXP (op0, 1))
+	  && CONST_INT_P (XEXP (op1, 1))
+	  && (INTVAL (XEXP (op0, 1))
+	      == ~INTVAL (XEXP (op1, 1))))
+	{
+	  /* The IOR may be on both sides.  */
+	  rtx top0 = NULL_RTX, top1 = NULL_RTX;
+	  if (GET_CODE (XEXP (op1, 0)) == IOR)
+	    top0 = op0, top1 = op1;
+	  else if (GET_CODE (XEXP (op0, 0)) == IOR)
+	    top0 = op1, top1 = op0;
+	  if (top0 && top1)
+	    {
+	      /* X may be on either side of the inner IOR.  */
+	      rtx tem = NULL_RTX;
+	      if (rtx_equal_p (XEXP (top0, 0),
+			       XEXP (XEXP (top1, 0), 0)))
+		tem = XEXP (XEXP (top1, 0), 1);
+	      else if (rtx_equal_p (XEXP (top0, 0),
+				    XEXP (XEXP (top1, 0), 1)))
+		tem = XEXP (XEXP (top1, 0), 0);
+	      if (tem)
+		return simplify_gen_binary (IOR, mode, XEXP (top0, 0),
+					    simplify_gen_binary
+					      (AND, mode, tem, XEXP (top1, 1)));
+	    }
+	}
 tem = simplify_byte_swapping_operation (code, mode, op0, op1);
 if (tem)
 	return tem;
 tem = simplify_associative_operation (code, mode, op0, op1);
+if (tem)
+	return tem;
+tem = simplify_logical_relational_operation (code, mode, op0, op1);
 if (tem)
 	return tem;
 break;
 case XOR:
 	      == GET_MODE_BITSIZE (inner_mode) - GET_MODE_BITSIZE (int_mode))
 	  && subreg_lowpart_p (op0))
 	{
 	  rtx tmp = gen_int_shift_amount
 	    (inner_mode, INTVAL (XEXP (SUBREG_REG (op0), 1)) + INTVAL (op1));
-	  tmp = simplify_gen_binary (code, inner_mode,
-				     XEXP (SUBREG_REG (op0), 0),
+	 /* Combine would usually zero out the value when combining two
-				     tmp);
+	    local shifts and the range becomes larger or equal to the mode.
+	    However since we fold away one of the shifts here combine won't
+	    see it so we should immediately zero the result if it's out of
+	    range.  */
+	 if (code == LSHIFTRT
+	     && INTVAL (tmp) >= GET_MODE_BITSIZE (inner_mode))
+	  tmp = const0_rtx;
+	 else
+	   tmp = simplify_gen_binary (code,
+				      inner_mode,
+				      XEXP (SUBREG_REG (op0), 0),
+				      tmp);
 	  return lowpart_subreg (int_mode, tmp, inner_mode);
 	}
 if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
 	{
 }
 return 0;
 }
+/* Return true if binary operation OP distributes over addition in operand
+OPNO, with the other operand being held constant.  OPNO counts from 1.  */
+static bool
+distributes_over_addition_p (rtx_code op, int opno)
+{
+switch (op)
+{
+case PLUS:
+case MINUS:
+case MULT:
+return true;
+case ASHIFT:
+return opno == 1;
+default:
+return false;
+}
+}
 rtx
 simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
 				 rtx op0, rtx op1)
 {
 if (VECTOR_MODE_P (mode)
 && code != VEC_CONCAT
 && GET_CODE (op0) == CONST_VECTOR
 && GET_CODE (op1) == CONST_VECTOR)
 {
-unsigned int n_elts;
+bool step_ok_p;
-if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts))
+if (CONST_VECTOR_STEPPED_P (op0)
-	return NULL_RTX;
+	  && CONST_VECTOR_STEPPED_P (op1))
+	/* We can operate directly on the encoding if:
-gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1)));
-gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode)));
+	      a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
-rtvec v = rtvec_alloc (n_elts);
+	    implies
-unsigned int i;
+	      (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
-for (i = 0; i < n_elts; i++)
+	   Addition and subtraction are the supported operators
+	   for which this is true.  */
+	step_ok_p = (code == PLUS || code == MINUS);
+else if (CONST_VECTOR_STEPPED_P (op0))
+	/* We can operate directly on stepped encodings if:
+	     a3 - a2 == a2 - a1
+	   implies:
+	     (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
+	   which is true if (x -> x op c) distributes over addition.  */
+	step_ok_p = distributes_over_addition_p (code, 1);
+else
+	/* Similarly in reverse.  */
+	step_ok_p = distributes_over_addition_p (code, 2);
+rtx_vector_builder builder;
+if (!builder.new_binary_operation (mode, op0, op1, step_ok_p))
+	return 0;
+unsigned int count = builder.encoded_nelts ();
+for (unsigned int i = 0; i < count; i++)
 	{
 	  rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode),
 					     CONST_VECTOR_ELT (op0, i),
 					     CONST_VECTOR_ELT (op1, i));
 	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
-	  RTVEC_ELT (v, i) = x;
+	  builder.quick_push (x);
 	}
+return builder.build ();
-return gen_rtx_CONST_VECTOR (mode, v);
 }
 if (VECTOR_MODE_P (mode)
 && code == VEC_CONCAT
 && (CONST_SCALAR_INT_P (op0)
 /* We can fold some multi-word operations.  */
 scalar_int_mode int_mode;
 if (is_a <scalar_int_mode> (mode, &int_mode)
 && CONST_SCALAR_INT_P (op0)
-&& CONST_SCALAR_INT_P (op1))
+&& CONST_SCALAR_INT_P (op1)
+&& GET_MODE_PRECISION (int_mode) <= MAX_BITSIZE_MODE_ANY_INT)
 {
 wide_int result;
 wi::overflow_type overflow;
 rtx_mode_t pop0 = rtx_mode_t (op0, int_mode);
 rtx_mode_t pop1 = rtx_mode_t (op1, int_mode);
 		  changed = 1;
 		  canonicalized = 1;
 		}
 	      break;
-	    case CONST_INT:
+	    CASE_CONST_SCALAR_INT:
+	    case CONST_POLY_INT:
 	      n_constants++;
 	      if (this_neg)
 		{
-		  ops[i].op = neg_const_int (mode, this_op);
+		  ops[i].op = neg_poly_int_rtx (mode, this_op);
 		  ops[i].neg = 0;
 		  changed = 1;
 		  canonicalized = 1;
 		}
 	      break;
 			&& XEXP (XEXP (tem, 0), 1) == rhs)
 		      break;
 		    lneg &= rneg;
 		    if (GET_CODE (tem) == NEG)
 		      tem = XEXP (tem, 0), lneg = !lneg;
-		    if (CONST_INT_P (tem) && lneg)
+		    if (poly_int_rtx_p (tem) && lneg)
-		      tem = neg_const_int (mode, tem), lneg = 0;
+		      tem = neg_poly_int_rtx (mode, tem), lneg = 0;
 		    ops[i].op = tem;
 		    ops[i].neg = lneg;
 		    ops[j].op = NULL_RTX;
 		    changed = 1;
 The combination loop should have ensured that there is exactly
 one CONST_INT, and the sort will have ensured that it is last
 in the array and that any other constant will be next-to-last.  */
 if (n_ops > 1
-&& CONST_INT_P (ops[n_ops - 1].op)
+&& poly_int_rtx_p (ops[n_ops - 1].op)
 && CONSTANT_P (ops[n_ops - 2].op))
 {
 rtx value = ops[n_ops - 1].op;
 if (ops[n_ops - 1].neg ^ ops[n_ops - 2].neg)
-	value = neg_const_int (mode, value);
+	value = neg_poly_int_rtx (mode, value);
 if (CONST_INT_P (value))
 	{
 	  ops[n_ops - 2].op = plus_constant (mode, ops[n_ops - 2].op,
 					     INTVAL (value));
 	  n_ops--;
 	  }
 #else
 	  return NULL_RTX;
 #endif
 	}
+/* For vector comparison with scalar int result, it is unknown
+	 if the target means here a comparison into an integral bitmask,
+	 or comparison where all comparisons true mean const_true_rtx
+	 whole result, or where any comparisons true mean const_true_rtx
+	 whole result.  For const0_rtx all the cases are the same.  */
+if (VECTOR_MODE_P (cmp_mode)
+	  && SCALAR_INT_MODE_P (mode)
+	  && tem == const_true_rtx)
+	return NULL_RTX;
 return tem;
 }
 /* For the following tests, ensure const0_rtx is op1.  */
 gcc_unreachable ();
 }
 }
 /* Check if the given comparison (done in the given MODE) is actually
-a tautology or a contradiction.  If the mode is VOID_mode, the
+a tautology or a contradiction.  If the mode is VOIDmode, the
 comparison is done in "infinite precision".  If no simplification
 is possible, this function returns zero.  Otherwise, it returns
 either const_true_rtx or const0_rtx.  */
 rtx
 && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
 {
 rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
 rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
 if (top0 || top1)
-	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+	{
-				    top0 ? top0 : XEXP (x, 0),
+	  if (COMPARISON_P (x))
-				    top1 ? top1 : XEXP (x, 1));
+	    return simplify_gen_relational (GET_CODE (x), GET_MODE (x),
+					    GET_MODE (XEXP (x, 0)) != VOIDmode
+					    ? GET_MODE (XEXP (x, 0))
+					    : GET_MODE (XEXP (x, 1)),
+					    top0 ? top0 : XEXP (x, 0),
+					    top1 ? top1 : XEXP (x, 1));
+	  else
+	    return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+					top0 ? top0 : XEXP (x, 0),
+					top1 ? top1 : XEXP (x, 1));
+	}
 }
 if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
 && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
 && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
 && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
 	  && !side_effects_p (op2) && !side_effects_p (op1))
 	return op0;
 if (!side_effects_p (op2))
 	{
-	  rtx top0 = simplify_merge_mask (op0, op2, 0);
+	  rtx top0
-	  rtx top1 = simplify_merge_mask (op1, op2, 1);
+	    = may_trap_p (op0) ? NULL_RTX : simplify_merge_mask (op0, op2, 0);
+	  rtx top1
+	    = may_trap_p (op1) ? NULL_RTX : simplify_merge_mask (op1, op2, 1);
 	  if (top0 || top1)
 	    return simplify_gen_ternary (code, mode, mode,
 					 top0 ? top0 : op0,
 					 top1 ? top1 : op1, op2);
 	}
 }
 return 0;
 }
-/* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE
+/* Try to calculate NUM_BYTES bytes of the target memory image of X,
-or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
+starting at byte FIRST_BYTE.  Return true on success and add the
-CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
+bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such
+that the bytes follow target memory order.  Leave BYTES unmodified
-Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values
+on failure.
-represented as a little-endian array of 'unsigned char', selecting by BYTE,
-and then repacking them again for OUTERMODE.  If OP is a CONST_VECTOR,
+MODE is the mode of X.  The caller must reserve NUM_BYTES bytes in
-FIRST_ELEM is the number of the first element to extract, otherwise
+BYTES before calling this function.  */
-FIRST_ELEM is ignored.  */
+bool
+native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
+		   unsigned int first_byte, unsigned int num_bytes)
+{
+/* Check the mode is sensible.  */
+gcc_assert (GET_MODE (x) == VOIDmode
+	      ? is_a <scalar_int_mode> (mode)
+	      : mode == GET_MODE (x));
+if (GET_CODE (x) == CONST_VECTOR)
+{
+/* CONST_VECTOR_ELT follows target memory order, so no shuffling
+	 is necessary.  The only complication is that MODE_VECTOR_BOOL
+	 vectors can have several elements per byte.  */
+unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
+						   GET_MODE_NUNITS (mode));
+unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits;
+if (elt_bits < BITS_PER_UNIT)
+	{
+	  /* This is the only case in which elements can be smaller than
+	     a byte.  */
+	  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+	  for (unsigned int i = 0; i < num_bytes; ++i)
+	    {
+	      target_unit value = 0;
+	      for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
+		{
+		  value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
+		  elt += 1;
+		}
+	      bytes.quick_push (value);
+	    }
+	  return true;
+	}
+unsigned int start = bytes.length ();
+unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode);
+/* Make FIRST_BYTE relative to ELT.  */
+first_byte %= elt_bytes;
+while (num_bytes > 0)
+	{
+	  /* Work out how many bytes we want from element ELT.  */
+	  unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte);
+	  if (!native_encode_rtx (GET_MODE_INNER (mode),
+				  CONST_VECTOR_ELT (x, elt), bytes,
+				  first_byte, chunk_bytes))
+	    {
+	      bytes.truncate (start);
+	      return false;
+	    }
+	  elt += 1;
+	  first_byte = 0;
+	  num_bytes -= chunk_bytes;
+	}
+return true;
+}
+/* All subsequent cases are limited to scalars.  */
+scalar_mode smode;
+if (!is_a <scalar_mode> (mode, &smode))
+return false;
+/* Make sure that the region is in range.  */
+unsigned int end_byte = first_byte + num_bytes;
+unsigned int mode_bytes = GET_MODE_SIZE (smode);
+gcc_assert (end_byte <= mode_bytes);
+if (CONST_SCALAR_INT_P (x))
+{
+/* The target memory layout is affected by both BYTES_BIG_ENDIAN
+	 and WORDS_BIG_ENDIAN.  Use the subreg machinery to get the lsb
+	 position of each byte.  */
+rtx_mode_t value (x, smode);
+wide_int_ref value_wi (value);
+for (unsigned int byte = first_byte; byte < end_byte; ++byte)
+	{
+	  /* Always constant because the inputs are.  */
+	  unsigned int lsb
+	    = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
+	  /* Operate directly on the encoding rather than using
+	     wi::extract_uhwi, so that we preserve the sign or zero
+	     extension for modes that are not a whole number of bits in
+	     size.  (Zero extension is only used for the combination of
+	     innermode == BImode && STORE_FLAG_VALUE == 1).  */
+	  unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT;
+	  unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT;
+	  unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt);
+	  bytes.quick_push (uhwi >> shift);
+	}
+return true;
+}
+if (CONST_DOUBLE_P (x))
+{
+/* real_to_target produces an array of integers in target memory order.
+	 All integers before the last one have 32 bits; the last one may
+	 have 32 bits or fewer, depending on whether the mode bitsize
+	 is divisible by 32.  Each of these integers is then laid out
+	 in target memory as any other integer would be.  */
+long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
+real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode);
+/* The (maximum) number of target bytes per element of el32.  */
+unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
+gcc_assert (bytes_per_el32 != 0);
+/* Build up the integers in a similar way to the CONST_SCALAR_INT_P
+	 handling above.  */
+for (unsigned int byte = first_byte; byte < end_byte; ++byte)
+	{
+	  unsigned int index = byte / bytes_per_el32;
+	  unsigned int subbyte = byte % bytes_per_el32;
+	  unsigned int int_bytes = MIN (bytes_per_el32,
+					mode_bytes - index * bytes_per_el32);
+	  /* Always constant because the inputs are.  */
+	  unsigned int lsb
+	    = subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
+	  bytes.quick_push ((unsigned long) el32[index] >> lsb);
+	}
+return true;
+}
+if (GET_CODE (x) == CONST_FIXED)
+{
+for (unsigned int byte = first_byte; byte < end_byte; ++byte)
+	{
+	  /* Always constant because the inputs are.  */
+	  unsigned int lsb
+	    = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
+	  unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x);
+	  if (lsb >= HOST_BITS_PER_WIDE_INT)
+	    {
+	      lsb -= HOST_BITS_PER_WIDE_INT;
+	      piece = CONST_FIXED_VALUE_HIGH (x);
+	    }
+	  bytes.quick_push (piece >> lsb);
+	}
+return true;
+}
+return false;
+}
+/* Read a vector of mode MODE from the target memory image given by BYTES,
+starting at byte FIRST_BYTE.  The vector is known to be encodable using
+NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each,
+and BYTES is known to have enough bytes to supply NPATTERNS *
+NELTS_PER_PATTERN vector elements.  Each element of BYTES contains
+BITS_PER_UNIT bits and the bytes are in target memory order.
+Return the vector on success, otherwise return NULL_RTX.  */
+rtx
+native_decode_vector_rtx (machine_mode mode, vec<target_unit> bytes,
+			  unsigned int first_byte, unsigned int npatterns,
+			  unsigned int nelts_per_pattern)
+{
+rtx_vector_builder builder (mode, npatterns, nelts_per_pattern);
+unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
+					       GET_MODE_NUNITS (mode));
+if (elt_bits < BITS_PER_UNIT)
+{
+/* This is the only case in which elements can be smaller than a byte.
+	 Element 0 is always in the lsb of the containing byte.  */
+gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
+	{
+	  unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
+	  unsigned int byte_index = bit_index / BITS_PER_UNIT;
+	  unsigned int lsb = bit_index % BITS_PER_UNIT;
+	  builder.quick_push (bytes[byte_index] & (1 << lsb)
+			      ? CONST1_RTX (BImode)
+			      : CONST0_RTX (BImode));
+	}
+}
+else
+{
+for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
+	{
+	  rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte);
+	  if (!x)
+	    return NULL_RTX;
+	  builder.quick_push (x);
+	  first_byte += elt_bits / BITS_PER_UNIT;
+	}
+}
+return builder.build ();
+}
+/* Read an rtx of mode MODE from the target memory image given by BYTES,
+starting at byte FIRST_BYTE.  Each element of BYTES contains BITS_PER_UNIT
+bits and the bytes are in target memory order.  The image has enough
+values to specify all bytes of MODE.
+Return the rtx on success, otherwise return NULL_RTX.  */
+rtx
+native_decode_rtx (machine_mode mode, vec<target_unit> bytes,
+		   unsigned int first_byte)
+{
+if (VECTOR_MODE_P (mode))
+{
+/* If we know at compile time how many elements there are,
+	 pull each element directly from BYTES.  */
+unsigned int nelts;
+if (GET_MODE_NUNITS (mode).is_constant (&nelts))
+	return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1);
+return NULL_RTX;
+}
+scalar_int_mode imode;
+if (is_a <scalar_int_mode> (mode, &imode)
+&& GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT)
+{
+/* Pull the bytes msb first, so that we can use simple
+	 shift-and-insert wide_int operations.  */
+unsigned int size = GET_MODE_SIZE (imode);
+wide_int result (wi::zero (GET_MODE_PRECISION (imode)));
+for (unsigned int i = 0; i < size; ++i)
+	{
+	  unsigned int lsb = (size - i - 1) * BITS_PER_UNIT;
+	  /* Always constant because the inputs are.  */
+	  unsigned int subbyte
+	    = subreg_size_offset_from_lsb (1, size, lsb).to_constant ();
+	  result <<= BITS_PER_UNIT;
+	  result |= bytes[first_byte + subbyte];
+	}
+return immed_wide_int_const (result, imode);
+}
+scalar_float_mode fmode;
+if (is_a <scalar_float_mode> (mode, &fmode))
+{
+/* We need to build an array of integers in target memory order.
+	 All integers before the last one have 32 bits; the last one may
+	 have 32 bits or fewer, depending on whether the mode bitsize
+	 is divisible by 32.  */
+long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
+unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32);
+memset (el32, 0, num_el32 * sizeof (long));
+/* The (maximum) number of target bytes per element of el32.  */
+unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
+gcc_assert (bytes_per_el32 != 0);
+unsigned int mode_bytes = GET_MODE_SIZE (fmode);
+for (unsigned int byte = 0; byte < mode_bytes; ++byte)
+	{
+	  unsigned int index = byte / bytes_per_el32;
+	  unsigned int subbyte = byte % bytes_per_el32;
+	  unsigned int int_bytes = MIN (bytes_per_el32,
+					mode_bytes - index * bytes_per_el32);
+	  /* Always constant because the inputs are.  */
+	  unsigned int lsb
+	    = subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
+	  el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb;
+	}
+REAL_VALUE_TYPE r;
+real_from_target (&r, el32, fmode);
+return const_double_from_real_value (r, fmode);
+}
+if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+{
+scalar_mode smode = as_a <scalar_mode> (mode);
+FIXED_VALUE_TYPE f;
+f.data.low = 0;
+f.data.high = 0;
+f.mode = smode;
+unsigned int mode_bytes = GET_MODE_SIZE (smode);
+for (unsigned int byte = 0; byte < mode_bytes; ++byte)
+	{
+	  /* Always constant because the inputs are.  */
+	  unsigned int lsb
+	    = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
+	  unsigned HOST_WIDE_INT unit = bytes[first_byte + byte];
+	  if (lsb >= HOST_BITS_PER_WIDE_INT)
+	    f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT);
+	  else
+	    f.data.low |= unit << lsb;
+	}
+return CONST_FIXED_FROM_FIXED_VALUE (f, mode);
+}
+return NULL_RTX;
+}
+/* Simplify a byte offset BYTE into CONST_VECTOR X.  The main purpose
+is to convert a runtime BYTE value into a constant one.  */
+static poly_uint64
+simplify_const_vector_byte_offset (rtx x, poly_uint64 byte)
+{
+/* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes.  */
+machine_mode mode = GET_MODE (x);
+unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
+					       GET_MODE_NUNITS (mode));
+/* The number of bits needed to encode one element from each pattern.  */
+unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits;
+/* Identify the start point in terms of a sequence number and a byte offset
+within that sequence.  */
+poly_uint64 first_sequence;
+unsigned HOST_WIDE_INT subbit;
+if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits,
+		       &first_sequence, &subbit))
+{
+unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
+if (nelts_per_pattern == 1)
+	/* This is a duplicated vector, so the value of FIRST_SEQUENCE
+	   doesn't matter.  */
+	byte = subbit / BITS_PER_UNIT;
+else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U))
+	{
+	  /* The subreg drops the first element from each pattern and
+	     only uses the second element.  Find the first sequence
+	     that starts on a byte boundary.  */
+	  subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT);
+	  byte = subbit / BITS_PER_UNIT;
+	}
+}
+return byte;
+}
+/* Subroutine of simplify_subreg in which:
+- X is known to be a CONST_VECTOR
+- OUTERMODE is known to be a vector mode
+Try to handle the subreg by operating on the CONST_VECTOR encoding
+rather than on each individual element of the CONST_VECTOR.
+Return the simplified subreg on success, otherwise return NULL_RTX.  */
 static rtx
-simplify_immed_subreg (fixed_size_mode outermode, rtx op,
+simplify_const_vector_subreg (machine_mode outermode, rtx x,
-		       machine_mode innermode, unsigned int byte,
+			      machine_mode innermode, unsigned int first_byte)
-		       unsigned int first_elem, unsigned int inner_bytes)
 {
-enum {
+/* Paradoxical subregs of vectors have dubious semantics.  */
-value_bit = 8,
+if (paradoxical_subreg_p (outermode, innermode))
-value_mask = (1 << value_bit) - 1
+return NULL_RTX;
-};
-unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit];
+/* We can only preserve the semantics of a stepped pattern if the new
-int value_start;
+vector element is the same as the original one.  */
-int i;
+if (CONST_VECTOR_STEPPED_P (x)
-int elem;
+&& GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode))
+return NULL_RTX;
-int num_elem;
-rtx * elems;
+/* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes.  */
-int elem_bitsize;
+unsigned int x_elt_bits
-rtx result_s = NULL;
+= vector_element_size (GET_MODE_BITSIZE (innermode),
-rtvec result_v = NULL;
+			   GET_MODE_NUNITS (innermode));
-enum mode_class outer_class;
+unsigned int out_elt_bits
-scalar_mode outer_submode;
+= vector_element_size (GET_MODE_BITSIZE (outermode),
-int max_bitsize;
+			   GET_MODE_NUNITS (outermode));
+/* The number of bits needed to encode one element from every pattern
+of the original vector.  */
+unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits;
+/* The number of bits needed to encode one element from every pattern
+of the result.  */
+unsigned int out_sequence_bits
+= least_common_multiple (x_sequence_bits, out_elt_bits);
+/* Work out the number of interleaved patterns in the output vector
+and the number of encoded elements per pattern.  */
+unsigned int out_npatterns = out_sequence_bits / out_elt_bits;
+unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
+/* The encoding scheme requires the number of elements to be a multiple
+of the number of patterns, so that each pattern appears at least once
+and so that the same number of elements appear from each pattern.  */
+bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns);
+unsigned int const_nunits;
+if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits)
+&& (!ok_p || out_npatterns * nelts_per_pattern > const_nunits))
+{
+/* Either the encoding is invalid, or applying it would give us
+	 more elements than we need.  Just encode each element directly.  */
+out_npatterns = const_nunits;
+nelts_per_pattern = 1;
+}
+else if (!ok_p)
+return NULL_RTX;
+/* Get enough bytes of X to form the new encoding.  */
+unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits;
+unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT);
+auto_vec<target_unit, 128> buffer (buffer_bytes);
+if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes))
+return NULL_RTX;
+/* Reencode the bytes as OUTERMODE.  */
+return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns,
+				   nelts_per_pattern);
+}
+/* Try to simplify a subreg of a constant by encoding the subreg region
+as a sequence of target bytes and reading them back in the new mode.
+Return the new value on success, otherwise return null.
+The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X
+and byte offset FIRST_BYTE.  */
+static rtx
+simplify_immed_subreg (fixed_size_mode outermode, rtx x,
+		       machine_mode innermode, unsigned int first_byte)
+{
+unsigned int buffer_bytes = GET_MODE_SIZE (outermode);
+auto_vec<target_unit, 128> buffer (buffer_bytes);
 /* Some ports misuse CCmode.  */
-if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op))
+if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x))
-return op;
+return x;
-/* We have no way to represent a complex constant at the rtl level.  */
+/* Paradoxical subregs read undefined values for bytes outside of the
-if (COMPLEX_MODE_P (outermode))
+inner value.  However, we have traditionally always sign-extended
-return NULL_RTX;
+integer constants and zero-extended others.  */
+unsigned int inner_bytes = buffer_bytes;
-/* We support any size mode.  */
+if (paradoxical_subreg_p (outermode, innermode))
-max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
+{
-		     inner_bytes * BITS_PER_UNIT);
+if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes))
+	return NULL_RTX;
-/* Unpack the value.  */
+target_unit filler = 0;
-if (GET_CODE (op) == CONST_VECTOR)
+if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode)))
-{
+	filler = -1;
-num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode));
-elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);
+/* Add any leading bytes due to big-endian layout.  The number of
+	 bytes must be constant because both modes have constant size.  */
+unsigned int leading_bytes
+	= -byte_lowpart_offset (outermode, innermode).to_constant ();
+for (unsigned int i = 0; i < leading_bytes; ++i)
+	buffer.quick_push (filler);
+if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
+	return NULL_RTX;
+/* Add any trailing bytes due to little-endian layout.  */
+while (buffer.length () < buffer_bytes)
+	buffer.quick_push (filler);
 }
 else
 {
-num_elem = 1;
+if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
-elem_bitsize = max_bitsize;
+	return NULL_RTX;
-}
-/* If this asserts, it is too complicated; reducing value_bit may help.  */
-gcc_assert (BITS_PER_UNIT % value_bit == 0);
-/* I don't know how to handle endianness of sub-units.  */
-gcc_assert (elem_bitsize % BITS_PER_UNIT == 0);
-for (elem = 0; elem < num_elem; elem++)
-{
-unsigned char * vp;
-rtx el = (GET_CODE (op) == CONST_VECTOR
-		? CONST_VECTOR_ELT (op, first_elem + elem)
-		: op);
-/* Vectors are kept in target memory order.  (This is probably
-	 a mistake.)  */
-{
-	unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
-	unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
-			  / BITS_PER_UNIT);
-	unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
-	unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
-	unsigned bytele = (subword_byte % UNITS_PER_WORD
-			 + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
-	vp = value + (bytele * BITS_PER_UNIT) / value_bit;
 }
+return native_decode_rtx (outermode, buffer, 0);
-switch (GET_CODE (el))
-	{
-	case CONST_INT:
-	  for (i = 0;
-	       i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
-	       i += value_bit)
-	    *vp++ = INTVAL (el) >> i;
-	  /* CONST_INTs are always logically sign-extended.  */
-	  for (; i < elem_bitsize; i += value_bit)
-	    *vp++ = INTVAL (el) < 0 ? -1 : 0;
-	  break;
-	case CONST_WIDE_INT:
-	  {
-	    rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode));
-	    unsigned char extend = wi::sign_mask (val);
-	    int prec = wi::get_precision (val);
-	    for (i = 0; i < prec && i < elem_bitsize; i += value_bit)
-	      *vp++ = wi::extract_uhwi (val, i, value_bit);
-	    for (; i < elem_bitsize; i += value_bit)
-	      *vp++ = extend;
-	  }
-	  break;
-	case CONST_DOUBLE:
-	  if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode)
-	    {
-	      unsigned char extend = 0;
-	      /* If this triggers, someone should have generated a
-		 CONST_INT instead.  */
-	      gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT);
-	      for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
-		*vp++ = CONST_DOUBLE_LOW (el) >> i;
-	      while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize)
-		{
-		  *vp++
-		    = CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT);
-		  i += value_bit;
-		}
-	      if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1))
-		extend = -1;
-	      for (; i < elem_bitsize; i += value_bit)
-		*vp++ = extend;
-	    }
-	  else
-	    {
-	      /* This is big enough for anything on the platform.  */
-	      long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32];
-	      scalar_float_mode el_mode;
-	      el_mode = as_a <scalar_float_mode> (GET_MODE (el));
-	      int bitsize = GET_MODE_BITSIZE (el_mode);
-	      gcc_assert (bitsize <= elem_bitsize);
-	      gcc_assert (bitsize % value_bit == 0);
-	      real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el),
-			      GET_MODE (el));
-	      /* real_to_target produces its result in words affected by
-		 FLOAT_WORDS_BIG_ENDIAN.  However, we ignore this,
-		 and use WORDS_BIG_ENDIAN instead; see the documentation
-	         of SUBREG in rtl.texi.  */
-	      for (i = 0; i < bitsize; i += value_bit)
-		{
-		  int ibase;
-		  if (WORDS_BIG_ENDIAN)
-		    ibase = bitsize - 1 - i;
-		  else
-		    ibase = i;
-		  *vp++ = tmp[ibase / 32] >> i % 32;
-		}
-	      /* It shouldn't matter what's done here, so fill it with
-		 zero.  */
-	      for (; i < elem_bitsize; i += value_bit)
-		*vp++ = 0;
-	    }
-	  break;
-case CONST_FIXED:
-	  if (elem_bitsize <= HOST_BITS_PER_WIDE_INT)
-	    {
-	      for (i = 0; i < elem_bitsize; i += value_bit)
-		*vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
-	    }
-	  else
-	    {
-	      for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
-		*vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
-for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize;
-		   i += value_bit)
-		*vp++ = CONST_FIXED_VALUE_HIGH (el)
-			>> (i - HOST_BITS_PER_WIDE_INT);
-	      for (; i < elem_bitsize; i += value_bit)
-		*vp++ = 0;
-	    }
-break;
-	default:
-	  gcc_unreachable ();
-	}
-}
-/* Now, pick the right byte to start with.  */
-/* Renumber BYTE so that the least-significant byte is byte 0.  A special
-case is paradoxical SUBREGs, which shouldn't be adjusted since they
-will already have offset 0.  */
-if (inner_bytes >= GET_MODE_SIZE (outermode))
-{
-unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte;
-unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
-unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
-byte = (subword_byte % UNITS_PER_WORD
-	      + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
-}
-/* BYTE should still be inside OP.  (Note that BYTE is unsigned,
-so if it's become negative it will instead be very large.)  */
-gcc_assert (byte < inner_bytes);
-/* Convert from bytes to chunks of size value_bit.  */
-value_start = byte * (BITS_PER_UNIT / value_bit);
-/* Re-pack the value.  */
-num_elem = GET_MODE_NUNITS (outermode);
-if (VECTOR_MODE_P (outermode))
-{
-result_v = rtvec_alloc (num_elem);
-elems = &RTVEC_ELT (result_v, 0);
-}
-else
-elems = &result_s;
-outer_submode = GET_MODE_INNER (outermode);
-outer_class = GET_MODE_CLASS (outer_submode);
-elem_bitsize = GET_MODE_BITSIZE (outer_submode);
-gcc_assert (elem_bitsize % value_bit == 0);
-gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize);
-for (elem = 0; elem < num_elem; elem++)
-{
-unsigned char *vp;
-/* Vectors are stored in target memory order.  (This is probably
-	 a mistake.)  */
-{
-	unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
-	unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
-			  / BITS_PER_UNIT);
-	unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
-	unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
-	unsigned bytele = (subword_byte % UNITS_PER_WORD
-			 + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
-	vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit;
-}
-switch (outer_class)
-	{
-	case MODE_INT:
-	case MODE_PARTIAL_INT:
-	  {
-	    int u;
-	    int base = 0;
-	    int units
-	      = (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1)
-	      / HOST_BITS_PER_WIDE_INT;
-	    HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT];
-	    wide_int r;
-	    if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT)
-	      return NULL_RTX;
-	    for (u = 0; u < units; u++)
-	      {
-		unsigned HOST_WIDE_INT buf = 0;
-		for (i = 0;
-		     i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize;
-		     i += value_bit)
-		  buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
-		tmp[u] = buf;
-		base += HOST_BITS_PER_WIDE_INT;
-	      }
-	    r = wide_int::from_array (tmp, units,
-				      GET_MODE_PRECISION (outer_submode));
-#if TARGET_SUPPORTS_WIDE_INT == 0
-	    /* Make sure r will fit into CONST_INT or CONST_DOUBLE.  */
-	    if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT)
-	      return NULL_RTX;
-#endif
-	    elems[elem] = immed_wide_int_const (r, outer_submode);
-	  }
-	  break;
-	case MODE_FLOAT:
-	case MODE_DECIMAL_FLOAT:
-	  {
-	    REAL_VALUE_TYPE r;
-	    long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 };
-	    /* real_from_target wants its input in words affected by
-	       FLOAT_WORDS_BIG_ENDIAN.  However, we ignore this,
-	       and use WORDS_BIG_ENDIAN instead; see the documentation
-	       of SUBREG in rtl.texi.  */
-	    for (i = 0; i < elem_bitsize; i += value_bit)
-	      {
-		int ibase;
-		if (WORDS_BIG_ENDIAN)
-		  ibase = elem_bitsize - 1 - i;
-		else
-		  ibase = i;
-		tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32;
-	      }
-	    real_from_target (&r, tmp, outer_submode);
-	    elems[elem] = const_double_from_real_value (r, outer_submode);
-	  }
-	  break;
-	case MODE_FRACT:
-	case MODE_UFRACT:
-	case MODE_ACCUM:
-	case MODE_UACCUM:
-	  {
-	    FIXED_VALUE_TYPE f;
-	    f.data.low = 0;
-	    f.data.high = 0;
-	    f.mode = outer_submode;
-	    for (i = 0;
-		 i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
-		 i += value_bit)
-	      f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
-	    for (; i < elem_bitsize; i += value_bit)
-	      f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask)
-			     << (i - HOST_BITS_PER_WIDE_INT));
-	    elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode);
-}
-break;
-	default:
-	  gcc_unreachable ();
-	}
-}
-if (VECTOR_MODE_P (outermode))
-return gen_rtx_CONST_VECTOR (outermode, result_v);
-else
-return result_s;
 }
 /* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE)
 Return 0 if no simplifications are possible.  */
 rtx
 return NULL_RTX;
 if (outermode == innermode && known_eq (byte, 0U))
 return op;
+if (GET_CODE (op) == CONST_VECTOR)
+byte = simplify_const_vector_byte_offset (op, byte);
 if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode)))
 {
 rtx elt;
 if (VECTOR_MODE_P (outermode)
 if (CONST_SCALAR_INT_P (op)
 || CONST_DOUBLE_AS_FLOAT_P (op)
 || CONST_FIXED_P (op)
 || GET_CODE (op) == CONST_VECTOR)
 {
-/* simplify_immed_subreg deconstructs OP into bytes and constructs
-	 the result from bytes, so it only works if the sizes of the modes
-	 and the value of the offset are known at compile time.  Cases that
-	 that apply to general modes and offsets should be handled here
-	 before calling simplify_immed_subreg.  */
-fixed_size_mode fs_outermode, fs_innermode;
 unsigned HOST_WIDE_INT cbyte;
-if (is_a <fixed_size_mode> (outermode, &fs_outermode)
+if (byte.is_constant (&cbyte))
-	  && is_a <fixed_size_mode> (innermode, &fs_innermode)
+	{
-	  && byte.is_constant (&cbyte))
+	  if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode))
-	return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte,
+	    {
-				      0, GET_MODE_SIZE (fs_innermode));
+	      rtx tmp = simplify_const_vector_subreg (outermode, op,
+						      innermode, cbyte);
-/* Handle constant-sized outer modes and variable-sized inner modes.  */
+	      if (tmp)
-unsigned HOST_WIDE_INT first_elem;
+		return tmp;
-if (GET_CODE (op) == CONST_VECTOR
+	    }
-	  && is_a <fixed_size_mode> (outermode, &fs_outermode)
-	  && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),
+	  fixed_size_mode fs_outermode;
-				  &first_elem))
+	  if (is_a <fixed_size_mode> (outermode, &fs_outermode))
-	return simplify_immed_subreg (fs_outermode, op, innermode, 0,
+	    return simplify_immed_subreg (fs_outermode, op, innermode, cbyte);
-				      first_elem,
+	}
-				      GET_MODE_SIZE (fs_outermode));
-return NULL_RTX;
 }
 /* Changing mode twice with SUBREG => just change it once,
 or not at all if changing back op starting mode.  */
 if (GET_CODE (op) == SUBREG)
 				      subreg_memory_offset (outermode,
 							    innermode, byte));
 	  /* Propagate original regno.  We don't have any way to specify
 	     the offset inside original regno, so do so only for lowpart.
-	     The information is used only by alias analysis that can not
+	     The information is used only by alias analysis that cannot
 	     grog partial register anyway.  */
 	  if (known_eq (subreg_lowpart_offset (outermode, innermode), byte))
 	    ORIGINAL_REGNO (x) = ORIGINAL_REGNO (op);
 	  return x;
 	  if (tem)
 	    return tem;
 	}
 }
+/* If OP is a vector comparison and the subreg is not changing the
+number of elements or the size of the elements, change the result
+of the comparison to the new mode.  */
+if (COMPARISON_P (op)
+&& VECTOR_MODE_P (outermode)
+&& VECTOR_MODE_P (innermode)
+&& known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode))
+&& known_eq (GET_MODE_UNIT_SIZE (outermode),
+		    GET_MODE_UNIT_SIZE (innermode)))
+return simplify_gen_relational (GET_CODE (op), outermode, innermode,
+				    XEXP (op, 0), XEXP (op, 1));
 return NULL_RTX;
 }
 /* Make a SUBREG operation or equivalent if it folds.  */
 if (maybe_ne (nunits, 2U)
 && multiple_p (nunits, 2)
 && mode_for_vector (inner_mode, 2).exists (&narrower_mode)
 && VECTOR_MODE_P (narrower_mode))
 {
+/* Test VEC_DUPLICATE of a vector.  */
+rtx_vector_builder nbuilder (narrower_mode, 2, 1);
+nbuilder.quick_push (const0_rtx);
+nbuilder.quick_push (const1_rtx);
+rtx_vector_builder builder (mode, 2, 1);
+builder.quick_push (const0_rtx);
+builder.quick_push (const1_rtx);
+ASSERT_RTX_EQ (builder.build (),
+		     simplify_unary_operation (VEC_DUPLICATE, mode,
+					       nbuilder.build (),
+					       narrower_mode));
 /* Test VEC_SELECT of a vector.  */
 rtx vec_par
 	= gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx));
 rtx narrower_duplicate
 	= gen_rtx_VEC_DUPLICATE (narrower_mode, scalar_reg);
 		 simplify_binary_operation (MINUS, mode, duplicate,
 					    series_0_m1));
 ASSERT_RTX_EQ (series_0_m1,
 		 simplify_binary_operation (VEC_SERIES, mode, const0_rtx,
 					    constm1_rtx));
+/* Test NEG on constant vector series.  */
+ASSERT_RTX_EQ (series_0_m1,
+		 simplify_unary_operation (NEG, mode, series_0_1, mode));
+ASSERT_RTX_EQ (series_0_1,
+		 simplify_unary_operation (NEG, mode, series_0_m1, mode));
+/* Test PLUS and MINUS on constant vector series.  */
+rtx scalar2 = gen_int_mode (2, inner_mode);
+rtx scalar3 = gen_int_mode (3, inner_mode);
+rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx);
+rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2);
+rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3);
+ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (PLUS, mode, series_0_1,
+					    CONST1_RTX (mode)));
+ASSERT_RTX_EQ (series_0_m1,
+		 simplify_binary_operation (PLUS, mode, CONST0_RTX (mode),
+					    series_0_m1));
+ASSERT_RTX_EQ (series_1_3,
+		 simplify_binary_operation (PLUS, mode, series_1_1,
+					    series_0_2));
+ASSERT_RTX_EQ (series_0_1,
+		 simplify_binary_operation (MINUS, mode, series_1_1,
+					    CONST1_RTX (mode)));
+ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (MINUS, mode, CONST1_RTX (mode),
+					    series_0_m1));
+ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (MINUS, mode, series_1_3,
+					    series_0_2));
+/* Test MULT between constant vectors.  */
+rtx vec2 = gen_const_vec_duplicate (mode, scalar2);
+rtx vec3 = gen_const_vec_duplicate (mode, scalar3);
+rtx scalar9 = gen_int_mode (9, inner_mode);
+rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9);
+ASSERT_RTX_EQ (series_0_2,
+		 simplify_binary_operation (MULT, mode, series_0_1, vec2));
+ASSERT_RTX_EQ (series_3_9,
+		 simplify_binary_operation (MULT, mode, vec3, series_1_3));
+if (!GET_MODE_NUNITS (mode).is_constant ())
+ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1,
+					     series_0_1));
+/* Test ASHIFT between constant vectors.  */
+ASSERT_RTX_EQ (series_0_2,
+		 simplify_binary_operation (ASHIFT, mode, series_0_1,
+					    CONST1_RTX (mode)));
+if (!GET_MODE_NUNITS (mode).is_constant ())
+ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode),
+					     series_0_1));
 }
 /* Verify simplify_merge_mask works correctly.  */
 static void
 /* Called indirectly.  */
 ASSERT_RTX_EQ (gen_rtx_VEC_MERGE (mode, op0, op3, mask1),
 		 simplify_rtx (nvm));
 }
+/* Test subregs of integer vector constant X, trying elements in
+the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)),
+where NELTS is the number of elements in X.  Subregs involving
+elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail.  */
+static void
+test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0,
+			   unsigned int first_valid = 0)
+{
+machine_mode inner_mode = GET_MODE (x);
+scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei)
+{
+machine_mode outer_mode = (machine_mode) modei;
+if (!VECTOR_MODE_P (outer_mode))
+	continue;
+unsigned int outer_nunits;
+if (GET_MODE_INNER (outer_mode) == int_mode
+	  && GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits)
+	  && multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits))
+	{
+	  /* Test subregs in which the outer mode is a smaller,
+	     constant-sized vector of the same element type.  */
+	  unsigned int limit
+	    = constant_lower_bound (GET_MODE_NUNITS (inner_mode));
+	  for (unsigned int elt = 0; elt < limit; elt += outer_nunits)
+	    {
+	      rtx expected = NULL_RTX;
+	      if (elt >= first_valid)
+		{
+		  rtx_vector_builder builder (outer_mode, outer_nunits, 1);
+		  for (unsigned int i = 0; i < outer_nunits; ++i)
+		    builder.quick_push (CONST_VECTOR_ELT (x, elt + i));
+		  expected = builder.build ();
+		}
+	      poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode);
+	      ASSERT_RTX_EQ (expected,
+			     simplify_subreg (outer_mode, x,
+					      inner_mode, byte));
+	    }
+	}
+else if (known_eq (GET_MODE_SIZE (outer_mode),
+			 GET_MODE_SIZE (inner_mode))
+	       && known_eq (elt_bias, 0U)
+	       && (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL
+		   || known_eq (GET_MODE_BITSIZE (outer_mode),
+				GET_MODE_NUNITS (outer_mode)))
+	       && (!FLOAT_MODE_P (outer_mode)
+		   || (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits
+		       == GET_MODE_UNIT_PRECISION (outer_mode)))
+	       && (GET_MODE_SIZE (inner_mode).is_constant ()
+		   || !CONST_VECTOR_STEPPED_P (x)))
+	{
+	  /* Try converting to OUTER_MODE and back.  */
+	  rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0);
+	  ASSERT_TRUE (outer_x != NULL_RTX);
+	  ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x,
+					     outer_mode, 0));
+	}
+}
+if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
+{
+/* Test each byte in the element range.  */
+unsigned int limit
+	= constant_lower_bound (GET_MODE_SIZE (inner_mode));
+for (unsigned int i = 0; i < limit; ++i)
+	{
+	  unsigned int elt = i / GET_MODE_SIZE (int_mode);
+	  rtx expected = NULL_RTX;
+	  if (elt >= first_valid)
+	    {
+	      unsigned int byte_shift = i % GET_MODE_SIZE (int_mode);
+	      if (BYTES_BIG_ENDIAN)
+		byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1;
+	      rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode);
+	      wide_int shifted_elt
+		= wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT);
+	      expected = immed_wide_int_const (shifted_elt, QImode);
+	    }
+	  poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i;
+	  ASSERT_RTX_EQ (expected,
+			 simplify_subreg (QImode, x, inner_mode, byte));
+	}
+}
+}
+/* Test constant subregs of integer vector mode INNER_MODE, using 1
+element per pattern.  */
+static void
+test_vector_subregs_repeating (machine_mode inner_mode)
+{
+poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
+unsigned int min_nunits = constant_lower_bound (nunits);
+scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+unsigned int count = gcd (min_nunits, 8);
+rtx_vector_builder builder (inner_mode, count, 1);
+for (unsigned int i = 0; i < count; ++i)
+builder.quick_push (gen_int_mode (8 - i, int_mode));
+rtx x = builder.build ();
+test_vector_subregs_modes (x);
+if (!nunits.is_constant ())
+test_vector_subregs_modes (x, nunits - min_nunits);
+}
+/* Test constant subregs of integer vector mode INNER_MODE, using 2
+elements per pattern.  */
+static void
+test_vector_subregs_fore_back (machine_mode inner_mode)
+{
+poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
+unsigned int min_nunits = constant_lower_bound (nunits);
+scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+unsigned int count = gcd (min_nunits, 4);
+rtx_vector_builder builder (inner_mode, count, 2);
+for (unsigned int i = 0; i < count; ++i)
+builder.quick_push (gen_int_mode (i, int_mode));
+for (unsigned int i = 0; i < count; ++i)
+builder.quick_push (gen_int_mode (-(int) i, int_mode));
+rtx x = builder.build ();
+test_vector_subregs_modes (x);
+if (!nunits.is_constant ())
+test_vector_subregs_modes (x, nunits - min_nunits, count);
+}
+/* Test constant subregs of integer vector mode INNER_MODE, using 3
+elements per pattern.  */
+static void
+test_vector_subregs_stepped (machine_mode inner_mode)
+{
+/* Build { 0, 1, 2, 3, ... }.  */
+scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+rtx_vector_builder builder (inner_mode, 1, 3);
+for (unsigned int i = 0; i < 3; ++i)
+builder.quick_push (gen_int_mode (i, int_mode));
+rtx x = builder.build ();
+test_vector_subregs_modes (x);
+}
+/* Test constant subregs of integer vector mode INNER_MODE.  */
+static void
+test_vector_subregs (machine_mode inner_mode)
+{
+test_vector_subregs_repeating (inner_mode);
+test_vector_subregs_fore_back (inner_mode);
+test_vector_subregs_stepped (inner_mode);
+}
 /* Verify some simplifications involving vectors.  */
 static void
 test_vector_ops ()
 {
 	{
 	  rtx scalar_reg = make_test_reg (GET_MODE_INNER (mode));
 	  test_vector_ops_duplicate (mode, scalar_reg);
 	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
 	      && maybe_gt (GET_MODE_NUNITS (mode), 2))
-	    test_vector_ops_series (mode, scalar_reg);
+	    {
+	      test_vector_ops_series (mode, scalar_reg);
+	      test_vector_subregs (mode);
+	    }
 	  test_vec_merge (mode);
 	}
 }
 }

Mercurial > hg > CbC > CbC_gcc

comparison gcc/simplify-rtx.c @ 145:1830386684a0