Mercurial > hg > CbC > CbC_gcc
diff gcc/simplify-rtx.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
line wrap: on
line diff
--- a/gcc/simplify-rtx.c Thu Oct 25 07:37:49 2018 +0900 +++ b/gcc/simplify-rtx.c Thu Feb 13 11:34:05 2020 +0900 @@ -1,5 +1,5 @@ /* RTL simplification functions for GNU compiler. - Copyright (C) 1987-2018 Free Software Foundation, Inc. + Copyright (C) 1987-2020 Free Software Foundation, Inc. This file is part of GCC. @@ -35,6 +35,7 @@ #include "flags.h" #include "selftest.h" #include "selftest-rtl.h" +#include "rtx-vector-builder.h" /* Simplification and canonicalization of RTL. */ @@ -45,7 +46,6 @@ #define HWI_SIGN_EXTEND(low) \ ((((HOST_WIDE_INT) low) < 0) ? HOST_WIDE_INT_M1 : HOST_WIDE_INT_0) -static rtx neg_const_int (machine_mode, const_rtx); static bool plus_minus_operand_p (const_rtx); static rtx simplify_plus_minus (enum rtx_code, machine_mode, rtx, rtx); static rtx simplify_associative_operation (enum rtx_code, machine_mode, @@ -56,17 +56,12 @@ static rtx simplify_binary_operation_1 (enum rtx_code, machine_mode, rtx, rtx, rtx, rtx); -/* Negate a CONST_INT rtx. */ +/* Negate I, which satisfies poly_int_rtx_p. MODE is the mode of I. */ + static rtx -neg_const_int (machine_mode mode, const_rtx i) +neg_poly_int_rtx (machine_mode mode, const_rtx i) { - unsigned HOST_WIDE_INT val = -UINTVAL (i); - - if (!HWI_COMPUTABLE_MODE_P (mode) - && val == UINTVAL (i)) - return simplify_const_unary_operation (NEG, mode, CONST_CAST_RTX (i), - mode); - return gen_int_mode (val, mode); + return immed_wide_int_const (-wi::to_poly_wide (i, mode), mode); } /* Test whether expression, X, is an immediate constant that represents @@ -1504,12 +1499,12 @@ && CONST_INT_P (XEXP (op, 1)) && XEXP (XEXP (op, 0), 1) == XEXP (op, 1) && (op_mode = as_a <scalar_int_mode> (GET_MODE (op)), - GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1)))) + GET_MODE_PRECISION (op_mode) > INTVAL (XEXP (op, 1)))) { scalar_int_mode tmode; - gcc_assert (GET_MODE_BITSIZE (int_mode) - > GET_MODE_BITSIZE (op_mode)); - if (int_mode_for_size (GET_MODE_BITSIZE (op_mode) + gcc_assert (GET_MODE_PRECISION (int_mode) + > GET_MODE_PRECISION (op_mode)); + if (int_mode_for_size (GET_MODE_PRECISION (op_mode) - INTVAL (XEXP (op, 1)), 1).exists (&tmode)) { rtx inner = @@ -1735,45 +1730,42 @@ } if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op)) return gen_const_vec_duplicate (mode, op); - unsigned int n_elts; if (GET_CODE (op) == CONST_VECTOR - && GET_MODE_NUNITS (mode).is_constant (&n_elts)) - { - /* This must be constant if we're duplicating it to a constant - number of elements. */ - unsigned int in_n_elts = CONST_VECTOR_NUNITS (op).to_constant (); - gcc_assert (in_n_elts < n_elts); - gcc_assert ((n_elts % in_n_elts) == 0); - rtvec v = rtvec_alloc (n_elts); - for (unsigned i = 0; i < n_elts; i++) - RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts); - return gen_rtx_CONST_VECTOR (mode, v); + && (CONST_VECTOR_DUPLICATE_P (op) + || CONST_VECTOR_NUNITS (op).is_constant ())) + { + unsigned int npatterns = (CONST_VECTOR_DUPLICATE_P (op) + ? CONST_VECTOR_NPATTERNS (op) + : CONST_VECTOR_NUNITS (op).to_constant ()); + gcc_assert (multiple_p (GET_MODE_NUNITS (mode), npatterns)); + rtx_vector_builder builder (mode, npatterns, 1); + for (unsigned i = 0; i < npatterns; i++) + builder.quick_push (CONST_VECTOR_ELT (op, i)); + return builder.build (); } } - if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) + if (VECTOR_MODE_P (mode) + && GET_CODE (op) == CONST_VECTOR + && known_eq (GET_MODE_NUNITS (mode), CONST_VECTOR_NUNITS (op))) { - unsigned int n_elts; - if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts)) - return NULL_RTX; - - machine_mode opmode = GET_MODE (op); - gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts)); - gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts)); - - rtvec v = rtvec_alloc (n_elts); - unsigned int i; - - for (i = 0; i < n_elts; i++) + gcc_assert (GET_MODE (op) == op_mode); + + rtx_vector_builder builder; + if (!builder.new_unary_operation (mode, op, false)) + return 0; + + unsigned int count = builder.encoded_nelts (); + for (unsigned int i = 0; i < count; i++) { rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode), CONST_VECTOR_ELT (op, i), - GET_MODE_INNER (opmode)); + GET_MODE_INNER (op_mode)); if (!x || !valid_for_const_vector_p (mode, x)) return 0; - RTVEC_ELT (v, i) = x; - } - return gen_rtx_CONST_VECTOR (mode, v); + builder.quick_push (x); + } + return builder.build (); } /* The order of these tests is critical so that, for example, we don't @@ -1832,6 +1824,9 @@ if (CONST_SCALAR_INT_P (op) && is_a <scalar_int_mode> (mode, &result_mode)) { unsigned int width = GET_MODE_PRECISION (result_mode); + if (width > MAX_BITSIZE_MODE_ANY_INT) + return 0; + wide_int result; scalar_int_mode imode = (op_mode == VOIDmode ? result_mode @@ -1976,6 +1971,9 @@ && is_int_mode (mode, &result_mode)) { unsigned int width = GET_MODE_PRECISION (result_mode); + if (width > MAX_BITSIZE_MODE_ANY_INT) + return 0; + /* Although the overflow semantics of RTL's FIX and UNSIGNED_FIX operators are intentionally left unspecified (to ease implementation by target backends), for consistency, this routine implements the @@ -2133,6 +2131,132 @@ return 0; } +/* Return a mask describing the COMPARISON. */ +static int +comparison_to_mask (enum rtx_code comparison) +{ + switch (comparison) + { + case LT: + return 8; + case GT: + return 4; + case EQ: + return 2; + case UNORDERED: + return 1; + + case LTGT: + return 12; + case LE: + return 10; + case GE: + return 6; + case UNLT: + return 9; + case UNGT: + return 5; + case UNEQ: + return 3; + + case ORDERED: + return 14; + case NE: + return 13; + case UNLE: + return 11; + case UNGE: + return 7; + + default: + gcc_unreachable (); + } +} + +/* Return a comparison corresponding to the MASK. */ +static enum rtx_code +mask_to_comparison (int mask) +{ + switch (mask) + { + case 8: + return LT; + case 4: + return GT; + case 2: + return EQ; + case 1: + return UNORDERED; + + case 12: + return LTGT; + case 10: + return LE; + case 6: + return GE; + case 9: + return UNLT; + case 5: + return UNGT; + case 3: + return UNEQ; + + case 14: + return ORDERED; + case 13: + return NE; + case 11: + return UNLE; + case 7: + return UNGE; + + default: + gcc_unreachable (); + } +} + +/* Simplify a logical operation CODE with result mode MODE, operating on OP0 + and OP1, which should be both relational operations. Return 0 if no such + simplification is possible. */ +rtx +simplify_logical_relational_operation (enum rtx_code code, machine_mode mode, + rtx op0, rtx op1) +{ + /* We only handle IOR of two relational operations. */ + if (code != IOR) + return 0; + + if (!(COMPARISON_P (op0) && COMPARISON_P (op1))) + return 0; + + if (!(rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0)) + && rtx_equal_p (XEXP (op0, 1), XEXP (op1, 1)))) + return 0; + + enum rtx_code code0 = GET_CODE (op0); + enum rtx_code code1 = GET_CODE (op1); + + /* We don't handle unsigned comparisons currently. */ + if (code0 == LTU || code0 == GTU || code0 == LEU || code0 == GEU) + return 0; + if (code1 == LTU || code1 == GTU || code1 == LEU || code1 == GEU) + return 0; + + int mask0 = comparison_to_mask (code0); + int mask1 = comparison_to_mask (code1); + + int mask = mask0 | mask1; + + if (mask == 15) + return const_true_rtx; + + code = mask_to_comparison (mask); + + op0 = XEXP (op1, 0); + op1 = XEXP (op1, 1); + + return simplify_gen_relational (code, mode, VOIDmode, op0, op1); +} /* Simplify a binary operation CODE with result mode MODE, operating on OP0 and OP1. Return 0 if no simplification is possible. @@ -2549,10 +2673,10 @@ return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode)); /* Don't let a relocatable value get a negative coeff. */ - if (CONST_INT_P (op1) && GET_MODE (op0) != VOIDmode) + if (poly_int_rtx_p (op1) && GET_MODE (op0) != VOIDmode) return simplify_gen_binary (PLUS, mode, op0, - neg_const_int (mode, op1)); + neg_poly_int_rtx (mode, op1)); /* (x - (x & y)) -> (x & ~y) */ if (INTEGRAL_MODE_P (mode) && GET_CODE (op1) == AND) @@ -2857,6 +2981,38 @@ XEXP (op0, 1)); } + /* The following happens with bitfield merging. + (X & C) | ((X | Y) & ~C) -> X | (Y & ~C) */ + if (GET_CODE (op0) == AND + && GET_CODE (op1) == AND + && CONST_INT_P (XEXP (op0, 1)) + && CONST_INT_P (XEXP (op1, 1)) + && (INTVAL (XEXP (op0, 1)) + == ~INTVAL (XEXP (op1, 1)))) + { + /* The IOR may be on both sides. */ + rtx top0 = NULL_RTX, top1 = NULL_RTX; + if (GET_CODE (XEXP (op1, 0)) == IOR) + top0 = op0, top1 = op1; + else if (GET_CODE (XEXP (op0, 0)) == IOR) + top0 = op1, top1 = op0; + if (top0 && top1) + { + /* X may be on either side of the inner IOR. */ + rtx tem = NULL_RTX; + if (rtx_equal_p (XEXP (top0, 0), + XEXP (XEXP (top1, 0), 0))) + tem = XEXP (XEXP (top1, 0), 1); + else if (rtx_equal_p (XEXP (top0, 0), + XEXP (XEXP (top1, 0), 1))) + tem = XEXP (XEXP (top1, 0), 0); + if (tem) + return simplify_gen_binary (IOR, mode, XEXP (top0, 0), + simplify_gen_binary + (AND, mode, tem, XEXP (top1, 1))); + } + } + tem = simplify_byte_swapping_operation (code, mode, op0, op1); if (tem) return tem; @@ -2864,6 +3020,10 @@ tem = simplify_associative_operation (code, mode, op0, op1); if (tem) return tem; + + tem = simplify_logical_relational_operation (code, mode, op0, op1); + if (tem) + return tem; break; case XOR: @@ -3487,9 +3647,21 @@ { rtx tmp = gen_int_shift_amount (inner_mode, INTVAL (XEXP (SUBREG_REG (op0), 1)) + INTVAL (op1)); - tmp = simplify_gen_binary (code, inner_mode, - XEXP (SUBREG_REG (op0), 0), - tmp); + + /* Combine would usually zero out the value when combining two + local shifts and the range becomes larger or equal to the mode. + However since we fold away one of the shifts here combine won't + see it so we should immediately zero the result if it's out of + range. */ + if (code == LSHIFTRT + && INTVAL (tmp) >= GET_MODE_BITSIZE (inner_mode)) + tmp = const0_rtx; + else + tmp = simplify_gen_binary (code, + inner_mode, + XEXP (SUBREG_REG (op0), 0), + tmp); + return lowpart_subreg (int_mode, tmp, inner_mode); } @@ -4027,6 +4199,27 @@ return 0; } +/* Return true if binary operation OP distributes over addition in operand + OPNO, with the other operand being held constant. OPNO counts from 1. */ + +static bool +distributes_over_addition_p (rtx_code op, int opno) +{ + switch (op) + { + case PLUS: + case MINUS: + case MULT: + return true; + + case ASHIFT: + return opno == 1; + + default: + return false; + } +} + rtx simplify_const_binary_operation (enum rtx_code code, machine_mode mode, rtx op0, rtx op1) @@ -4036,26 +4229,45 @@ && GET_CODE (op0) == CONST_VECTOR && GET_CODE (op1) == CONST_VECTOR) { - unsigned int n_elts; - if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts)) - return NULL_RTX; - - gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1))); - gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode))); - rtvec v = rtvec_alloc (n_elts); - unsigned int i; - - for (i = 0; i < n_elts; i++) + bool step_ok_p; + if (CONST_VECTOR_STEPPED_P (op0) + && CONST_VECTOR_STEPPED_P (op1)) + /* We can operate directly on the encoding if: + + a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 + implies + (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) + + Addition and subtraction are the supported operators + for which this is true. */ + step_ok_p = (code == PLUS || code == MINUS); + else if (CONST_VECTOR_STEPPED_P (op0)) + /* We can operate directly on stepped encodings if: + + a3 - a2 == a2 - a1 + implies: + (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) + + which is true if (x -> x op c) distributes over addition. */ + step_ok_p = distributes_over_addition_p (code, 1); + else + /* Similarly in reverse. */ + step_ok_p = distributes_over_addition_p (code, 2); + rtx_vector_builder builder; + if (!builder.new_binary_operation (mode, op0, op1, step_ok_p)) + return 0; + + unsigned int count = builder.encoded_nelts (); + for (unsigned int i = 0; i < count; i++) { rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode), CONST_VECTOR_ELT (op0, i), CONST_VECTOR_ELT (op1, i)); if (!x || !valid_for_const_vector_p (mode, x)) return 0; - RTVEC_ELT (v, i) = x; - } - - return gen_rtx_CONST_VECTOR (mode, v); + builder.quick_push (x); + } + return builder.build (); } if (VECTOR_MODE_P (mode) @@ -4228,7 +4440,8 @@ scalar_int_mode int_mode; if (is_a <scalar_int_mode> (mode, &int_mode) && CONST_SCALAR_INT_P (op0) - && CONST_SCALAR_INT_P (op1)) + && CONST_SCALAR_INT_P (op1) + && GET_MODE_PRECISION (int_mode) <= MAX_BITSIZE_MODE_ANY_INT) { wide_int result; wi::overflow_type overflow; @@ -4549,11 +4762,12 @@ } break; - case CONST_INT: + CASE_CONST_SCALAR_INT: + case CONST_POLY_INT: n_constants++; if (this_neg) { - ops[i].op = neg_const_int (mode, this_op); + ops[i].op = neg_poly_int_rtx (mode, this_op); ops[i].neg = 0; changed = 1; canonicalized = 1; @@ -4678,8 +4892,8 @@ lneg &= rneg; if (GET_CODE (tem) == NEG) tem = XEXP (tem, 0), lneg = !lneg; - if (CONST_INT_P (tem) && lneg) - tem = neg_const_int (mode, tem), lneg = 0; + if (poly_int_rtx_p (tem) && lneg) + tem = neg_poly_int_rtx (mode, tem), lneg = 0; ops[i].op = tem; ops[i].neg = lneg; @@ -4738,12 +4952,12 @@ in the array and that any other constant will be next-to-last. */ if (n_ops > 1 - && CONST_INT_P (ops[n_ops - 1].op) + && poly_int_rtx_p (ops[n_ops - 1].op) && CONSTANT_P (ops[n_ops - 2].op)) { rtx value = ops[n_ops - 1].op; if (ops[n_ops - 1].neg ^ ops[n_ops - 2].neg) - value = neg_const_int (mode, value); + value = neg_poly_int_rtx (mode, value); if (CONST_INT_P (value)) { ops[n_ops - 2].op = plus_constant (mode, ops[n_ops - 2].op, @@ -4842,6 +5056,15 @@ return NULL_RTX; #endif } + /* For vector comparison with scalar int result, it is unknown + if the target means here a comparison into an integral bitmask, + or comparison where all comparisons true mean const_true_rtx + whole result, or where any comparisons true mean const_true_rtx + whole result. For const0_rtx all the cases are the same. */ + if (VECTOR_MODE_P (cmp_mode) + && SCALAR_INT_MODE_P (mode) + && tem == const_true_rtx) + return NULL_RTX; return tem; } @@ -5188,7 +5411,7 @@ } /* Check if the given comparison (done in the given MODE) is actually - a tautology or a contradiction. If the mode is VOID_mode, the + a tautology or a contradiction. If the mode is VOIDmode, the comparison is done in "infinite precision". If no simplification is possible, this function returns zero. Otherwise, it returns either const_true_rtx or const0_rtx. */ @@ -5615,9 +5838,19 @@ rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); if (top0 || top1) - return simplify_gen_binary (GET_CODE (x), GET_MODE (x), - top0 ? top0 : XEXP (x, 0), - top1 ? top1 : XEXP (x, 1)); + { + if (COMPARISON_P (x)) + return simplify_gen_relational (GET_CODE (x), GET_MODE (x), + GET_MODE (XEXP (x, 0)) != VOIDmode + ? GET_MODE (XEXP (x, 0)) + : GET_MODE (XEXP (x, 1)), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1)); + else + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1)); + } } if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) @@ -6031,8 +6264,10 @@ if (!side_effects_p (op2)) { - rtx top0 = simplify_merge_mask (op0, op2, 0); - rtx top1 = simplify_merge_mask (op1, op2, 1); + rtx top0 + = may_trap_p (op0) ? NULL_RTX : simplify_merge_mask (op0, op2, 0); + rtx top1 + = may_trap_p (op1) ? NULL_RTX : simplify_merge_mask (op1, op2, 1); if (top0 || top1) return simplify_gen_ternary (code, mode, mode, top0 ? top0 : op0, @@ -6048,342 +6283,466 @@ return 0; } -/* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE - or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or - CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. - - Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values - represented as a little-endian array of 'unsigned char', selecting by BYTE, - and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR, - FIRST_ELEM is the number of the first element to extract, otherwise - FIRST_ELEM is ignored. */ - -static rtx -simplify_immed_subreg (fixed_size_mode outermode, rtx op, - machine_mode innermode, unsigned int byte, - unsigned int first_elem, unsigned int inner_bytes) +/* Try to calculate NUM_BYTES bytes of the target memory image of X, + starting at byte FIRST_BYTE. Return true on success and add the + bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such + that the bytes follow target memory order. Leave BYTES unmodified + on failure. + + MODE is the mode of X. The caller must reserve NUM_BYTES bytes in + BYTES before calling this function. */ + +bool +native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes, + unsigned int first_byte, unsigned int num_bytes) { - enum { - value_bit = 8, - value_mask = (1 << value_bit) - 1 - }; - unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit]; - int value_start; - int i; - int elem; - - int num_elem; - rtx * elems; - int elem_bitsize; - rtx result_s = NULL; - rtvec result_v = NULL; - enum mode_class outer_class; - scalar_mode outer_submode; - int max_bitsize; - - /* Some ports misuse CCmode. */ - if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op)) - return op; - - /* We have no way to represent a complex constant at the rtl level. */ - if (COMPLEX_MODE_P (outermode)) - return NULL_RTX; - - /* We support any size mode. */ - max_bitsize = MAX (GET_MODE_BITSIZE (outermode), - inner_bytes * BITS_PER_UNIT); - - /* Unpack the value. */ - - if (GET_CODE (op) == CONST_VECTOR) + /* Check the mode is sensible. */ + gcc_assert (GET_MODE (x) == VOIDmode + ? is_a <scalar_int_mode> (mode) + : mode == GET_MODE (x)); + + if (GET_CODE (x) == CONST_VECTOR) + { + /* CONST_VECTOR_ELT follows target memory order, so no shuffling + is necessary. The only complication is that MODE_VECTOR_BOOL + vectors can have several elements per byte. */ + unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), + GET_MODE_NUNITS (mode)); + unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits; + if (elt_bits < BITS_PER_UNIT) + { + /* This is the only case in which elements can be smaller than + a byte. */ + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); + for (unsigned int i = 0; i < num_bytes; ++i) + { + target_unit value = 0; + for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits) + { + value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j; + elt += 1; + } + bytes.quick_push (value); + } + return true; + } + + unsigned int start = bytes.length (); + unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode); + /* Make FIRST_BYTE relative to ELT. */ + first_byte %= elt_bytes; + while (num_bytes > 0) + { + /* Work out how many bytes we want from element ELT. */ + unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte); + if (!native_encode_rtx (GET_MODE_INNER (mode), + CONST_VECTOR_ELT (x, elt), bytes, + first_byte, chunk_bytes)) + { + bytes.truncate (start); + return false; + } + elt += 1; + first_byte = 0; + num_bytes -= chunk_bytes; + } + return true; + } + + /* All subsequent cases are limited to scalars. */ + scalar_mode smode; + if (!is_a <scalar_mode> (mode, &smode)) + return false; + + /* Make sure that the region is in range. */ + unsigned int end_byte = first_byte + num_bytes; + unsigned int mode_bytes = GET_MODE_SIZE (smode); + gcc_assert (end_byte <= mode_bytes); + + if (CONST_SCALAR_INT_P (x)) { - num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)); - elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode); + /* The target memory layout is affected by both BYTES_BIG_ENDIAN + and WORDS_BIG_ENDIAN. Use the subreg machinery to get the lsb + position of each byte. */ + rtx_mode_t value (x, smode); + wide_int_ref value_wi (value); + for (unsigned int byte = first_byte; byte < end_byte; ++byte) + { + /* Always constant because the inputs are. */ + unsigned int lsb + = subreg_size_lsb (1, mode_bytes, byte).to_constant (); + /* Operate directly on the encoding rather than using + wi::extract_uhwi, so that we preserve the sign or zero + extension for modes that are not a whole number of bits in + size. (Zero extension is only used for the combination of + innermode == BImode && STORE_FLAG_VALUE == 1). */ + unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT; + unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT; + unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt); + bytes.quick_push (uhwi >> shift); + } + return true; + } + + if (CONST_DOUBLE_P (x)) + { + /* real_to_target produces an array of integers in target memory order. + All integers before the last one have 32 bits; the last one may + have 32 bits or fewer, depending on whether the mode bitsize + is divisible by 32. Each of these integers is then laid out + in target memory as any other integer would be. */ + long el32[MAX_BITSIZE_MODE_ANY_MODE / 32]; + real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode); + + /* The (maximum) number of target bytes per element of el32. */ + unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT; + gcc_assert (bytes_per_el32 != 0); + + /* Build up the integers in a similar way to the CONST_SCALAR_INT_P + handling above. */ + for (unsigned int byte = first_byte; byte < end_byte; ++byte) + { + unsigned int index = byte / bytes_per_el32; + unsigned int subbyte = byte % bytes_per_el32; + unsigned int int_bytes = MIN (bytes_per_el32, + mode_bytes - index * bytes_per_el32); + /* Always constant because the inputs are. */ + unsigned int lsb + = subreg_size_lsb (1, int_bytes, subbyte).to_constant (); + bytes.quick_push ((unsigned long) el32[index] >> lsb); + } + return true; + } + + if (GET_CODE (x) == CONST_FIXED) + { + for (unsigned int byte = first_byte; byte < end_byte; ++byte) + { + /* Always constant because the inputs are. */ + unsigned int lsb + = subreg_size_lsb (1, mode_bytes, byte).to_constant (); + unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x); + if (lsb >= HOST_BITS_PER_WIDE_INT) + { + lsb -= HOST_BITS_PER_WIDE_INT; + piece = CONST_FIXED_VALUE_HIGH (x); + } + bytes.quick_push (piece >> lsb); + } + return true; + } + + return false; +} + +/* Read a vector of mode MODE from the target memory image given by BYTES, + starting at byte FIRST_BYTE. The vector is known to be encodable using + NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each, + and BYTES is known to have enough bytes to supply NPATTERNS * + NELTS_PER_PATTERN vector elements. Each element of BYTES contains + BITS_PER_UNIT bits and the bytes are in target memory order. + + Return the vector on success, otherwise return NULL_RTX. */ + +rtx +native_decode_vector_rtx (machine_mode mode, vec<target_unit> bytes, + unsigned int first_byte, unsigned int npatterns, + unsigned int nelts_per_pattern) +{ + rtx_vector_builder builder (mode, npatterns, nelts_per_pattern); + + unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), + GET_MODE_NUNITS (mode)); + if (elt_bits < BITS_PER_UNIT) + { + /* This is the only case in which elements can be smaller than a byte. + Element 0 is always in the lsb of the containing byte. */ + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); + for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) + { + unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits; + unsigned int byte_index = bit_index / BITS_PER_UNIT; + unsigned int lsb = bit_index % BITS_PER_UNIT; + builder.quick_push (bytes[byte_index] & (1 << lsb) + ? CONST1_RTX (BImode) + : CONST0_RTX (BImode)); + } } else { - num_elem = 1; - elem_bitsize = max_bitsize; - } - /* If this asserts, it is too complicated; reducing value_bit may help. */ - gcc_assert (BITS_PER_UNIT % value_bit == 0); - /* I don't know how to handle endianness of sub-units. */ - gcc_assert (elem_bitsize % BITS_PER_UNIT == 0); - - for (elem = 0; elem < num_elem; elem++) - { - unsigned char * vp; - rtx el = (GET_CODE (op) == CONST_VECTOR - ? CONST_VECTOR_ELT (op, first_elem + elem) - : op); - - /* Vectors are kept in target memory order. (This is probably - a mistake.) */ - { - unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT; - unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize) - / BITS_PER_UNIT); - unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; - unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; - unsigned bytele = (subword_byte % UNITS_PER_WORD - + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); - vp = value + (bytele * BITS_PER_UNIT) / value_bit; - } - - switch (GET_CODE (el)) - { - case CONST_INT: - for (i = 0; - i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize; - i += value_bit) - *vp++ = INTVAL (el) >> i; - /* CONST_INTs are always logically sign-extended. */ - for (; i < elem_bitsize; i += value_bit) - *vp++ = INTVAL (el) < 0 ? -1 : 0; - break; - - case CONST_WIDE_INT: - { - rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode)); - unsigned char extend = wi::sign_mask (val); - int prec = wi::get_precision (val); - - for (i = 0; i < prec && i < elem_bitsize; i += value_bit) - *vp++ = wi::extract_uhwi (val, i, value_bit); - for (; i < elem_bitsize; i += value_bit) - *vp++ = extend; - } - break; - - case CONST_DOUBLE: - if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode) - { - unsigned char extend = 0; - /* If this triggers, someone should have generated a - CONST_INT instead. */ - gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT); - - for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit) - *vp++ = CONST_DOUBLE_LOW (el) >> i; - while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize) - { - *vp++ - = CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT); - i += value_bit; - } - - if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1)) - extend = -1; - for (; i < elem_bitsize; i += value_bit) - *vp++ = extend; - } - else - { - /* This is big enough for anything on the platform. */ - long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32]; - scalar_float_mode el_mode; - - el_mode = as_a <scalar_float_mode> (GET_MODE (el)); - int bitsize = GET_MODE_BITSIZE (el_mode); - - gcc_assert (bitsize <= elem_bitsize); - gcc_assert (bitsize % value_bit == 0); - - real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el), - GET_MODE (el)); - - /* real_to_target produces its result in words affected by - FLOAT_WORDS_BIG_ENDIAN. However, we ignore this, - and use WORDS_BIG_ENDIAN instead; see the documentation - of SUBREG in rtl.texi. */ - for (i = 0; i < bitsize; i += value_bit) - { - int ibase; - if (WORDS_BIG_ENDIAN) - ibase = bitsize - 1 - i; - else - ibase = i; - *vp++ = tmp[ibase / 32] >> i % 32; - } - - /* It shouldn't matter what's done here, so fill it with - zero. */ - for (; i < elem_bitsize; i += value_bit) - *vp++ = 0; - } - break; - - case CONST_FIXED: - if (elem_bitsize <= HOST_BITS_PER_WIDE_INT) - { - for (i = 0; i < elem_bitsize; i += value_bit) - *vp++ = CONST_FIXED_VALUE_LOW (el) >> i; - } - else - { - for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit) - *vp++ = CONST_FIXED_VALUE_LOW (el) >> i; - for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize; - i += value_bit) - *vp++ = CONST_FIXED_VALUE_HIGH (el) - >> (i - HOST_BITS_PER_WIDE_INT); - for (; i < elem_bitsize; i += value_bit) - *vp++ = 0; - } - break; - - default: - gcc_unreachable (); + for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) + { + rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte); + if (!x) + return NULL_RTX; + builder.quick_push (x); + first_byte += elt_bits / BITS_PER_UNIT; } } - - /* Now, pick the right byte to start with. */ - /* Renumber BYTE so that the least-significant byte is byte 0. A special - case is paradoxical SUBREGs, which shouldn't be adjusted since they - will already have offset 0. */ - if (inner_bytes >= GET_MODE_SIZE (outermode)) + return builder.build (); +} + +/* Read an rtx of mode MODE from the target memory image given by BYTES, + starting at byte FIRST_BYTE. Each element of BYTES contains BITS_PER_UNIT + bits and the bytes are in target memory order. The image has enough + values to specify all bytes of MODE. + + Return the rtx on success, otherwise return NULL_RTX. */ + +rtx +native_decode_rtx (machine_mode mode, vec<target_unit> bytes, + unsigned int first_byte) +{ + if (VECTOR_MODE_P (mode)) + { + /* If we know at compile time how many elements there are, + pull each element directly from BYTES. */ + unsigned int nelts; + if (GET_MODE_NUNITS (mode).is_constant (&nelts)) + return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1); + return NULL_RTX; + } + + scalar_int_mode imode; + if (is_a <scalar_int_mode> (mode, &imode) + && GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT) + { + /* Pull the bytes msb first, so that we can use simple + shift-and-insert wide_int operations. */ + unsigned int size = GET_MODE_SIZE (imode); + wide_int result (wi::zero (GET_MODE_PRECISION (imode))); + for (unsigned int i = 0; i < size; ++i) + { + unsigned int lsb = (size - i - 1) * BITS_PER_UNIT; + /* Always constant because the inputs are. */ + unsigned int subbyte + = subreg_size_offset_from_lsb (1, size, lsb).to_constant (); + result <<= BITS_PER_UNIT; + result |= bytes[first_byte + subbyte]; + } + return immed_wide_int_const (result, imode); + } + + scalar_float_mode fmode; + if (is_a <scalar_float_mode> (mode, &fmode)) { - unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte; - unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; - unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; - byte = (subword_byte % UNITS_PER_WORD - + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); + /* We need to build an array of integers in target memory order. + All integers before the last one have 32 bits; the last one may + have 32 bits or fewer, depending on whether the mode bitsize + is divisible by 32. */ + long el32[MAX_BITSIZE_MODE_ANY_MODE / 32]; + unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32); + memset (el32, 0, num_el32 * sizeof (long)); + + /* The (maximum) number of target bytes per element of el32. */ + unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT; + gcc_assert (bytes_per_el32 != 0); + + unsigned int mode_bytes = GET_MODE_SIZE (fmode); + for (unsigned int byte = 0; byte < mode_bytes; ++byte) + { + unsigned int index = byte / bytes_per_el32; + unsigned int subbyte = byte % bytes_per_el32; + unsigned int int_bytes = MIN (bytes_per_el32, + mode_bytes - index * bytes_per_el32); + /* Always constant because the inputs are. */ + unsigned int lsb + = subreg_size_lsb (1, int_bytes, subbyte).to_constant (); + el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb; + } + REAL_VALUE_TYPE r; + real_from_target (&r, el32, fmode); + return const_double_from_real_value (r, fmode); + } + + if (ALL_SCALAR_FIXED_POINT_MODE_P (mode)) + { + scalar_mode smode = as_a <scalar_mode> (mode); + FIXED_VALUE_TYPE f; + f.data.low = 0; + f.data.high = 0; + f.mode = smode; + + unsigned int mode_bytes = GET_MODE_SIZE (smode); + for (unsigned int byte = 0; byte < mode_bytes; ++byte) + { + /* Always constant because the inputs are. */ + unsigned int lsb + = subreg_size_lsb (1, mode_bytes, byte).to_constant (); + unsigned HOST_WIDE_INT unit = bytes[first_byte + byte]; + if (lsb >= HOST_BITS_PER_WIDE_INT) + f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT); + else + f.data.low |= unit << lsb; + } + return CONST_FIXED_FROM_FIXED_VALUE (f, mode); } - /* BYTE should still be inside OP. (Note that BYTE is unsigned, - so if it's become negative it will instead be very large.) */ - gcc_assert (byte < inner_bytes); - - /* Convert from bytes to chunks of size value_bit. */ - value_start = byte * (BITS_PER_UNIT / value_bit); - - /* Re-pack the value. */ - num_elem = GET_MODE_NUNITS (outermode); - - if (VECTOR_MODE_P (outermode)) + return NULL_RTX; +} + +/* Simplify a byte offset BYTE into CONST_VECTOR X. The main purpose + is to convert a runtime BYTE value into a constant one. */ + +static poly_uint64 +simplify_const_vector_byte_offset (rtx x, poly_uint64 byte) +{ + /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */ + machine_mode mode = GET_MODE (x); + unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), + GET_MODE_NUNITS (mode)); + /* The number of bits needed to encode one element from each pattern. */ + unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits; + + /* Identify the start point in terms of a sequence number and a byte offset + within that sequence. */ + poly_uint64 first_sequence; + unsigned HOST_WIDE_INT subbit; + if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits, + &first_sequence, &subbit)) { - result_v = rtvec_alloc (num_elem); - elems = &RTVEC_ELT (result_v, 0); + unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); + if (nelts_per_pattern == 1) + /* This is a duplicated vector, so the value of FIRST_SEQUENCE + doesn't matter. */ + byte = subbit / BITS_PER_UNIT; + else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U)) + { + /* The subreg drops the first element from each pattern and + only uses the second element. Find the first sequence + that starts on a byte boundary. */ + subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT); + byte = subbit / BITS_PER_UNIT; + } + } + return byte; +} + +/* Subroutine of simplify_subreg in which: + + - X is known to be a CONST_VECTOR + - OUTERMODE is known to be a vector mode + + Try to handle the subreg by operating on the CONST_VECTOR encoding + rather than on each individual element of the CONST_VECTOR. + + Return the simplified subreg on success, otherwise return NULL_RTX. */ + +static rtx +simplify_const_vector_subreg (machine_mode outermode, rtx x, + machine_mode innermode, unsigned int first_byte) +{ + /* Paradoxical subregs of vectors have dubious semantics. */ + if (paradoxical_subreg_p (outermode, innermode)) + return NULL_RTX; + + /* We can only preserve the semantics of a stepped pattern if the new + vector element is the same as the original one. */ + if (CONST_VECTOR_STEPPED_P (x) + && GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode)) + return NULL_RTX; + + /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */ + unsigned int x_elt_bits + = vector_element_size (GET_MODE_BITSIZE (innermode), + GET_MODE_NUNITS (innermode)); + unsigned int out_elt_bits + = vector_element_size (GET_MODE_BITSIZE (outermode), + GET_MODE_NUNITS (outermode)); + + /* The number of bits needed to encode one element from every pattern + of the original vector. */ + unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits; + + /* The number of bits needed to encode one element from every pattern + of the result. */ + unsigned int out_sequence_bits + = least_common_multiple (x_sequence_bits, out_elt_bits); + + /* Work out the number of interleaved patterns in the output vector + and the number of encoded elements per pattern. */ + unsigned int out_npatterns = out_sequence_bits / out_elt_bits; + unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); + + /* The encoding scheme requires the number of elements to be a multiple + of the number of patterns, so that each pattern appears at least once + and so that the same number of elements appear from each pattern. */ + bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns); + unsigned int const_nunits; + if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits) + && (!ok_p || out_npatterns * nelts_per_pattern > const_nunits)) + { + /* Either the encoding is invalid, or applying it would give us + more elements than we need. Just encode each element directly. */ + out_npatterns = const_nunits; + nelts_per_pattern = 1; + } + else if (!ok_p) + return NULL_RTX; + + /* Get enough bytes of X to form the new encoding. */ + unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits; + unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT); + auto_vec<target_unit, 128> buffer (buffer_bytes); + if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes)) + return NULL_RTX; + + /* Reencode the bytes as OUTERMODE. */ + return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns, + nelts_per_pattern); +} + +/* Try to simplify a subreg of a constant by encoding the subreg region + as a sequence of target bytes and reading them back in the new mode. + Return the new value on success, otherwise return null. + + The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X + and byte offset FIRST_BYTE. */ + +static rtx +simplify_immed_subreg (fixed_size_mode outermode, rtx x, + machine_mode innermode, unsigned int first_byte) +{ + unsigned int buffer_bytes = GET_MODE_SIZE (outermode); + auto_vec<target_unit, 128> buffer (buffer_bytes); + + /* Some ports misuse CCmode. */ + if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x)) + return x; + + /* Paradoxical subregs read undefined values for bytes outside of the + inner value. However, we have traditionally always sign-extended + integer constants and zero-extended others. */ + unsigned int inner_bytes = buffer_bytes; + if (paradoxical_subreg_p (outermode, innermode)) + { + if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes)) + return NULL_RTX; + + target_unit filler = 0; + if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode))) + filler = -1; + + /* Add any leading bytes due to big-endian layout. The number of + bytes must be constant because both modes have constant size. */ + unsigned int leading_bytes + = -byte_lowpart_offset (outermode, innermode).to_constant (); + for (unsigned int i = 0; i < leading_bytes; ++i) + buffer.quick_push (filler); + + if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes)) + return NULL_RTX; + + /* Add any trailing bytes due to little-endian layout. */ + while (buffer.length () < buffer_bytes) + buffer.quick_push (filler); } else - elems = &result_s; - - outer_submode = GET_MODE_INNER (outermode); - outer_class = GET_MODE_CLASS (outer_submode); - elem_bitsize = GET_MODE_BITSIZE (outer_submode); - - gcc_assert (elem_bitsize % value_bit == 0); - gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize); - - for (elem = 0; elem < num_elem; elem++) { - unsigned char *vp; - - /* Vectors are stored in target memory order. (This is probably - a mistake.) */ - { - unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT; - unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize) - / BITS_PER_UNIT); - unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; - unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; - unsigned bytele = (subword_byte % UNITS_PER_WORD - + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); - vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit; + if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes)) + return NULL_RTX; } - - switch (outer_class) - { - case MODE_INT: - case MODE_PARTIAL_INT: - { - int u; - int base = 0; - int units - = (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1) - / HOST_BITS_PER_WIDE_INT; - HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT]; - wide_int r; - - if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT) - return NULL_RTX; - for (u = 0; u < units; u++) - { - unsigned HOST_WIDE_INT buf = 0; - for (i = 0; - i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize; - i += value_bit) - buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i; - - tmp[u] = buf; - base += HOST_BITS_PER_WIDE_INT; - } - r = wide_int::from_array (tmp, units, - GET_MODE_PRECISION (outer_submode)); -#if TARGET_SUPPORTS_WIDE_INT == 0 - /* Make sure r will fit into CONST_INT or CONST_DOUBLE. */ - if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT) - return NULL_RTX; -#endif - elems[elem] = immed_wide_int_const (r, outer_submode); - } - break; - - case MODE_FLOAT: - case MODE_DECIMAL_FLOAT: - { - REAL_VALUE_TYPE r; - long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 }; - - /* real_from_target wants its input in words affected by - FLOAT_WORDS_BIG_ENDIAN. However, we ignore this, - and use WORDS_BIG_ENDIAN instead; see the documentation - of SUBREG in rtl.texi. */ - for (i = 0; i < elem_bitsize; i += value_bit) - { - int ibase; - if (WORDS_BIG_ENDIAN) - ibase = elem_bitsize - 1 - i; - else - ibase = i; - tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32; - } - - real_from_target (&r, tmp, outer_submode); - elems[elem] = const_double_from_real_value (r, outer_submode); - } - break; - - case MODE_FRACT: - case MODE_UFRACT: - case MODE_ACCUM: - case MODE_UACCUM: - { - FIXED_VALUE_TYPE f; - f.data.low = 0; - f.data.high = 0; - f.mode = outer_submode; - - for (i = 0; - i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize; - i += value_bit) - f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i; - for (; i < elem_bitsize; i += value_bit) - f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask) - << (i - HOST_BITS_PER_WIDE_INT)); - - elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode); - } - break; - - default: - gcc_unreachable (); - } - } - if (VECTOR_MODE_P (outermode)) - return gen_rtx_CONST_VECTOR (outermode, result_v); - else - return result_s; + return native_decode_rtx (outermode, buffer, 0); } /* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE) @@ -6412,6 +6771,9 @@ if (outermode == innermode && known_eq (byte, 0U)) return op; + if (GET_CODE (op) == CONST_VECTOR) + byte = simplify_const_vector_byte_offset (op, byte); + if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode))) { rtx elt; @@ -6431,30 +6793,21 @@ || CONST_FIXED_P (op) || GET_CODE (op) == CONST_VECTOR) { - /* simplify_immed_subreg deconstructs OP into bytes and constructs - the result from bytes, so it only works if the sizes of the modes - and the value of the offset are known at compile time. Cases that - that apply to general modes and offsets should be handled here - before calling simplify_immed_subreg. */ - fixed_size_mode fs_outermode, fs_innermode; unsigned HOST_WIDE_INT cbyte; - if (is_a <fixed_size_mode> (outermode, &fs_outermode) - && is_a <fixed_size_mode> (innermode, &fs_innermode) - && byte.is_constant (&cbyte)) - return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte, - 0, GET_MODE_SIZE (fs_innermode)); - - /* Handle constant-sized outer modes and variable-sized inner modes. */ - unsigned HOST_WIDE_INT first_elem; - if (GET_CODE (op) == CONST_VECTOR - && is_a <fixed_size_mode> (outermode, &fs_outermode) - && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode), - &first_elem)) - return simplify_immed_subreg (fs_outermode, op, innermode, 0, - first_elem, - GET_MODE_SIZE (fs_outermode)); - - return NULL_RTX; + if (byte.is_constant (&cbyte)) + { + if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode)) + { + rtx tmp = simplify_const_vector_subreg (outermode, op, + innermode, cbyte); + if (tmp) + return tmp; + } + + fixed_size_mode fs_outermode; + if (is_a <fixed_size_mode> (outermode, &fs_outermode)) + return simplify_immed_subreg (fs_outermode, op, innermode, cbyte); + } } /* Changing mode twice with SUBREG => just change it once, @@ -6539,7 +6892,7 @@ /* Propagate original regno. We don't have any way to specify the offset inside original regno, so do so only for lowpart. - The information is used only by alias analysis that can not + The information is used only by alias analysis that cannot grog partial register anyway. */ if (known_eq (subreg_lowpart_offset (outermode, innermode), byte)) @@ -6653,6 +7006,17 @@ } } + /* If OP is a vector comparison and the subreg is not changing the + number of elements or the size of the elements, change the result + of the comparison to the new mode. */ + if (COMPARISON_P (op) + && VECTOR_MODE_P (outermode) + && VECTOR_MODE_P (innermode) + && known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode)) + && known_eq (GET_MODE_UNIT_SIZE (outermode), + GET_MODE_UNIT_SIZE (innermode))) + return simplify_gen_relational (GET_CODE (op), outermode, innermode, + XEXP (op, 0), XEXP (op, 1)); return NULL_RTX; } @@ -6885,6 +7249,18 @@ && mode_for_vector (inner_mode, 2).exists (&narrower_mode) && VECTOR_MODE_P (narrower_mode)) { + /* Test VEC_DUPLICATE of a vector. */ + rtx_vector_builder nbuilder (narrower_mode, 2, 1); + nbuilder.quick_push (const0_rtx); + nbuilder.quick_push (const1_rtx); + rtx_vector_builder builder (mode, 2, 1); + builder.quick_push (const0_rtx); + builder.quick_push (const1_rtx); + ASSERT_RTX_EQ (builder.build (), + simplify_unary_operation (VEC_DUPLICATE, mode, + nbuilder.build (), + narrower_mode)); + /* Test VEC_SELECT of a vector. */ rtx vec_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx)); @@ -6957,6 +7333,58 @@ ASSERT_RTX_EQ (series_0_m1, simplify_binary_operation (VEC_SERIES, mode, const0_rtx, constm1_rtx)); + + /* Test NEG on constant vector series. */ + ASSERT_RTX_EQ (series_0_m1, + simplify_unary_operation (NEG, mode, series_0_1, mode)); + ASSERT_RTX_EQ (series_0_1, + simplify_unary_operation (NEG, mode, series_0_m1, mode)); + + /* Test PLUS and MINUS on constant vector series. */ + rtx scalar2 = gen_int_mode (2, inner_mode); + rtx scalar3 = gen_int_mode (3, inner_mode); + rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx); + rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2); + rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3); + ASSERT_RTX_EQ (series_1_1, + simplify_binary_operation (PLUS, mode, series_0_1, + CONST1_RTX (mode))); + ASSERT_RTX_EQ (series_0_m1, + simplify_binary_operation (PLUS, mode, CONST0_RTX (mode), + series_0_m1)); + ASSERT_RTX_EQ (series_1_3, + simplify_binary_operation (PLUS, mode, series_1_1, + series_0_2)); + ASSERT_RTX_EQ (series_0_1, + simplify_binary_operation (MINUS, mode, series_1_1, + CONST1_RTX (mode))); + ASSERT_RTX_EQ (series_1_1, + simplify_binary_operation (MINUS, mode, CONST1_RTX (mode), + series_0_m1)); + ASSERT_RTX_EQ (series_1_1, + simplify_binary_operation (MINUS, mode, series_1_3, + series_0_2)); + + /* Test MULT between constant vectors. */ + rtx vec2 = gen_const_vec_duplicate (mode, scalar2); + rtx vec3 = gen_const_vec_duplicate (mode, scalar3); + rtx scalar9 = gen_int_mode (9, inner_mode); + rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9); + ASSERT_RTX_EQ (series_0_2, + simplify_binary_operation (MULT, mode, series_0_1, vec2)); + ASSERT_RTX_EQ (series_3_9, + simplify_binary_operation (MULT, mode, vec3, series_1_3)); + if (!GET_MODE_NUNITS (mode).is_constant ()) + ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1, + series_0_1)); + + /* Test ASHIFT between constant vectors. */ + ASSERT_RTX_EQ (series_0_2, + simplify_binary_operation (ASHIFT, mode, series_0_1, + CONST1_RTX (mode))); + if (!GET_MODE_NUNITS (mode).is_constant ()) + ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode), + series_0_1)); } /* Verify simplify_merge_mask works correctly. */ @@ -7022,6 +7450,165 @@ simplify_rtx (nvm)); } +/* Test subregs of integer vector constant X, trying elements in + the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)), + where NELTS is the number of elements in X. Subregs involving + elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail. */ + +static void +test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0, + unsigned int first_valid = 0) +{ + machine_mode inner_mode = GET_MODE (x); + scalar_mode int_mode = GET_MODE_INNER (inner_mode); + + for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei) + { + machine_mode outer_mode = (machine_mode) modei; + if (!VECTOR_MODE_P (outer_mode)) + continue; + + unsigned int outer_nunits; + if (GET_MODE_INNER (outer_mode) == int_mode + && GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits) + && multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits)) + { + /* Test subregs in which the outer mode is a smaller, + constant-sized vector of the same element type. */ + unsigned int limit + = constant_lower_bound (GET_MODE_NUNITS (inner_mode)); + for (unsigned int elt = 0; elt < limit; elt += outer_nunits) + { + rtx expected = NULL_RTX; + if (elt >= first_valid) + { + rtx_vector_builder builder (outer_mode, outer_nunits, 1); + for (unsigned int i = 0; i < outer_nunits; ++i) + builder.quick_push (CONST_VECTOR_ELT (x, elt + i)); + expected = builder.build (); + } + poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode); + ASSERT_RTX_EQ (expected, + simplify_subreg (outer_mode, x, + inner_mode, byte)); + } + } + else if (known_eq (GET_MODE_SIZE (outer_mode), + GET_MODE_SIZE (inner_mode)) + && known_eq (elt_bias, 0U) + && (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL + || known_eq (GET_MODE_BITSIZE (outer_mode), + GET_MODE_NUNITS (outer_mode))) + && (!FLOAT_MODE_P (outer_mode) + || (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits + == GET_MODE_UNIT_PRECISION (outer_mode))) + && (GET_MODE_SIZE (inner_mode).is_constant () + || !CONST_VECTOR_STEPPED_P (x))) + { + /* Try converting to OUTER_MODE and back. */ + rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0); + ASSERT_TRUE (outer_x != NULL_RTX); + ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x, + outer_mode, 0)); + } + } + + if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN) + { + /* Test each byte in the element range. */ + unsigned int limit + = constant_lower_bound (GET_MODE_SIZE (inner_mode)); + for (unsigned int i = 0; i < limit; ++i) + { + unsigned int elt = i / GET_MODE_SIZE (int_mode); + rtx expected = NULL_RTX; + if (elt >= first_valid) + { + unsigned int byte_shift = i % GET_MODE_SIZE (int_mode); + if (BYTES_BIG_ENDIAN) + byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1; + rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode); + wide_int shifted_elt + = wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT); + expected = immed_wide_int_const (shifted_elt, QImode); + } + poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i; + ASSERT_RTX_EQ (expected, + simplify_subreg (QImode, x, inner_mode, byte)); + } + } +} + +/* Test constant subregs of integer vector mode INNER_MODE, using 1 + element per pattern. */ + +static void +test_vector_subregs_repeating (machine_mode inner_mode) +{ + poly_uint64 nunits = GET_MODE_NUNITS (inner_mode); + unsigned int min_nunits = constant_lower_bound (nunits); + scalar_mode int_mode = GET_MODE_INNER (inner_mode); + unsigned int count = gcd (min_nunits, 8); + + rtx_vector_builder builder (inner_mode, count, 1); + for (unsigned int i = 0; i < count; ++i) + builder.quick_push (gen_int_mode (8 - i, int_mode)); + rtx x = builder.build (); + + test_vector_subregs_modes (x); + if (!nunits.is_constant ()) + test_vector_subregs_modes (x, nunits - min_nunits); +} + +/* Test constant subregs of integer vector mode INNER_MODE, using 2 + elements per pattern. */ + +static void +test_vector_subregs_fore_back (machine_mode inner_mode) +{ + poly_uint64 nunits = GET_MODE_NUNITS (inner_mode); + unsigned int min_nunits = constant_lower_bound (nunits); + scalar_mode int_mode = GET_MODE_INNER (inner_mode); + unsigned int count = gcd (min_nunits, 4); + + rtx_vector_builder builder (inner_mode, count, 2); + for (unsigned int i = 0; i < count; ++i) + builder.quick_push (gen_int_mode (i, int_mode)); + for (unsigned int i = 0; i < count; ++i) + builder.quick_push (gen_int_mode (-(int) i, int_mode)); + rtx x = builder.build (); + + test_vector_subregs_modes (x); + if (!nunits.is_constant ()) + test_vector_subregs_modes (x, nunits - min_nunits, count); +} + +/* Test constant subregs of integer vector mode INNER_MODE, using 3 + elements per pattern. */ + +static void +test_vector_subregs_stepped (machine_mode inner_mode) +{ + /* Build { 0, 1, 2, 3, ... }. */ + scalar_mode int_mode = GET_MODE_INNER (inner_mode); + rtx_vector_builder builder (inner_mode, 1, 3); + for (unsigned int i = 0; i < 3; ++i) + builder.quick_push (gen_int_mode (i, int_mode)); + rtx x = builder.build (); + + test_vector_subregs_modes (x); +} + +/* Test constant subregs of integer vector mode INNER_MODE. */ + +static void +test_vector_subregs (machine_mode inner_mode) +{ + test_vector_subregs_repeating (inner_mode); + test_vector_subregs_fore_back (inner_mode); + test_vector_subregs_stepped (inner_mode); +} + /* Verify some simplifications involving vectors. */ static void @@ -7036,7 +7623,10 @@ test_vector_ops_duplicate (mode, scalar_reg); if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT && maybe_gt (GET_MODE_NUNITS (mode), 2)) - test_vector_ops_series (mode, scalar_reg); + { + test_vector_ops_series (mode, scalar_reg); + test_vector_subregs (mode); + } test_vec_merge (mode); } }