Mercurial > hg > CbC > CbC_gcc
diff gcc/gimple-ssa-store-merging.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/gimple-ssa-store-merging.c Thu Oct 25 08:08:40 2018 +0900 +++ b/gcc/gimple-ssa-store-merging.c Thu Oct 25 10:21:07 2018 +0900 @@ -1,5 +1,5 @@ -/* GIMPLE store merging pass. - Copyright (C) 2016-2017 Free Software Foundation, Inc. +/* GIMPLE store merging and byte swapping passes. + Copyright (C) 2009-2018 Free Software Foundation, Inc. Contributed by ARM Ltd. This file is part of GCC. @@ -18,8 +18,10 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* The purpose of this pass is to combine multiple memory stores of - constant values to consecutive memory locations into fewer wider stores. +/* The purpose of the store merging pass is to combine multiple memory stores + of constant values, values loaded from memory, bitwise operations on those, + or bit-field values, to consecutive locations, into fewer wider stores. + For example, if we have a sequence peforming four byte stores to consecutive memory locations: [p ] := imm1; @@ -27,23 +29,57 @@ [p + 2B] := imm3; [p + 3B] := imm4; we can transform this into a single 4-byte store if the target supports it: - [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness. + [p] := imm1:imm2:imm3:imm4 concatenated according to endianness. + + Or: + [p ] := [q ]; + [p + 1B] := [q + 1B]; + [p + 2B] := [q + 2B]; + [p + 3B] := [q + 3B]; + if there is no overlap can be transformed into a single 4-byte + load followed by single 4-byte store. + + Or: + [p ] := [q ] ^ imm1; + [p + 1B] := [q + 1B] ^ imm2; + [p + 2B] := [q + 2B] ^ imm3; + [p + 3B] := [q + 3B] ^ imm4; + if there is no overlap can be transformed into a single 4-byte + load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store. + + Or: + [p:1 ] := imm; + [p:31] := val & 0x7FFFFFFF; + we can transform this into a single 4-byte store if the target supports it: + [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness. The algorithm is applied to each basic block in three phases: - 1) Scan through the basic block recording constant assignments to - destinations that can be expressed as a store to memory of a certain size - at a certain bit offset. Record store chains to different bases in a - hash_map (m_stores) and make sure to terminate such chains when appropriate - (for example when when the stored values get used subsequently). + 1) Scan through the basic block and record assignments to destinations + that can be expressed as a store to memory of a certain size at a certain + bit offset from base expressions we can handle. For bit-fields we also + record the surrounding bit region, i.e. bits that could be stored in + a read-modify-write operation when storing the bit-field. Record store + chains to different bases in a hash_map (m_stores) and make sure to + terminate such chains when appropriate (for example when when the stored + values get used subsequently). These stores can be a result of structure element initializers, array stores etc. A store_immediate_info object is recorded for every such store. Record as many such assignments to a single base as possible until a statement that interferes with the store sequence is encountered. - - 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of + Each store has up to 2 operands, which can be a either constant, a memory + load or an SSA name, from which the value to be stored can be computed. + At most one of the operands can be a constant. The operands are recorded + in store_operand_info struct. + + 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of store_immediate_info objects) and coalesce contiguous stores into - merged_store_group objects. + merged_store_group objects. For bit-field stores, we don't need to + require the stores to be contiguous, just their surrounding bit regions + have to be contiguous. If the expression being stored is different + between adjacent stores, such as one store storing a constant and + following storing a value loaded from memory, or if the loaded memory + objects are not adjacent, a new merged_store_group is created as well. For example, given the stores: [p ] := 0; @@ -62,8 +98,8 @@ multiple stores per store group to handle contiguous stores that are not of a size that is a power of 2. For example it can try to emit a 40-bit store as a 32-bit store followed by an 8-bit store. - We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or - TARGET_SLOW_UNALIGNED_ACCESS rules. + We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT + or TARGET_SLOW_UNALIGNED_ACCESS settings. Note on endianness and example: Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores: @@ -120,20 +156,1191 @@ #include "tree-hash-traits.h" #include "gimple-iterator.h" #include "gimplify.h" +#include "gimple-fold.h" #include "stor-layout.h" #include "timevar.h" #include "tree-cfg.h" #include "tree-eh.h" #include "target.h" #include "gimplify-me.h" +#include "rtl.h" +#include "expr.h" /* For get_bit_range. */ +#include "optabs-tree.h" #include "selftest.h" /* The maximum size (in bits) of the stores this pass should generate. */ #define MAX_STORE_BITSIZE (BITS_PER_WORD) #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT) +/* Limit to bound the number of aliasing checks for loads with the same + vuse as the corresponding store. */ +#define MAX_STORE_ALIAS_CHECKS 64 + namespace { +struct bswap_stat +{ + /* Number of hand-written 16-bit nop / bswaps found. */ + int found_16bit; + + /* Number of hand-written 32-bit nop / bswaps found. */ + int found_32bit; + + /* Number of hand-written 64-bit nop / bswaps found. */ + int found_64bit; +} nop_stats, bswap_stats; + +/* A symbolic number structure is used to detect byte permutation and selection + patterns of a source. To achieve that, its field N contains an artificial + number consisting of BITS_PER_MARKER sized markers tracking where does each + byte come from in the source: + + 0 - target byte has the value 0 + FF - target byte has an unknown value (eg. due to sign extension) + 1..size - marker value is the byte index in the source (0 for lsb). + + To detect permutations on memory sources (arrays and structures), a symbolic + number is also associated: + - a base address BASE_ADDR and an OFFSET giving the address of the source; + - a range which gives the difference between the highest and lowest accessed + memory location to make such a symbolic number; + - the address SRC of the source element of lowest address as a convenience + to easily get BASE_ADDR + offset + lowest bytepos; + - number of expressions N_OPS bitwise ored together to represent + approximate cost of the computation. + + Note 1: the range is different from size as size reflects the size of the + type of the current expression. For instance, for an array char a[], + (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while + (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this + time a range of 1. + + Note 2: for non-memory sources, range holds the same value as size. + + Note 3: SRC points to the SSA_NAME in case of non-memory source. */ + +struct symbolic_number { + uint64_t n; + tree type; + tree base_addr; + tree offset; + poly_int64_pod bytepos; + tree src; + tree alias_set; + tree vuse; + unsigned HOST_WIDE_INT range; + int n_ops; +}; + +#define BITS_PER_MARKER 8 +#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) +#define MARKER_BYTE_UNKNOWN MARKER_MASK +#define HEAD_MARKER(n, size) \ + ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) + +/* The number which the find_bswap_or_nop_1 result should match in + order to have a nop. The number is masked according to the size of + the symbolic number before using it. */ +#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ + (uint64_t)0x08070605 << 32 | 0x04030201) + +/* The number which the find_bswap_or_nop_1 result should match in + order to have a byte swap. The number is masked according to the + size of the symbolic number before using it. */ +#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ + (uint64_t)0x01020304 << 32 | 0x05060708) + +/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic + number N. Return false if the requested operation is not permitted + on a symbolic number. */ + +inline bool +do_shift_rotate (enum tree_code code, + struct symbolic_number *n, + int count) +{ + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + unsigned head_marker; + + if (count % BITS_PER_UNIT != 0) + return false; + count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; + + /* Zero out the extra bits of N in order to avoid them being shifted + into the significant bits. */ + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + + switch (code) + { + case LSHIFT_EXPR: + n->n <<= count; + break; + case RSHIFT_EXPR: + head_marker = HEAD_MARKER (n->n, size); + n->n >>= count; + /* Arithmetic shift of signed type: result is dependent on the value. */ + if (!TYPE_UNSIGNED (n->type) && head_marker) + for (i = 0; i < count / BITS_PER_MARKER; i++) + n->n |= (uint64_t) MARKER_BYTE_UNKNOWN + << ((size - 1 - i) * BITS_PER_MARKER); + break; + case LROTATE_EXPR: + n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); + break; + case RROTATE_EXPR: + n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); + break; + default: + return false; + } + /* Zero unused bits for size. */ + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + return true; +} + +/* Perform sanity checking for the symbolic number N and the gimple + statement STMT. */ + +inline bool +verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) +{ + tree lhs_type; + + lhs_type = gimple_expr_type (stmt); + + if (TREE_CODE (lhs_type) != INTEGER_TYPE) + return false; + + if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) + return false; + + return true; +} + +/* Initialize the symbolic number N for the bswap pass from the base element + SRC manipulated by the bitwise OR expression. */ + +bool +init_symbolic_number (struct symbolic_number *n, tree src) +{ + int size; + + if (! INTEGRAL_TYPE_P (TREE_TYPE (src))) + return false; + + n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; + n->src = src; + + /* Set up the symbolic number N by setting each byte to a value between 1 and + the byte size of rhs1. The highest order byte is set to n->size and the + lowest order byte to 1. */ + n->type = TREE_TYPE (src); + size = TYPE_PRECISION (n->type); + if (size % BITS_PER_UNIT != 0) + return false; + size /= BITS_PER_UNIT; + if (size > 64 / BITS_PER_MARKER) + return false; + n->range = size; + n->n = CMPNOP; + n->n_ops = 1; + + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + + return true; +} + +/* Check if STMT might be a byte swap or a nop from a memory source and returns + the answer. If so, REF is that memory source and the base of the memory area + accessed and the offset of the access from that base are recorded in N. */ + +bool +find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n) +{ + /* Leaf node is an array or component ref. Memorize its base and + offset from base to compare to other such leaf node. */ + poly_int64 bitsize, bitpos, bytepos; + machine_mode mode; + int unsignedp, reversep, volatilep; + tree offset, base_addr; + + /* Not prepared to handle PDP endian. */ + if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) + return false; + + if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) + return false; + + base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &reversep, &volatilep); + + if (TREE_CODE (base_addr) == TARGET_MEM_REF) + /* Do not rewrite TARGET_MEM_REF. */ + return false; + else if (TREE_CODE (base_addr) == MEM_REF) + { + poly_offset_int bit_offset = 0; + tree off = TREE_OPERAND (base_addr, 1); + + if (!integer_zerop (off)) + { + poly_offset_int boff = mem_ref_offset (base_addr); + boff <<= LOG2_BITS_PER_UNIT; + bit_offset += boff; + } + + base_addr = TREE_OPERAND (base_addr, 0); + + /* Avoid returning a negative bitpos as this may wreak havoc later. */ + if (maybe_lt (bit_offset, 0)) + { + tree byte_offset = wide_int_to_tree + (sizetype, bits_to_bytes_round_down (bit_offset)); + bit_offset = num_trailing_bits (bit_offset); + if (offset) + offset = size_binop (PLUS_EXPR, offset, byte_offset); + else + offset = byte_offset; + } + + bitpos += bit_offset.force_shwi (); + } + else + base_addr = build_fold_addr_expr (base_addr); + + if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) + return false; + if (!multiple_p (bitsize, BITS_PER_UNIT)) + return false; + if (reversep) + return false; + + if (!init_symbolic_number (n, ref)) + return false; + n->base_addr = base_addr; + n->offset = offset; + n->bytepos = bytepos; + n->alias_set = reference_alias_ptr_type (ref); + n->vuse = gimple_vuse (stmt); + return true; +} + +/* Compute the symbolic number N representing the result of a bitwise OR on 2 + symbolic number N1 and N2 whose source statements are respectively + SOURCE_STMT1 and SOURCE_STMT2. */ + +gimple * +perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, + gimple *source_stmt2, struct symbolic_number *n2, + struct symbolic_number *n) +{ + int i, size; + uint64_t mask; + gimple *source_stmt; + struct symbolic_number *n_start; + + tree rhs1 = gimple_assign_rhs1 (source_stmt1); + if (TREE_CODE (rhs1) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) + rhs1 = TREE_OPERAND (rhs1, 0); + tree rhs2 = gimple_assign_rhs1 (source_stmt2); + if (TREE_CODE (rhs2) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME) + rhs2 = TREE_OPERAND (rhs2, 0); + + /* Sources are different, cancel bswap if they are not memory location with + the same base (array, structure, ...). */ + if (rhs1 != rhs2) + { + uint64_t inc; + HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end; + struct symbolic_number *toinc_n_ptr, *n_end; + basic_block bb1, bb2; + + if (!n1->base_addr || !n2->base_addr + || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) + return NULL; + + if (!n1->offset != !n2->offset + || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) + return NULL; + + start1 = 0; + if (!(n2->bytepos - n1->bytepos).is_constant (&start2)) + return NULL; + + if (start1 < start2) + { + n_start = n1; + start_sub = start2 - start1; + } + else + { + n_start = n2; + start_sub = start1 - start2; + } + + bb1 = gimple_bb (source_stmt1); + bb2 = gimple_bb (source_stmt2); + if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) + source_stmt = source_stmt1; + else + source_stmt = source_stmt2; + + /* Find the highest address at which a load is performed and + compute related info. */ + end1 = start1 + (n1->range - 1); + end2 = start2 + (n2->range - 1); + if (end1 < end2) + { + end = end2; + end_sub = end2 - end1; + } + else + { + end = end1; + end_sub = end1 - end2; + } + n_end = (end2 > end1) ? n2 : n1; + + /* Find symbolic number whose lsb is the most significant. */ + if (BYTES_BIG_ENDIAN) + toinc_n_ptr = (n_end == n1) ? n2 : n1; + else + toinc_n_ptr = (n_start == n1) ? n2 : n1; + + n->range = end - MIN (start1, start2) + 1; + + /* Check that the range of memory covered can be represented by + a symbolic number. */ + if (n->range > 64 / BITS_PER_MARKER) + return NULL; + + /* Reinterpret byte marks in symbolic number holding the value of + bigger weight according to target endianness. */ + inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; + size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; + for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) + { + unsigned marker + = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; + if (marker && marker != MARKER_BYTE_UNKNOWN) + toinc_n_ptr->n += inc; + } + } + else + { + n->range = n1->range; + n_start = n1; + source_stmt = source_stmt1; + } + + if (!n1->alias_set + || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set)) + n->alias_set = n1->alias_set; + else + n->alias_set = ptr_type_node; + n->vuse = n_start->vuse; + n->base_addr = n_start->base_addr; + n->offset = n_start->offset; + n->src = n_start->src; + n->bytepos = n_start->bytepos; + n->type = n_start->type; + size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + + for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER) + { + uint64_t masked1, masked2; + + masked1 = n1->n & mask; + masked2 = n2->n & mask; + if (masked1 && masked2 && masked1 != masked2) + return NULL; + } + n->n = n1->n | n2->n; + n->n_ops = n1->n_ops + n2->n_ops; + + return source_stmt; +} + +/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform + the operation given by the rhs of STMT on the result. If the operation + could successfully be executed the function returns a gimple stmt whose + rhs's first tree is the expression of the source operand and NULL + otherwise. */ + +gimple * +find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) +{ + enum tree_code code; + tree rhs1, rhs2 = NULL; + gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1; + enum gimple_rhs_class rhs_class; + + if (!limit || !is_gimple_assign (stmt)) + return NULL; + + rhs1 = gimple_assign_rhs1 (stmt); + + if (find_bswap_or_nop_load (stmt, rhs1, n)) + return stmt; + + /* Handle BIT_FIELD_REF. */ + if (TREE_CODE (rhs1) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) + { + unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1)); + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2)); + if (bitpos % BITS_PER_UNIT == 0 + && bitsize % BITS_PER_UNIT == 0 + && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) + { + /* Handle big-endian bit numbering in BIT_FIELD_REF. */ + if (BYTES_BIG_ENDIAN) + bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize; + + /* Shift. */ + if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) + return NULL; + + /* Mask. */ + uint64_t mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; + for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; + i++, tmp <<= BITS_PER_UNIT) + mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); + n->n &= mask; + + /* Convert. */ + n->type = TREE_TYPE (rhs1); + if (!n->base_addr) + n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + + return verify_symbolic_number_p (n, stmt) ? stmt : NULL; + } + + return NULL; + } + + if (TREE_CODE (rhs1) != SSA_NAME) + return NULL; + + code = gimple_assign_rhs_code (stmt); + rhs_class = gimple_assign_rhs_class (stmt); + rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); + + if (rhs_class == GIMPLE_BINARY_RHS) + rhs2 = gimple_assign_rhs2 (stmt); + + /* Handle unary rhs and binary rhs with integer constants as second + operand. */ + + if (rhs_class == GIMPLE_UNARY_RHS + || (rhs_class == GIMPLE_BINARY_RHS + && TREE_CODE (rhs2) == INTEGER_CST)) + { + if (code != BIT_AND_EXPR + && code != LSHIFT_EXPR + && code != RSHIFT_EXPR + && code != LROTATE_EXPR + && code != RROTATE_EXPR + && !CONVERT_EXPR_CODE_P (code)) + return NULL; + + source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); + + /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and + we have to initialize the symbolic number. */ + if (!source_stmt1) + { + if (gimple_assign_load_p (stmt) + || !init_symbolic_number (n, rhs1)) + return NULL; + source_stmt1 = stmt; + } + + switch (code) + { + case BIT_AND_EXPR: + { + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + uint64_t val = int_cst_value (rhs2), mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; + + /* Only constants masking full bytes are allowed. */ + for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) + if ((val & tmp) != 0 && (val & tmp) != tmp) + return NULL; + else if (val & tmp) + mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); + + n->n &= mask; + } + break; + case LSHIFT_EXPR: + case RSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: + if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) + return NULL; + break; + CASE_CONVERT: + { + int i, type_size, old_type_size; + tree type; + + type = gimple_expr_type (stmt); + type_size = TYPE_PRECISION (type); + if (type_size % BITS_PER_UNIT != 0) + return NULL; + type_size /= BITS_PER_UNIT; + if (type_size > 64 / BITS_PER_MARKER) + return NULL; + + /* Sign extension: result is dependent on the value. */ + old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size + && HEAD_MARKER (n->n, old_type_size)) + for (i = 0; i < type_size - old_type_size; i++) + n->n |= (uint64_t) MARKER_BYTE_UNKNOWN + << ((type_size - 1 - i) * BITS_PER_MARKER); + + if (type_size < 64 / BITS_PER_MARKER) + { + /* If STMT casts to a smaller type mask out the bits not + belonging to the target type. */ + n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; + } + n->type = type; + if (!n->base_addr) + n->range = type_size; + } + break; + default: + return NULL; + }; + return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; + } + + /* Handle binary rhs. */ + + if (rhs_class == GIMPLE_BINARY_RHS) + { + struct symbolic_number n1, n2; + gimple *source_stmt, *source_stmt2; + + if (code != BIT_IOR_EXPR) + return NULL; + + if (TREE_CODE (rhs2) != SSA_NAME) + return NULL; + + rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); + + switch (code) + { + case BIT_IOR_EXPR: + source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); + + if (!source_stmt1) + return NULL; + + source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); + + if (!source_stmt2) + return NULL; + + if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) + return NULL; + + if (n1.vuse != n2.vuse) + return NULL; + + source_stmt + = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); + + if (!source_stmt) + return NULL; + + if (!verify_symbolic_number_p (n, stmt)) + return NULL; + + break; + default: + return NULL; + } + return source_stmt; + } + return NULL; +} + +/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute + *CMPXCHG, *CMPNOP and adjust *N. */ + +void +find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, + uint64_t *cmpnop) +{ + unsigned rsize; + uint64_t tmpn, mask; + + /* The number which the find_bswap_or_nop_1 result should match in order + to have a full byte swap. The number is shifted to the right + according to the size of the symbolic number before using it. */ + *cmpxchg = CMPXCHG; + *cmpnop = CMPNOP; + + /* Find real size of result (highest non-zero byte). */ + if (n->base_addr) + for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); + else + rsize = n->range; + + /* Zero out the bits corresponding to untouched bytes in original gimple + expression. */ + if (n->range < (int) sizeof (int64_t)) + { + mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; + *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; + *cmpnop &= mask; + } + + /* Zero out the bits corresponding to unused bytes in the result of the + gimple expression. */ + if (rsize < n->range) + { + if (BYTES_BIG_ENDIAN) + { + mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; + *cmpxchg &= mask; + *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; + } + else + { + mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; + *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; + *cmpnop &= mask; + } + n->range = rsize; + } + + n->range *= BITS_PER_UNIT; +} + +/* Check if STMT completes a bswap implementation or a read in a given + endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP + accordingly. It also sets N to represent the kind of operations + performed: size of the resulting expression and whether it works on + a memory source, and if so alias-set and vuse. At last, the + function returns a stmt whose rhs's first tree is the source + expression. */ + +gimple * +find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) +{ + /* The last parameter determines the depth search limit. It usually + correlates directly to the number n of bytes to be touched. We + increase that number by log2(n) + 1 here in order to also + cover signed -> unsigned conversions of the src operand as can be seen + in libgcc, and for initial shift/and operation of the src operand. */ + int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); + limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); + gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); + + if (!ins_stmt) + return NULL; + + uint64_t cmpxchg, cmpnop; + find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop); + + /* A complete byte swap should make the symbolic number to start with + the largest digit in the highest order byte. Unchanged symbolic + number indicates a read with same endianness as target architecture. */ + if (n->n == cmpnop) + *bswap = false; + else if (n->n == cmpxchg) + *bswap = true; + else + return NULL; + + /* Useless bit manipulation performed by code. */ + if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) + return NULL; + + return ins_stmt; +} + +const pass_data pass_data_optimize_bswap = +{ + GIMPLE_PASS, /* type */ + "bswap", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_optimize_bswap : public gimple_opt_pass +{ +public: + pass_optimize_bswap (gcc::context *ctxt) + : gimple_opt_pass (pass_data_optimize_bswap, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8; + } + + virtual unsigned int execute (function *); + +}; // class pass_optimize_bswap + +/* Perform the bswap optimization: replace the expression computed in the rhs + of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent + bswap, load or load + bswap expression. + Which of these alternatives replace the rhs is given by N->base_addr (non + null if a load is needed) and BSWAP. The type, VUSE and set-alias of the + load to perform are also given in N while the builtin bswap invoke is given + in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the + load statements involved to construct the rhs in gsi_stmt (GSI) and + N->range gives the size of the rhs expression for maintaining some + statistics. + + Note that if the replacement involve a load and if gsi_stmt (GSI) is + non-NULL, that stmt is moved just after INS_STMT to do the load with the + same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */ + +tree +bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, + tree bswap_type, tree load_type, struct symbolic_number *n, + bool bswap) +{ + tree src, tmp, tgt = NULL_TREE; + gimple *bswap_stmt; + + gimple *cur_stmt = gsi_stmt (gsi); + src = n->src; + if (cur_stmt) + tgt = gimple_assign_lhs (cur_stmt); + + /* Need to load the value from memory first. */ + if (n->base_addr) + { + gimple_stmt_iterator gsi_ins = gsi; + if (ins_stmt) + gsi_ins = gsi_for_stmt (ins_stmt); + tree addr_expr, addr_tmp, val_expr, val_tmp; + tree load_offset_ptr, aligned_load_type; + gimple *load_stmt; + unsigned align = get_object_alignment (src); + poly_int64 load_offset = 0; + + if (cur_stmt) + { + basic_block ins_bb = gimple_bb (ins_stmt); + basic_block cur_bb = gimple_bb (cur_stmt); + if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) + return NULL_TREE; + + /* Move cur_stmt just before one of the load of the original + to ensure it has the same VUSE. See PR61517 for what could + go wrong. */ + if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) + reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); + gsi_move_before (&gsi, &gsi_ins); + gsi = gsi_for_stmt (cur_stmt); + } + else + gsi = gsi_ins; + + /* Compute address to load from and cast according to the size + of the load. */ + addr_expr = build_fold_addr_expr (src); + if (is_gimple_mem_ref_addr (addr_expr)) + addr_tmp = unshare_expr (addr_expr); + else + { + addr_tmp = unshare_expr (n->base_addr); + if (!is_gimple_mem_ref_addr (addr_tmp)) + addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp, + is_gimple_mem_ref_addr, + NULL_TREE, true, + GSI_SAME_STMT); + load_offset = n->bytepos; + if (n->offset) + { + tree off + = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset), + true, NULL_TREE, true, + GSI_SAME_STMT); + gimple *stmt + = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)), + POINTER_PLUS_EXPR, addr_tmp, off); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + addr_tmp = gimple_assign_lhs (stmt); + } + } + + /* Perform the load. */ + aligned_load_type = load_type; + if (align < TYPE_ALIGN (load_type)) + aligned_load_type = build_aligned_type (load_type, align); + load_offset_ptr = build_int_cst (n->alias_set, load_offset); + val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, + load_offset_ptr); + + if (!bswap) + { + if (n->range == 16) + nop_stats.found_16bit++; + else if (n->range == 32) + nop_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + nop_stats.found_64bit++; + } + + /* Convert the result of load if necessary. */ + if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type)) + { + val_tmp = make_temp_ssa_name (aligned_load_type, NULL, + "load_dst"); + load_stmt = gimple_build_assign (val_tmp, val_expr); + gimple_set_vuse (load_stmt, n->vuse); + gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); + gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); + update_stmt (cur_stmt); + } + else if (cur_stmt) + { + gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); + gimple_set_vuse (cur_stmt, n->vuse); + update_stmt (cur_stmt); + } + else + { + tgt = make_ssa_name (load_type); + cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr); + gimple_set_vuse (cur_stmt, n->vuse); + gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT); + } + + if (dump_file) + { + fprintf (dump_file, + "%d bit load in target endianness found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + return tgt; + } + else + { + val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); + load_stmt = gimple_build_assign (val_tmp, val_expr); + gimple_set_vuse (load_stmt, n->vuse); + gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); + } + src = val_tmp; + } + else if (!bswap) + { + gimple *g = NULL; + if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) + { + if (!is_gimple_val (src)) + return NULL_TREE; + g = gimple_build_assign (tgt, NOP_EXPR, src); + } + else if (cur_stmt) + g = gimple_build_assign (tgt, src); + else + tgt = src; + if (n->range == 16) + nop_stats.found_16bit++; + else if (n->range == 32) + nop_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + nop_stats.found_64bit++; + } + if (dump_file) + { + fprintf (dump_file, + "%d bit reshuffle in target endianness found at: ", + (int) n->range); + if (cur_stmt) + print_gimple_stmt (dump_file, cur_stmt, 0); + else + { + print_generic_expr (dump_file, tgt, TDF_NONE); + fprintf (dump_file, "\n"); + } + } + if (cur_stmt) + gsi_replace (&gsi, g, true); + return tgt; + } + else if (TREE_CODE (src) == BIT_FIELD_REF) + src = TREE_OPERAND (src, 0); + + if (n->range == 16) + bswap_stats.found_16bit++; + else if (n->range == 32) + bswap_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + bswap_stats.found_64bit++; + } + + tmp = src; + + /* Convert the src expression if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) + { + gimple *convert_stmt; + + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); + convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); + gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); + } + + /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values + are considered as rotation of 2N bit values by N bits is generally not + equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which + gives 0x03040102 while a bswap for that value is 0x04030201. */ + if (bswap && n->range == 16) + { + tree count = build_int_cst (NULL, BITS_PER_UNIT); + src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count); + bswap_stmt = gimple_build_assign (NULL, src); + } + else + bswap_stmt = gimple_build_call (fndecl, 1, tmp); + + if (tgt == NULL_TREE) + tgt = make_ssa_name (bswap_type); + tmp = tgt; + + /* Convert the result if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) + { + gimple *convert_stmt; + + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); + convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); + gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); + } + + gimple_set_lhs (bswap_stmt, tmp); + + if (dump_file) + { + fprintf (dump_file, "%d bit bswap implementation found at: ", + (int) n->range); + if (cur_stmt) + print_gimple_stmt (dump_file, cur_stmt, 0); + else + { + print_generic_expr (dump_file, tgt, TDF_NONE); + fprintf (dump_file, "\n"); + } + } + + if (cur_stmt) + { + gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + } + else + gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT); + return tgt; +} + +/* Find manual byte swap implementations as well as load in a given + endianness. Byte swaps are turned into a bswap builtin invokation + while endian loads are converted to bswap builtin invokation or + simple load according to the target endianness. */ + +unsigned int +pass_optimize_bswap::execute (function *fun) +{ + basic_block bb; + bool bswap32_p, bswap64_p; + bool changed = false; + tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; + + bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) + && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); + bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) + && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing + || (bswap32_p && word_mode == SImode))); + + /* Determine the argument type of the builtins. The code later on + assumes that the return and argument type are the same. */ + if (bswap32_p) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); + bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + + if (bswap64_p) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); + bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + + memset (&nop_stats, 0, sizeof (nop_stats)); + memset (&bswap_stats, 0, sizeof (bswap_stats)); + calculate_dominance_info (CDI_DOMINATORS); + + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + + /* We do a reverse scan for bswap patterns to make sure we get the + widest match. As bswap pattern matching doesn't handle previously + inserted smaller bswap replacements as sub-patterns, the wider + variant wouldn't be detected. */ + for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) + { + gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi); + tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; + enum tree_code code; + struct symbolic_number n; + bool bswap; + + /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt + might be moved to a different basic block by bswap_replace and gsi + must not points to it if that's the case. Moving the gsi_prev + there make sure that gsi points to the statement previous to + cur_stmt while still making sure that all statements are + considered in this basic block. */ + gsi_prev (&gsi); + + if (!is_gimple_assign (cur_stmt)) + continue; + + code = gimple_assign_rhs_code (cur_stmt); + switch (code) + { + case LROTATE_EXPR: + case RROTATE_EXPR: + if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) + || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) + % BITS_PER_UNIT) + continue; + /* Fall through. */ + case BIT_IOR_EXPR: + break; + default: + continue; + } + + ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); + + if (!ins_stmt) + continue; + + switch (n.range) + { + case 16: + /* Already in canonical form, nothing to do. */ + if (code == LROTATE_EXPR || code == RROTATE_EXPR) + continue; + load_type = bswap_type = uint16_type_node; + break; + case 32: + load_type = uint32_type_node; + if (bswap32_p) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); + bswap_type = bswap32_type; + } + break; + case 64: + load_type = uint64_type_node; + if (bswap64_p) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); + bswap_type = bswap64_type; + } + break; + default: + continue; + } + + if (bswap && !fndecl && n.range != 16) + continue; + + if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl, + bswap_type, load_type, &n, bswap)) + changed = true; + } + } + + statistics_counter_event (fun, "16-bit nop implementations found", + nop_stats.found_16bit); + statistics_counter_event (fun, "32-bit nop implementations found", + nop_stats.found_32bit); + statistics_counter_event (fun, "64-bit nop implementations found", + nop_stats.found_64bit); + statistics_counter_event (fun, "16-bit bswap implementations found", + bswap_stats.found_16bit); + statistics_counter_event (fun, "32-bit bswap implementations found", + bswap_stats.found_32bit); + statistics_counter_event (fun, "64-bit bswap implementations found", + bswap_stats.found_64bit); + + return (changed ? TODO_update_ssa : 0); +} + +} // anon namespace + +gimple_opt_pass * +make_pass_optimize_bswap (gcc::context *ctxt) +{ + return new pass_optimize_bswap (ctxt); +} + +namespace { + +/* Struct recording one operand for the store, which is either a constant, + then VAL represents the constant and all the other fields are zero, or + a memory load, then VAL represents the reference, BASE_ADDR is non-NULL + and the other fields also reflect the memory load, or an SSA name, then + VAL represents the SSA name and all the other fields are zero, */ + +struct store_operand_info +{ + tree val; + tree base_addr; + poly_uint64 bitsize; + poly_uint64 bitpos; + poly_uint64 bitregion_start; + poly_uint64 bitregion_end; + gimple *stmt; + bool bit_not_p; + store_operand_info (); +}; + +store_operand_info::store_operand_info () + : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0), + bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false) +{ +} + /* Struct recording the information about a single store of an immediate to memory. These are created in the first phase and coalesced into merged_store_group objects in the second phase. */ @@ -142,19 +1349,63 @@ { unsigned HOST_WIDE_INT bitsize; unsigned HOST_WIDE_INT bitpos; + unsigned HOST_WIDE_INT bitregion_start; + /* This is one past the last bit of the bit region. */ + unsigned HOST_WIDE_INT bitregion_end; gimple *stmt; unsigned int order; + /* INTEGER_CST for constant stores, MEM_REF for memory copy, + BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR + for bit insertion. + LROTATE_EXPR if it can be only bswap optimized and + ops are not really meaningful. + NOP_EXPR if bswap optimization detected identity, ops + are not meaningful. */ + enum tree_code rhs_code; + /* Two fields for bswap optimization purposes. */ + struct symbolic_number n; + gimple *ins_stmt; + /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */ + bool bit_not_p; + /* True if ops have been swapped and thus ops[1] represents + rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */ + bool ops_swapped_p; + /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise + just the first one. */ + store_operand_info ops[2]; store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, - gimple *, unsigned int); + unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, + gimple *, unsigned int, enum tree_code, + struct symbolic_number &, gimple *, bool, + const store_operand_info &, + const store_operand_info &); }; store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs, unsigned HOST_WIDE_INT bp, + unsigned HOST_WIDE_INT brs, + unsigned HOST_WIDE_INT bre, gimple *st, - unsigned int ord) - : bitsize (bs), bitpos (bp), stmt (st), order (ord) + unsigned int ord, + enum tree_code rhscode, + struct symbolic_number &nr, + gimple *ins_stmtp, + bool bitnotp, + const store_operand_info &op0r, + const store_operand_info &op1r) + : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre), + stmt (st), order (ord), rhs_code (rhscode), n (nr), + ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false) +#if __cplusplus >= 201103L + , ops { op0r, op1r } { } +#else +{ + ops[0] = op0r; + ops[1] = op1r; +} +#endif /* Struct representing a group of stores to contiguous memory locations. These are produced by the second phase (coalescing) and consumed in the @@ -164,26 +1415,36 @@ { unsigned HOST_WIDE_INT start; unsigned HOST_WIDE_INT width; - /* The size of the allocated memory for val. */ + unsigned HOST_WIDE_INT bitregion_start; + unsigned HOST_WIDE_INT bitregion_end; + /* The size of the allocated memory for val and mask. */ unsigned HOST_WIDE_INT buf_size; + unsigned HOST_WIDE_INT align_base; + poly_uint64 load_align_base[2]; unsigned int align; + unsigned int load_align[2]; unsigned int first_order; unsigned int last_order; - - auto_vec<struct store_immediate_info *> stores; + bool bit_insertion; + + auto_vec<store_immediate_info *> stores; /* We record the first and last original statements in the sequence because we'll need their vuse/vdef and replacement position. It's easier to keep track of them separately as 'stores' is reordered by apply_stores. */ gimple *last_stmt; gimple *first_stmt; unsigned char *val; + unsigned char *mask; merged_store_group (store_immediate_info *); ~merged_store_group (); + bool can_be_merged_into (store_immediate_info *); void merge_into (store_immediate_info *); void merge_overlapping (store_immediate_info *); bool apply_stores (); +private: + void do_merge (store_immediate_info *); }; /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */ @@ -195,7 +1456,7 @@ return; for (unsigned int i = 0; i < len; i++) - fprintf (fd, "%x ", ptr[i]); + fprintf (fd, "%02x ", ptr[i]); fprintf (fd, "\n"); } @@ -287,8 +1548,7 @@ && len > BITS_PER_UNIT) { unsigned int nbytes = len / BITS_PER_UNIT; - for (unsigned int i = 0; i < nbytes; i++) - ptr[i] = 0U; + memset (ptr, 0, nbytes); if (len % BITS_PER_UNIT != 0) clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1, len % BITS_PER_UNIT); @@ -401,8 +1661,11 @@ The awkwardness comes from the fact that bitpos is counted from the most significant bit of a byte. */ + /* We must be dealing with fixed-size data at this point, since the + total size is also fixed. */ + fixed_size_mode mode = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr))); /* Allocate an extra byte so that we have space to shift into. */ - unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1; + unsigned int byte_size = GET_MODE_SIZE (mode) + 1; unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size); memset (tmpbuf, '\0', byte_size); /* The store detection code should only have allowed constants that are @@ -549,10 +1812,31 @@ { start = info->bitpos; width = info->bitsize; + bitregion_start = info->bitregion_start; + bitregion_end = info->bitregion_end; /* VAL has memory allocated for it in apply_stores once the group width has been finalized. */ val = NULL; - align = get_object_alignment (gimple_assign_lhs (info->stmt)); + mask = NULL; + bit_insertion = false; + unsigned HOST_WIDE_INT align_bitpos = 0; + get_object_alignment_1 (gimple_assign_lhs (info->stmt), + &align, &align_bitpos); + align_base = start - align_bitpos; + for (int i = 0; i < 2; ++i) + { + store_operand_info &op = info->ops[i]; + if (op.base_addr == NULL_TREE) + { + load_align[i] = 0; + load_align_base[i] = 0; + } + else + { + get_object_alignment_1 (op.val, &load_align[i], &align_bitpos); + load_align_base[i] = op.bitpos - align_bitpos; + } + } stores.create (1); stores.safe_push (info); last_stmt = info->stmt; @@ -568,18 +1852,76 @@ XDELETEVEC (val); } -/* Merge a store recorded by INFO into this merged store. - The store is not overlapping with the existing recorded - stores. */ +/* Return true if the store described by INFO can be merged into the group. */ + +bool +merged_store_group::can_be_merged_into (store_immediate_info *info) +{ + /* Do not merge bswap patterns. */ + if (info->rhs_code == LROTATE_EXPR) + return false; + + /* The canonical case. */ + if (info->rhs_code == stores[0]->rhs_code) + return true; + + /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */ + if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST) + return true; + + if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST) + return true; + + /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */ + if (info->rhs_code == MEM_REF + && (stores[0]->rhs_code == INTEGER_CST + || stores[0]->rhs_code == BIT_INSERT_EXPR) + && info->bitregion_start == stores[0]->bitregion_start + && info->bitregion_end == stores[0]->bitregion_end) + return true; + + if (stores[0]->rhs_code == MEM_REF + && (info->rhs_code == INTEGER_CST + || info->rhs_code == BIT_INSERT_EXPR) + && info->bitregion_start == stores[0]->bitregion_start + && info->bitregion_end == stores[0]->bitregion_end) + return true; + + return false; +} + +/* Helper method for merge_into and merge_overlapping to do + the common part. */ void -merged_store_group::merge_into (store_immediate_info *info) +merged_store_group::do_merge (store_immediate_info *info) { - unsigned HOST_WIDE_INT wid = info->bitsize; - /* Make sure we're inserting in the position we think we're inserting. */ - gcc_assert (info->bitpos == start + width); - - width += wid; + bitregion_start = MIN (bitregion_start, info->bitregion_start); + bitregion_end = MAX (bitregion_end, info->bitregion_end); + + unsigned int this_align; + unsigned HOST_WIDE_INT align_bitpos = 0; + get_object_alignment_1 (gimple_assign_lhs (info->stmt), + &this_align, &align_bitpos); + if (this_align > align) + { + align = this_align; + align_base = info->bitpos - align_bitpos; + } + for (int i = 0; i < 2; ++i) + { + store_operand_info &op = info->ops[i]; + if (!op.base_addr) + continue; + + get_object_alignment_1 (op.val, &this_align, &align_bitpos); + if (this_align > load_align[i]) + { + load_align[i] = this_align; + load_align_base[i] = op.bitpos - align_bitpos; + } + } + gimple *stmt = info->stmt; stores.safe_push (info); if (info->order > last_order) @@ -594,6 +1936,21 @@ } } +/* Merge a store recorded by INFO into this merged store. + The store is not overlapping with the existing recorded + stores. */ + +void +merged_store_group::merge_into (store_immediate_info *info) +{ + /* Make sure we're inserting in the position we think we're inserting. */ + gcc_assert (info->bitpos >= start + width + && info->bitregion_start <= bitregion_end); + + width = info->bitpos + info->bitsize - start; + do_merge (info); +} + /* Merge a store described by INFO into this merged store. INFO overlaps in some way with the current store (i.e. it's not contiguous which is handled by merged_store_group::merge_into). */ @@ -601,23 +1958,11 @@ void merged_store_group::merge_overlapping (store_immediate_info *info) { - gimple *stmt = info->stmt; - stores.safe_push (info); - /* If the store extends the size of the group, extend the width. */ - if ((info->bitpos + info->bitsize) > (start + width)) - width += info->bitpos + info->bitsize - (start + width); - - if (info->order > last_order) - { - last_order = info->order; - last_stmt = stmt; - } - else if (info->order < first_order) - { - first_order = info->order; - first_stmt = stmt; - } + if (info->bitpos + info->bitsize > start + width) + width = info->bitpos + info->bitsize - start; + + do_merge (info); } /* Go through all the recorded stores in this group in program order and @@ -627,48 +1972,71 @@ bool merged_store_group::apply_stores () { - /* The total width of the stores must add up to a whole number of bytes - and start at a byte boundary. We don't support emitting bitfield - references for now. Also, make sure we have more than one store - in the group, otherwise we cannot merge anything. */ - if (width % BITS_PER_UNIT != 0 - || start % BITS_PER_UNIT != 0 + /* Make sure we have more than one store in the group, otherwise we cannot + merge anything. */ + if (bitregion_start % BITS_PER_UNIT != 0 + || bitregion_end % BITS_PER_UNIT != 0 || stores.length () == 1) return false; stores.qsort (sort_by_order); - struct store_immediate_info *info; + store_immediate_info *info; unsigned int i; - /* Create a buffer of a size that is 2 times the number of bytes we're - storing. That way native_encode_expr can write power-of-2-sized - chunks without overrunning. */ - buf_size = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT); - val = XCNEWVEC (unsigned char, buf_size); + /* Create a power-of-2-sized buffer for native_encode_expr. */ + buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT); + val = XNEWVEC (unsigned char, 2 * buf_size); + mask = val + buf_size; + memset (val, 0, buf_size); + memset (mask, ~0U, buf_size); FOR_EACH_VEC_ELT (stores, i, info) { - unsigned int pos_in_buffer = info->bitpos - start; - bool ret = encode_tree_to_bitpos (gimple_assign_rhs1 (info->stmt), - val, info->bitsize, - pos_in_buffer, buf_size); - if (dump_file && (dump_flags & TDF_DETAILS)) + unsigned int pos_in_buffer = info->bitpos - bitregion_start; + tree cst; + if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE) + cst = info->ops[0].val; + else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE) + cst = info->ops[1].val; + else + cst = NULL_TREE; + bool ret = true; + if (cst) + { + if (info->rhs_code == BIT_INSERT_EXPR) + bit_insertion = true; + else + ret = encode_tree_to_bitpos (cst, val, info->bitsize, + pos_in_buffer, buf_size); + } + unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT); + if (BYTES_BIG_ENDIAN) + clear_bit_region_be (m, (BITS_PER_UNIT - 1 + - (pos_in_buffer % BITS_PER_UNIT)), + info->bitsize); + else + clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize); + if (cst && dump_file && (dump_flags & TDF_DETAILS)) { if (ret) { - fprintf (dump_file, "After writing "); - print_generic_expr (dump_file, - gimple_assign_rhs1 (info->stmt), 0); + fputs ("After writing ", dump_file); + print_generic_expr (dump_file, cst, TDF_NONE); fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC - " at position %d the merged region contains:\n", - info->bitsize, pos_in_buffer); + " at position %d\n", info->bitsize, pos_in_buffer); + fputs (" the merged value contains ", dump_file); dump_char_array (dump_file, val, buf_size); + fputs (" the merged mask contains ", dump_file); + dump_char_array (dump_file, mask, buf_size); + if (bit_insertion) + fputs (" bit insertion is required\n", dump_file); } else fprintf (dump_file, "Failed to merge stores\n"); - } + } if (!ret) return false; } + stores.qsort (sort_by_bitpos); return true; } @@ -682,7 +2050,7 @@ See pass_store_merging::m_stores_head for more rationale. */ imm_store_chain_info *next, **pnxp; tree base_addr; - auto_vec<struct store_immediate_info *> m_store_info; + auto_vec<store_immediate_info *> m_store_info; auto_vec<merged_store_group *> m_merged_store_groups; imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a) @@ -705,6 +2073,7 @@ } } bool terminate_and_process_chain (); + bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int); bool coalesce_immediate_stores (); bool output_merged_store (merged_store_group *); bool output_merged_stores (); @@ -730,11 +2099,16 @@ { } - /* Pass not supported for PDP-endianness. */ + /* Pass not supported for PDP-endian, nor for insane hosts or + target character sizes where native_{encode,interpret}_expr + doesn't work properly. */ virtual bool gate (function *) { - return flag_store_merging && (WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN); + return flag_store_merging + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8; } virtual unsigned int execute (function *); @@ -753,9 +2127,9 @@ decisions when going out of SSA). */ imm_store_chain_info *m_stores_head; + void process_store (gimple *); bool terminate_and_process_all_chains (); - bool terminate_all_aliasing_chains (imm_store_chain_info **, - bool, gimple *); + bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *); bool terminate_and_release_chain (imm_store_chain_info *); }; // class pass_store_merging @@ -774,18 +2148,13 @@ return ret; } -/* Terminate all chains that are affected by the assignment to DEST, appearing - in statement STMT and ultimately points to the object BASE. Return true if - at least one aliasing chain was terminated. BASE and DEST are allowed to - be NULL_TREE. In that case the aliasing checks are performed on the whole - statement rather than a particular operand in it. VAR_OFFSET_P signifies - whether STMT represents a store to BASE offset by a variable amount. - If that is the case we have to terminate any chain anchored at BASE. */ +/* Terminate all chains that are affected by the statement STMT. + CHAIN_INFO is the chain we should ignore from the checks if + non-NULL. */ bool pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info **chain_info, - bool var_offset_p, gimple *stmt) { bool ret = false; @@ -794,67 +2163,34 @@ if (!gimple_vuse (stmt)) return false; - /* Check if the assignment destination (BASE) is part of a store chain. - This is to catch non-constant stores to destinations that may be part - of a chain. */ - if (chain_info) - { - /* We have a chain at BASE and we're writing to [BASE + <variable>]. - This can interfere with any of the stores so terminate - the chain. */ - if (var_offset_p) - { - terminate_and_release_chain (*chain_info); - ret = true; - } - /* Otherwise go through every store in the chain to see if it - aliases with any of them. */ - else - { - struct store_immediate_info *info; - unsigned int i; - FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info) - { - if (ref_maybe_used_by_stmt_p (stmt, - gimple_assign_lhs (info->stmt)) - || stmt_may_clobber_ref_p (stmt, - gimple_assign_lhs (info->stmt))) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, - "stmt causes chain termination:\n"); - print_gimple_stmt (dump_file, stmt, 0); - } - terminate_and_release_chain (*chain_info); - ret = true; - break; - } - } - } - } - - /* Check for aliasing with all other store chains. */ + tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE; for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next) { next = cur->next; /* We already checked all the stores in chain_info and terminated the chain if necessary. Skip it here. */ - if (chain_info && (*chain_info) == cur) + if (chain_info && *chain_info == cur) continue; - /* We can't use the base object here as that does not reliably exist. - Build a ao_ref from the base object address (if we know the - minimum and maximum offset and the maximum size we could improve - things here). */ - ao_ref chain_ref; - ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE); - if (ref_maybe_used_by_stmt_p (stmt, &chain_ref) - || stmt_may_clobber_ref_p_1 (stmt, &chain_ref)) + store_immediate_info *info; + unsigned int i; + FOR_EACH_VEC_ELT (cur->m_store_info, i, info) { - terminate_and_release_chain (cur); - ret = true; + tree lhs = gimple_assign_lhs (info->stmt); + if (ref_maybe_used_by_stmt_p (stmt, lhs) + || stmt_may_clobber_ref_p (stmt, lhs) + || (store_lhs && refs_output_dependent_p (store_lhs, lhs))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt causes chain termination:\n"); + print_gimple_stmt (dump_file, stmt, 0); + } + terminate_and_release_chain (cur); + ret = true; + break; + } } } @@ -874,6 +2210,429 @@ return ret; } +/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive) + may clobber REF. FIRST and LAST must be in the same basic block and + have non-NULL vdef. We want to be able to sink load of REF across + stores between FIRST and LAST, up to right before LAST. */ + +bool +stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref) +{ + ao_ref r; + ao_ref_init (&r, ref); + unsigned int count = 0; + tree vop = gimple_vdef (last); + gimple *stmt; + + gcc_checking_assert (gimple_bb (first) == gimple_bb (last)); + do + { + stmt = SSA_NAME_DEF_STMT (vop); + if (stmt_may_clobber_ref_p_1 (stmt, &r)) + return true; + if (gimple_store_p (stmt) + && refs_anti_dependent_p (ref, gimple_get_lhs (stmt))) + return true; + /* Avoid quadratic compile time by bounding the number of checks + we perform. */ + if (++count > MAX_STORE_ALIAS_CHECKS) + return true; + vop = gimple_vuse (stmt); + } + while (stmt != first); + return false; +} + +/* Return true if INFO->ops[IDX] is mergeable with the + corresponding loads already in MERGED_STORE group. + BASE_ADDR is the base address of the whole store group. */ + +bool +compatible_load_p (merged_store_group *merged_store, + store_immediate_info *info, + tree base_addr, int idx) +{ + store_immediate_info *infof = merged_store->stores[0]; + if (!info->ops[idx].base_addr + || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos, + info->bitpos - infof->bitpos) + || !operand_equal_p (info->ops[idx].base_addr, + infof->ops[idx].base_addr, 0)) + return false; + + store_immediate_info *infol = merged_store->stores.last (); + tree load_vuse = gimple_vuse (info->ops[idx].stmt); + /* In this case all vuses should be the same, e.g. + _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4; + or + _1 = s.a; _2 = s.b; t.a = _1; t.b = _2; + and we can emit the coalesced load next to any of those loads. */ + if (gimple_vuse (infof->ops[idx].stmt) == load_vuse + && gimple_vuse (infol->ops[idx].stmt) == load_vuse) + return true; + + /* Otherwise, at least for now require that the load has the same + vuse as the store. See following examples. */ + if (gimple_vuse (info->stmt) != load_vuse) + return false; + + if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt) + || (infof != infol + && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt))) + return false; + + /* If the load is from the same location as the store, already + the construction of the immediate chain info guarantees no intervening + stores, so no further checks are needed. Example: + _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */ + if (known_eq (info->ops[idx].bitpos, info->bitpos) + && operand_equal_p (info->ops[idx].base_addr, base_addr, 0)) + return true; + + /* Otherwise, we need to punt if any of the loads can be clobbered by any + of the stores in the group, or any other stores in between those. + Previous calls to compatible_load_p ensured that for all the + merged_store->stores IDX loads, no stmts starting with + merged_store->first_stmt and ending right before merged_store->last_stmt + clobbers those loads. */ + gimple *first = merged_store->first_stmt; + gimple *last = merged_store->last_stmt; + unsigned int i; + store_immediate_info *infoc; + /* The stores are sorted by increasing store bitpos, so if info->stmt store + comes before the so far first load, we'll be changing + merged_store->first_stmt. In that case we need to give up if + any of the earlier processed loads clobber with the stmts in the new + range. */ + if (info->order < merged_store->first_order) + { + FOR_EACH_VEC_ELT (merged_store->stores, i, infoc) + if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val)) + return false; + first = info->stmt; + } + /* Similarly, we could change merged_store->last_stmt, so ensure + in that case no stmts in the new range clobber any of the earlier + processed loads. */ + else if (info->order > merged_store->last_order) + { + FOR_EACH_VEC_ELT (merged_store->stores, i, infoc) + if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val)) + return false; + last = info->stmt; + } + /* And finally, we'd be adding a new load to the set, ensure it isn't + clobbered in the new range. */ + if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val)) + return false; + + /* Otherwise, we are looking for: + _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4; + or + _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */ + return true; +} + +/* Add all refs loaded to compute VAL to REFS vector. */ + +void +gather_bswap_load_refs (vec<tree> *refs, tree val) +{ + if (TREE_CODE (val) != SSA_NAME) + return; + + gimple *stmt = SSA_NAME_DEF_STMT (val); + if (!is_gimple_assign (stmt)) + return; + + if (gimple_assign_load_p (stmt)) + { + refs->safe_push (gimple_assign_rhs1 (stmt)); + return; + } + + switch (gimple_assign_rhs_class (stmt)) + { + case GIMPLE_BINARY_RHS: + gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt)); + /* FALLTHRU */ + case GIMPLE_UNARY_RHS: + gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt)); + break; + default: + gcc_unreachable (); + } +} + +/* Check if there are any stores in M_STORE_INFO after index I + (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap + a potential group ending with END that have their order + smaller than LAST_ORDER. RHS_CODE is the kind of store in the + group. Return true if there are no such stores. + Consider: + MEM[(long long int *)p_28] = 0; + MEM[(long long int *)p_28 + 8B] = 0; + MEM[(long long int *)p_28 + 16B] = 0; + MEM[(long long int *)p_28 + 24B] = 0; + _129 = (int) _130; + MEM[(int *)p_28 + 8B] = _129; + MEM[(int *)p_28].a = -1; + We already have + MEM[(long long int *)p_28] = 0; + MEM[(int *)p_28].a = -1; + stmts in the current group and need to consider if it is safe to + add MEM[(long long int *)p_28 + 8B] = 0; store into the same group. + There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129; + store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0; + into the group and merging of those 3 stores is successful, merged + stmts will be emitted at the latest store from that group, i.e. + LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store. + The MEM[(int *)p_28 + 8B] = _129; store that originally follows + the MEM[(long long int *)p_28 + 8B] = 0; would now be before it, + so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0; + into the group. That way it will be its own store group and will + not be touched. If RHS_CODE is INTEGER_CST and there are overlapping + INTEGER_CST stores, those are mergeable using merge_overlapping, + so don't return false for those. */ + +static bool +check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i, + enum tree_code rhs_code, unsigned int last_order, + unsigned HOST_WIDE_INT end) +{ + unsigned int len = m_store_info.length (); + for (++i; i < len; ++i) + { + store_immediate_info *info = m_store_info[i]; + if (info->bitpos >= end) + break; + if (info->order < last_order + && (rhs_code != INTEGER_CST || info->rhs_code != INTEGER_CST)) + return false; + } + return true; +} + +/* Return true if m_store_info[first] and at least one following store + form a group which store try_size bitsize value which is byte swapped + from a memory load or some value, or identity from some value. + This uses the bswap pass APIs. */ + +bool +imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store, + unsigned int first, + unsigned int try_size) +{ + unsigned int len = m_store_info.length (), last = first; + unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize; + if (width >= try_size) + return false; + for (unsigned int i = first + 1; i < len; ++i) + { + if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width + || m_store_info[i]->ins_stmt == NULL) + return false; + width += m_store_info[i]->bitsize; + if (width >= try_size) + { + last = i; + break; + } + } + if (width != try_size) + return false; + + bool allow_unaligned + = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED); + /* Punt if the combined store would not be aligned and we need alignment. */ + if (!allow_unaligned) + { + unsigned int align = merged_store->align; + unsigned HOST_WIDE_INT align_base = merged_store->align_base; + for (unsigned int i = first + 1; i <= last; ++i) + { + unsigned int this_align; + unsigned HOST_WIDE_INT align_bitpos = 0; + get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt), + &this_align, &align_bitpos); + if (this_align > align) + { + align = this_align; + align_base = m_store_info[i]->bitpos - align_bitpos; + } + } + unsigned HOST_WIDE_INT align_bitpos + = (m_store_info[first]->bitpos - align_base) & (align - 1); + if (align_bitpos) + align = least_bit_hwi (align_bitpos); + if (align < try_size) + return false; + } + + tree type; + switch (try_size) + { + case 16: type = uint16_type_node; break; + case 32: type = uint32_type_node; break; + case 64: type = uint64_type_node; break; + default: gcc_unreachable (); + } + struct symbolic_number n; + gimple *ins_stmt = NULL; + int vuse_store = -1; + unsigned int first_order = merged_store->first_order; + unsigned int last_order = merged_store->last_order; + gimple *first_stmt = merged_store->first_stmt; + gimple *last_stmt = merged_store->last_stmt; + unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width; + store_immediate_info *infof = m_store_info[first]; + + for (unsigned int i = first; i <= last; ++i) + { + store_immediate_info *info = m_store_info[i]; + struct symbolic_number this_n = info->n; + this_n.type = type; + if (!this_n.base_addr) + this_n.range = try_size / BITS_PER_UNIT; + else + /* Update vuse in case it has changed by output_merged_stores. */ + this_n.vuse = gimple_vuse (info->ins_stmt); + unsigned int bitpos = info->bitpos - infof->bitpos; + if (!do_shift_rotate (LSHIFT_EXPR, &this_n, + BYTES_BIG_ENDIAN + ? try_size - info->bitsize - bitpos + : bitpos)) + return false; + if (this_n.base_addr && vuse_store) + { + unsigned int j; + for (j = first; j <= last; ++j) + if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt)) + break; + if (j > last) + { + if (vuse_store == 1) + return false; + vuse_store = 0; + } + } + if (i == first) + { + n = this_n; + ins_stmt = info->ins_stmt; + } + else + { + if (n.base_addr && n.vuse != this_n.vuse) + { + if (vuse_store == 0) + return false; + vuse_store = 1; + } + if (info->order > last_order) + { + last_order = info->order; + last_stmt = info->stmt; + } + else if (info->order < first_order) + { + first_order = info->order; + first_stmt = info->stmt; + } + end = MAX (end, info->bitpos + info->bitsize); + + ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt, + &this_n, &n); + if (ins_stmt == NULL) + return false; + } + } + + uint64_t cmpxchg, cmpnop; + find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop); + + /* A complete byte swap should make the symbolic number to start with + the largest digit in the highest order byte. Unchanged symbolic + number indicates a read with same endianness as target architecture. */ + if (n.n != cmpnop && n.n != cmpxchg) + return false; + + if (n.base_addr == NULL_TREE && !is_gimple_val (n.src)) + return false; + + if (!check_no_overlap (m_store_info, last, LROTATE_EXPR, last_order, end)) + return false; + + /* Don't handle memory copy this way if normal non-bswap processing + would handle it too. */ + if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1) + { + unsigned int i; + for (i = first; i <= last; ++i) + if (m_store_info[i]->rhs_code != MEM_REF) + break; + if (i == last + 1) + return false; + } + + if (n.n == cmpxchg) + switch (try_size) + { + case 16: + /* Will emit LROTATE_EXPR. */ + break; + case 32: + if (builtin_decl_explicit_p (BUILT_IN_BSWAP32) + && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing) + break; + return false; + case 64: + if (builtin_decl_explicit_p (BUILT_IN_BSWAP64) + && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing) + break; + return false; + default: + gcc_unreachable (); + } + + if (!allow_unaligned && n.base_addr) + { + unsigned int align = get_object_alignment (n.src); + if (align < try_size) + return false; + } + + /* If each load has vuse of the corresponding store, need to verify + the loads can be sunk right before the last store. */ + if (vuse_store == 1) + { + auto_vec<tree, 64> refs; + for (unsigned int i = first; i <= last; ++i) + gather_bswap_load_refs (&refs, + gimple_assign_rhs1 (m_store_info[i]->stmt)); + + unsigned int i; + tree ref; + FOR_EACH_VEC_ELT (refs, i, ref) + if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref)) + return false; + n.vuse = NULL_TREE; + } + + infof->n = n; + infof->ins_stmt = ins_stmt; + for (unsigned int i = first; i <= last; ++i) + { + m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR; + m_store_info[i]->ops[0].base_addr = NULL_TREE; + m_store_info[i]->ops[1].base_addr = NULL_TREE; + if (i != first) + merged_store->merge_into (m_store_info[i]); + } + + return true; +} + /* Go through the candidate stores recorded in m_store_info and merge them into merged_store_group objects recorded into m_merged_store_groups representing the widened stores. Return true if coalescing was successful @@ -888,114 +2647,304 @@ return false; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n", + fprintf (dump_file, "Attempting to coalesce %u stores in chain\n", m_store_info.length ()); store_immediate_info *info; - unsigned int i; + unsigned int i, ignore = 0; /* Order the stores by the bitposition they write to. */ m_store_info.qsort (sort_by_bitpos); info = m_store_info[0]; merged_store_group *merged_store = new merged_store_group (info); + if (dump_file && (dump_flags & TDF_DETAILS)) + fputs ("New store group\n", dump_file); FOR_EACH_VEC_ELT (m_store_info, i, info) { - if (dump_file && (dump_flags & TDF_DETAILS)) + if (i <= ignore) + goto done; + + /* First try to handle group of stores like: + p[0] = data >> 24; + p[1] = data >> 16; + p[2] = data >> 8; + p[3] = data; + using the bswap framework. */ + if (info->bitpos == merged_store->start + merged_store->width + && merged_store->stores.length () == 1 + && merged_store->stores[0]->ins_stmt != NULL + && info->ins_stmt != NULL) { - fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC - " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n", - i, info->bitsize, info->bitpos); - print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt)); - fprintf (dump_file, "\n------------\n"); + unsigned int try_size; + for (try_size = 64; try_size >= 16; try_size >>= 1) + if (try_coalesce_bswap (merged_store, i - 1, try_size)) + break; + + if (try_size >= 16) + { + ignore = i + merged_store->stores.length () - 1; + m_merged_store_groups.safe_push (merged_store); + if (ignore < m_store_info.length ()) + merged_store = new merged_store_group (m_store_info[ignore]); + else + merged_store = NULL; + goto done; + } } - if (i == 0) - continue; - /* |---store 1---| |---store 2---| - Overlapping stores. */ - unsigned HOST_WIDE_INT start = info->bitpos; - if (IN_RANGE (start, merged_store->start, + Overlapping stores. */ + if (IN_RANGE (info->bitpos, merged_store->start, merged_store->start + merged_store->width - 1)) { - merged_store->merge_overlapping (info); - continue; + /* Only allow overlapping stores of constants. */ + if (info->rhs_code == INTEGER_CST) + { + bool only_constants = true; + store_immediate_info *infoj; + unsigned int j; + FOR_EACH_VEC_ELT (merged_store->stores, j, infoj) + if (infoj->rhs_code != INTEGER_CST) + { + only_constants = false; + break; + } + unsigned int last_order + = MAX (merged_store->last_order, info->order); + unsigned HOST_WIDE_INT end + = MAX (merged_store->start + merged_store->width, + info->bitpos + info->bitsize); + if (only_constants + && check_no_overlap (m_store_info, i, INTEGER_CST, + last_order, end)) + { + /* check_no_overlap call above made sure there are no + overlapping stores with non-INTEGER_CST rhs_code + in between the first and last of the stores we've + just merged. If there are any INTEGER_CST rhs_code + stores in between, we need to merge_overlapping them + even if in the sort_by_bitpos order there are other + overlapping stores in between. Keep those stores as is. + Example: + MEM[(int *)p_28] = 0; + MEM[(char *)p_28 + 3B] = 1; + MEM[(char *)p_28 + 1B] = 2; + MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B]; + We can't merge the zero store with the store of two and + not merge anything else, because the store of one is + in the original order in between those two, but in + store_by_bitpos order it comes after the last store that + we can't merge with them. We can merge the first 3 stores + and keep the last store as is though. */ + unsigned int len = m_store_info.length (), k = i; + for (unsigned int j = i + 1; j < len; ++j) + { + store_immediate_info *info2 = m_store_info[j]; + if (info2->bitpos >= end) + break; + if (info2->order < last_order) + { + if (info2->rhs_code != INTEGER_CST) + { + /* Normally check_no_overlap makes sure this + doesn't happen, but if end grows below, then + we need to process more stores than + check_no_overlap verified. Example: + MEM[(int *)p_5] = 0; + MEM[(short *)p_5 + 3B] = 1; + MEM[(char *)p_5 + 4B] = _9; + MEM[(char *)p_5 + 2B] = 2; */ + k = 0; + break; + } + k = j; + end = MAX (end, info2->bitpos + info2->bitsize); + } + } + + if (k != 0) + { + merged_store->merge_overlapping (info); + + for (unsigned int j = i + 1; j <= k; j++) + { + store_immediate_info *info2 = m_store_info[j]; + gcc_assert (info2->bitpos < end); + if (info2->order < last_order) + { + gcc_assert (info2->rhs_code == INTEGER_CST); + merged_store->merge_overlapping (info2); + } + /* Other stores are kept and not merged in any + way. */ + } + ignore = k; + goto done; + } + } + } + } + /* |---store 1---||---store 2---| + This store is consecutive to the previous one. + Merge it into the current store group. There can be gaps in between + the stores, but there can't be gaps in between bitregions. */ + else if (info->bitregion_start <= merged_store->bitregion_end + && merged_store->can_be_merged_into (info)) + { + store_immediate_info *infof = merged_store->stores[0]; + + /* All the rhs_code ops that take 2 operands are commutative, + swap the operands if it could make the operands compatible. */ + if (infof->ops[0].base_addr + && infof->ops[1].base_addr + && info->ops[0].base_addr + && info->ops[1].base_addr + && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos, + info->bitpos - infof->bitpos) + && operand_equal_p (info->ops[1].base_addr, + infof->ops[0].base_addr, 0)) + { + std::swap (info->ops[0], info->ops[1]); + info->ops_swapped_p = true; + } + if (check_no_overlap (m_store_info, i, info->rhs_code, + MAX (merged_store->last_order, info->order), + MAX (merged_store->start + merged_store->width, + info->bitpos + info->bitsize))) + { + /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */ + if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF) + { + info->rhs_code = BIT_INSERT_EXPR; + info->ops[0].val = gimple_assign_rhs1 (info->stmt); + info->ops[0].base_addr = NULL_TREE; + } + else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF) + { + store_immediate_info *infoj; + unsigned int j; + FOR_EACH_VEC_ELT (merged_store->stores, j, infoj) + { + infoj->rhs_code = BIT_INSERT_EXPR; + infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt); + infoj->ops[0].base_addr = NULL_TREE; + } + } + if ((infof->ops[0].base_addr + ? compatible_load_p (merged_store, info, base_addr, 0) + : !info->ops[0].base_addr) + && (infof->ops[1].base_addr + ? compatible_load_p (merged_store, info, base_addr, 1) + : !info->ops[1].base_addr)) + { + merged_store->merge_into (info); + goto done; + } + } } /* |---store 1---| <gap> |---store 2---|. - Gap between stores. Start a new group. */ - if (start != merged_store->start + merged_store->width) + Gap between stores or the rhs not compatible. Start a new group. */ + + /* Try to apply all the stores recorded for the group to determine + the bitpattern they write and discard it if that fails. + This will also reject single-store groups. */ + if (merged_store->apply_stores ()) + m_merged_store_groups.safe_push (merged_store); + else + delete merged_store; + + merged_store = new merged_store_group (info); + if (dump_file && (dump_flags & TDF_DETAILS)) + fputs ("New store group\n", dump_file); + + done: + if (dump_file && (dump_flags & TDF_DETAILS)) { - /* Try to apply all the stores recorded for the group to determine - the bitpattern they write and discard it if that fails. - This will also reject single-store groups. */ - if (!merged_store->apply_stores ()) - delete merged_store; - else - m_merged_store_groups.safe_push (merged_store); - - merged_store = new merged_store_group (info); - - continue; + fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC + " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:", + i, info->bitsize, info->bitpos); + print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt)); + fputc ('\n', dump_file); } - - /* |---store 1---||---store 2---| - This store is consecutive to the previous one. - Merge it into the current store group. */ - merged_store->merge_into (info); } - /* Record or discard the last store group. */ - if (!merged_store->apply_stores ()) - delete merged_store; - else - m_merged_store_groups.safe_push (merged_store); + /* Record or discard the last store group. */ + if (merged_store) + { + if (merged_store->apply_stores ()) + m_merged_store_groups.safe_push (merged_store); + else + delete merged_store; + } gcc_assert (m_merged_store_groups.length () <= m_store_info.length ()); + bool success = !m_merged_store_groups.is_empty () && m_merged_store_groups.length () < m_store_info.length (); if (success && dump_file) - fprintf (dump_file, "Coalescing successful!\n" - "Merged into %u stores\n", - m_merged_store_groups.length ()); + fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n", + m_merged_store_groups.length ()); return success; } -/* Return the type to use for the merged stores described by STMTS. - This is needed to get the alias sets right. */ +/* Return the type to use for the merged stores or loads described by STMTS. + This is needed to get the alias sets right. If IS_LOAD, look for rhs, + otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_* + of the MEM_REFs if any. */ static tree -get_alias_type_for_stmts (auto_vec<gimple *> &stmts) +get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load, + unsigned short *cliquep, unsigned short *basep) { gimple *stmt; unsigned int i; - tree lhs = gimple_assign_lhs (stmts[0]); - tree type = reference_alias_ptr_type (lhs); + tree type = NULL_TREE; + tree ret = NULL_TREE; + *cliquep = 0; + *basep = 0; FOR_EACH_VEC_ELT (stmts, i, stmt) { + tree ref = is_load ? gimple_assign_rhs1 (stmt) + : gimple_assign_lhs (stmt); + tree type1 = reference_alias_ptr_type (ref); + tree base = get_base_address (ref); + if (i == 0) - continue; - - lhs = gimple_assign_lhs (stmt); - tree type1 = reference_alias_ptr_type (lhs); + { + if (TREE_CODE (base) == MEM_REF) + { + *cliquep = MR_DEPENDENCE_CLIQUE (base); + *basep = MR_DEPENDENCE_BASE (base); + } + ret = type = type1; + continue; + } if (!alias_ptr_types_compatible_p (type, type1)) - return ptr_type_node; + ret = ptr_type_node; + if (TREE_CODE (base) != MEM_REF + || *cliquep != MR_DEPENDENCE_CLIQUE (base) + || *basep != MR_DEPENDENCE_BASE (base)) + { + *cliquep = 0; + *basep = 0; + } } - return type; + return ret; } /* Return the location_t information we can find among the statements in STMTS. */ static location_t -get_location_for_stmts (auto_vec<gimple *> &stmts) +get_location_for_stmts (vec<gimple *> &stmts) { gimple *stmt; unsigned int i; @@ -1015,7 +2964,9 @@ unsigned HOST_WIDE_INT bytepos; unsigned HOST_WIDE_INT size; unsigned HOST_WIDE_INT align; - auto_vec<gimple *> orig_stmts; + auto_vec<store_immediate_info *> orig_stores; + /* True if there is a single orig stmt covering the whole split store. */ + bool orig; split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); }; @@ -1025,100 +2976,524 @@ split_store::split_store (unsigned HOST_WIDE_INT bp, unsigned HOST_WIDE_INT sz, unsigned HOST_WIDE_INT al) - : bytepos (bp), size (sz), align (al) + : bytepos (bp), size (sz), align (al), orig (false) { - orig_stmts.create (0); + orig_stores.create (0); } -/* Record all statements corresponding to stores in GROUP that write to - the region starting at BITPOS and is of size BITSIZE. Record such - statements in STMTS. The stores in GROUP must be sorted by - bitposition. */ - -static void -find_constituent_stmts (struct merged_store_group *group, - auto_vec<gimple *> &stmts, +/* Record all stores in GROUP that write to the region starting at BITPOS and + is of size BITSIZE. Record infos for such statements in STORES if + non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO + if there is exactly one original store in the range. */ + +static store_immediate_info * +find_constituent_stores (struct merged_store_group *group, + vec<store_immediate_info *> *stores, + unsigned int *first, unsigned HOST_WIDE_INT bitpos, unsigned HOST_WIDE_INT bitsize) { - struct store_immediate_info *info; + store_immediate_info *info, *ret = NULL; unsigned int i; + bool second = false; + bool update_first = true; unsigned HOST_WIDE_INT end = bitpos + bitsize; - FOR_EACH_VEC_ELT (group->stores, i, info) + for (i = *first; group->stores.iterate (i, &info); ++i) { unsigned HOST_WIDE_INT stmt_start = info->bitpos; unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize; - if (stmt_end < bitpos) - continue; + if (stmt_end <= bitpos) + { + /* BITPOS passed to this function never decreases from within the + same split_group call, so optimize and don't scan info records + which are known to end before or at BITPOS next time. + Only do it if all stores before this one also pass this. */ + if (update_first) + *first = i + 1; + continue; + } + else + update_first = false; + /* The stores in GROUP are ordered by bitposition so if we're past - the region for this group return early. */ - if (stmt_start > end) - return; - - if (IN_RANGE (stmt_start, bitpos, bitpos + bitsize) - || IN_RANGE (stmt_end, bitpos, end) - /* The statement writes a region that completely encloses the region - that this group writes. Unlikely to occur but let's - handle it. */ - || IN_RANGE (bitpos, stmt_start, stmt_end)) - stmts.safe_push (info->stmt); + the region for this group return early. */ + if (stmt_start >= end) + return ret; + + if (stores) + { + stores->safe_push (info); + if (ret) + { + ret = NULL; + second = true; + } + } + else if (ret) + return NULL; + if (!second) + ret = info; + } + return ret; +} + +/* Return how many SSA_NAMEs used to compute value to store in the INFO + store have multiple uses. If any SSA_NAME has multiple uses, also + count statements needed to compute it. */ + +static unsigned +count_multiple_uses (store_immediate_info *info) +{ + gimple *stmt = info->stmt; + unsigned ret = 0; + switch (info->rhs_code) + { + case INTEGER_CST: + return 0; + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + if (info->bit_not_p) + { + if (!has_single_use (gimple_assign_rhs1 (stmt))) + ret = 1; /* Fall through below to return + the BIT_NOT_EXPR stmt and then + BIT_{AND,IOR,XOR}_EXPR and anything it + uses. */ + else + /* stmt is after this the BIT_NOT_EXPR. */ + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + } + if (!has_single_use (gimple_assign_rhs1 (stmt))) + { + ret += 1 + info->ops[0].bit_not_p; + if (info->ops[1].base_addr) + ret += 1 + info->ops[1].bit_not_p; + return ret + 1; + } + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + /* stmt is now the BIT_*_EXPR. */ + if (!has_single_use (gimple_assign_rhs1 (stmt))) + ret += 1 + info->ops[info->ops_swapped_p].bit_not_p; + else if (info->ops[info->ops_swapped_p].bit_not_p) + { + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt2))) + ++ret; + } + if (info->ops[1].base_addr == NULL_TREE) + { + gcc_checking_assert (!info->ops_swapped_p); + return ret; + } + if (!has_single_use (gimple_assign_rhs2 (stmt))) + ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p; + else if (info->ops[1 - info->ops_swapped_p].bit_not_p) + { + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt2))) + ++ret; + } + return ret; + case MEM_REF: + if (!has_single_use (gimple_assign_rhs1 (stmt))) + return 1 + info->ops[0].bit_not_p; + else if (info->ops[0].bit_not_p) + { + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt))) + return 1; + } + return 0; + case BIT_INSERT_EXPR: + return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1; + default: + gcc_unreachable (); } } /* Split a merged store described by GROUP by populating the SPLIT_STORES - vector with split_store structs describing the byte offset (from the base), - the bit size and alignment of each store as well as the original statements - involved in each such split group. + vector (if non-NULL) with split_store structs describing the byte offset + (from the base), the bit size and alignment of each store as well as the + original statements involved in each such split group. This is to separate the splitting strategy from the statement building/emission/linking done in output_merged_store. - At the moment just start with the widest possible size and keep emitting - the widest we can until we have emitted all the bytes, halving the size - when appropriate. */ - -static bool -split_group (merged_store_group *group, - auto_vec<struct split_store *> &split_stores) + Return number of new stores. + If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned. + If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned. + If SPLIT_STORES is NULL, it is just a dry run to count number of + new stores. */ + +static unsigned int +split_group (merged_store_group *group, bool allow_unaligned_store, + bool allow_unaligned_load, + vec<struct split_store *> *split_stores, + unsigned *total_orig, + unsigned *total_new) { - unsigned HOST_WIDE_INT pos = group->start; - unsigned HOST_WIDE_INT size = group->width; + unsigned HOST_WIDE_INT pos = group->bitregion_start; + unsigned HOST_WIDE_INT size = group->bitregion_end - pos; unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT; - unsigned HOST_WIDE_INT align = group->align; - - /* We don't handle partial bitfields for now. We shouldn't have - reached this far. */ + unsigned HOST_WIDE_INT group_align = group->align; + unsigned HOST_WIDE_INT align_base = group->align_base; + unsigned HOST_WIDE_INT group_load_align = group_align; + bool any_orig = false; + gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0)); - bool allow_unaligned - = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED); - - unsigned int try_size = MAX_STORE_BITSIZE; - while (try_size > size - || (!allow_unaligned - && try_size > align)) + if (group->stores[0]->rhs_code == LROTATE_EXPR + || group->stores[0]->rhs_code == NOP_EXPR) { - try_size /= 2; - if (try_size < BITS_PER_UNIT) - return false; + /* For bswap framework using sets of stores, all the checking + has been done earlier in try_coalesce_bswap and needs to be + emitted as a single store. */ + if (total_orig) + { + /* Avoid the old/new stmt count heuristics. It should be + always beneficial. */ + total_new[0] = 1; + total_orig[0] = 2; + } + + if (split_stores) + { + unsigned HOST_WIDE_INT align_bitpos + = (group->start - align_base) & (group_align - 1); + unsigned HOST_WIDE_INT align = group_align; + if (align_bitpos) + align = least_bit_hwi (align_bitpos); + bytepos = group->start / BITS_PER_UNIT; + struct split_store *store + = new split_store (bytepos, group->width, align); + unsigned int first = 0; + find_constituent_stores (group, &store->orig_stores, + &first, group->start, group->width); + split_stores->safe_push (store); + } + + return 1; } + unsigned int ret = 0, first = 0; unsigned HOST_WIDE_INT try_pos = bytepos; - group->stores.qsort (sort_by_bitpos); + + if (total_orig) + { + unsigned int i; + store_immediate_info *info = group->stores[0]; + + total_new[0] = 0; + total_orig[0] = 1; /* The orig store. */ + info = group->stores[0]; + if (info->ops[0].base_addr) + total_orig[0]++; + if (info->ops[1].base_addr) + total_orig[0]++; + switch (info->rhs_code) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ + break; + default: + break; + } + total_orig[0] *= group->stores.length (); + + FOR_EACH_VEC_ELT (group->stores, i, info) + { + total_new[0] += count_multiple_uses (info); + total_orig[0] += (info->bit_not_p + + info->ops[0].bit_not_p + + info->ops[1].bit_not_p); + } + } + + if (!allow_unaligned_load) + for (int i = 0; i < 2; ++i) + if (group->load_align[i]) + group_load_align = MIN (group_load_align, group->load_align[i]); while (size > 0) { - struct split_store *store = new split_store (try_pos, try_size, align); + if ((allow_unaligned_store || group_align <= BITS_PER_UNIT) + && group->mask[try_pos - bytepos] == (unsigned char) ~0U) + { + /* Skip padding bytes. */ + ++try_pos; + size -= BITS_PER_UNIT; + continue; + } + unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT; - find_constituent_stmts (group, store->orig_stmts, try_bitpos, try_size); - split_stores.safe_push (store); + unsigned int try_size = MAX_STORE_BITSIZE, nonmasked; + unsigned HOST_WIDE_INT align_bitpos + = (try_bitpos - align_base) & (group_align - 1); + unsigned HOST_WIDE_INT align = group_align; + if (align_bitpos) + align = least_bit_hwi (align_bitpos); + if (!allow_unaligned_store) + try_size = MIN (try_size, align); + if (!allow_unaligned_load) + { + /* If we can't do or don't want to do unaligned stores + as well as loads, we need to take the loads into account + as well. */ + unsigned HOST_WIDE_INT load_align = group_load_align; + align_bitpos = (try_bitpos - align_base) & (load_align - 1); + if (align_bitpos) + load_align = least_bit_hwi (align_bitpos); + for (int i = 0; i < 2; ++i) + if (group->load_align[i]) + { + align_bitpos + = known_alignment (try_bitpos + - group->stores[0]->bitpos + + group->stores[0]->ops[i].bitpos + - group->load_align_base[i]); + if (align_bitpos & (group_load_align - 1)) + { + unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos); + load_align = MIN (load_align, a); + } + } + try_size = MIN (try_size, load_align); + } + store_immediate_info *info + = find_constituent_stores (group, NULL, &first, try_bitpos, try_size); + if (info) + { + /* If there is just one original statement for the range, see if + we can just reuse the original store which could be even larger + than try_size. */ + unsigned HOST_WIDE_INT stmt_end + = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT); + info = find_constituent_stores (group, NULL, &first, try_bitpos, + stmt_end - try_bitpos); + if (info && info->bitpos >= try_bitpos) + { + try_size = stmt_end - try_bitpos; + goto found; + } + } + + /* Approximate store bitsize for the case when there are no padding + bits. */ + while (try_size > size) + try_size /= 2; + /* Now look for whole padding bytes at the end of that bitsize. */ + for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked) + if (group->mask[try_pos - bytepos + nonmasked - 1] + != (unsigned char) ~0U) + break; + if (nonmasked == 0) + { + /* If entire try_size range is padding, skip it. */ + try_pos += try_size / BITS_PER_UNIT; + size -= try_size; + continue; + } + /* Otherwise try to decrease try_size if second half, last 3 quarters + etc. are padding. */ + nonmasked *= BITS_PER_UNIT; + while (nonmasked <= try_size / 2) + try_size /= 2; + if (!allow_unaligned_store && group_align > BITS_PER_UNIT) + { + /* Now look for whole padding bytes at the start of that bitsize. */ + unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked; + for (masked = 0; masked < try_bytesize; ++masked) + if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U) + break; + masked *= BITS_PER_UNIT; + gcc_assert (masked < try_size); + if (masked >= try_size / 2) + { + while (masked >= try_size / 2) + { + try_size /= 2; + try_pos += try_size / BITS_PER_UNIT; + size -= try_size; + masked -= try_size; + } + /* Need to recompute the alignment, so just retry at the new + position. */ + continue; + } + } + + found: + ++ret; + + if (split_stores) + { + struct split_store *store + = new split_store (try_pos, try_size, align); + info = find_constituent_stores (group, &store->orig_stores, + &first, try_bitpos, try_size); + if (info + && info->bitpos >= try_bitpos + && info->bitpos + info->bitsize <= try_bitpos + try_size) + { + store->orig = true; + any_orig = true; + } + split_stores->safe_push (store); + } try_pos += try_size / BITS_PER_UNIT; - size -= try_size; - align = try_size; - while (size < try_size) - try_size /= 2; + } + + if (total_orig) + { + unsigned int i; + struct split_store *store; + /* If we are reusing some original stores and any of the + original SSA_NAMEs had multiple uses, we need to subtract + those now before we add the new ones. */ + if (total_new[0] && any_orig) + { + FOR_EACH_VEC_ELT (*split_stores, i, store) + if (store->orig) + total_new[0] -= count_multiple_uses (store->orig_stores[0]); + } + total_new[0] += ret; /* The new store. */ + store_immediate_info *info = group->stores[0]; + if (info->ops[0].base_addr) + total_new[0] += ret; + if (info->ops[1].base_addr) + total_new[0] += ret; + switch (info->rhs_code) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ + break; + default: + break; + } + FOR_EACH_VEC_ELT (*split_stores, i, store) + { + unsigned int j; + bool bit_not_p[3] = { false, false, false }; + /* If all orig_stores have certain bit_not_p set, then + we'd use a BIT_NOT_EXPR stmt and need to account for it. + If some orig_stores have certain bit_not_p set, then + we'd use a BIT_XOR_EXPR with a mask and need to account for + it. */ + FOR_EACH_VEC_ELT (store->orig_stores, j, info) + { + if (info->ops[0].bit_not_p) + bit_not_p[0] = true; + if (info->ops[1].bit_not_p) + bit_not_p[1] = true; + if (info->bit_not_p) + bit_not_p[2] = true; + } + total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2]; + } + } - return true; + + return ret; +} + +/* Return the operation through which the operand IDX (if < 2) or + result (IDX == 2) should be inverted. If NOP_EXPR, no inversion + is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR, + the bits should be xored with mask. */ + +static enum tree_code +invert_op (split_store *split_store, int idx, tree int_type, tree &mask) +{ + unsigned int i; + store_immediate_info *info; + unsigned int cnt = 0; + bool any_paddings = false; + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) + { + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p; + if (bit_not_p) + { + ++cnt; + tree lhs = gimple_assign_lhs (info->stmt); + if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize) + any_paddings = true; + } + } + mask = NULL_TREE; + if (cnt == 0) + return NOP_EXPR; + if (cnt == split_store->orig_stores.length () && !any_paddings) + return BIT_NOT_EXPR; + + unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT; + unsigned buf_size = split_store->size / BITS_PER_UNIT; + unsigned char *buf + = XALLOCAVEC (unsigned char, buf_size); + memset (buf, ~0U, buf_size); + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) + { + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p; + if (!bit_not_p) + continue; + /* Clear regions with bit_not_p and invert afterwards, rather than + clear regions with !bit_not_p, so that gaps in between stores aren't + set in the mask. */ + unsigned HOST_WIDE_INT bitsize = info->bitsize; + unsigned HOST_WIDE_INT prec = bitsize; + unsigned int pos_in_buffer = 0; + if (any_paddings) + { + tree lhs = gimple_assign_lhs (info->stmt); + if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize) + prec = TYPE_PRECISION (TREE_TYPE (lhs)); + } + if (info->bitpos < try_bitpos) + { + gcc_assert (info->bitpos + bitsize > try_bitpos); + if (!BYTES_BIG_ENDIAN) + { + if (prec <= try_bitpos - info->bitpos) + continue; + prec -= try_bitpos - info->bitpos; + } + bitsize -= try_bitpos - info->bitpos; + if (BYTES_BIG_ENDIAN && prec > bitsize) + prec = bitsize; + } + else + pos_in_buffer = info->bitpos - try_bitpos; + if (prec < bitsize) + { + /* If this is a bool inversion, invert just the least significant + prec bits rather than all bits of it. */ + if (BYTES_BIG_ENDIAN) + { + pos_in_buffer += bitsize - prec; + if (pos_in_buffer >= split_store->size) + continue; + } + bitsize = prec; + } + if (pos_in_buffer + bitsize > split_store->size) + bitsize = split_store->size - pos_in_buffer; + unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT); + if (BYTES_BIG_ENDIAN) + clear_bit_region_be (p, (BITS_PER_UNIT - 1 + - (pos_in_buffer % BITS_PER_UNIT)), bitsize); + else + clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize); + } + for (unsigned int i = 0; i < buf_size; ++i) + buf[i] = ~buf[i]; + mask = native_interpret_expr (int_type, buf, buf_size); + return BIT_XOR_EXPR; } /* Given a merged store group GROUP output the widened version of it. @@ -1132,81 +3507,495 @@ bool imm_store_chain_info::output_merged_store (merged_store_group *group) { - unsigned HOST_WIDE_INT start_byte_pos = group->start / BITS_PER_UNIT; + split_store *split_store; + unsigned int i; + unsigned HOST_WIDE_INT start_byte_pos + = group->bitregion_start / BITS_PER_UNIT; unsigned int orig_num_stmts = group->stores.length (); if (orig_num_stmts < 2) return false; - auto_vec<struct split_store *> split_stores; - split_stores.create (0); - if (!split_group (group, split_stores)) - return false; + auto_vec<struct split_store *, 32> split_stores; + bool allow_unaligned_store + = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED); + bool allow_unaligned_load = allow_unaligned_store; + if (allow_unaligned_store) + { + /* If unaligned stores are allowed, see how many stores we'd emit + for unaligned and how many stores we'd emit for aligned stores. + Only use unaligned stores if it allows fewer stores than aligned. */ + unsigned aligned_cnt + = split_group (group, false, allow_unaligned_load, NULL, NULL, NULL); + unsigned unaligned_cnt + = split_group (group, true, allow_unaligned_load, NULL, NULL, NULL); + if (aligned_cnt <= unaligned_cnt) + allow_unaligned_store = false; + } + unsigned total_orig, total_new; + split_group (group, allow_unaligned_store, allow_unaligned_load, + &split_stores, &total_orig, &total_new); + + if (split_stores.length () >= orig_num_stmts) + { + /* We didn't manage to reduce the number of statements. Bail out. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Exceeded original number of stmts (%u)." + " Not profitable to emit new sequence.\n", + orig_num_stmts); + FOR_EACH_VEC_ELT (split_stores, i, split_store) + delete split_store; + return false; + } + if (total_orig <= total_new) + { + /* If number of estimated new statements is above estimated original + statements, bail out too. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Estimated number of original stmts (%u)" + " not larger than estimated number of new" + " stmts (%u).\n", + total_orig, total_new); + FOR_EACH_VEC_ELT (split_stores, i, split_store) + delete split_store; + return false; + } gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt); gimple_seq seq = NULL; - unsigned int num_stmts = 0; tree last_vdef, new_vuse; last_vdef = gimple_vdef (group->last_stmt); new_vuse = gimple_vuse (group->last_stmt); + tree bswap_res = NULL_TREE; + + if (group->stores[0]->rhs_code == LROTATE_EXPR + || group->stores[0]->rhs_code == NOP_EXPR) + { + tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; + gimple *ins_stmt = group->stores[0]->ins_stmt; + struct symbolic_number *n = &group->stores[0]->n; + bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR; + + switch (n->range) + { + case 16: + load_type = bswap_type = uint16_type_node; + break; + case 32: + load_type = uint32_type_node; + if (bswap) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); + bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + break; + case 64: + load_type = uint64_type_node; + if (bswap) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); + bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + break; + default: + gcc_unreachable (); + } + + /* If the loads have each vuse of the corresponding store, + we've checked the aliasing already in try_coalesce_bswap and + we want to sink the need load into seq. So need to use new_vuse + on the load. */ + if (n->base_addr) + { + if (n->vuse == NULL) + { + n->vuse = new_vuse; + ins_stmt = NULL; + } + else + /* Update vuse in case it has changed by output_merged_stores. */ + n->vuse = gimple_vuse (ins_stmt); + } + bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl, + bswap_type, load_type, n, bswap); + gcc_assert (bswap_res); + } gimple *stmt = NULL; - /* The new SSA names created. Keep track of them so that we can free them - if we decide to not use the new sequence. */ - auto_vec<tree> new_ssa_names; - split_store *split_store; - unsigned int i; - bool fail = false; - - tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq, + auto_vec<gimple *, 32> orig_stmts; + gimple_seq this_seq; + tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq, is_gimple_mem_ref_addr, NULL_TREE); + gimple_seq_add_seq_without_update (&seq, this_seq); + + tree load_addr[2] = { NULL_TREE, NULL_TREE }; + gimple_seq load_seq[2] = { NULL, NULL }; + gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () }; + for (int j = 0; j < 2; ++j) + { + store_operand_info &op = group->stores[0]->ops[j]; + if (op.base_addr == NULL_TREE) + continue; + + store_immediate_info *infol = group->stores.last (); + if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt)) + { + /* We can't pick the location randomly; while we've verified + all the loads have the same vuse, they can be still in different + basic blocks and we need to pick the one from the last bb: + int x = q[0]; + if (x == N) return; + int y = q[1]; + p[0] = x; + p[1] = y; + otherwise if we put the wider load at the q[0] load, we might + segfault if q[1] is not mapped. */ + basic_block bb = gimple_bb (op.stmt); + gimple *ostmt = op.stmt; + store_immediate_info *info; + FOR_EACH_VEC_ELT (group->stores, i, info) + { + gimple *tstmt = info->ops[j].stmt; + basic_block tbb = gimple_bb (tstmt); + if (dominated_by_p (CDI_DOMINATORS, tbb, bb)) + { + ostmt = tstmt; + bb = tbb; + } + } + load_gsi[j] = gsi_for_stmt (ostmt); + load_addr[j] + = force_gimple_operand_1 (unshare_expr (op.base_addr), + &load_seq[j], is_gimple_mem_ref_addr, + NULL_TREE); + } + else if (operand_equal_p (base_addr, op.base_addr, 0)) + load_addr[j] = addr; + else + { + load_addr[j] + = force_gimple_operand_1 (unshare_expr (op.base_addr), + &this_seq, is_gimple_mem_ref_addr, + NULL_TREE); + gimple_seq_add_seq_without_update (&seq, this_seq); + } + } + FOR_EACH_VEC_ELT (split_stores, i, split_store) { unsigned HOST_WIDE_INT try_size = split_store->size; unsigned HOST_WIDE_INT try_pos = split_store->bytepos; + unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT; unsigned HOST_WIDE_INT align = split_store->align; - tree offset_type = get_alias_type_for_stmts (split_store->orig_stmts); - location_t loc = get_location_for_stmts (split_store->orig_stmts); - - tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED); - int_type = build_aligned_type (int_type, align); - tree dest = fold_build2 (MEM_REF, int_type, addr, - build_int_cst (offset_type, try_pos)); - - tree src = native_interpret_expr (int_type, - group->val + try_pos - start_byte_pos, - group->buf_size); + tree dest, src; + location_t loc; + if (split_store->orig) + { + /* If there is just a single constituent store which covers + the whole area, just reuse the lhs and rhs. */ + gimple *orig_stmt = split_store->orig_stores[0]->stmt; + dest = gimple_assign_lhs (orig_stmt); + src = gimple_assign_rhs1 (orig_stmt); + loc = gimple_location (orig_stmt); + } + else + { + store_immediate_info *info; + unsigned short clique, base; + unsigned int k; + FOR_EACH_VEC_ELT (split_store->orig_stores, k, info) + orig_stmts.safe_push (info->stmt); + tree offset_type + = get_alias_type_for_stmts (orig_stmts, false, &clique, &base); + loc = get_location_for_stmts (orig_stmts); + orig_stmts.truncate (0); + + tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED); + int_type = build_aligned_type (int_type, align); + dest = fold_build2 (MEM_REF, int_type, addr, + build_int_cst (offset_type, try_pos)); + if (TREE_CODE (dest) == MEM_REF) + { + MR_DEPENDENCE_CLIQUE (dest) = clique; + MR_DEPENDENCE_BASE (dest) = base; + } + + tree mask; + if (bswap_res) + mask = integer_zero_node; + else + mask = native_interpret_expr (int_type, + group->mask + try_pos + - start_byte_pos, + group->buf_size); + + tree ops[2]; + for (int j = 0; + j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE); + ++j) + { + store_operand_info &op = split_store->orig_stores[0]->ops[j]; + if (bswap_res) + ops[j] = bswap_res; + else if (op.base_addr) + { + FOR_EACH_VEC_ELT (split_store->orig_stores, k, info) + orig_stmts.safe_push (info->ops[j].stmt); + + offset_type = get_alias_type_for_stmts (orig_stmts, true, + &clique, &base); + location_t load_loc = get_location_for_stmts (orig_stmts); + orig_stmts.truncate (0); + + unsigned HOST_WIDE_INT load_align = group->load_align[j]; + unsigned HOST_WIDE_INT align_bitpos + = known_alignment (try_bitpos + - split_store->orig_stores[0]->bitpos + + op.bitpos); + if (align_bitpos & (load_align - 1)) + load_align = least_bit_hwi (align_bitpos); + + tree load_int_type + = build_nonstandard_integer_type (try_size, UNSIGNED); + load_int_type + = build_aligned_type (load_int_type, load_align); + + poly_uint64 load_pos + = exact_div (try_bitpos + - split_store->orig_stores[0]->bitpos + + op.bitpos, + BITS_PER_UNIT); + ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j], + build_int_cst (offset_type, load_pos)); + if (TREE_CODE (ops[j]) == MEM_REF) + { + MR_DEPENDENCE_CLIQUE (ops[j]) = clique; + MR_DEPENDENCE_BASE (ops[j]) = base; + } + if (!integer_zerop (mask)) + /* The load might load some bits (that will be masked off + later on) uninitialized, avoid -W*uninitialized + warnings in that case. */ + TREE_NO_WARNING (ops[j]) = 1; + + stmt = gimple_build_assign (make_ssa_name (int_type), + ops[j]); + gimple_set_location (stmt, load_loc); + if (gsi_bb (load_gsi[j])) + { + gimple_set_vuse (stmt, gimple_vuse (op.stmt)); + gimple_seq_add_stmt_without_update (&load_seq[j], stmt); + } + else + { + gimple_set_vuse (stmt, new_vuse); + gimple_seq_add_stmt_without_update (&seq, stmt); + } + ops[j] = gimple_assign_lhs (stmt); + tree xor_mask; + enum tree_code inv_op + = invert_op (split_store, j, int_type, xor_mask); + if (inv_op != NOP_EXPR) + { + stmt = gimple_build_assign (make_ssa_name (int_type), + inv_op, ops[j], xor_mask); + gimple_set_location (stmt, load_loc); + ops[j] = gimple_assign_lhs (stmt); + + if (gsi_bb (load_gsi[j])) + gimple_seq_add_stmt_without_update (&load_seq[j], + stmt); + else + gimple_seq_add_stmt_without_update (&seq, stmt); + } + } + else + ops[j] = native_interpret_expr (int_type, + group->val + try_pos + - start_byte_pos, + group->buf_size); + } + + switch (split_store->orig_stores[0]->rhs_code) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + FOR_EACH_VEC_ELT (split_store->orig_stores, k, info) + { + tree rhs1 = gimple_assign_rhs1 (info->stmt); + orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1)); + } + location_t bit_loc; + bit_loc = get_location_for_stmts (orig_stmts); + orig_stmts.truncate (0); + + stmt + = gimple_build_assign (make_ssa_name (int_type), + split_store->orig_stores[0]->rhs_code, + ops[0], ops[1]); + gimple_set_location (stmt, bit_loc); + /* If there is just one load and there is a separate + load_seq[0], emit the bitwise op right after it. */ + if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0])) + gimple_seq_add_stmt_without_update (&load_seq[0], stmt); + /* Otherwise, if at least one load is in seq, we need to + emit the bitwise op right before the store. If there + are two loads and are emitted somewhere else, it would + be better to emit the bitwise op as early as possible; + we don't track where that would be possible right now + though. */ + else + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + tree xor_mask; + enum tree_code inv_op; + inv_op = invert_op (split_store, 2, int_type, xor_mask); + if (inv_op != NOP_EXPR) + { + stmt = gimple_build_assign (make_ssa_name (int_type), + inv_op, src, xor_mask); + gimple_set_location (stmt, bit_loc); + if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0])) + gimple_seq_add_stmt_without_update (&load_seq[0], stmt); + else + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } + break; + case LROTATE_EXPR: + case NOP_EXPR: + src = ops[0]; + if (!is_gimple_val (src)) + { + stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)), + src); + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } + if (!useless_type_conversion_p (int_type, TREE_TYPE (src))) + { + stmt = gimple_build_assign (make_ssa_name (int_type), + NOP_EXPR, src); + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } + inv_op = invert_op (split_store, 2, int_type, xor_mask); + if (inv_op != NOP_EXPR) + { + stmt = gimple_build_assign (make_ssa_name (int_type), + inv_op, src, xor_mask); + gimple_set_location (stmt, loc); + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } + break; + default: + src = ops[0]; + break; + } + + /* If bit insertion is required, we use the source as an accumulator + into which the successive bit-field values are manually inserted. + FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */ + if (group->bit_insertion) + FOR_EACH_VEC_ELT (split_store->orig_stores, k, info) + if (info->rhs_code == BIT_INSERT_EXPR + && info->bitpos < try_bitpos + try_size + && info->bitpos + info->bitsize > try_bitpos) + { + /* Mask, truncate, convert to final type, shift and ior into + the accumulator. Note that every step can be a no-op. */ + const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos; + const HOST_WIDE_INT end_gap + = (try_bitpos + try_size) - (info->bitpos + info->bitsize); + tree tem = info->ops[0].val; + if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize) + { + tree bitfield_type + = build_nonstandard_integer_type (info->bitsize, + UNSIGNED); + tem = gimple_convert (&seq, loc, bitfield_type, tem); + } + else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0) + { + const unsigned HOST_WIDE_INT imask + = (HOST_WIDE_INT_1U << info->bitsize) - 1; + tem = gimple_build (&seq, loc, + BIT_AND_EXPR, TREE_TYPE (tem), tem, + build_int_cst (TREE_TYPE (tem), + imask)); + } + const HOST_WIDE_INT shift + = (BYTES_BIG_ENDIAN ? end_gap : start_gap); + if (shift < 0) + tem = gimple_build (&seq, loc, + RSHIFT_EXPR, TREE_TYPE (tem), tem, + build_int_cst (NULL_TREE, -shift)); + tem = gimple_convert (&seq, loc, int_type, tem); + if (shift > 0) + tem = gimple_build (&seq, loc, + LSHIFT_EXPR, int_type, tem, + build_int_cst (NULL_TREE, shift)); + src = gimple_build (&seq, loc, + BIT_IOR_EXPR, int_type, tem, src); + } + + if (!integer_zerop (mask)) + { + tree tem = make_ssa_name (int_type); + tree load_src = unshare_expr (dest); + /* The load might load some or all bits uninitialized, + avoid -W*uninitialized warnings in that case. + As optimization, it would be nice if all the bits are + provably uninitialized (no stores at all yet or previous + store a CLOBBER) we'd optimize away the load and replace + it e.g. with 0. */ + TREE_NO_WARNING (load_src) = 1; + stmt = gimple_build_assign (tem, load_src); + gimple_set_location (stmt, loc); + gimple_set_vuse (stmt, new_vuse); + gimple_seq_add_stmt_without_update (&seq, stmt); + + /* FIXME: If there is a single chunk of zero bits in mask, + perhaps use BIT_INSERT_EXPR instead? */ + stmt = gimple_build_assign (make_ssa_name (int_type), + BIT_AND_EXPR, tem, mask); + gimple_set_location (stmt, loc); + gimple_seq_add_stmt_without_update (&seq, stmt); + tem = gimple_assign_lhs (stmt); + + if (TREE_CODE (src) == INTEGER_CST) + src = wide_int_to_tree (int_type, + wi::bit_and_not (wi::to_wide (src), + wi::to_wide (mask))); + else + { + tree nmask + = wide_int_to_tree (int_type, + wi::bit_not (wi::to_wide (mask))); + stmt = gimple_build_assign (make_ssa_name (int_type), + BIT_AND_EXPR, src, nmask); + gimple_set_location (stmt, loc); + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } + stmt = gimple_build_assign (make_ssa_name (int_type), + BIT_IOR_EXPR, tem, src); + gimple_set_location (stmt, loc); + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } + } stmt = gimple_build_assign (dest, src); gimple_set_location (stmt, loc); gimple_set_vuse (stmt, new_vuse); gimple_seq_add_stmt_without_update (&seq, stmt); - /* We didn't manage to reduce the number of statements. Bail out. */ - if (++num_stmts == orig_num_stmts) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Exceeded original number of stmts (%u)." - " Not profitable to emit new sequence.\n", - orig_num_stmts); - } - unsigned int ssa_count; - tree ssa_name; - /* Don't forget to cleanup the temporary SSA names. */ - FOR_EACH_VEC_ELT (new_ssa_names, ssa_count, ssa_name) - release_ssa_name (ssa_name); - - fail = true; - break; - } - tree new_vdef; if (i < split_stores.length () - 1) - { - new_vdef = make_ssa_name (gimple_vop (cfun), stmt); - new_ssa_names.safe_push (new_vdef); - } + new_vdef = make_ssa_name (gimple_vop (cfun), stmt); else new_vdef = last_vdef; @@ -1218,19 +4007,19 @@ FOR_EACH_VEC_ELT (split_stores, i, split_store) delete split_store; - if (fail) - return false; - gcc_assert (seq); if (dump_file) { fprintf (dump_file, - "New sequence of %u stmts to replace old one of %u stmts\n", - num_stmts, orig_num_stmts); + "New sequence of %u stores to replace old one of %u stores\n", + split_stores.length (), orig_num_stmts); if (dump_flags & TDF_DETAILS) print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS); } gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT); + for (int j = 0; j < 2; ++j) + if (load_seq[j]) + gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT); return true; } @@ -1325,14 +4114,394 @@ static bool rhs_valid_for_store_merging_p (tree rhs) { - return native_encode_expr (rhs, NULL, - GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs)))) != 0; + unsigned HOST_WIDE_INT size; + return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size) + && native_encode_expr (rhs, NULL, size) != 0); +} + +/* If MEM is a memory reference usable for store merging (either as + store destination or for loads), return the non-NULL base_addr + and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END. + Otherwise return NULL, *PBITPOS should be still valid even for that + case. */ + +static tree +mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize, + poly_uint64 *pbitpos, + poly_uint64 *pbitregion_start, + poly_uint64 *pbitregion_end) +{ + poly_int64 bitsize, bitpos; + poly_uint64 bitregion_start = 0, bitregion_end = 0; + machine_mode mode; + int unsignedp = 0, reversep = 0, volatilep = 0; + tree offset; + tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &reversep, &volatilep); + *pbitsize = bitsize; + if (known_eq (bitsize, 0)) + return NULL_TREE; + + if (TREE_CODE (mem) == COMPONENT_REF + && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1))) + { + get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset); + if (maybe_ne (bitregion_end, 0U)) + bitregion_end += 1; + } + + if (reversep) + return NULL_TREE; + + /* We do not want to rewrite TARGET_MEM_REFs. */ + if (TREE_CODE (base_addr) == TARGET_MEM_REF) + return NULL_TREE; + /* In some cases get_inner_reference may return a + MEM_REF [ptr + byteoffset]. For the purposes of this pass + canonicalize the base_addr to MEM_REF [ptr] and take + byteoffset into account in the bitpos. This occurs in + PR 23684 and this way we can catch more chains. */ + else if (TREE_CODE (base_addr) == MEM_REF) + { + poly_offset_int byte_off = mem_ref_offset (base_addr); + poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT; + bit_off += bitpos; + if (known_ge (bit_off, 0) && bit_off.to_shwi (&bitpos)) + { + if (maybe_ne (bitregion_end, 0U)) + { + bit_off = byte_off << LOG2_BITS_PER_UNIT; + bit_off += bitregion_start; + if (bit_off.to_uhwi (&bitregion_start)) + { + bit_off = byte_off << LOG2_BITS_PER_UNIT; + bit_off += bitregion_end; + if (!bit_off.to_uhwi (&bitregion_end)) + bitregion_end = 0; + } + else + bitregion_end = 0; + } + } + else + return NULL_TREE; + base_addr = TREE_OPERAND (base_addr, 0); + } + /* get_inner_reference returns the base object, get at its + address now. */ + else + { + if (maybe_lt (bitpos, 0)) + return NULL_TREE; + base_addr = build_fold_addr_expr (base_addr); + } + + if (known_eq (bitregion_end, 0U)) + { + bitregion_start = round_down_to_byte_boundary (bitpos); + bitregion_end = bitpos; + bitregion_end = round_up_to_byte_boundary (bitregion_end + bitsize); + } + + if (offset != NULL_TREE) + { + /* If the access is variable offset then a base decl has to be + address-taken to be able to emit pointer-based stores to it. + ??? We might be able to get away with re-using the original + base up to the first variable part and then wrapping that inside + a BIT_FIELD_REF. */ + tree base = get_base_address (base_addr); + if (! base + || (DECL_P (base) && ! TREE_ADDRESSABLE (base))) + return NULL_TREE; + + base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr), + base_addr, offset); + } + + *pbitsize = bitsize; + *pbitpos = bitpos; + *pbitregion_start = bitregion_start; + *pbitregion_end = bitregion_end; + return base_addr; +} + +/* Return true if STMT is a load that can be used for store merging. + In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and + BITREGION_END are properties of the corresponding store. */ + +static bool +handled_load (gimple *stmt, store_operand_info *op, + poly_uint64 bitsize, poly_uint64 bitpos, + poly_uint64 bitregion_start, poly_uint64 bitregion_end) +{ + if (!is_gimple_assign (stmt)) + return false; + if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (stmt); + if (TREE_CODE (rhs1) == SSA_NAME + && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos, + bitregion_start, bitregion_end)) + { + /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have + been optimized earlier, but if allowed here, would confuse the + multiple uses counting. */ + if (op->bit_not_p) + return false; + op->bit_not_p = !op->bit_not_p; + return true; + } + return false; + } + if (gimple_vuse (stmt) + && gimple_assign_load_p (stmt) + && !stmt_can_throw_internal (cfun, stmt) + && !gimple_has_volatile_ops (stmt)) + { + tree mem = gimple_assign_rhs1 (stmt); + op->base_addr + = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos, + &op->bitregion_start, + &op->bitregion_end); + if (op->base_addr != NULL_TREE + && known_eq (op->bitsize, bitsize) + && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT) + && known_ge (op->bitpos - op->bitregion_start, + bitpos - bitregion_start) + && known_ge (op->bitregion_end - op->bitpos, + bitregion_end - bitpos)) + { + op->stmt = stmt; + op->val = mem; + op->bit_not_p = false; + return true; + } + } + return false; +} + +/* Record the store STMT for store merging optimization if it can be + optimized. */ + +void +pass_store_merging::process_store (gimple *stmt) +{ + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + poly_uint64 bitsize, bitpos; + poly_uint64 bitregion_start, bitregion_end; + tree base_addr + = mem_valid_for_store_merging (lhs, &bitsize, &bitpos, + &bitregion_start, &bitregion_end); + if (known_eq (bitsize, 0U)) + return; + + bool invalid = (base_addr == NULL_TREE + || (maybe_gt (bitsize, + (unsigned int) MAX_BITSIZE_MODE_ANY_INT) + && (TREE_CODE (rhs) != INTEGER_CST))); + enum tree_code rhs_code = ERROR_MARK; + bool bit_not_p = false; + struct symbolic_number n; + gimple *ins_stmt = NULL; + store_operand_info ops[2]; + if (invalid) + ; + else if (rhs_valid_for_store_merging_p (rhs)) + { + rhs_code = INTEGER_CST; + ops[0].val = rhs; + } + else if (TREE_CODE (rhs) != SSA_NAME) + invalid = true; + else + { + gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2; + if (!is_gimple_assign (def_stmt)) + invalid = true; + else if (handled_load (def_stmt, &ops[0], bitsize, bitpos, + bitregion_start, bitregion_end)) + rhs_code = MEM_REF; + else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (def_stmt); + if (TREE_CODE (rhs1) == SSA_NAME + && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1))) + { + bit_not_p = true; + def_stmt = SSA_NAME_DEF_STMT (rhs1); + } + } + + if (rhs_code == ERROR_MARK && !invalid) + switch ((rhs_code = gimple_assign_rhs_code (def_stmt))) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + tree rhs1, rhs2; + rhs1 = gimple_assign_rhs1 (def_stmt); + rhs2 = gimple_assign_rhs2 (def_stmt); + invalid = true; + if (TREE_CODE (rhs1) != SSA_NAME) + break; + def_stmt1 = SSA_NAME_DEF_STMT (rhs1); + if (!is_gimple_assign (def_stmt1) + || !handled_load (def_stmt1, &ops[0], bitsize, bitpos, + bitregion_start, bitregion_end)) + break; + if (rhs_valid_for_store_merging_p (rhs2)) + ops[1].val = rhs2; + else if (TREE_CODE (rhs2) != SSA_NAME) + break; + else + { + def_stmt2 = SSA_NAME_DEF_STMT (rhs2); + if (!is_gimple_assign (def_stmt2)) + break; + else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos, + bitregion_start, bitregion_end)) + break; + } + invalid = false; + break; + default: + invalid = true; + break; + } + + unsigned HOST_WIDE_INT const_bitsize; + if (bitsize.is_constant (&const_bitsize) + && (const_bitsize % BITS_PER_UNIT) == 0 + && const_bitsize <= 64 + && multiple_p (bitpos, BITS_PER_UNIT)) + { + ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12); + if (ins_stmt) + { + uint64_t nn = n.n; + for (unsigned HOST_WIDE_INT i = 0; + i < const_bitsize; + i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER) + if ((nn & MARKER_MASK) == 0 + || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN) + { + ins_stmt = NULL; + break; + } + if (ins_stmt) + { + if (invalid) + { + rhs_code = LROTATE_EXPR; + ops[0].base_addr = NULL_TREE; + ops[1].base_addr = NULL_TREE; + } + invalid = false; + } + } + } + + if (invalid + && bitsize.is_constant (&const_bitsize) + && ((const_bitsize % BITS_PER_UNIT) != 0 + || !multiple_p (bitpos, BITS_PER_UNIT)) + && const_bitsize <= 64) + { + /* Bypass a conversion to the bit-field type. */ + if (!bit_not_p + && is_gimple_assign (def_stmt) + && CONVERT_EXPR_CODE_P (rhs_code)) + { + tree rhs1 = gimple_assign_rhs1 (def_stmt); + if (TREE_CODE (rhs1) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))) + rhs = rhs1; + } + rhs_code = BIT_INSERT_EXPR; + bit_not_p = false; + ops[0].val = rhs; + ops[0].base_addr = NULL_TREE; + ops[1].base_addr = NULL_TREE; + invalid = false; + } + } + + unsigned HOST_WIDE_INT const_bitsize, const_bitpos; + unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end; + if (invalid + || !bitsize.is_constant (&const_bitsize) + || !bitpos.is_constant (&const_bitpos) + || !bitregion_start.is_constant (&const_bitregion_start) + || !bitregion_end.is_constant (&const_bitregion_end)) + { + terminate_all_aliasing_chains (NULL, stmt); + return; + } + + if (!ins_stmt) + memset (&n, 0, sizeof (n)); + + struct imm_store_chain_info **chain_info = NULL; + if (base_addr) + chain_info = m_stores.get (base_addr); + + store_immediate_info *info; + if (chain_info) + { + unsigned int ord = (*chain_info)->m_store_info.length (); + info = new store_immediate_info (const_bitsize, const_bitpos, + const_bitregion_start, + const_bitregion_end, + stmt, ord, rhs_code, n, ins_stmt, + bit_not_p, ops[0], ops[1]); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Recording immediate store from stmt:\n"); + print_gimple_stmt (dump_file, stmt, 0); + } + (*chain_info)->m_store_info.safe_push (info); + terminate_all_aliasing_chains (chain_info, stmt); + /* If we reach the limit of stores to merge in a chain terminate and + process the chain now. */ + if ((*chain_info)->m_store_info.length () + == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Reached maximum number of statements to merge:\n"); + terminate_and_release_chain (*chain_info); + } + return; + } + + /* Store aliases any existing chain? */ + terminate_all_aliasing_chains (NULL, stmt); + /* Start a new chain. */ + struct imm_store_chain_info *new_chain + = new imm_store_chain_info (m_stores_head, base_addr); + info = new store_immediate_info (const_bitsize, const_bitpos, + const_bitregion_start, + const_bitregion_end, + stmt, 0, rhs_code, n, ins_stmt, + bit_not_p, ops[0], ops[1]); + new_chain->m_store_info.safe_push (info); + m_stores.put (base_addr, new_chain); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Starting new chain with statement:\n"); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "The base object is:\n"); + print_generic_expr (dump_file, base_addr); + fprintf (dump_file, "\n"); + } } /* Entry point for the pass. Go over each basic block recording chains of - immediate stores. Upon encountering a terminating statement (as defined - by stmt_terminates_chain_p) process the recorded stores and emit the widened - variants. */ + immediate stores. Upon encountering a terminating statement (as defined + by stmt_terminates_chain_p) process the recorded stores and emit the widened + variants. */ unsigned int pass_store_merging::execute (function *fun) @@ -1340,6 +4509,8 @@ basic_block bb; hash_set<gimple *> orig_stmts; + calculate_dominance_info (CDI_DOMINATORS); + FOR_EACH_BB_FN (bb, fun) { gimple_stmt_iterator gsi; @@ -1380,135 +4551,11 @@ } if (gimple_assign_single_p (stmt) && gimple_vdef (stmt) - && !stmt_can_throw_internal (stmt) + && !stmt_can_throw_internal (cfun, stmt) && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))) - { - tree lhs = gimple_assign_lhs (stmt); - tree rhs = gimple_assign_rhs1 (stmt); - - HOST_WIDE_INT bitsize, bitpos; - machine_mode mode; - int unsignedp = 0, reversep = 0, volatilep = 0; - tree offset, base_addr; - base_addr - = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode, - &unsignedp, &reversep, &volatilep); - /* As a future enhancement we could handle stores with the same - base and offset. */ - bool invalid = reversep - || ((bitsize > MAX_BITSIZE_MODE_ANY_INT) - && (TREE_CODE (rhs) != INTEGER_CST)) - || !rhs_valid_for_store_merging_p (rhs); - - /* We do not want to rewrite TARGET_MEM_REFs. */ - if (TREE_CODE (base_addr) == TARGET_MEM_REF) - invalid = true; - /* In some cases get_inner_reference may return a - MEM_REF [ptr + byteoffset]. For the purposes of this pass - canonicalize the base_addr to MEM_REF [ptr] and take - byteoffset into account in the bitpos. This occurs in - PR 23684 and this way we can catch more chains. */ - else if (TREE_CODE (base_addr) == MEM_REF) - { - offset_int bit_off, byte_off = mem_ref_offset (base_addr); - bit_off = byte_off << LOG2_BITS_PER_UNIT; - bit_off += bitpos; - if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off)) - bitpos = bit_off.to_shwi (); - else - invalid = true; - base_addr = TREE_OPERAND (base_addr, 0); - } - /* get_inner_reference returns the base object, get at its - address now. */ - else - { - if (bitpos < 0) - invalid = true; - base_addr = build_fold_addr_expr (base_addr); - } - - if (! invalid - && offset != NULL_TREE) - { - /* If the access is variable offset then a base - decl has to be address-taken to be able to - emit pointer-based stores to it. - ??? We might be able to get away with - re-using the original base up to the first - variable part and then wrapping that inside - a BIT_FIELD_REF. */ - tree base = get_base_address (base_addr); - if (! base - || (DECL_P (base) - && ! TREE_ADDRESSABLE (base))) - invalid = true; - else - base_addr = build2 (POINTER_PLUS_EXPR, - TREE_TYPE (base_addr), - base_addr, offset); - } - - struct imm_store_chain_info **chain_info - = m_stores.get (base_addr); - - if (!invalid) - { - store_immediate_info *info; - if (chain_info) - { - info = new store_immediate_info ( - bitsize, bitpos, stmt, - (*chain_info)->m_store_info.length ()); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, - "Recording immediate store from stmt:\n"); - print_gimple_stmt (dump_file, stmt, 0); - } - (*chain_info)->m_store_info.safe_push (info); - /* If we reach the limit of stores to merge in a chain - terminate and process the chain now. */ - if ((*chain_info)->m_store_info.length () - == (unsigned int) - PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, - "Reached maximum number of statements" - " to merge:\n"); - terminate_and_release_chain (*chain_info); - } - continue; - } - - /* Store aliases any existing chain? */ - terminate_all_aliasing_chains (chain_info, false, stmt); - /* Start a new chain. */ - struct imm_store_chain_info *new_chain - = new imm_store_chain_info (m_stores_head, base_addr); - info = new store_immediate_info (bitsize, bitpos, - stmt, 0); - new_chain->m_store_info.safe_push (info); - m_stores.put (base_addr, new_chain); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, - "Starting new chain with statement:\n"); - print_gimple_stmt (dump_file, stmt, 0); - fprintf (dump_file, "The base object is:\n"); - print_generic_expr (dump_file, base_addr); - fprintf (dump_file, "\n"); - } - } - else - terminate_all_aliasing_chains (chain_info, - offset != NULL_TREE, stmt); - - continue; - } - - terminate_all_aliasing_chains (NULL, false, stmt); + process_store (stmt); + else + terminate_all_aliasing_chains (NULL, stmt); } terminate_and_process_all_chains (); }