Mercurial > hg > CbC > CbC_gcc
diff gcc/tree-sra.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gcc/tree-sra.c Fri Jul 17 14:47:48 2009 +0900 @@ -0,0 +1,3728 @@ +/* Scalar Replacement of Aggregates (SRA) converts some structure + references into scalar references, exposing them to the scalar + optimizers. + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 + Free Software Foundation, Inc. + Contributed by Diego Novillo <dnovillo@redhat.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" + +/* These RTL headers are needed for basic-block.h. */ +#include "rtl.h" +#include "tm_p.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "langhooks.h" +#include "tree-inline.h" +#include "tree-flow.h" +#include "gimple.h" +#include "tree-dump.h" +#include "tree-pass.h" +#include "timevar.h" +#include "flags.h" +#include "bitmap.h" +#include "obstack.h" +#include "target.h" +/* expr.h is needed for MOVE_RATIO. */ +#include "expr.h" +#include "params.h" + + +/* This object of this pass is to replace a non-addressable aggregate with a + set of independent variables. Most of the time, all of these variables + will be scalars. But a secondary objective is to break up larger + aggregates into smaller aggregates. In the process we may find that some + bits of the larger aggregate can be deleted as unreferenced. + + This substitution is done globally. More localized substitutions would + be the purvey of a load-store motion pass. + + The optimization proceeds in phases: + + (1) Identify variables that have types that are candidates for + decomposition. + + (2) Scan the function looking for the ways these variables are used. + In particular we're interested in the number of times a variable + (or member) is needed as a complete unit, and the number of times + a variable (or member) is copied. + + (3) Based on the usage profile, instantiate substitution variables. + + (4) Scan the function making replacements. +*/ + + +/* True if this is the "early" pass, before inlining. */ +static bool early_sra; + +/* The set of todo flags to return from tree_sra. */ +static unsigned int todoflags; + +/* The set of aggregate variables that are candidates for scalarization. */ +static bitmap sra_candidates; + +/* Set of scalarizable PARM_DECLs that need copy-in operations at the + beginning of the function. */ +static bitmap needs_copy_in; + +/* Sets of bit pairs that cache type decomposition and instantiation. */ +static bitmap sra_type_decomp_cache; +static bitmap sra_type_inst_cache; + +/* One of these structures is created for each candidate aggregate and + each (accessed) member or group of members of such an aggregate. */ +struct sra_elt +{ + /* A tree of the elements. Used when we want to traverse everything. */ + struct sra_elt *parent; + struct sra_elt *groups; + struct sra_elt *children; + struct sra_elt *sibling; + + /* If this element is a root, then this is the VAR_DECL. If this is + a sub-element, this is some token used to identify the reference. + In the case of COMPONENT_REF, this is the FIELD_DECL. In the case + of an ARRAY_REF, this is the (constant) index. In the case of an + ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR. In the case + of a complex number, this is a zero or one. */ + tree element; + + /* The type of the element. */ + tree type; + + /* A VAR_DECL, for any sub-element we've decided to replace. */ + tree replacement; + + /* The number of times the element is referenced as a whole. I.e. + given "a.b.c", this would be incremented for C, but not for A or B. */ + unsigned int n_uses; + + /* The number of times the element is copied to or from another + scalarizable element. */ + unsigned int n_copies; + + /* True if TYPE is scalar. */ + bool is_scalar; + + /* True if this element is a group of members of its parent. */ + bool is_group; + + /* True if we saw something about this element that prevents scalarization, + such as non-constant indexing. */ + bool cannot_scalarize; + + /* True if we've decided that structure-to-structure assignment + should happen via memcpy and not per-element. */ + bool use_block_copy; + + /* True if everything under this element has been marked TREE_NO_WARNING. */ + bool all_no_warning; + + /* A flag for use with/after random access traversals. */ + bool visited; + + /* True if there is BIT_FIELD_REF on the lhs with a vector. */ + bool is_vector_lhs; + + /* 1 if the element is a field that is part of a block, 2 if the field + is the block itself, 0 if it's neither. */ + char in_bitfld_block; +}; + +#define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR) + +#define FOR_EACH_ACTUAL_CHILD(CHILD, ELT) \ + for ((CHILD) = (ELT)->is_group \ + ? next_child_for_group (NULL, (ELT)) \ + : (ELT)->children; \ + (CHILD); \ + (CHILD) = (ELT)->is_group \ + ? next_child_for_group ((CHILD), (ELT)) \ + : (CHILD)->sibling) + +/* Helper function for above macro. Return next child in group. */ +static struct sra_elt * +next_child_for_group (struct sra_elt *child, struct sra_elt *group) +{ + gcc_assert (group->is_group); + + /* Find the next child in the parent. */ + if (child) + child = child->sibling; + else + child = group->parent->children; + + /* Skip siblings that do not belong to the group. */ + while (child) + { + tree g_elt = group->element; + if (TREE_CODE (g_elt) == RANGE_EXPR) + { + if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0)) + && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element)) + break; + } + else + gcc_unreachable (); + + child = child->sibling; + } + + return child; +} + +/* Random access to the child of a parent is performed by hashing. + This prevents quadratic behavior, and allows SRA to function + reasonably on larger records. */ +static htab_t sra_map; + +/* All structures are allocated out of the following obstack. */ +static struct obstack sra_obstack; + +/* Debugging functions. */ +static void dump_sra_elt_name (FILE *, struct sra_elt *); +extern void debug_sra_elt_name (struct sra_elt *); + +/* Forward declarations. */ +static tree generate_element_ref (struct sra_elt *); +static gimple_seq sra_build_assignment (tree dst, tree src); +static void mark_all_v_defs_seq (gimple_seq); +static void mark_all_v_defs_stmt (gimple); + + +/* Return true if DECL is an SRA candidate. */ + +static bool +is_sra_candidate_decl (tree decl) +{ + return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl)); +} + +/* Return true if TYPE is a scalar type. */ + +static bool +is_sra_scalar_type (tree type) +{ + enum tree_code code = TREE_CODE (type); + return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE + || code == FIXED_POINT_TYPE + || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE + || code == POINTER_TYPE || code == OFFSET_TYPE + || code == REFERENCE_TYPE); +} + +/* Return true if TYPE can be decomposed into a set of independent variables. + + Note that this doesn't imply that all elements of TYPE can be + instantiated, just that if we decide to break up the type into + separate pieces that it can be done. */ + +bool +sra_type_can_be_decomposed_p (tree type) +{ + unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; + tree t; + + /* Avoid searching the same type twice. */ + if (bitmap_bit_p (sra_type_decomp_cache, cache+0)) + return true; + if (bitmap_bit_p (sra_type_decomp_cache, cache+1)) + return false; + + /* The type must have a definite nonzero size. */ + if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || integer_zerop (TYPE_SIZE (type))) + goto fail; + + /* The type must be a non-union aggregate. */ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + { + bool saw_one_field = false; + + for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t)) + if (TREE_CODE (t) == FIELD_DECL) + { + /* Reject incorrectly represented bit fields. */ + if (DECL_BIT_FIELD (t) + && INTEGRAL_TYPE_P (TREE_TYPE (t)) + && (tree_low_cst (DECL_SIZE (t), 1) + != TYPE_PRECISION (TREE_TYPE (t)))) + goto fail; + + saw_one_field = true; + } + + /* Record types must have at least one field. */ + if (!saw_one_field) + goto fail; + } + break; + + case ARRAY_TYPE: + /* Array types must have a fixed lower and upper bound. */ + t = TYPE_DOMAIN (type); + if (t == NULL) + goto fail; + if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t))) + goto fail; + if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t))) + goto fail; + break; + + case COMPLEX_TYPE: + break; + + default: + goto fail; + } + + bitmap_set_bit (sra_type_decomp_cache, cache+0); + return true; + + fail: + bitmap_set_bit (sra_type_decomp_cache, cache+1); + return false; +} + +/* Returns true if the TYPE is one of the available va_list types. + Otherwise it returns false. + Note, that for multiple calling conventions there can be more + than just one va_list type present. */ + +static bool +is_va_list_type (tree type) +{ + tree h; + + if (type == NULL_TREE) + return false; + h = targetm.canonical_va_list_type (type); + if (h == NULL_TREE) + return false; + if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (h)) + return true; + return false; +} + +/* Return true if DECL can be decomposed into a set of independent + (though not necessarily scalar) variables. */ + +static bool +decl_can_be_decomposed_p (tree var) +{ + /* Early out for scalars. */ + if (is_sra_scalar_type (TREE_TYPE (var))) + return false; + + /* The variable must not be aliased. */ + if (!is_gimple_non_addressable (var)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Cannot scalarize variable "); + print_generic_expr (dump_file, var, dump_flags); + fprintf (dump_file, " because it must live in memory\n"); + } + return false; + } + + /* The variable must not be volatile. */ + if (TREE_THIS_VOLATILE (var)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Cannot scalarize variable "); + print_generic_expr (dump_file, var, dump_flags); + fprintf (dump_file, " because it is declared volatile\n"); + } + return false; + } + + /* We must be able to decompose the variable's type. */ + if (!sra_type_can_be_decomposed_p (TREE_TYPE (var))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Cannot scalarize variable "); + print_generic_expr (dump_file, var, dump_flags); + fprintf (dump_file, " because its type cannot be decomposed\n"); + } + return false; + } + + /* HACK: if we decompose a va_list_type_node before inlining, then we'll + confuse tree-stdarg.c, and we won't be able to figure out which and + how many arguments are accessed. This really should be improved in + tree-stdarg.c, as the decomposition is truly a win. This could also + be fixed if the stdarg pass ran early, but this can't be done until + we've aliasing information early too. See PR 30791. */ + if (early_sra && is_va_list_type (TREE_TYPE (var))) + return false; + + return true; +} + +/* Return true if TYPE can be *completely* decomposed into scalars. */ + +static bool +type_can_instantiate_all_elements (tree type) +{ + if (is_sra_scalar_type (type)) + return true; + if (!sra_type_can_be_decomposed_p (type)) + return false; + + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + { + unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; + tree f; + + if (bitmap_bit_p (sra_type_inst_cache, cache+0)) + return true; + if (bitmap_bit_p (sra_type_inst_cache, cache+1)) + return false; + + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + if (!type_can_instantiate_all_elements (TREE_TYPE (f))) + { + bitmap_set_bit (sra_type_inst_cache, cache+1); + return false; + } + } + + bitmap_set_bit (sra_type_inst_cache, cache+0); + return true; + } + + case ARRAY_TYPE: + return type_can_instantiate_all_elements (TREE_TYPE (type)); + + case COMPLEX_TYPE: + return true; + + default: + gcc_unreachable (); + } +} + +/* Test whether ELT or some sub-element cannot be scalarized. */ + +static bool +can_completely_scalarize_p (struct sra_elt *elt) +{ + struct sra_elt *c; + + if (elt->cannot_scalarize) + return false; + + for (c = elt->children; c; c = c->sibling) + if (!can_completely_scalarize_p (c)) + return false; + + for (c = elt->groups; c; c = c->sibling) + if (!can_completely_scalarize_p (c)) + return false; + + return true; +} + + +/* A simplified tree hashing algorithm that only handles the types of + trees we expect to find in sra_elt->element. */ + +static hashval_t +sra_hash_tree (tree t) +{ + hashval_t h; + + switch (TREE_CODE (t)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + h = DECL_UID (t); + break; + + case INTEGER_CST: + h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t); + break; + + case RANGE_EXPR: + h = iterative_hash_expr (TREE_OPERAND (t, 0), 0); + h = iterative_hash_expr (TREE_OPERAND (t, 1), h); + break; + + case FIELD_DECL: + /* We can have types that are compatible, but have different member + lists, so we can't hash fields by ID. Use offsets instead. */ + h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0); + h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h); + break; + + case BIT_FIELD_REF: + /* Don't take operand 0 into account, that's our parent. */ + h = iterative_hash_expr (TREE_OPERAND (t, 1), 0); + h = iterative_hash_expr (TREE_OPERAND (t, 2), h); + break; + + default: + gcc_unreachable (); + } + + return h; +} + +/* Hash function for type SRA_PAIR. */ + +static hashval_t +sra_elt_hash (const void *x) +{ + const struct sra_elt *const e = (const struct sra_elt *) x; + const struct sra_elt *p; + hashval_t h; + + h = sra_hash_tree (e->element); + + /* Take into account everything except bitfield blocks back up the + chain. Given that chain lengths are rarely very long, this + should be acceptable. If we truly identify this as a performance + problem, it should work to hash the pointer value + "e->parent". */ + for (p = e->parent; p ; p = p->parent) + if (!p->in_bitfld_block) + h = (h * 65521) ^ sra_hash_tree (p->element); + + return h; +} + +/* Equality function for type SRA_PAIR. */ + +static int +sra_elt_eq (const void *x, const void *y) +{ + const struct sra_elt *const a = (const struct sra_elt *) x; + const struct sra_elt *const b = (const struct sra_elt *) y; + tree ae, be; + const struct sra_elt *ap = a->parent; + const struct sra_elt *bp = b->parent; + + if (ap) + while (ap->in_bitfld_block) + ap = ap->parent; + if (bp) + while (bp->in_bitfld_block) + bp = bp->parent; + + if (ap != bp) + return false; + + ae = a->element; + be = b->element; + + if (ae == be) + return true; + if (TREE_CODE (ae) != TREE_CODE (be)) + return false; + + switch (TREE_CODE (ae)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + /* These are all pointer unique. */ + return false; + + case INTEGER_CST: + /* Integers are not pointer unique, so compare their values. */ + return tree_int_cst_equal (ae, be); + + case RANGE_EXPR: + return + tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0)) + && tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1)); + + case FIELD_DECL: + /* Fields are unique within a record, but not between + compatible records. */ + if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be)) + return false; + return fields_compatible_p (ae, be); + + case BIT_FIELD_REF: + return + tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1)) + && tree_int_cst_equal (TREE_OPERAND (ae, 2), TREE_OPERAND (be, 2)); + + default: + gcc_unreachable (); + } +} + +/* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT + may be null, in which case CHILD must be a DECL. */ + +static struct sra_elt * +lookup_element (struct sra_elt *parent, tree child, tree type, + enum insert_option insert) +{ + struct sra_elt dummy; + struct sra_elt **slot; + struct sra_elt *elt; + + if (parent) + dummy.parent = parent->is_group ? parent->parent : parent; + else + dummy.parent = NULL; + dummy.element = child; + + slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert); + if (!slot && insert == NO_INSERT) + return NULL; + + elt = *slot; + if (!elt && insert == INSERT) + { + *slot = elt = XOBNEW (&sra_obstack, struct sra_elt); + memset (elt, 0, sizeof (*elt)); + + elt->parent = parent; + elt->element = child; + elt->type = type; + elt->is_scalar = is_sra_scalar_type (type); + + if (parent) + { + if (IS_ELEMENT_FOR_GROUP (elt->element)) + { + elt->is_group = true; + elt->sibling = parent->groups; + parent->groups = elt; + } + else + { + elt->sibling = parent->children; + parent->children = elt; + } + } + + /* If this is a parameter, then if we want to scalarize, we have + one copy from the true function parameter. Count it now. */ + if (TREE_CODE (child) == PARM_DECL) + { + elt->n_copies = 1; + bitmap_set_bit (needs_copy_in, DECL_UID (child)); + } + } + + return elt; +} + +/* Create or return the SRA_ELT structure for EXPR if the expression + refers to a scalarizable variable. */ + +static struct sra_elt * +maybe_lookup_element_for_expr (tree expr) +{ + struct sra_elt *elt; + tree child; + + switch (TREE_CODE (expr)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + if (is_sra_candidate_decl (expr)) + return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT); + return NULL; + + case ARRAY_REF: + /* We can't scalarize variable array indices. */ + if (in_array_bounds_p (expr)) + child = TREE_OPERAND (expr, 1); + else + return NULL; + break; + + case ARRAY_RANGE_REF: + /* We can't scalarize variable array indices. */ + if (range_in_array_bounds_p (expr)) + { + tree domain = TYPE_DOMAIN (TREE_TYPE (expr)); + child = build2 (RANGE_EXPR, integer_type_node, + TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain)); + } + else + return NULL; + break; + + case COMPONENT_REF: + { + tree type = TREE_TYPE (TREE_OPERAND (expr, 0)); + /* Don't look through unions. */ + if (TREE_CODE (type) != RECORD_TYPE) + return NULL; + /* Neither through variable-sized records. */ + if (TYPE_SIZE (type) == NULL_TREE + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + return NULL; + child = TREE_OPERAND (expr, 1); + } + break; + + case REALPART_EXPR: + child = integer_zero_node; + break; + case IMAGPART_EXPR: + child = integer_one_node; + break; + + default: + return NULL; + } + + elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0)); + if (elt) + return lookup_element (elt, child, TREE_TYPE (expr), INSERT); + return NULL; +} + + +/* Functions to walk just enough of the tree to see all scalarizable + references, and categorize them. */ + +/* A set of callbacks for phases 2 and 4. They'll be invoked for the + various kinds of references seen. In all cases, *GSI is an iterator + pointing to the statement being processed. */ +struct sra_walk_fns +{ + /* Invoked when ELT is required as a unit. Note that ELT might refer to + a leaf node, in which case this is a simple scalar reference. *EXPR_P + points to the location of the expression. IS_OUTPUT is true if this + is a left-hand-side reference. USE_ALL is true if we saw something we + couldn't quite identify and had to force the use of the entire object. */ + void (*use) (struct sra_elt *elt, tree *expr_p, + gimple_stmt_iterator *gsi, bool is_output, bool use_all); + + /* Invoked when we have a copy between two scalarizable references. */ + void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + gimple_stmt_iterator *gsi); + + /* Invoked when ELT is initialized from a constant. VALUE may be NULL, + in which case it should be treated as an empty CONSTRUCTOR. */ + void (*init) (struct sra_elt *elt, tree value, gimple_stmt_iterator *gsi); + + /* Invoked when we have a copy between one scalarizable reference ELT + and one non-scalarizable reference OTHER without side-effects. + IS_OUTPUT is true if ELT is on the left-hand side. */ + void (*ldst) (struct sra_elt *elt, tree other, + gimple_stmt_iterator *gsi, bool is_output); + + /* True during phase 2, false during phase 4. */ + /* ??? This is a hack. */ + bool initial_scan; +}; + +#ifdef ENABLE_CHECKING +/* Invoked via walk_tree, if *TP contains a candidate decl, return it. */ + +static tree +sra_find_candidate_decl (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + tree t = *tp; + enum tree_code code = TREE_CODE (t); + + if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL) + { + *walk_subtrees = 0; + if (is_sra_candidate_decl (t)) + return t; + } + else if (TYPE_P (t)) + *walk_subtrees = 0; + + return NULL; +} +#endif + +/* Walk most expressions looking for a scalarizable aggregate. + If we find one, invoke FNS->USE. */ + +static void +sra_walk_expr (tree *expr_p, gimple_stmt_iterator *gsi, bool is_output, + const struct sra_walk_fns *fns) +{ + tree expr = *expr_p; + tree inner = expr; + bool disable_scalarization = false; + bool use_all_p = false; + + /* We're looking to collect a reference expression between EXPR and INNER, + such that INNER is a scalarizable decl and all other nodes through EXPR + are references that we can scalarize. If we come across something that + we can't scalarize, we reset EXPR. This has the effect of making it + appear that we're referring to the larger expression as a whole. */ + + while (1) + switch (TREE_CODE (inner)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + /* If there is a scalarizable decl at the bottom, then process it. */ + if (is_sra_candidate_decl (inner)) + { + struct sra_elt *elt = maybe_lookup_element_for_expr (expr); + if (disable_scalarization) + elt->cannot_scalarize = true; + else + fns->use (elt, expr_p, gsi, is_output, use_all_p); + } + return; + + case ARRAY_REF: + /* Non-constant index means any member may be accessed. Prevent the + expression from being scalarized. If we were to treat this as a + reference to the whole array, we can wind up with a single dynamic + index reference inside a loop being overridden by several constant + index references during loop setup. It's possible that this could + be avoided by using dynamic usage counts based on BB trip counts + (based on loop analysis or profiling), but that hardly seems worth + the effort. */ + /* ??? Hack. Figure out how to push this into the scan routines + without duplicating too much code. */ + if (!in_array_bounds_p (inner)) + { + disable_scalarization = true; + goto use_all; + } + /* ??? Are we assured that non-constant bounds and stride will have + the same value everywhere? I don't think Fortran will... */ + if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) + goto use_all; + inner = TREE_OPERAND (inner, 0); + break; + + case ARRAY_RANGE_REF: + if (!range_in_array_bounds_p (inner)) + { + disable_scalarization = true; + goto use_all; + } + /* ??? See above non-constant bounds and stride . */ + if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) + goto use_all; + inner = TREE_OPERAND (inner, 0); + break; + + case COMPONENT_REF: + { + tree type = TREE_TYPE (TREE_OPERAND (inner, 0)); + /* Don't look through unions. */ + if (TREE_CODE (type) != RECORD_TYPE) + goto use_all; + /* Neither through variable-sized records. */ + if (TYPE_SIZE (type) == NULL_TREE + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + goto use_all; + inner = TREE_OPERAND (inner, 0); + } + break; + + case REALPART_EXPR: + case IMAGPART_EXPR: + inner = TREE_OPERAND (inner, 0); + break; + + case BIT_FIELD_REF: + /* A bit field reference to a specific vector is scalarized but for + ones for inputs need to be marked as used on the left hand size so + when we scalarize it, we can mark that variable as non renamable. */ + if (is_output + && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) == VECTOR_TYPE) + { + struct sra_elt *elt + = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0)); + if (elt) + elt->is_vector_lhs = true; + } + + /* A bit field reference (access to *multiple* fields simultaneously) + is not currently scalarized. Consider this an access to the full + outer element, to which walk_tree will bring us next. */ + goto use_all; + + CASE_CONVERT: + /* Similarly, a nop explicitly wants to look at an object in a + type other than the one we've scalarized. */ + goto use_all; + + case VIEW_CONVERT_EXPR: + /* Likewise for a view conversion, but with an additional twist: + it can be on the LHS and, in this case, an access to the full + outer element would mean a killing def. So we need to punt + if we haven't already a full access to the current element, + because we cannot pretend to have a killing def if we only + have a partial access at some level. */ + if (is_output && !use_all_p && inner != expr) + disable_scalarization = true; + goto use_all; + + case WITH_SIZE_EXPR: + /* This is a transparent wrapper. The entire inner expression really + is being used. */ + goto use_all; + + use_all: + expr_p = &TREE_OPERAND (inner, 0); + inner = expr = *expr_p; + use_all_p = true; + break; + + default: +#ifdef ENABLE_CHECKING + /* Validate that we're not missing any references. */ + gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL)); +#endif + return; + } +} + +/* Walk the arguments of a GIMPLE_CALL looking for scalarizable aggregates. + If we find one, invoke FNS->USE. */ + +static void +sra_walk_gimple_call (gimple stmt, gimple_stmt_iterator *gsi, + const struct sra_walk_fns *fns) +{ + int i; + int nargs = gimple_call_num_args (stmt); + + for (i = 0; i < nargs; i++) + sra_walk_expr (gimple_call_arg_ptr (stmt, i), gsi, false, fns); + + if (gimple_call_lhs (stmt)) + sra_walk_expr (gimple_call_lhs_ptr (stmt), gsi, true, fns); +} + +/* Walk the inputs and outputs of a GIMPLE_ASM looking for scalarizable + aggregates. If we find one, invoke FNS->USE. */ + +static void +sra_walk_gimple_asm (gimple stmt, gimple_stmt_iterator *gsi, + const struct sra_walk_fns *fns) +{ + size_t i; + for (i = 0; i < gimple_asm_ninputs (stmt); i++) + sra_walk_expr (&TREE_VALUE (gimple_asm_input_op (stmt, i)), gsi, false, fns); + for (i = 0; i < gimple_asm_noutputs (stmt); i++) + sra_walk_expr (&TREE_VALUE (gimple_asm_output_op (stmt, i)), gsi, true, fns); +} + +/* Walk a GIMPLE_ASSIGN and categorize the assignment appropriately. */ + +static void +sra_walk_gimple_assign (gimple stmt, gimple_stmt_iterator *gsi, + const struct sra_walk_fns *fns) +{ + struct sra_elt *lhs_elt = NULL, *rhs_elt = NULL; + tree lhs, rhs; + + /* If there is more than 1 element on the RHS, only walk the lhs. */ + if (!gimple_assign_single_p (stmt)) + { + sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns); + return; + } + + lhs = gimple_assign_lhs (stmt); + rhs = gimple_assign_rhs1 (stmt); + lhs_elt = maybe_lookup_element_for_expr (lhs); + rhs_elt = maybe_lookup_element_for_expr (rhs); + + /* If both sides are scalarizable, this is a COPY operation. */ + if (lhs_elt && rhs_elt) + { + fns->copy (lhs_elt, rhs_elt, gsi); + return; + } + + /* If the RHS is scalarizable, handle it. There are only two cases. */ + if (rhs_elt) + { + if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs)) + fns->ldst (rhs_elt, lhs, gsi, false); + else + fns->use (rhs_elt, gimple_assign_rhs1_ptr (stmt), gsi, false, false); + } + + /* If it isn't scalarizable, there may be scalarizable variables within, so + check for a call or else walk the RHS to see if we need to do any + copy-in operations. We need to do it before the LHS is scalarized so + that the statements get inserted in the proper place, before any + copy-out operations. */ + else + sra_walk_expr (gimple_assign_rhs1_ptr (stmt), gsi, false, fns); + + /* Likewise, handle the LHS being scalarizable. We have cases similar + to those above, but also want to handle RHS being constant. */ + if (lhs_elt) + { + /* If this is an assignment from a constant, or constructor, then + we have access to all of the elements individually. Invoke INIT. */ + if (TREE_CODE (rhs) == COMPLEX_EXPR + || TREE_CODE (rhs) == COMPLEX_CST + || TREE_CODE (rhs) == CONSTRUCTOR) + fns->init (lhs_elt, rhs, gsi); + + /* If this is an assignment from read-only memory, treat this as if + we'd been passed the constructor directly. Invoke INIT. */ + else if (TREE_CODE (rhs) == VAR_DECL + && TREE_STATIC (rhs) + && !DECL_EXTERNAL (rhs) + && TREE_READONLY (rhs) + && targetm.binds_local_p (rhs)) + fns->init (lhs_elt, DECL_INITIAL (rhs), gsi); + + /* If this is a copy from a non-scalarizable lvalue, invoke LDST. + The lvalue requirement prevents us from trying to directly scalarize + the result of a function call. Which would result in trying to call + the function multiple times, and other evil things. */ + else if (!lhs_elt->is_scalar + && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs)) + fns->ldst (lhs_elt, rhs, gsi, true); + + /* Otherwise we're being used in some context that requires the + aggregate to be seen as a whole. Invoke USE. */ + else + fns->use (lhs_elt, gimple_assign_lhs_ptr (stmt), gsi, true, false); + } + + /* Similarly to above, LHS_ELT being null only means that the LHS as a + whole is not a scalarizable reference. There may be occurrences of + scalarizable variables within, which implies a USE. */ + else + sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns); +} + +/* Entry point to the walk functions. Search the entire function, + invoking the callbacks in FNS on each of the references to + scalarizable variables. */ + +static void +sra_walk_function (const struct sra_walk_fns *fns) +{ + basic_block bb; + gimple_stmt_iterator si, ni; + + /* ??? Phase 4 could derive some benefit to walking the function in + dominator tree order. */ + + FOR_EACH_BB (bb) + for (si = gsi_start_bb (bb); !gsi_end_p (si); si = ni) + { + gimple stmt; + + stmt = gsi_stmt (si); + + ni = si; + gsi_next (&ni); + + /* If the statement has no virtual operands, then it doesn't + make any structure references that we care about. */ + if (gimple_aliases_computed_p (cfun) + && ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE))) + continue; + + switch (gimple_code (stmt)) + { + case GIMPLE_RETURN: + /* If we have "return <retval>" then the return value is + already exposed for our pleasure. Walk it as a USE to + force all the components back in place for the return. + */ + if (gimple_return_retval (stmt) == NULL_TREE) + ; + else + sra_walk_expr (gimple_return_retval_ptr (stmt), &si, false, + fns); + break; + + case GIMPLE_ASSIGN: + sra_walk_gimple_assign (stmt, &si, fns); + break; + case GIMPLE_CALL: + sra_walk_gimple_call (stmt, &si, fns); + break; + case GIMPLE_ASM: + sra_walk_gimple_asm (stmt, &si, fns); + break; + + default: + break; + } + } +} + +/* Phase One: Scan all referenced variables in the program looking for + structures that could be decomposed. */ + +static bool +find_candidates_for_sra (void) +{ + bool any_set = false; + tree var; + referenced_var_iterator rvi; + + FOR_EACH_REFERENCED_VAR (var, rvi) + { + if (decl_can_be_decomposed_p (var)) + { + bitmap_set_bit (sra_candidates, DECL_UID (var)); + any_set = true; + } + } + + return any_set; +} + + +/* Phase Two: Scan all references to scalarizable variables. Count the + number of times they are used or copied respectively. */ + +/* Callbacks to fill in SRA_WALK_FNS. Everything but USE is + considered a copy, because we can decompose the reference such that + the sub-elements needn't be contiguous. */ + +static void +scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED, + gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, + bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED) +{ + elt->n_uses += 1; +} + +static void +scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) +{ + lhs_elt->n_copies += 1; + rhs_elt->n_copies += 1; +} + +static void +scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED, + gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) +{ + lhs_elt->n_copies += 1; +} + +static void +scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED, + gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, + bool is_output ATTRIBUTE_UNUSED) +{ + elt->n_copies += 1; +} + +/* Dump the values we collected during the scanning phase. */ + +static void +scan_dump (struct sra_elt *elt) +{ + struct sra_elt *c; + + dump_sra_elt_name (dump_file, elt); + fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies); + + for (c = elt->children; c ; c = c->sibling) + scan_dump (c); + + for (c = elt->groups; c ; c = c->sibling) + scan_dump (c); +} + +/* Entry point to phase 2. Scan the entire function, building up + scalarization data structures, recording copies and uses. */ + +static void +scan_function (void) +{ + static const struct sra_walk_fns fns = { + scan_use, scan_copy, scan_init, scan_ldst, true + }; + bitmap_iterator bi; + + sra_walk_function (&fns); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned i; + + fputs ("\nScan results:\n", dump_file); + EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + if (elt) + scan_dump (elt); + } + fputc ('\n', dump_file); + } +} + +/* Phase Three: Make decisions about which variables to scalarize, if any. + All elements to be scalarized have replacement variables made for them. */ + +/* A subroutine of build_element_name. Recursively build the element + name on the obstack. */ + +static void +build_element_name_1 (struct sra_elt *elt) +{ + tree t; + char buffer[32]; + + if (elt->parent) + { + build_element_name_1 (elt->parent); + obstack_1grow (&sra_obstack, '$'); + + if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE) + { + if (elt->element == integer_zero_node) + obstack_grow (&sra_obstack, "real", 4); + else + obstack_grow (&sra_obstack, "imag", 4); + return; + } + } + + t = elt->element; + if (TREE_CODE (t) == INTEGER_CST) + { + /* ??? Eh. Don't bother doing double-wide printing. */ + sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); + } + else if (TREE_CODE (t) == BIT_FIELD_REF) + { + sprintf (buffer, "B" HOST_WIDE_INT_PRINT_DEC, + tree_low_cst (TREE_OPERAND (t, 2), 1)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); + sprintf (buffer, "F" HOST_WIDE_INT_PRINT_DEC, + tree_low_cst (TREE_OPERAND (t, 1), 1)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); + } + else + { + tree name = DECL_NAME (t); + if (name) + obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name), + IDENTIFIER_LENGTH (name)); + else + { + sprintf (buffer, "D%u", DECL_UID (t)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); + } + } +} + +/* Construct a pretty variable name for an element's replacement variable. + The name is built on the obstack. */ + +static char * +build_element_name (struct sra_elt *elt) +{ + build_element_name_1 (elt); + obstack_1grow (&sra_obstack, '\0'); + return XOBFINISH (&sra_obstack, char *); +} + +/* Instantiate an element as an independent variable. */ + +static void +instantiate_element (struct sra_elt *elt) +{ + struct sra_elt *base_elt; + tree var, base; + bool nowarn = TREE_NO_WARNING (elt->element); + + for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent) + if (!nowarn) + nowarn = TREE_NO_WARNING (base_elt->parent->element); + base = base_elt->element; + + elt->replacement = var = make_rename_temp (elt->type, "SR"); + + if (DECL_P (elt->element) + && !tree_int_cst_equal (DECL_SIZE (var), DECL_SIZE (elt->element))) + { + DECL_SIZE (var) = DECL_SIZE (elt->element); + DECL_SIZE_UNIT (var) = DECL_SIZE_UNIT (elt->element); + + elt->in_bitfld_block = 1; + elt->replacement = fold_build3 (BIT_FIELD_REF, elt->type, var, + DECL_SIZE (var), + BYTES_BIG_ENDIAN + ? size_binop (MINUS_EXPR, + TYPE_SIZE (elt->type), + DECL_SIZE (var)) + : bitsize_int (0)); + } + + /* For vectors, if used on the left hand side with BIT_FIELD_REF, + they are not a gimple register. */ + if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE && elt->is_vector_lhs) + DECL_GIMPLE_REG_P (var) = 0; + + DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base); + DECL_ARTIFICIAL (var) = 1; + + if (TREE_THIS_VOLATILE (elt->type)) + { + TREE_THIS_VOLATILE (var) = 1; + TREE_SIDE_EFFECTS (var) = 1; + } + + if (DECL_NAME (base) && !DECL_IGNORED_P (base)) + { + char *pretty_name = build_element_name (elt); + DECL_NAME (var) = get_identifier (pretty_name); + obstack_free (&sra_obstack, pretty_name); + + SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt)); + DECL_DEBUG_EXPR_IS_FROM (var) = 1; + + DECL_IGNORED_P (var) = 0; + TREE_NO_WARNING (var) = nowarn; + } + else + { + DECL_IGNORED_P (var) = 1; + /* ??? We can't generate any warning that would be meaningful. */ + TREE_NO_WARNING (var) = 1; + } + + /* Zero-initialize bit-field scalarization variables, to avoid + triggering undefined behavior. */ + if (TREE_CODE (elt->element) == BIT_FIELD_REF + || (var != elt->replacement + && TREE_CODE (elt->replacement) == BIT_FIELD_REF)) + { + gimple_seq init = sra_build_assignment (var, + fold_convert (TREE_TYPE (var), + integer_zero_node) + ); + insert_edge_copies_seq (init, ENTRY_BLOCK_PTR); + mark_all_v_defs_seq (init); + } + + if (dump_file) + { + fputs (" ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputs (" -> ", dump_file); + print_generic_expr (dump_file, var, dump_flags); + fputc ('\n', dump_file); + } +} + +/* Make one pass across an element tree deciding whether or not it's + profitable to instantiate individual leaf scalars. + + PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES + fields all the way up the tree. */ + +static void +decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses, + unsigned int parent_copies) +{ + if (dump_file && !elt->parent) + { + fputs ("Initial instantiation for ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } + + if (elt->cannot_scalarize) + return; + + if (elt->is_scalar) + { + /* The decision is simple: instantiate if we're used more frequently + than the parent needs to be seen as a complete unit. */ + if (elt->n_uses + elt->n_copies + parent_copies > parent_uses) + instantiate_element (elt); + } + else + { + struct sra_elt *c, *group; + unsigned int this_uses = elt->n_uses + parent_uses; + unsigned int this_copies = elt->n_copies + parent_copies; + + /* Consider groups of sub-elements as weighing in favour of + instantiation whatever their size. */ + for (group = elt->groups; group ; group = group->sibling) + FOR_EACH_ACTUAL_CHILD (c, group) + { + c->n_uses += group->n_uses; + c->n_copies += group->n_copies; + } + + for (c = elt->children; c ; c = c->sibling) + decide_instantiation_1 (c, this_uses, this_copies); + } +} + +/* Compute the size and number of all instantiated elements below ELT. + We will only care about this if the size of the complete structure + fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */ + +static unsigned int +sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep) +{ + if (elt->replacement) + { + *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type)); + return 1; + } + else + { + struct sra_elt *c; + unsigned int count = 0; + + for (c = elt->children; c ; c = c->sibling) + count += sum_instantiated_sizes (c, sizep); + + return count; + } +} + +/* Instantiate fields in ELT->TYPE that are not currently present as + children of ELT. */ + +static void instantiate_missing_elements (struct sra_elt *elt); + +static struct sra_elt * +instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type) +{ + struct sra_elt *sub = lookup_element (elt, child, type, INSERT); + if (sub->is_scalar) + { + if (sub->replacement == NULL) + instantiate_element (sub); + } + else + instantiate_missing_elements (sub); + return sub; +} + +/* Obtain the canonical type for field F of ELEMENT. */ + +static tree +canon_type_for_field (tree f, tree element) +{ + tree field_type = TREE_TYPE (f); + + /* canonicalize_component_ref() unwidens some bit-field types (not + marked as DECL_BIT_FIELD in C++), so we must do the same, lest we + may introduce type mismatches. */ + if (INTEGRAL_TYPE_P (field_type) + && DECL_MODE (f) != TYPE_MODE (field_type)) + field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF, + field_type, + element, + f, NULL_TREE), + NULL_TREE)); + + return field_type; +} + +/* Look for adjacent fields of ELT starting at F that we'd like to + scalarize as a single variable. Return the last field of the + group. */ + +static tree +try_instantiate_multiple_fields (struct sra_elt *elt, tree f) +{ + int count; + unsigned HOST_WIDE_INT align, bit, size, alchk; + enum machine_mode mode; + tree first = f, prev; + tree type, var; + struct sra_elt *block; + + /* Point fields are typically best handled as standalone entities. */ + if (POINTER_TYPE_P (TREE_TYPE (f))) + return f; + + if (!is_sra_scalar_type (TREE_TYPE (f)) + || !host_integerp (DECL_FIELD_OFFSET (f), 1) + || !host_integerp (DECL_FIELD_BIT_OFFSET (f), 1) + || !host_integerp (DECL_SIZE (f), 1) + || lookup_element (elt, f, NULL, NO_INSERT)) + return f; + + block = elt; + + /* For complex and array objects, there are going to be integer + literals as child elements. In this case, we can't just take the + alignment and mode of the decl, so we instead rely on the element + type. + + ??? We could try to infer additional alignment from the full + object declaration and the location of the sub-elements we're + accessing. */ + for (count = 0; !DECL_P (block->element); count++) + block = block->parent; + + align = DECL_ALIGN (block->element); + alchk = GET_MODE_BITSIZE (DECL_MODE (block->element)); + + if (count) + { + type = TREE_TYPE (block->element); + while (count--) + type = TREE_TYPE (type); + + align = TYPE_ALIGN (type); + alchk = GET_MODE_BITSIZE (TYPE_MODE (type)); + } + + if (align < alchk) + align = alchk; + + /* Coalescing wider fields is probably pointless and + inefficient. */ + if (align > BITS_PER_WORD) + align = BITS_PER_WORD; + + bit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT + + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); + size = tree_low_cst (DECL_SIZE (f), 1); + + alchk = align - 1; + alchk = ~alchk; + + if ((bit & alchk) != ((bit + size - 1) & alchk)) + return f; + + /* Find adjacent fields in the same alignment word. */ + + for (prev = f, f = TREE_CHAIN (f); + f && TREE_CODE (f) == FIELD_DECL + && is_sra_scalar_type (TREE_TYPE (f)) + && host_integerp (DECL_FIELD_OFFSET (f), 1) + && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1) + && host_integerp (DECL_SIZE (f), 1) + && !lookup_element (elt, f, NULL, NO_INSERT); + prev = f, f = TREE_CHAIN (f)) + { + unsigned HOST_WIDE_INT nbit, nsize; + + nbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT + + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); + nsize = tree_low_cst (DECL_SIZE (f), 1); + + if (bit + size == nbit) + { + if ((bit & alchk) != ((nbit + nsize - 1) & alchk)) + { + /* If we're at an alignment boundary, don't bother + growing alignment such that we can include this next + field. */ + if ((nbit & alchk) + || GET_MODE_BITSIZE (DECL_MODE (f)) <= align) + break; + + align = GET_MODE_BITSIZE (DECL_MODE (f)); + alchk = align - 1; + alchk = ~alchk; + + if ((bit & alchk) != ((nbit + nsize - 1) & alchk)) + break; + } + size += nsize; + } + else if (nbit + nsize == bit) + { + if ((nbit & alchk) != ((bit + size - 1) & alchk)) + { + if ((bit & alchk) + || GET_MODE_BITSIZE (DECL_MODE (f)) <= align) + break; + + align = GET_MODE_BITSIZE (DECL_MODE (f)); + alchk = align - 1; + alchk = ~alchk; + + if ((nbit & alchk) != ((bit + size - 1) & alchk)) + break; + } + bit = nbit; + size += nsize; + } + else + break; + } + + f = prev; + + if (f == first) + return f; + + gcc_assert ((bit & alchk) == ((bit + size - 1) & alchk)); + + /* Try to widen the bit range so as to cover padding bits as well. */ + + if ((bit & ~alchk) || size != align) + { + unsigned HOST_WIDE_INT mbit = bit & alchk; + unsigned HOST_WIDE_INT msize = align; + + for (f = TYPE_FIELDS (elt->type); + f; f = TREE_CHAIN (f)) + { + unsigned HOST_WIDE_INT fbit, fsize; + + /* Skip the fields from first to prev. */ + if (f == first) + { + f = prev; + continue; + } + + if (!(TREE_CODE (f) == FIELD_DECL + && host_integerp (DECL_FIELD_OFFSET (f), 1) + && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1))) + continue; + + fbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT + + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); + + /* If we're past the selected word, we're fine. */ + if ((bit & alchk) < (fbit & alchk)) + continue; + + if (host_integerp (DECL_SIZE (f), 1)) + fsize = tree_low_cst (DECL_SIZE (f), 1); + else + /* Assume a variable-sized field takes up all space till + the end of the word. ??? Endianness issues? */ + fsize = align - (fbit & alchk); + + if ((fbit & alchk) < (bit & alchk)) + { + /* A large field might start at a previous word and + extend into the selected word. Exclude those + bits. ??? Endianness issues? */ + HOST_WIDE_INT diff = fbit + fsize - mbit; + + if (diff <= 0) + continue; + + mbit += diff; + msize -= diff; + } + else + { + /* Non-overlapping, great. */ + if (fbit + fsize <= mbit + || mbit + msize <= fbit) + continue; + + if (fbit <= mbit) + { + unsigned HOST_WIDE_INT diff = fbit + fsize - mbit; + mbit += diff; + msize -= diff; + } + else if (fbit > mbit) + msize -= (mbit + msize - fbit); + else + gcc_unreachable (); + } + } + + bit = mbit; + size = msize; + } + + /* Now we know the bit range we're interested in. Find the smallest + machine mode we can use to access it. */ + + for (mode = smallest_mode_for_size (size, MODE_INT); + ; + mode = GET_MODE_WIDER_MODE (mode)) + { + gcc_assert (mode != VOIDmode); + + alchk = GET_MODE_PRECISION (mode) - 1; + alchk = ~alchk; + + if ((bit & alchk) == ((bit + size - 1) & alchk)) + break; + } + + gcc_assert (~alchk < align); + + /* Create the field group as a single variable. */ + + /* We used to create a type for the mode above, but size turns + to be out not of mode-size. As we need a matching type + to build a BIT_FIELD_REF, use a nonstandard integer type as + fallback. */ + type = lang_hooks.types.type_for_size (size, 1); + if (!type || TYPE_PRECISION (type) != size) + type = build_nonstandard_integer_type (size, 1); + gcc_assert (type); + var = build3 (BIT_FIELD_REF, type, NULL_TREE, + bitsize_int (size), bitsize_int (bit)); + + block = instantiate_missing_elements_1 (elt, var, type); + gcc_assert (block && block->is_scalar); + + var = block->replacement; + block->in_bitfld_block = 2; + + /* Add the member fields to the group, such that they access + portions of the group variable. */ + + for (f = first; f != TREE_CHAIN (prev); f = TREE_CHAIN (f)) + { + tree field_type = canon_type_for_field (f, elt->element); + struct sra_elt *fld = lookup_element (block, f, field_type, INSERT); + + gcc_assert (fld && fld->is_scalar && !fld->replacement); + + fld->replacement = fold_build3 (BIT_FIELD_REF, field_type, var, + bitsize_int (TYPE_PRECISION (field_type)), + bitsize_int + ((TREE_INT_CST_LOW (DECL_FIELD_OFFSET (f)) + * BITS_PER_UNIT + + (TREE_INT_CST_LOW + (DECL_FIELD_BIT_OFFSET (f))) + - (TREE_INT_CST_LOW + (TREE_OPERAND (block->element, 2)))) + & ~alchk)); + fld->in_bitfld_block = 1; + } + + return prev; +} + +static void +instantiate_missing_elements (struct sra_elt *elt) +{ + tree type = elt->type; + + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + { + tree f; + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + tree last = try_instantiate_multiple_fields (elt, f); + + if (last != f) + { + f = last; + continue; + } + + instantiate_missing_elements_1 (elt, f, + canon_type_for_field + (f, elt->element)); + } + break; + } + + case ARRAY_TYPE: + { + tree i, max, subtype; + + i = TYPE_MIN_VALUE (TYPE_DOMAIN (type)); + max = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + subtype = TREE_TYPE (type); + + while (1) + { + instantiate_missing_elements_1 (elt, i, subtype); + if (tree_int_cst_equal (i, max)) + break; + i = int_const_binop (PLUS_EXPR, i, integer_one_node, true); + } + + break; + } + + case COMPLEX_TYPE: + type = TREE_TYPE (type); + instantiate_missing_elements_1 (elt, integer_zero_node, type); + instantiate_missing_elements_1 (elt, integer_one_node, type); + break; + + default: + gcc_unreachable (); + } +} + +/* Return true if there is only one non aggregate field in the record, TYPE. + Return false otherwise. */ + +static bool +single_scalar_field_in_record_p (tree type) +{ + int num_fields = 0; + tree field; + if (TREE_CODE (type) != RECORD_TYPE) + return false; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + if (TREE_CODE (field) == FIELD_DECL) + { + num_fields++; + + if (num_fields == 2) + return false; + + if (AGGREGATE_TYPE_P (TREE_TYPE (field))) + return false; + } + + return true; +} + +/* Make one pass across an element tree deciding whether to perform block + or element copies. If we decide on element copies, instantiate all + elements. Return true if there are any instantiated sub-elements. */ + +static bool +decide_block_copy (struct sra_elt *elt) +{ + struct sra_elt *c; + bool any_inst; + + /* We shouldn't be invoked on groups of sub-elements as they must + behave like their parent as far as block copy is concerned. */ + gcc_assert (!elt->is_group); + + /* If scalarization is disabled, respect it. */ + if (elt->cannot_scalarize) + { + elt->use_block_copy = 1; + + if (dump_file) + { + fputs ("Scalarization disabled for ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } + + /* Disable scalarization of sub-elements */ + for (c = elt->children; c; c = c->sibling) + { + c->cannot_scalarize = 1; + decide_block_copy (c); + } + + /* Groups behave like their parent. */ + for (c = elt->groups; c; c = c->sibling) + { + c->cannot_scalarize = 1; + c->use_block_copy = 1; + } + + return false; + } + + /* Don't decide if we've no uses and no groups. */ + if (elt->n_uses == 0 && elt->n_copies == 0 && elt->groups == NULL) + ; + + else if (!elt->is_scalar) + { + tree size_tree = TYPE_SIZE_UNIT (elt->type); + bool use_block_copy = true; + + /* Tradeoffs for COMPLEX types pretty much always make it better + to go ahead and split the components. */ + if (TREE_CODE (elt->type) == COMPLEX_TYPE) + use_block_copy = false; + + /* Don't bother trying to figure out the rest if the structure is + so large we can't do easy arithmetic. This also forces block + copies for variable sized structures. */ + else if (host_integerp (size_tree, 1)) + { + unsigned HOST_WIDE_INT full_size, inst_size = 0; + unsigned int max_size, max_count, inst_count, full_count; + + /* If the sra-max-structure-size parameter is 0, then the + user has not overridden the parameter and we can choose a + sensible default. */ + max_size = SRA_MAX_STRUCTURE_SIZE + ? SRA_MAX_STRUCTURE_SIZE + : MOVE_RATIO (optimize_function_for_speed_p (cfun)) * UNITS_PER_WORD; + max_count = SRA_MAX_STRUCTURE_COUNT + ? SRA_MAX_STRUCTURE_COUNT + : MOVE_RATIO (optimize_function_for_speed_p (cfun)); + + full_size = tree_low_cst (size_tree, 1); + full_count = count_type_elements (elt->type, false); + inst_count = sum_instantiated_sizes (elt, &inst_size); + + /* If there is only one scalar field in the record, don't block copy. */ + if (single_scalar_field_in_record_p (elt->type)) + use_block_copy = false; + + /* ??? What to do here. If there are two fields, and we've only + instantiated one, then instantiating the other is clearly a win. + If there are a large number of fields then the size of the copy + is much more of a factor. */ + + /* If the structure is small, and we've made copies, go ahead + and instantiate, hoping that the copies will go away. */ + if (full_size <= max_size + && (full_count - inst_count) <= max_count + && elt->n_copies > elt->n_uses) + use_block_copy = false; + else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO + && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO) + use_block_copy = false; + + /* In order to avoid block copy, we have to be able to instantiate + all elements of the type. See if this is possible. */ + if (!use_block_copy + && (!can_completely_scalarize_p (elt) + || !type_can_instantiate_all_elements (elt->type))) + use_block_copy = true; + } + + elt->use_block_copy = use_block_copy; + + /* Groups behave like their parent. */ + for (c = elt->groups; c; c = c->sibling) + c->use_block_copy = use_block_copy; + + if (dump_file) + { + fprintf (dump_file, "Using %s for ", + use_block_copy ? "block-copy" : "element-copy"); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } + + if (!use_block_copy) + { + instantiate_missing_elements (elt); + return true; + } + } + + any_inst = elt->replacement != NULL; + + for (c = elt->children; c ; c = c->sibling) + any_inst |= decide_block_copy (c); + + return any_inst; +} + +/* Entry point to phase 3. Instantiate scalar replacement variables. */ + +static void +decide_instantiations (void) +{ + unsigned int i; + bool cleared_any; + bitmap_head done_head; + bitmap_iterator bi; + + /* We cannot clear bits from a bitmap we're iterating over, + so save up all the bits to clear until the end. */ + bitmap_initialize (&done_head, &bitmap_default_obstack); + cleared_any = false; + + EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + if (elt) + { + decide_instantiation_1 (elt, 0, 0); + if (!decide_block_copy (elt)) + elt = NULL; + } + if (!elt) + { + bitmap_set_bit (&done_head, i); + cleared_any = true; + } + } + + if (cleared_any) + { + bitmap_and_compl_into (sra_candidates, &done_head); + bitmap_and_compl_into (needs_copy_in, &done_head); + } + bitmap_clear (&done_head); + + mark_set_for_renaming (sra_candidates); + + if (dump_file) + fputc ('\n', dump_file); +} + + +/* Phase Four: Update the function to match the replacements created. */ + +/* Mark all the variables in VDEF/VUSE operators for STMT for + renaming. This becomes necessary when we modify all of a + non-scalar. */ + +static void +mark_all_v_defs_stmt (gimple stmt) +{ + tree sym; + ssa_op_iter iter; + + update_stmt_if_modified (stmt); + + FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS) + { + if (TREE_CODE (sym) == SSA_NAME) + sym = SSA_NAME_VAR (sym); + mark_sym_for_renaming (sym); + } +} + + +/* Mark all the variables in virtual operands in all the statements in + LIST for renaming. */ + +static void +mark_all_v_defs_seq (gimple_seq seq) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + mark_all_v_defs_stmt (gsi_stmt (gsi)); +} + +/* Mark every replacement under ELT with TREE_NO_WARNING. */ + +static void +mark_no_warning (struct sra_elt *elt) +{ + if (!elt->all_no_warning) + { + if (elt->replacement) + TREE_NO_WARNING (elt->replacement) = 1; + else + { + struct sra_elt *c; + FOR_EACH_ACTUAL_CHILD (c, elt) + mark_no_warning (c); + } + elt->all_no_warning = true; + } +} + +/* Build a single level component reference to ELT rooted at BASE. */ + +static tree +generate_one_element_ref (struct sra_elt *elt, tree base) +{ + switch (TREE_CODE (TREE_TYPE (base))) + { + case RECORD_TYPE: + { + tree field = elt->element; + + /* We can't test elt->in_bitfld_block here because, when this is + called from instantiate_element, we haven't set this field + yet. */ + if (TREE_CODE (field) == BIT_FIELD_REF) + { + tree ret = unshare_expr (field); + TREE_OPERAND (ret, 0) = base; + return ret; + } + + /* Watch out for compatible records with differing field lists. */ + if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base))) + field = find_compatible_field (TREE_TYPE (base), field); + + return build3 (COMPONENT_REF, elt->type, base, field, NULL); + } + + case ARRAY_TYPE: + if (TREE_CODE (elt->element) == RANGE_EXPR) + return build4 (ARRAY_RANGE_REF, elt->type, base, + TREE_OPERAND (elt->element, 0), NULL, NULL); + else + return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL); + + case COMPLEX_TYPE: + if (elt->element == integer_zero_node) + return build1 (REALPART_EXPR, elt->type, base); + else + return build1 (IMAGPART_EXPR, elt->type, base); + + default: + gcc_unreachable (); + } +} + +/* Build a full component reference to ELT rooted at its native variable. */ + +static tree +generate_element_ref (struct sra_elt *elt) +{ + if (elt->parent) + return generate_one_element_ref (elt, generate_element_ref (elt->parent)); + else + return elt->element; +} + +/* Return true if BF is a bit-field that we can handle like a scalar. */ + +static bool +scalar_bitfield_p (tree bf) +{ + return (TREE_CODE (bf) == BIT_FIELD_REF + && (is_gimple_reg (TREE_OPERAND (bf, 0)) + || (TYPE_MODE (TREE_TYPE (TREE_OPERAND (bf, 0))) != BLKmode + && (!TREE_SIDE_EFFECTS (TREE_OPERAND (bf, 0)) + || (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE + (TREE_OPERAND (bf, 0)))) + <= BITS_PER_WORD))))); +} + +/* Create an assignment statement from SRC to DST. */ + +static gimple_seq +sra_build_assignment (tree dst, tree src) +{ + gimple stmt; + gimple_seq seq = NULL, seq2 = NULL; + /* Turning BIT_FIELD_REFs into bit operations enables other passes + to do a much better job at optimizing the code. + From dst = BIT_FIELD_REF <var, sz, off> we produce + + SR.1 = (scalar type) var; + SR.2 = SR.1 >> off; + SR.3 = SR.2 & ((1 << sz) - 1); + ... possible sign extension of SR.3 ... + dst = (destination type) SR.3; + */ + if (scalar_bitfield_p (src)) + { + tree var, shift, width; + tree utype, stype; + bool unsignedp = (INTEGRAL_TYPE_P (TREE_TYPE (src)) + ? TYPE_UNSIGNED (TREE_TYPE (src)) : true); + struct gimplify_ctx gctx; + + var = TREE_OPERAND (src, 0); + width = TREE_OPERAND (src, 1); + /* The offset needs to be adjusted to a right shift quantity + depending on the endianness. */ + if (BYTES_BIG_ENDIAN) + { + tree tmp = size_binop (PLUS_EXPR, width, TREE_OPERAND (src, 2)); + shift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), tmp); + } + else + shift = TREE_OPERAND (src, 2); + + /* In weird cases we have non-integral types for the source or + destination object. + ??? For unknown reasons we also want an unsigned scalar type. */ + stype = TREE_TYPE (var); + if (!INTEGRAL_TYPE_P (stype)) + stype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW + (TYPE_SIZE (stype)), 1); + else if (!TYPE_UNSIGNED (stype)) + stype = unsigned_type_for (stype); + + utype = TREE_TYPE (dst); + if (!INTEGRAL_TYPE_P (utype)) + utype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW + (TYPE_SIZE (utype)), 1); + else if (!TYPE_UNSIGNED (utype)) + utype = unsigned_type_for (utype); + + /* Convert the base var of the BIT_FIELD_REF to the scalar type + we use for computation if we cannot use it directly. */ + if (INTEGRAL_TYPE_P (TREE_TYPE (var))) + var = fold_convert (stype, var); + else + var = fold_build1 (VIEW_CONVERT_EXPR, stype, var); + + if (!integer_zerop (shift)) + var = fold_build2 (RSHIFT_EXPR, stype, var, shift); + + /* If we need a masking operation, produce one. */ + if (TREE_INT_CST_LOW (width) == TYPE_PRECISION (stype)) + unsignedp = true; + else + { + tree one = build_int_cst_wide (stype, 1, 0); + tree mask = int_const_binop (LSHIFT_EXPR, one, width, 0); + mask = int_const_binop (MINUS_EXPR, mask, one, 0); + var = fold_build2 (BIT_AND_EXPR, stype, var, mask); + } + + /* After shifting and masking, convert to the target type. */ + var = fold_convert (utype, var); + + /* Perform sign extension, if required. + ??? This should never be necessary. */ + if (!unsignedp) + { + tree signbit = int_const_binop (LSHIFT_EXPR, + build_int_cst_wide (utype, 1, 0), + size_binop (MINUS_EXPR, width, + bitsize_int (1)), 0); + + var = fold_build2 (BIT_XOR_EXPR, utype, var, signbit); + var = fold_build2 (MINUS_EXPR, utype, var, signbit); + } + + /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */ + STRIP_NOPS (dst); + + /* Finally, move and convert to the destination. */ + if (INTEGRAL_TYPE_P (TREE_TYPE (dst))) + var = fold_convert (TREE_TYPE (dst), var); + else + var = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (dst), var); + + push_gimplify_context (&gctx); + gctx.into_ssa = true; + gctx.allow_rhs_cond_expr = true; + + gimplify_assign (dst, var, &seq); + + if (gimple_referenced_vars (cfun)) + for (var = gctx.temps; var; var = TREE_CHAIN (var)) + add_referenced_var (var); + pop_gimplify_context (NULL); + + return seq; + } + + /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */ + if (CONVERT_EXPR_P (dst)) + { + STRIP_NOPS (dst); + src = fold_convert (TREE_TYPE (dst), src); + } + /* It was hoped that we could perform some type sanity checking + here, but since front-ends can emit accesses of fields in types + different from their nominal types and copy structures containing + them as a whole, we'd have to handle such differences here. + Since such accesses under different types require compatibility + anyway, there's little point in making tests and/or adding + conversions to ensure the types of src and dst are the same. + So we just assume type differences at this point are ok. + The only exception we make here are pointer types, which can be different + in e.g. structurally equal, but non-identical RECORD_TYPEs. */ + else if (POINTER_TYPE_P (TREE_TYPE (dst)) + && !useless_type_conversion_p (TREE_TYPE (dst), TREE_TYPE (src))) + src = fold_convert (TREE_TYPE (dst), src); + + /* ??? Only call the gimplifier if we need to. Otherwise we may + end up substituting with DECL_VALUE_EXPR - see PR37380. */ + if (!handled_component_p (src) + && !SSA_VAR_P (src)) + { + src = force_gimple_operand (src, &seq2, false, NULL_TREE); + gimple_seq_add_seq (&seq, seq2); + } + stmt = gimple_build_assign (dst, src); + gimple_seq_add_stmt (&seq, stmt); + return seq; +} + +/* BIT_FIELD_REFs must not be shared. sra_build_elt_assignment() + takes care of assignments, but we must create copies for uses. */ +#define REPLDUP(t) (TREE_CODE (t) != BIT_FIELD_REF ? (t) : unshare_expr (t)) + +/* Emit an assignment from SRC to DST, but if DST is a scalarizable + BIT_FIELD_REF, turn it into bit operations. */ + +static gimple_seq +sra_build_bf_assignment (tree dst, tree src) +{ + tree var, type, utype, tmp, tmp2, tmp3; + gimple_seq seq; + gimple stmt; + tree cst, cst2, mask; + tree minshift, maxshift; + + if (TREE_CODE (dst) != BIT_FIELD_REF) + return sra_build_assignment (dst, src); + + var = TREE_OPERAND (dst, 0); + + if (!scalar_bitfield_p (dst)) + return sra_build_assignment (REPLDUP (dst), src); + + seq = NULL; + + cst = fold_convert (bitsizetype, TREE_OPERAND (dst, 2)); + cst2 = size_binop (PLUS_EXPR, + fold_convert (bitsizetype, TREE_OPERAND (dst, 1)), + cst); + + if (BYTES_BIG_ENDIAN) + { + maxshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst); + minshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst2); + } + else + { + maxshift = cst2; + minshift = cst; + } + + type = TREE_TYPE (var); + if (!INTEGRAL_TYPE_P (type)) + type = lang_hooks.types.type_for_size + (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (var))), 1); + if (TYPE_UNSIGNED (type)) + utype = type; + else + utype = unsigned_type_for (type); + + mask = build_int_cst_wide (utype, 1, 0); + if (TREE_INT_CST_LOW (maxshift) == TYPE_PRECISION (utype)) + cst = build_int_cst_wide (utype, 0, 0); + else + cst = int_const_binop (LSHIFT_EXPR, mask, maxshift, true); + if (integer_zerop (minshift)) + cst2 = mask; + else + cst2 = int_const_binop (LSHIFT_EXPR, mask, minshift, true); + mask = int_const_binop (MINUS_EXPR, cst, cst2, true); + mask = fold_build1 (BIT_NOT_EXPR, utype, mask); + + if (TYPE_MAIN_VARIANT (utype) != TYPE_MAIN_VARIANT (TREE_TYPE (var)) + && !integer_zerop (mask)) + { + tmp = var; + if (!is_gimple_variable (tmp)) + tmp = unshare_expr (var); + else + TREE_NO_WARNING (var) = true; + + tmp2 = make_rename_temp (utype, "SR"); + + if (INTEGRAL_TYPE_P (TREE_TYPE (var))) + tmp = fold_convert (utype, tmp); + else + tmp = fold_build1 (VIEW_CONVERT_EXPR, utype, tmp); + + stmt = gimple_build_assign (tmp2, tmp); + gimple_seq_add_stmt (&seq, stmt); + } + else + tmp2 = var; + + if (!integer_zerop (mask)) + { + tmp = make_rename_temp (utype, "SR"); + stmt = gimple_build_assign (tmp, fold_build2 (BIT_AND_EXPR, utype, + tmp2, mask)); + gimple_seq_add_stmt (&seq, stmt); + } + else + tmp = mask; + + if (is_gimple_reg (src) && INTEGRAL_TYPE_P (TREE_TYPE (src))) + tmp2 = src; + else if (INTEGRAL_TYPE_P (TREE_TYPE (src))) + { + gimple_seq tmp_seq; + tmp2 = make_rename_temp (TREE_TYPE (src), "SR"); + tmp_seq = sra_build_assignment (tmp2, src); + gimple_seq_add_seq (&seq, tmp_seq); + } + else + { + gimple_seq tmp_seq; + tmp2 = make_rename_temp + (lang_hooks.types.type_for_size + (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (src))), + 1), "SR"); + tmp_seq = sra_build_assignment (tmp2, fold_build1 (VIEW_CONVERT_EXPR, + TREE_TYPE (tmp2), src)); + gimple_seq_add_seq (&seq, tmp_seq); + } + + if (!TYPE_UNSIGNED (TREE_TYPE (tmp2))) + { + gimple_seq tmp_seq; + tree ut = unsigned_type_for (TREE_TYPE (tmp2)); + tmp3 = make_rename_temp (ut, "SR"); + tmp2 = fold_convert (ut, tmp2); + tmp_seq = sra_build_assignment (tmp3, tmp2); + gimple_seq_add_seq (&seq, tmp_seq); + + tmp2 = fold_build1 (BIT_NOT_EXPR, utype, mask); + tmp2 = int_const_binop (RSHIFT_EXPR, tmp2, minshift, true); + tmp2 = fold_convert (ut, tmp2); + tmp2 = fold_build2 (BIT_AND_EXPR, ut, tmp3, tmp2); + + if (tmp3 != tmp2) + { + tmp3 = make_rename_temp (ut, "SR"); + tmp_seq = sra_build_assignment (tmp3, tmp2); + gimple_seq_add_seq (&seq, tmp_seq); + } + + tmp2 = tmp3; + } + + if (TYPE_MAIN_VARIANT (TREE_TYPE (tmp2)) != TYPE_MAIN_VARIANT (utype)) + { + gimple_seq tmp_seq; + tmp3 = make_rename_temp (utype, "SR"); + tmp2 = fold_convert (utype, tmp2); + tmp_seq = sra_build_assignment (tmp3, tmp2); + gimple_seq_add_seq (&seq, tmp_seq); + tmp2 = tmp3; + } + + if (!integer_zerop (minshift)) + { + tmp3 = make_rename_temp (utype, "SR"); + stmt = gimple_build_assign (tmp3, fold_build2 (LSHIFT_EXPR, utype, + tmp2, minshift)); + gimple_seq_add_stmt (&seq, stmt); + tmp2 = tmp3; + } + + if (utype != TREE_TYPE (var)) + tmp3 = make_rename_temp (utype, "SR"); + else + tmp3 = var; + stmt = gimple_build_assign (tmp3, fold_build2 (BIT_IOR_EXPR, utype, + tmp, tmp2)); + gimple_seq_add_stmt (&seq, stmt); + + if (tmp3 != var) + { + if (TREE_TYPE (var) == type) + stmt = gimple_build_assign (var, fold_convert (type, tmp3)); + else + stmt = gimple_build_assign (var, fold_build1 (VIEW_CONVERT_EXPR, + TREE_TYPE (var), tmp3)); + gimple_seq_add_stmt (&seq, stmt); + } + + return seq; +} + +/* Expand an assignment of SRC to the scalarized representation of + ELT. If it is a field group, try to widen the assignment to cover + the full variable. */ + +static gimple_seq +sra_build_elt_assignment (struct sra_elt *elt, tree src) +{ + tree dst = elt->replacement; + tree var, tmp, cst, cst2; + gimple stmt; + gimple_seq seq; + + if (TREE_CODE (dst) != BIT_FIELD_REF + || !elt->in_bitfld_block) + return sra_build_assignment (REPLDUP (dst), src); + + var = TREE_OPERAND (dst, 0); + + /* Try to widen the assignment to the entire variable. + We need the source to be a BIT_FIELD_REF as well, such that, for + BIT_FIELD_REF<d,sz,dp> = BIT_FIELD_REF<s,sz,sp>, + by design, conditions are met such that we can turn it into + d = BIT_FIELD_REF<s,dw,sp-dp>. */ + if (elt->in_bitfld_block == 2 + && TREE_CODE (src) == BIT_FIELD_REF) + { + tmp = src; + cst = TYPE_SIZE (TREE_TYPE (var)); + cst2 = size_binop (MINUS_EXPR, TREE_OPERAND (src, 2), + TREE_OPERAND (dst, 2)); + + src = TREE_OPERAND (src, 0); + + /* Avoid full-width bit-fields. */ + if (integer_zerop (cst2) + && tree_int_cst_equal (cst, TYPE_SIZE (TREE_TYPE (src)))) + { + if (INTEGRAL_TYPE_P (TREE_TYPE (src)) + && !TYPE_UNSIGNED (TREE_TYPE (src))) + src = fold_convert (unsigned_type_for (TREE_TYPE (src)), src); + + /* If a single conversion won't do, we'll need a statement + list. */ + if (TYPE_MAIN_VARIANT (TREE_TYPE (var)) + != TYPE_MAIN_VARIANT (TREE_TYPE (src))) + { + gimple_seq tmp_seq; + seq = NULL; + + if (!INTEGRAL_TYPE_P (TREE_TYPE (src))) + src = fold_build1 (VIEW_CONVERT_EXPR, + lang_hooks.types.type_for_size + (TREE_INT_CST_LOW + (TYPE_SIZE (TREE_TYPE (src))), + 1), src); + gcc_assert (TYPE_UNSIGNED (TREE_TYPE (src))); + + tmp = make_rename_temp (TREE_TYPE (src), "SR"); + stmt = gimple_build_assign (tmp, src); + gimple_seq_add_stmt (&seq, stmt); + + tmp_seq = sra_build_assignment (var, + fold_convert (TREE_TYPE (var), + tmp)); + gimple_seq_add_seq (&seq, tmp_seq); + + return seq; + } + + src = fold_convert (TREE_TYPE (var), src); + } + else + { + src = fold_convert (TREE_TYPE (var), tmp); + } + + return sra_build_assignment (var, src); + } + + return sra_build_bf_assignment (dst, src); +} + +/* Generate a set of assignment statements in *LIST_P to copy all + instantiated elements under ELT to or from the equivalent structure + rooted at EXPR. COPY_OUT controls the direction of the copy, with + true meaning to copy out of EXPR into ELT. */ + +static void +generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr, + gimple_seq *seq_p) +{ + struct sra_elt *c; + gimple_seq tmp_seq; + tree t; + + if (!copy_out && TREE_CODE (expr) == SSA_NAME + && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE) + { + tree r, i; + + c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT); + r = c->replacement; + c = lookup_element (elt, integer_one_node, NULL, NO_INSERT); + i = c->replacement; + + t = build2 (COMPLEX_EXPR, elt->type, r, i); + tmp_seq = sra_build_bf_assignment (expr, t); + SSA_NAME_DEF_STMT (expr) = gimple_seq_last_stmt (tmp_seq); + gimple_seq_add_seq (seq_p, tmp_seq); + } + else if (elt->replacement) + { + if (copy_out) + tmp_seq = sra_build_elt_assignment (elt, expr); + else + tmp_seq = sra_build_bf_assignment (expr, REPLDUP (elt->replacement)); + gimple_seq_add_seq (seq_p, tmp_seq); + } + else + { + FOR_EACH_ACTUAL_CHILD (c, elt) + { + t = generate_one_element_ref (c, unshare_expr (expr)); + generate_copy_inout (c, copy_out, t, seq_p); + } + } +} + +/* Generate a set of assignment statements in *LIST_P to copy all instantiated + elements under SRC to their counterparts under DST. There must be a 1-1 + correspondence of instantiated elements. */ + +static void +generate_element_copy (struct sra_elt *dst, struct sra_elt *src, gimple_seq *seq_p) +{ + struct sra_elt *dc, *sc; + + FOR_EACH_ACTUAL_CHILD (dc, dst) + { + sc = lookup_element (src, dc->element, NULL, NO_INSERT); + if (!sc && dc->in_bitfld_block == 2) + { + struct sra_elt *dcs; + + FOR_EACH_ACTUAL_CHILD (dcs, dc) + { + sc = lookup_element (src, dcs->element, NULL, NO_INSERT); + gcc_assert (sc); + generate_element_copy (dcs, sc, seq_p); + } + + continue; + } + + /* If DST and SRC are structs with the same elements, but do not have + the same TYPE_MAIN_VARIANT, then lookup of DST FIELD_DECL in SRC + will fail. Try harder by finding the corresponding FIELD_DECL + in SRC. */ + if (!sc) + { + tree f; + + gcc_assert (useless_type_conversion_p (dst->type, src->type)); + gcc_assert (TREE_CODE (dc->element) == FIELD_DECL); + for (f = TYPE_FIELDS (src->type); f ; f = TREE_CHAIN (f)) + if (simple_cst_equal (DECL_FIELD_OFFSET (f), + DECL_FIELD_OFFSET (dc->element)) > 0 + && simple_cst_equal (DECL_FIELD_BIT_OFFSET (f), + DECL_FIELD_BIT_OFFSET (dc->element)) > 0 + && simple_cst_equal (DECL_SIZE (f), + DECL_SIZE (dc->element)) > 0 + && (useless_type_conversion_p (TREE_TYPE (dc->element), + TREE_TYPE (f)) + || (POINTER_TYPE_P (TREE_TYPE (dc->element)) + && POINTER_TYPE_P (TREE_TYPE (f))))) + break; + gcc_assert (f != NULL_TREE); + sc = lookup_element (src, f, NULL, NO_INSERT); + } + + generate_element_copy (dc, sc, seq_p); + } + + if (dst->replacement) + { + gimple_seq tmp_seq; + + gcc_assert (src->replacement); + + tmp_seq = sra_build_elt_assignment (dst, REPLDUP (src->replacement)); + gimple_seq_add_seq (seq_p, tmp_seq); + } +} + +/* Generate a set of assignment statements in *LIST_P to zero all instantiated + elements under ELT. In addition, do not assign to elements that have been + marked VISITED but do reset the visited flag; this allows easy coordination + with generate_element_init. */ + +static void +generate_element_zero (struct sra_elt *elt, gimple_seq *seq_p) +{ + struct sra_elt *c; + + if (elt->visited) + { + elt->visited = false; + return; + } + + if (!elt->in_bitfld_block) + FOR_EACH_ACTUAL_CHILD (c, elt) + generate_element_zero (c, seq_p); + + if (elt->replacement) + { + tree t; + gimple_seq tmp_seq; + + gcc_assert (elt->is_scalar); + t = fold_convert (elt->type, integer_zero_node); + + tmp_seq = sra_build_elt_assignment (elt, t); + gimple_seq_add_seq (seq_p, tmp_seq); + } +} + +/* Generate an assignment VAR = INIT, where INIT may need gimplification. + Add the result to *LIST_P. */ + +static void +generate_one_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p) +{ + gimple_seq tmp_seq = sra_build_elt_assignment (elt, init); + gimple_seq_add_seq (seq_p, tmp_seq); +} + +/* Generate a set of assignment statements in *LIST_P to set all instantiated + elements under ELT with the contents of the initializer INIT. In addition, + mark all assigned elements VISITED; this allows easy coordination with + generate_element_zero. Return false if we found a case we couldn't + handle. */ + +static bool +generate_element_init_1 (struct sra_elt *elt, tree init, gimple_seq *seq_p) +{ + bool result = true; + enum tree_code init_code; + struct sra_elt *sub; + tree t; + unsigned HOST_WIDE_INT idx; + tree value, purpose; + + /* We can be passed DECL_INITIAL of a static variable. It might have a + conversion, which we strip off here. */ + STRIP_USELESS_TYPE_CONVERSION (init); + init_code = TREE_CODE (init); + + if (elt->is_scalar) + { + if (elt->replacement) + { + generate_one_element_init (elt, init, seq_p); + elt->visited = true; + } + return result; + } + + switch (init_code) + { + case COMPLEX_CST: + case COMPLEX_EXPR: + FOR_EACH_ACTUAL_CHILD (sub, elt) + { + if (sub->element == integer_zero_node) + t = (init_code == COMPLEX_EXPR + ? TREE_OPERAND (init, 0) : TREE_REALPART (init)); + else + t = (init_code == COMPLEX_EXPR + ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init)); + result &= generate_element_init_1 (sub, t, seq_p); + } + break; + + case CONSTRUCTOR: + FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value) + { + /* Array constructors are routinely created with NULL indices. */ + if (purpose == NULL_TREE) + { + result = false; + break; + } + if (TREE_CODE (purpose) == RANGE_EXPR) + { + tree lower = TREE_OPERAND (purpose, 0); + tree upper = TREE_OPERAND (purpose, 1); + + while (1) + { + sub = lookup_element (elt, lower, NULL, NO_INSERT); + if (sub != NULL) + result &= generate_element_init_1 (sub, value, seq_p); + if (tree_int_cst_equal (lower, upper)) + break; + lower = int_const_binop (PLUS_EXPR, lower, + integer_one_node, true); + } + } + else + { + sub = lookup_element (elt, purpose, NULL, NO_INSERT); + if (sub != NULL) + result &= generate_element_init_1 (sub, value, seq_p); + } + } + break; + + default: + elt->visited = true; + result = false; + } + + return result; +} + +/* A wrapper function for generate_element_init_1 that handles cleanup after + gimplification. */ + +static bool +generate_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p) +{ + bool ret; + struct gimplify_ctx gctx; + + push_gimplify_context (&gctx); + ret = generate_element_init_1 (elt, init, seq_p); + pop_gimplify_context (NULL); + + /* The replacement can expose previously unreferenced variables. */ + if (ret && *seq_p) + { + gimple_stmt_iterator i; + + for (i = gsi_start (*seq_p); !gsi_end_p (i); gsi_next (&i)) + find_new_referenced_vars (gsi_stmt (i)); + } + + return ret; +} + +/* Insert a gimple_seq SEQ on all the outgoing edges out of BB. Note that + if BB has more than one edge, STMT will be replicated for each edge. + Also, abnormal edges will be ignored. */ + +void +insert_edge_copies_seq (gimple_seq seq, basic_block bb) +{ + edge e; + edge_iterator ei; + unsigned n_copies = -1; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (!(e->flags & EDGE_ABNORMAL)) + n_copies++; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (!(e->flags & EDGE_ABNORMAL)) + gsi_insert_seq_on_edge (e, n_copies-- > 0 ? gimple_seq_copy (seq) : seq); +} + +/* Helper function to insert LIST before GSI, and set up line number info. */ + +void +sra_insert_before (gimple_stmt_iterator *gsi, gimple_seq seq) +{ + gimple stmt = gsi_stmt (*gsi); + + if (gimple_has_location (stmt)) + annotate_all_with_location (seq, gimple_location (stmt)); + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); +} + +/* Similarly, but insert after GSI. Handles insertion onto edges as well. */ + +void +sra_insert_after (gimple_stmt_iterator *gsi, gimple_seq seq) +{ + gimple stmt = gsi_stmt (*gsi); + + if (gimple_has_location (stmt)) + annotate_all_with_location (seq, gimple_location (stmt)); + + if (stmt_ends_bb_p (stmt)) + insert_edge_copies_seq (seq, gsi_bb (*gsi)); + else + gsi_insert_seq_after (gsi, seq, GSI_SAME_STMT); +} + +/* Similarly, but replace the statement at GSI. */ + +static void +sra_replace (gimple_stmt_iterator *gsi, gimple_seq seq) +{ + sra_insert_before (gsi, seq); + gsi_remove (gsi, false); + if (gsi_end_p (*gsi)) + *gsi = gsi_last (gsi_seq (*gsi)); + else + gsi_prev (gsi); +} + +/* Data structure that bitfield_overlaps_p fills in with information + about the element passed in and how much of it overlaps with the + bit-range passed it to. */ + +struct bitfield_overlap_info +{ + /* The bit-length of an element. */ + tree field_len; + + /* The bit-position of the element in its parent. */ + tree field_pos; + + /* The number of bits of the element that overlap with the incoming + bit range. */ + tree overlap_len; + + /* The first bit of the element that overlaps with the incoming bit + range. */ + tree overlap_pos; +}; + +/* Return true if a BIT_FIELD_REF<(FLD->parent), BLEN, BPOS> + expression (referenced as BF below) accesses any of the bits in FLD, + false if it doesn't. If DATA is non-null, its field_len and + field_pos are filled in such that BIT_FIELD_REF<(FLD->parent), + field_len, field_pos> (referenced as BFLD below) represents the + entire field FLD->element, and BIT_FIELD_REF<BFLD, overlap_len, + overlap_pos> represents the portion of the entire field that + overlaps with BF. */ + +static bool +bitfield_overlaps_p (tree blen, tree bpos, struct sra_elt *fld, + struct bitfield_overlap_info *data) +{ + tree flen, fpos; + bool ret; + + if (TREE_CODE (fld->element) == FIELD_DECL) + { + flen = fold_convert (bitsizetype, DECL_SIZE (fld->element)); + fpos = fold_convert (bitsizetype, DECL_FIELD_OFFSET (fld->element)); + fpos = size_binop (MULT_EXPR, fpos, bitsize_int (BITS_PER_UNIT)); + fpos = size_binop (PLUS_EXPR, fpos, DECL_FIELD_BIT_OFFSET (fld->element)); + } + else if (TREE_CODE (fld->element) == BIT_FIELD_REF) + { + flen = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 1)); + fpos = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 2)); + } + else if (TREE_CODE (fld->element) == INTEGER_CST) + { + tree domain_type = TYPE_DOMAIN (TREE_TYPE (fld->parent->element)); + flen = fold_convert (bitsizetype, TYPE_SIZE (fld->type)); + fpos = fold_convert (bitsizetype, fld->element); + if (domain_type && TYPE_MIN_VALUE (domain_type)) + fpos = size_binop (MINUS_EXPR, fpos, + fold_convert (bitsizetype, + TYPE_MIN_VALUE (domain_type))); + fpos = size_binop (MULT_EXPR, flen, fpos); + } + else + gcc_unreachable (); + + gcc_assert (host_integerp (blen, 1) + && host_integerp (bpos, 1) + && host_integerp (flen, 1) + && host_integerp (fpos, 1)); + + ret = ((!tree_int_cst_lt (fpos, bpos) + && tree_int_cst_lt (size_binop (MINUS_EXPR, fpos, bpos), + blen)) + || (!tree_int_cst_lt (bpos, fpos) + && tree_int_cst_lt (size_binop (MINUS_EXPR, bpos, fpos), + flen))); + + if (!ret) + return ret; + + if (data) + { + tree bend, fend; + + data->field_len = flen; + data->field_pos = fpos; + + fend = size_binop (PLUS_EXPR, fpos, flen); + bend = size_binop (PLUS_EXPR, bpos, blen); + + if (tree_int_cst_lt (bend, fend)) + data->overlap_len = size_binop (MINUS_EXPR, bend, fpos); + else + data->overlap_len = NULL; + + if (tree_int_cst_lt (fpos, bpos)) + { + data->overlap_pos = size_binop (MINUS_EXPR, bpos, fpos); + data->overlap_len = size_binop (MINUS_EXPR, + data->overlap_len + ? data->overlap_len + : data->field_len, + data->overlap_pos); + } + else + data->overlap_pos = NULL; + } + + return ret; +} + +/* Add to LISTP a sequence of statements that copies BLEN bits between + VAR and the scalarized elements of ELT, starting a bit VPOS of VAR + and at bit BPOS of ELT. The direction of the copy is given by + TO_VAR. */ + +static void +sra_explode_bitfield_assignment (tree var, tree vpos, bool to_var, + gimple_seq *seq_p, tree blen, tree bpos, + struct sra_elt *elt) +{ + struct sra_elt *fld; + struct bitfield_overlap_info flp; + + FOR_EACH_ACTUAL_CHILD (fld, elt) + { + tree flen, fpos; + + if (!bitfield_overlaps_p (blen, bpos, fld, &flp)) + continue; + + flen = flp.overlap_len ? flp.overlap_len : flp.field_len; + fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0); + + if (fld->replacement) + { + tree infld, invar, type; + gimple_seq st; + + infld = fld->replacement; + + type = unsigned_type_for (TREE_TYPE (infld)); + if (TYPE_PRECISION (type) != TREE_INT_CST_LOW (flen)) + type = build_nonstandard_integer_type (TREE_INT_CST_LOW (flen), 1); + + if (TREE_CODE (infld) == BIT_FIELD_REF) + { + fpos = size_binop (PLUS_EXPR, fpos, TREE_OPERAND (infld, 2)); + infld = TREE_OPERAND (infld, 0); + } + else if (BYTES_BIG_ENDIAN && DECL_P (fld->element) + && !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (infld)), + DECL_SIZE (fld->element))) + { + fpos = size_binop (PLUS_EXPR, fpos, + TYPE_SIZE (TREE_TYPE (infld))); + fpos = size_binop (MINUS_EXPR, fpos, + DECL_SIZE (fld->element)); + } + + infld = fold_build3 (BIT_FIELD_REF, type, infld, flen, fpos); + + invar = size_binop (MINUS_EXPR, flp.field_pos, bpos); + if (flp.overlap_pos) + invar = size_binop (PLUS_EXPR, invar, flp.overlap_pos); + invar = size_binop (PLUS_EXPR, invar, vpos); + + invar = fold_build3 (BIT_FIELD_REF, type, var, flen, invar); + + if (to_var) + st = sra_build_bf_assignment (invar, infld); + else + st = sra_build_bf_assignment (infld, invar); + + gimple_seq_add_seq (seq_p, st); + } + else + { + tree sub = size_binop (MINUS_EXPR, flp.field_pos, bpos); + sub = size_binop (PLUS_EXPR, vpos, sub); + if (flp.overlap_pos) + sub = size_binop (PLUS_EXPR, sub, flp.overlap_pos); + + sra_explode_bitfield_assignment (var, sub, to_var, seq_p, + flen, fpos, fld); + } + } +} + +/* Add to LISTBEFOREP statements that copy scalarized members of ELT + that overlap with BIT_FIELD_REF<(ELT->element), BLEN, BPOS> back + into the full variable, and to LISTAFTERP, if non-NULL, statements + that copy the (presumably modified) overlapping portions of the + full variable back to the scalarized variables. */ + +static void +sra_sync_for_bitfield_assignment (gimple_seq *seq_before_p, + gimple_seq *seq_after_p, + tree blen, tree bpos, + struct sra_elt *elt) +{ + struct sra_elt *fld; + struct bitfield_overlap_info flp; + + FOR_EACH_ACTUAL_CHILD (fld, elt) + if (bitfield_overlaps_p (blen, bpos, fld, &flp)) + { + if (fld->replacement || (!flp.overlap_len && !flp.overlap_pos)) + { + generate_copy_inout (fld, false, generate_element_ref (fld), + seq_before_p); + mark_no_warning (fld); + if (seq_after_p) + generate_copy_inout (fld, true, generate_element_ref (fld), + seq_after_p); + } + else + { + tree flen = flp.overlap_len ? flp.overlap_len : flp.field_len; + tree fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0); + + sra_sync_for_bitfield_assignment (seq_before_p, seq_after_p, + flen, fpos, fld); + } + } +} + +/* Scalarize a USE. To recap, this is either a simple reference to ELT, + if elt is scalar, or some occurrence of ELT that requires a complete + aggregate. IS_OUTPUT is true if ELT is being modified. */ + +static void +scalarize_use (struct sra_elt *elt, tree *expr_p, gimple_stmt_iterator *gsi, + bool is_output, bool use_all) +{ + gimple stmt = gsi_stmt (*gsi); + tree bfexpr; + + if (elt->replacement) + { + tree replacement = elt->replacement; + + /* If we have a replacement, then updating the reference is as + simple as modifying the existing statement in place. */ + if (is_output + && TREE_CODE (elt->replacement) == BIT_FIELD_REF + && is_gimple_reg (TREE_OPERAND (elt->replacement, 0)) + && is_gimple_assign (stmt) + && gimple_assign_lhs_ptr (stmt) == expr_p) + { + gimple_seq newseq; + /* RHS must be a single operand. */ + gcc_assert (gimple_assign_single_p (stmt)); + newseq = sra_build_elt_assignment (elt, gimple_assign_rhs1 (stmt)); + sra_replace (gsi, newseq); + return; + } + else if (!is_output + && TREE_CODE (elt->replacement) == BIT_FIELD_REF + && is_gimple_assign (stmt) + && gimple_assign_rhs1_ptr (stmt) == expr_p) + { + tree tmp = make_rename_temp + (TREE_TYPE (gimple_assign_lhs (stmt)), "SR"); + gimple_seq newseq = sra_build_assignment (tmp, REPLDUP (elt->replacement)); + + sra_insert_before (gsi, newseq); + replacement = tmp; + } + if (is_output) + mark_all_v_defs_stmt (stmt); + *expr_p = REPLDUP (replacement); + update_stmt (stmt); + } + else if (use_all && is_output + && is_gimple_assign (stmt) + && TREE_CODE (bfexpr + = gimple_assign_lhs (stmt)) == BIT_FIELD_REF + && &TREE_OPERAND (bfexpr, 0) == expr_p + && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr)) + && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE) + { + gimple_seq seq_before = NULL; + gimple_seq seq_after = NULL; + tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1)); + tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2)); + bool update = false; + + if (!elt->use_block_copy) + { + tree type = TREE_TYPE (bfexpr); + tree var = make_rename_temp (type, "SR"), tmp, vpos; + gimple st; + + gimple_assign_set_lhs (stmt, var); + update = true; + + if (!TYPE_UNSIGNED (type)) + { + type = unsigned_type_for (type); + tmp = make_rename_temp (type, "SR"); + st = gimple_build_assign (tmp, fold_convert (type, var)); + gimple_seq_add_stmt (&seq_after, st); + var = tmp; + } + + /* If VAR is wider than BLEN bits, it is padded at the + most-significant end. We want to set VPOS such that + <BIT_FIELD_REF VAR BLEN VPOS> would refer to the + least-significant BLEN bits of VAR. */ + if (BYTES_BIG_ENDIAN) + vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen); + else + vpos = bitsize_int (0); + sra_explode_bitfield_assignment + (var, vpos, false, &seq_after, blen, bpos, elt); + } + else + sra_sync_for_bitfield_assignment + (&seq_before, &seq_after, blen, bpos, elt); + + if (seq_before) + { + mark_all_v_defs_seq (seq_before); + sra_insert_before (gsi, seq_before); + } + if (seq_after) + { + mark_all_v_defs_seq (seq_after); + sra_insert_after (gsi, seq_after); + } + + if (update) + update_stmt (stmt); + } + else if (use_all && !is_output + && is_gimple_assign (stmt) + && TREE_CODE (bfexpr + = gimple_assign_rhs1 (stmt)) == BIT_FIELD_REF + && &TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == expr_p + && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr)) + && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE) + { + gimple_seq seq = NULL; + tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1)); + tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2)); + bool update = false; + + if (!elt->use_block_copy) + { + tree type = TREE_TYPE (bfexpr); + tree var = make_rename_temp (type, "SR"), tmp, vpos; + gimple st = NULL; + + gimple_assign_set_rhs1 (stmt, var); + update = true; + + if (!TYPE_UNSIGNED (type)) + { + type = unsigned_type_for (type); + tmp = make_rename_temp (type, "SR"); + st = gimple_build_assign (var, + fold_convert (TREE_TYPE (var), tmp)); + var = tmp; + } + + gimple_seq_add_stmt (&seq, + gimple_build_assign + (var, build_int_cst_wide (type, 0, 0))); + + /* If VAR is wider than BLEN bits, it is padded at the + most-significant end. We want to set VPOS such that + <BIT_FIELD_REF VAR BLEN VPOS> would refer to the + least-significant BLEN bits of VAR. */ + if (BYTES_BIG_ENDIAN) + vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen); + else + vpos = bitsize_int (0); + sra_explode_bitfield_assignment + (var, vpos, true, &seq, blen, bpos, elt); + + if (st) + gimple_seq_add_stmt (&seq, st); + } + else + sra_sync_for_bitfield_assignment + (&seq, NULL, blen, bpos, elt); + + if (seq) + { + mark_all_v_defs_seq (seq); + sra_insert_before (gsi, seq); + } + + if (update) + update_stmt (stmt); + } + else + { + gimple_seq seq = NULL; + + /* Otherwise we need some copies. If ELT is being read, then we + want to store all (modified) sub-elements back into the + structure before the reference takes place. If ELT is being + written, then we want to load the changed values back into + our shadow variables. */ + /* ??? We don't check modified for reads, we just always write all of + the values. We should be able to record the SSA number of the VOP + for which the values were last read. If that number matches the + SSA number of the VOP in the current statement, then we needn't + emit an assignment. This would also eliminate double writes when + a structure is passed as more than one argument to a function call. + This optimization would be most effective if sra_walk_function + processed the blocks in dominator order. */ + + generate_copy_inout (elt, is_output, generate_element_ref (elt), &seq); + if (seq == NULL) + return; + mark_all_v_defs_seq (seq); + if (is_output) + sra_insert_after (gsi, seq); + else + { + sra_insert_before (gsi, seq); + if (use_all) + mark_no_warning (elt); + } + } +} + +/* Scalarize a COPY. To recap, this is an assignment statement between + two scalarizable references, LHS_ELT and RHS_ELT. */ + +static void +scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + gimple_stmt_iterator *gsi) +{ + gimple_seq seq; + gimple stmt; + + if (lhs_elt->replacement && rhs_elt->replacement) + { + /* If we have two scalar operands, modify the existing statement. */ + stmt = gsi_stmt (*gsi); + + /* See the commentary in sra_walk_function concerning + RETURN_EXPR, and why we should never see one here. */ + gcc_assert (is_gimple_assign (stmt)); + gcc_assert (gimple_assign_copy_p (stmt)); + + + gimple_assign_set_lhs (stmt, lhs_elt->replacement); + gimple_assign_set_rhs1 (stmt, REPLDUP (rhs_elt->replacement)); + update_stmt (stmt); + } + else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy) + { + /* If either side requires a block copy, then sync the RHS back + to the original structure, leave the original assignment + statement (which will perform the block copy), then load the + LHS values out of its now-updated original structure. */ + /* ??? Could perform a modified pair-wise element copy. That + would at least allow those elements that are instantiated in + both structures to be optimized well. */ + + seq = NULL; + generate_copy_inout (rhs_elt, false, + generate_element_ref (rhs_elt), &seq); + if (seq) + { + mark_all_v_defs_seq (seq); + sra_insert_before (gsi, seq); + } + + seq = NULL; + generate_copy_inout (lhs_elt, true, + generate_element_ref (lhs_elt), &seq); + if (seq) + { + mark_all_v_defs_seq (seq); + sra_insert_after (gsi, seq); + } + } + else + { + /* Otherwise both sides must be fully instantiated. In which + case perform pair-wise element assignments and replace the + original block copy statement. */ + + stmt = gsi_stmt (*gsi); + mark_all_v_defs_stmt (stmt); + + seq = NULL; + generate_element_copy (lhs_elt, rhs_elt, &seq); + gcc_assert (seq); + mark_all_v_defs_seq (seq); + sra_replace (gsi, seq); + } +} + +/* Scalarize an INIT. To recap, this is an assignment to a scalarizable + reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or + COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty + CONSTRUCTOR. */ + +static void +scalarize_init (struct sra_elt *lhs_elt, tree rhs, gimple_stmt_iterator *gsi) +{ + bool result = true; + gimple_seq seq = NULL, init_seq = NULL; + + /* Generate initialization statements for all members extant in the RHS. */ + if (rhs) + { + /* Unshare the expression just in case this is from a decl's initial. */ + rhs = unshare_expr (rhs); + result = generate_element_init (lhs_elt, rhs, &init_seq); + } + + if (!result) + { + /* If we failed to convert the entire initializer, then we must + leave the structure assignment in place and must load values + from the structure into the slots for which we did not find + constants. The easiest way to do this is to generate a complete + copy-out, and then follow that with the constant assignments + that we were able to build. DCE will clean things up. */ + gimple_seq seq0 = NULL; + generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt), + &seq0); + gimple_seq_add_seq (&seq0, seq); + seq = seq0; + } + else + { + /* CONSTRUCTOR is defined such that any member not mentioned is assigned + a zero value. Initialize the rest of the instantiated elements. */ + generate_element_zero (lhs_elt, &seq); + gimple_seq_add_seq (&seq, init_seq); + } + + if (lhs_elt->use_block_copy || !result) + { + /* Since LHS is not fully instantiated, we must leave the structure + assignment in place. Treating this case differently from a USE + exposes constants to later optimizations. */ + if (seq) + { + mark_all_v_defs_seq (seq); + sra_insert_after (gsi, seq); + } + } + else + { + /* The LHS is fully instantiated. The list of initializations + replaces the original structure assignment. */ + gcc_assert (seq); + mark_all_v_defs_stmt (gsi_stmt (*gsi)); + mark_all_v_defs_seq (seq); + sra_replace (gsi, seq); + } +} + +/* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP + on all INDIRECT_REFs. */ + +static tree +mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) +{ + tree t = *tp; + + if (TREE_CODE (t) == INDIRECT_REF) + { + TREE_THIS_NOTRAP (t) = 1; + *walk_subtrees = 0; + } + else if (IS_TYPE_OR_DECL_P (t)) + *walk_subtrees = 0; + + return NULL; +} + +/* Scalarize a LDST. To recap, this is an assignment between one scalarizable + reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true + if ELT is on the left-hand side. */ + +static void +scalarize_ldst (struct sra_elt *elt, tree other, + gimple_stmt_iterator *gsi, bool is_output) +{ + /* Shouldn't have gotten called for a scalar. */ + gcc_assert (!elt->replacement); + + if (elt->use_block_copy) + { + /* Since ELT is not fully instantiated, we have to leave the + block copy in place. Treat this as a USE. */ + scalarize_use (elt, NULL, gsi, is_output, false); + } + else + { + /* The interesting case is when ELT is fully instantiated. In this + case we can have each element stored/loaded directly to/from the + corresponding slot in OTHER. This avoids a block copy. */ + + gimple_seq seq = NULL; + gimple stmt = gsi_stmt (*gsi); + + mark_all_v_defs_stmt (stmt); + generate_copy_inout (elt, is_output, other, &seq); + gcc_assert (seq); + mark_all_v_defs_seq (seq); + + /* Preserve EH semantics. */ + if (stmt_ends_bb_p (stmt)) + { + gimple_stmt_iterator si; + gimple first; + gimple_seq blist = NULL; + bool thr = stmt_could_throw_p (stmt); + + /* If the last statement of this BB created an EH edge + before scalarization, we have to locate the first + statement that can throw in the new statement list and + use that as the last statement of this BB, such that EH + semantics is preserved. All statements up to this one + are added to the same BB. All other statements in the + list will be added to normal outgoing edges of the same + BB. If they access any memory, it's the same memory, so + we can assume they won't throw. */ + si = gsi_start (seq); + for (first = gsi_stmt (si); + thr && !gsi_end_p (si) && !stmt_could_throw_p (first); + first = gsi_stmt (si)) + { + gsi_remove (&si, false); + gimple_seq_add_stmt (&blist, first); + } + + /* Extract the first remaining statement from LIST, this is + the EH statement if there is one. */ + gsi_remove (&si, false); + + if (blist) + sra_insert_before (gsi, blist); + + /* Replace the old statement with this new representative. */ + gsi_replace (gsi, first, true); + + if (!gsi_end_p (si)) + { + /* If any reference would trap, then they all would. And more + to the point, the first would. Therefore none of the rest + will trap since the first didn't. Indicate this by + iterating over the remaining statements and set + TREE_THIS_NOTRAP in all INDIRECT_REFs. */ + do + { + walk_gimple_stmt (&si, NULL, mark_notrap, NULL); + gsi_next (&si); + } + while (!gsi_end_p (si)); + + insert_edge_copies_seq (seq, gsi_bb (*gsi)); + } + } + else + sra_replace (gsi, seq); + } +} + +/* Generate initializations for all scalarizable parameters. */ + +static void +scalarize_parms (void) +{ + gimple_seq seq = NULL; + unsigned i; + bitmap_iterator bi; + + EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi) + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + generate_copy_inout (elt, true, var, &seq); + } + + if (seq) + { + insert_edge_copies_seq (seq, ENTRY_BLOCK_PTR); + mark_all_v_defs_seq (seq); + } +} + +/* Entry point to phase 4. Update the function to match replacements. */ + +static void +scalarize_function (void) +{ + static const struct sra_walk_fns fns = { + scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false + }; + + sra_walk_function (&fns); + scalarize_parms (); + gsi_commit_edge_inserts (); +} + + +/* Debug helper function. Print ELT in a nice human-readable format. */ + +static void +dump_sra_elt_name (FILE *f, struct sra_elt *elt) +{ + if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE) + { + fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f); + dump_sra_elt_name (f, elt->parent); + } + else + { + if (elt->parent) + dump_sra_elt_name (f, elt->parent); + if (DECL_P (elt->element)) + { + if (TREE_CODE (elt->element) == FIELD_DECL) + fputc ('.', f); + print_generic_expr (f, elt->element, dump_flags); + } + else if (TREE_CODE (elt->element) == BIT_FIELD_REF) + fprintf (f, "$B" HOST_WIDE_INT_PRINT_DEC "F" HOST_WIDE_INT_PRINT_DEC, + tree_low_cst (TREE_OPERAND (elt->element, 2), 1), + tree_low_cst (TREE_OPERAND (elt->element, 1), 1)); + else if (TREE_CODE (elt->element) == RANGE_EXPR) + fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]", + TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)), + TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1))); + else + fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", + TREE_INT_CST_LOW (elt->element)); + } +} + +/* Likewise, but callable from the debugger. */ + +void +debug_sra_elt_name (struct sra_elt *elt) +{ + dump_sra_elt_name (stderr, elt); + fputc ('\n', stderr); +} + +void +sra_init_cache (void) +{ + if (sra_type_decomp_cache) + return; + + sra_type_decomp_cache = BITMAP_ALLOC (NULL); + sra_type_inst_cache = BITMAP_ALLOC (NULL); +} + + +/* Main entry point. */ + +static unsigned int +tree_sra (void) +{ + /* Initialize local variables. */ + todoflags = 0; + gcc_obstack_init (&sra_obstack); + sra_candidates = BITMAP_ALLOC (NULL); + needs_copy_in = BITMAP_ALLOC (NULL); + sra_init_cache (); + sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL); + + /* Scan. If we find anything, instantiate and scalarize. */ + if (find_candidates_for_sra ()) + { + scan_function (); + decide_instantiations (); + scalarize_function (); + if (!bitmap_empty_p (sra_candidates)) + todoflags |= TODO_rebuild_alias; + } + + /* Free allocated memory. */ + htab_delete (sra_map); + sra_map = NULL; + BITMAP_FREE (sra_candidates); + BITMAP_FREE (needs_copy_in); + BITMAP_FREE (sra_type_decomp_cache); + BITMAP_FREE (sra_type_inst_cache); + obstack_free (&sra_obstack, NULL); + return todoflags; +} + +static unsigned int +tree_sra_early (void) +{ + unsigned int ret; + + early_sra = true; + ret = tree_sra (); + early_sra = false; + + return ret & ~TODO_rebuild_alias; +} + +static bool +gate_sra (void) +{ + return flag_tree_sra != 0; +} + +struct gimple_opt_pass pass_sra_early = +{ + { + GIMPLE_PASS, + "esra", /* name */ + gate_sra, /* gate */ + tree_sra_early, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_SRA, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func + | TODO_update_ssa + | TODO_ggc_collect + | TODO_verify_ssa /* todo_flags_finish */ + } +}; + +struct gimple_opt_pass pass_sra = +{ + { + GIMPLE_PASS, + "sra", /* name */ + gate_sra, /* gate */ + tree_sra, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_SRA, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func + | TODO_update_ssa + | TODO_ggc_collect + | TODO_verify_ssa /* todo_flags_finish */ + } +};