Mercurial > hg > CbC > CbC_gcc
diff gcc/tree-ssa-loop-im.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/tree-ssa-loop-im.c Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/tree-ssa-loop-im.c Fri Oct 27 22:46:09 2017 +0900 @@ -1,6 +1,5 @@ /* Loop invariant motion. - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010 - Free Software Foundation, Inc. + Copyright (C) 2003-2017 Free Software Foundation, Inc. This file is part of GCC. @@ -21,25 +20,31 @@ #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" +#include "backend.h" #include "tree.h" -#include "tm_p.h" -#include "basic-block.h" -#include "output.h" -#include "tree-pretty-print.h" +#include "gimple.h" +#include "cfghooks.h" +#include "tree-pass.h" +#include "ssa.h" #include "gimple-pretty-print.h" -#include "tree-flow.h" -#include "tree-dump.h" -#include "timevar.h" +#include "fold-const.h" +#include "cfganal.h" +#include "tree-eh.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "tree-cfg.h" +#include "tree-ssa-loop-manip.h" +#include "tree-ssa-loop.h" +#include "tree-into-ssa.h" #include "cfgloop.h" #include "domwalk.h" #include "params.h" -#include "tree-pass.h" -#include "flags.h" -#include "hashtab.h" #include "tree-affine.h" -#include "pointer-set.h" #include "tree-ssa-propagate.h" +#include "trans-mem.h" +#include "gimple-fold.h" +#include "tree-scalar-evolution.h" +#include "tree-ssa-loop-niter.h" /* TODO: Support for predicated code motion. I.e. @@ -52,7 +57,7 @@ } } - Where COND and INV are is invariants, but evaluating INV may trap or be + Where COND and INV are invariants, but evaluating INV may trap or be invalid from some other reason if !COND. This may be transformed to if (cond) @@ -63,15 +68,6 @@ something; } */ -/* A type for the list of statements that have to be moved in order to be able - to hoist an invariant computation. */ - -struct depend -{ - gimple stmt; - struct depend *next; -}; - /* The auxiliary data kept for each statement. */ struct lim_aux_data @@ -90,134 +86,153 @@ unsigned cost; /* Cost of the computation performed by the statement. */ - struct depend *depends; /* List of statements that must be also hoisted - out of the loop when this statement is - hoisted; i.e. those that define the operands - of the statement and are inside of the - MAX_LOOP loop. */ + unsigned ref; /* The simple_mem_ref in this stmt or 0. */ + + vec<gimple *> depends; /* Vector of statements that must be also + hoisted out of the loop when this statement + is hoisted; i.e. those that define the + operands of the statement and are inside of + the MAX_LOOP loop. */ }; /* Maps statements to their lim_aux_data. */ -static struct pointer_map_t *lim_aux_data_map; +static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map; /* Description of a memory reference location. */ -typedef struct mem_ref_loc +struct mem_ref_loc { tree *ref; /* The reference itself. */ - gimple stmt; /* The statement in that it occurs. */ -} *mem_ref_loc_p; - -DEF_VEC_P(mem_ref_loc_p); -DEF_VEC_ALLOC_P(mem_ref_loc_p, heap); - -/* The list of memory reference locations in a loop. */ - -typedef struct mem_ref_locs -{ - VEC (mem_ref_loc_p, heap) *locs; -} *mem_ref_locs_p; - -DEF_VEC_P(mem_ref_locs_p); -DEF_VEC_ALLOC_P(mem_ref_locs_p, heap); + gimple *stmt; /* The statement in that it occurs. */ +}; + /* Description of a memory reference. */ -typedef struct mem_ref +struct im_mem_ref { - tree mem; /* The memory itself. */ unsigned id; /* ID assigned to the memory reference (its index in memory_accesses.refs_list) */ hashval_t hash; /* Its hash value. */ + + /* The memory access itself and associated caching of alias-oracle + query meta-data. */ + ao_ref mem; + bitmap stored; /* The set of loops in that this memory location is stored to. */ - VEC (mem_ref_locs_p, heap) *accesses_in_loop; + vec<mem_ref_loc> accesses_in_loop; /* The locations of the accesses. Vector indexed by the loop number. */ - bitmap vops; /* Vops corresponding to this memory - location. */ /* The following sets are computed on demand. We keep both set and its complement, so that we know whether the information was already computed or not. */ - bitmap indep_loop; /* The set of loops in that the memory + bitmap_head indep_loop; /* The set of loops in that the memory reference is independent, meaning: If it is stored in the loop, this store is independent on all other loads and stores. If it is only loaded, then it is independent on all stores in the loop. */ - bitmap dep_loop; /* The complement of INDEP_LOOP. */ - - bitmap indep_ref; /* The set of memory references on that - this reference is independent. */ - bitmap dep_ref; /* The complement of DEP_REF. */ -} *mem_ref_p; - -DEF_VEC_P(mem_ref_p); -DEF_VEC_ALLOC_P(mem_ref_p, heap); - -DEF_VEC_P(bitmap); -DEF_VEC_ALLOC_P(bitmap, heap); - -DEF_VEC_P(htab_t); -DEF_VEC_ALLOC_P(htab_t, heap); + bitmap_head dep_loop; /* The complement of INDEP_LOOP. */ +}; + +/* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first + to record (in)dependence against stores in the loop and its subloops, the + second to record (in)dependence against all references in the loop + and its subloops. */ +#define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0)) + +/* Mem_ref hashtable helpers. */ + +struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref> +{ + typedef tree_node *compare_type; + static inline hashval_t hash (const im_mem_ref *); + static inline bool equal (const im_mem_ref *, const tree_node *); +}; + +/* A hash function for struct im_mem_ref object OBJ. */ + +inline hashval_t +mem_ref_hasher::hash (const im_mem_ref *mem) +{ + return mem->hash; +} + +/* An equality function for struct im_mem_ref object MEM1 with + memory reference OBJ2. */ + +inline bool +mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2) +{ + return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0); +} + /* Description of memory accesses in loops. */ static struct { /* The hash table of memory references accessed in loops. */ - htab_t refs; + hash_table<mem_ref_hasher> *refs; /* The list of memory references. */ - VEC (mem_ref_p, heap) *refs_list; + vec<im_mem_ref *> refs_list; /* The set of memory references accessed in each loop. */ - VEC (bitmap, heap) *refs_in_loop; - - /* The set of memory references accessed in each loop, including - subloops. */ - VEC (bitmap, heap) *all_refs_in_loop; - - /* The set of virtual operands clobbered in a given loop. */ - VEC (bitmap, heap) *clobbered_vops; - - /* Map from the pair (loop, virtual operand) to the set of refs that - touch the virtual operand in the loop. */ - VEC (htab_t, heap) *vop_ref_map; + vec<bitmap_head> refs_in_loop; + + /* The set of memory references stored in each loop. */ + vec<bitmap_head> refs_stored_in_loop; + + /* The set of memory references stored in each loop, including subloops . */ + vec<bitmap_head> all_refs_stored_in_loop; /* Cache for expanding memory addresses. */ - struct pointer_map_t *ttae_cache; + hash_map<tree, name_expansion *> *ttae_cache; } memory_accesses; -static bool ref_indep_loop_p (struct loop *, mem_ref_p); +/* Obstack for the bitmaps in the above data structures. */ +static bitmap_obstack lim_bitmap_obstack; +static obstack mem_ref_obstack; + +static bool ref_indep_loop_p (struct loop *, im_mem_ref *, struct loop *); +static bool ref_always_accessed_p (struct loop *, im_mem_ref *, bool); /* Minimum cost of an expensive expression. */ #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE)) -/* The outermost loop for that execution of the header guarantees that the +/* The outermost loop for which execution of the header guarantees that the block will be executed. */ #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux) +#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL)) + +/* ID of the shared unanalyzable mem. */ +#define UNANALYZABLE_MEM_ID 0 + +/* Whether the reference was analyzable. */ +#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID) static struct lim_aux_data * -init_lim_data (gimple stmt) +init_lim_data (gimple *stmt) { - void **p = pointer_map_insert (lim_aux_data_map, stmt); - - *p = XCNEW (struct lim_aux_data); - return (struct lim_aux_data *) *p; + lim_aux_data *p = XCNEW (struct lim_aux_data); + lim_aux_data_map->put (stmt, p); + + return p; } static struct lim_aux_data * -get_lim_data (gimple stmt) +get_lim_data (gimple *stmt) { - void **p = pointer_map_contains (lim_aux_data_map, stmt); + lim_aux_data **p = lim_aux_data_map->get (stmt); if (!p) return NULL; - return (struct lim_aux_data *) *p; + return *p; } /* Releases the memory occupied by DATA. */ @@ -225,118 +240,31 @@ static void free_lim_aux_data (struct lim_aux_data *data) { - struct depend *dep, *next; - - for (dep = data->depends; dep; dep = next) - { - next = dep->next; - free (dep); - } + data->depends.release (); free (data); } static void -clear_lim_data (gimple stmt) +clear_lim_data (gimple *stmt) { - void **p = pointer_map_contains (lim_aux_data_map, stmt); + lim_aux_data **p = lim_aux_data_map->get (stmt); if (!p) return; - free_lim_aux_data ((struct lim_aux_data *) *p); + free_lim_aux_data (*p); *p = NULL; } -/* Calls CBCK for each index in memory reference ADDR_P. There are two - kinds situations handled; in each of these cases, the memory reference - and DATA are passed to the callback: - - Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also - pass the pointer to the index to the callback. - - Pointer dereference: INDIRECT_REF (addr). In this case we also pass the - pointer to addr to the callback. - - If the callback returns false, the whole search stops and false is returned. - Otherwise the function returns true after traversing through the whole - reference *ADDR_P. */ - -bool -for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data) -{ - tree *nxt, *idx; - - for (; ; addr_p = nxt) - { - switch (TREE_CODE (*addr_p)) - { - case SSA_NAME: - return cbck (*addr_p, addr_p, data); - - case MEM_REF: - nxt = &TREE_OPERAND (*addr_p, 0); - return cbck (*addr_p, nxt, data); - - case BIT_FIELD_REF: - case VIEW_CONVERT_EXPR: - case REALPART_EXPR: - case IMAGPART_EXPR: - nxt = &TREE_OPERAND (*addr_p, 0); - break; - - case COMPONENT_REF: - /* If the component has varying offset, it behaves like index - as well. */ - idx = &TREE_OPERAND (*addr_p, 2); - if (*idx - && !cbck (*addr_p, idx, data)) - return false; - - nxt = &TREE_OPERAND (*addr_p, 0); - break; - - case ARRAY_REF: - case ARRAY_RANGE_REF: - nxt = &TREE_OPERAND (*addr_p, 0); - if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data)) - return false; - break; - - case VAR_DECL: - case PARM_DECL: - case STRING_CST: - case RESULT_DECL: - case VECTOR_CST: - case COMPLEX_CST: - case INTEGER_CST: - case REAL_CST: - case FIXED_CST: - case CONSTRUCTOR: - return true; - - case ADDR_EXPR: - gcc_assert (is_gimple_min_invariant (*addr_p)); - return true; - - case TARGET_MEM_REF: - idx = &TMR_BASE (*addr_p); - if (*idx - && !cbck (*addr_p, idx, data)) - return false; - idx = &TMR_INDEX (*addr_p); - if (*idx - && !cbck (*addr_p, idx, data)) - return false; - idx = &TMR_INDEX2 (*addr_p); - if (*idx - && !cbck (*addr_p, idx, data)) - return false; - return true; - - default: - gcc_unreachable (); - } - } -} + +/* The possibilities of statement movement. */ +enum move_pos + { + MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */ + MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement + become executed -- memory accesses, ... */ + MOVE_POSSIBLE /* Unlimited movement. */ + }; + /* If it is possible to hoist the statement STMT unconditionally, returns MOVE_POSSIBLE. @@ -346,7 +274,7 @@ Otherwise return MOVE_IMPOSSIBLE. */ enum move_pos -movement_possibility (gimple stmt) +movement_possibility (gimple *stmt) { tree lhs; enum move_pos ret = MOVE_POSSIBLE; @@ -361,7 +289,7 @@ if (gimple_code (stmt) == GIMPLE_PHI && gimple_phi_num_args (stmt) <= 2 - && is_gimple_reg (gimple_phi_result (stmt)) + && !virtual_operand_p (gimple_phi_result (stmt)) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt))) return MOVE_POSSIBLE; @@ -413,6 +341,26 @@ || gimple_could_trap_p (stmt)) return MOVE_PRESERVE_EXECUTION; + /* Non local loads in a transaction cannot be hoisted out. Well, + unless the load happens on every path out of the loop, but we + don't take this into account yet. */ + if (flag_tm + && gimple_in_transaction (stmt) + && gimple_assign_single_p (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); + if (DECL_P (rhs) && is_global_var (rhs)) + { + if (dump_file) + { + fprintf (dump_file, "Cannot hoist conditional load of "); + print_generic_expr (dump_file, rhs, TDF_SLIM); + fprintf (dump_file, " because it is in a transaction.\n"); + } + return MOVE_IMPOSSIBLE; + } + } + return ret; } @@ -424,7 +372,7 @@ static struct loop * outermost_invariant_loop (tree def, struct loop *loop) { - gimple def_stmt; + gimple *def_stmt; basic_block def_bb; struct loop *max_loop; struct lim_aux_data *lim_data; @@ -472,10 +420,9 @@ add_dependency (tree def, struct lim_aux_data *data, struct loop *loop, bool add_cost) { - gimple def_stmt = SSA_NAME_DEF_STMT (def); + gimple *def_stmt = SSA_NAME_DEF_STMT (def); basic_block def_bb = gimple_bb (def_stmt); struct loop *max_loop; - struct depend *dep; struct lim_aux_data *def_data; if (!def_bb) @@ -500,36 +447,26 @@ && def_bb->loop_father == loop) data->cost += def_data->cost; - dep = XNEW (struct depend); - dep->stmt = def_stmt; - dep->next = data->depends; - data->depends = dep; + data->depends.safe_push (def_stmt); return true; } -/* Returns an estimate for a cost of statement STMT. TODO -- the values here - are just ad-hoc constants. The estimates should be based on target-specific - values. */ +/* Returns an estimate for a cost of statement STMT. The values here + are just ad-hoc constants, similar to costs for inlining. */ static unsigned -stmt_cost (gimple stmt) +stmt_cost (gimple *stmt) { - tree fndecl; - unsigned cost = 1; - /* Always try to create possibilities for unswitching. */ if (gimple_code (stmt) == GIMPLE_COND || gimple_code (stmt) == GIMPLE_PHI) return LIM_EXPENSIVE; - /* Hoisting memory references out should almost surely be a win. */ - if (gimple_references_memory_p (stmt)) - cost += 20; - + /* We should be hoisting calls if possible. */ if (is_gimple_call (stmt)) { - /* We should be hoisting calls if possible. */ + tree fndecl; /* Unless the call is a builtin_constant_p; this always folds to a constant, so moving it is useless. */ @@ -539,15 +476,24 @@ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P) return 0; - return cost + 20; + return LIM_EXPENSIVE; } + /* Hoisting memory references out should almost surely be a win. */ + if (gimple_references_memory_p (stmt)) + return LIM_EXPENSIVE; + if (gimple_code (stmt) != GIMPLE_ASSIGN) - return cost; + return 1; switch (gimple_assign_rhs_code (stmt)) { case MULT_EXPR: + case WIDEN_MULT_EXPR: + case WIDEN_MULT_PLUS_EXPR: + case WIDEN_MULT_MINUS_EXPR: + case DOT_PROD_EXPR: + case FMA_EXPR: case TRUNC_DIV_EXPR: case CEIL_DIV_EXPR: case FLOOR_DIV_EXPR: @@ -559,19 +505,31 @@ case TRUNC_MOD_EXPR: case RDIV_EXPR: /* Division and multiplication are usually expensive. */ - cost += 20; - break; + return LIM_EXPENSIVE; case LSHIFT_EXPR: case RSHIFT_EXPR: - cost += 20; - break; + case WIDEN_LSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: + /* Shifts and rotates are usually expensive. */ + return LIM_EXPENSIVE; + + case CONSTRUCTOR: + /* Make vector construction cost proportional to the number + of elements. */ + return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)); + + case SSA_NAME: + case PAREN_EXPR: + /* Whether or not something is wrapped inside a PAREN_EXPR + should not change move cost. Nor should an intermediate + unpropagated SSA name copy. */ + return 0; default: - break; + return 1; } - - return cost; } /* Finds the outermost loop between OUTER and LOOP in that the memory reference @@ -579,21 +537,21 @@ instead. */ static struct loop * -outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref) +outermost_indep_loop (struct loop *outer, struct loop *loop, im_mem_ref *ref) { struct loop *aloop; - if (bitmap_bit_p (ref->stored, loop->num)) + if (ref->stored && bitmap_bit_p (ref->stored, loop->num)) return NULL; for (aloop = outer; aloop != loop; aloop = superloop_at_depth (loop, loop_depth (aloop) + 1)) - if (!bitmap_bit_p (ref->stored, aloop->num) - && ref_indep_loop_p (aloop, ref)) + if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num)) + && ref_indep_loop_p (aloop, ref, loop)) return aloop; - if (ref_indep_loop_p (loop, ref)) + if (ref_indep_loop_p (loop, ref, loop)) return loop; else return NULL; @@ -604,31 +562,24 @@ it is a store or load. Otherwise, returns NULL. */ static tree * -simple_mem_ref_in_stmt (gimple stmt, bool *is_store) +simple_mem_ref_in_stmt (gimple *stmt, bool *is_store) { - tree *lhs; - enum tree_code code; - - /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */ - if (gimple_code (stmt) != GIMPLE_ASSIGN) + tree *lhs, *rhs; + + /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */ + if (!gimple_assign_single_p (stmt)) return NULL; - code = gimple_assign_rhs_code (stmt); - lhs = gimple_assign_lhs_ptr (stmt); - - if (TREE_CODE (*lhs) == SSA_NAME) + rhs = gimple_assign_rhs1_ptr (stmt); + + if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt)) { - if (get_gimple_rhs_class (code) != GIMPLE_SINGLE_RHS - || !is_gimple_addressable (gimple_assign_rhs1 (stmt))) - return NULL; - *is_store = false; - return gimple_assign_rhs1_ptr (stmt); + return rhs; } - else if (code == SSA_NAME - || (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS - && is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))) + else if (gimple_vdef (stmt) + && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs))) { *is_store = true; return lhs; @@ -637,27 +588,6 @@ return NULL; } -/* Returns the memory reference contained in STMT. */ - -static mem_ref_p -mem_ref_in_stmt (gimple stmt) -{ - bool store; - tree *mem = simple_mem_ref_in_stmt (stmt, &store); - hashval_t hash; - mem_ref_p ref; - - if (!mem) - return NULL; - gcc_assert (!store); - - hash = iterative_hash_expr (*mem, 0); - ref = (mem_ref_p) htab_find_with_hash (memory_accesses.refs, *mem, hash); - - gcc_assert (ref != NULL); - return ref; -} - /* From a controlling predicate in DOM determine the arguments from the PHI node PHI that are chosen if the predicate evaluates to true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if @@ -665,59 +595,18 @@ else return false. */ static bool -extract_true_false_args_from_phi (basic_block dom, gimple phi, +extract_true_false_args_from_phi (basic_block dom, gphi *phi, tree *true_arg_p, tree *false_arg_p) { - basic_block bb = gimple_bb (phi); - edge true_edge, false_edge, tem; - tree arg0 = NULL_TREE, arg1 = NULL_TREE; - - /* We have to verify that one edge into the PHI node is dominated - by the true edge of the predicate block and the other edge - dominated by the false edge. This ensures that the PHI argument - we are going to take is completely determined by the path we - take from the predicate block. - We can only use BB dominance checks below if the destination of - the true/false edges are dominated by their edge, thus only - have a single predecessor. */ - extract_true_false_edges_from_block (dom, &true_edge, &false_edge); - tem = EDGE_PRED (bb, 0); - if (tem == true_edge - || (single_pred_p (true_edge->dest) - && (tem->src == true_edge->dest - || dominated_by_p (CDI_DOMINATORS, - tem->src, true_edge->dest)))) - arg0 = PHI_ARG_DEF (phi, tem->dest_idx); - else if (tem == false_edge - || (single_pred_p (false_edge->dest) - && (tem->src == false_edge->dest - || dominated_by_p (CDI_DOMINATORS, - tem->src, false_edge->dest)))) - arg1 = PHI_ARG_DEF (phi, tem->dest_idx); - else - return false; - tem = EDGE_PRED (bb, 1); - if (tem == true_edge - || (single_pred_p (true_edge->dest) - && (tem->src == true_edge->dest - || dominated_by_p (CDI_DOMINATORS, - tem->src, true_edge->dest)))) - arg0 = PHI_ARG_DEF (phi, tem->dest_idx); - else if (tem == false_edge - || (single_pred_p (false_edge->dest) - && (tem->src == false_edge->dest - || dominated_by_p (CDI_DOMINATORS, - tem->src, false_edge->dest)))) - arg1 = PHI_ARG_DEF (phi, tem->dest_idx); - else - return false; - if (!arg0 || !arg1) + edge te, fe; + if (! extract_true_false_controlled_edges (dom, gimple_bb (phi), + &te, &fe)) return false; if (true_arg_p) - *true_arg_p = arg0; + *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx); if (false_arg_p) - *false_arg_p = arg1; + *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx); return true; } @@ -733,7 +622,7 @@ is defined in, and true otherwise. */ static bool -determine_max_movement (gimple stmt, bool must_preserve_exec) +determine_max_movement (gimple *stmt, bool must_preserve_exec) { basic_block bb = gimple_bb (stmt); struct loop *loop = bb->loop_father; @@ -748,7 +637,7 @@ level = superloop_at_depth (loop, 1); lim_data->max_loop = level; - if (gimple_code (stmt) == GIMPLE_PHI) + if (gphi *phi = dyn_cast <gphi *> (stmt)) { use_operand_p use_p; unsigned min_cost = UINT_MAX; @@ -759,27 +648,40 @@ evaluated. For this reason the PHI cost (and thus the cost we remove from the loop by doing the invariant motion) is that of the cheapest PHI argument dependency chain. */ - FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE) + FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) { val = USE_FROM_PTR (use_p); + if (TREE_CODE (val) != SSA_NAME) - continue; + { + /* Assign const 1 to constants. */ + min_cost = MIN (min_cost, 1); + total_cost += 1; + continue; + } if (!add_dependency (val, lim_data, loop, false)) return false; - def_data = get_lim_data (SSA_NAME_DEF_STMT (val)); - if (def_data) + + gimple *def_stmt = SSA_NAME_DEF_STMT (val); + if (gimple_bb (def_stmt) + && gimple_bb (def_stmt)->loop_father == loop) { - min_cost = MIN (min_cost, def_data->cost); - total_cost += def_data->cost; + def_data = get_lim_data (def_stmt); + if (def_data) + { + min_cost = MIN (min_cost, def_data->cost); + total_cost += def_data->cost; + } } } + min_cost = MIN (min_cost, total_cost); lim_data->cost += min_cost; - if (gimple_phi_num_args (stmt) > 1) + if (gimple_phi_num_args (phi) > 1) { basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); - gimple cond; + gimple *cond; if (gsi_end_p (gsi_last_bb (dom))) return false; cond = gsi_stmt (gsi_last_bb (dom)); @@ -788,7 +690,7 @@ /* Verify that this is an extended form of a diamond and the PHI arguments are completely controlled by the predicate in DOM. */ - if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL)) + if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL)) return false; /* Fold in dependencies and cost of the condition. */ @@ -798,7 +700,7 @@ return false; def_data = get_lim_data (SSA_NAME_DEF_STMT (val)); if (def_data) - total_cost += def_data->cost; + lim_data->cost += def_data->cost; } /* We want to avoid unconditionally executing very expensive @@ -826,23 +728,18 @@ if (gimple_vuse (stmt)) { - mem_ref_p ref = mem_ref_in_stmt (stmt); - - if (ref) + im_mem_ref *ref + = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL; + if (ref + && MEM_ANALYZABLE (ref)) { - lim_data->max_loop - = outermost_indep_loop (lim_data->max_loop, loop, ref); + lim_data->max_loop = outermost_indep_loop (lim_data->max_loop, + loop, ref); if (!lim_data->max_loop) return false; } - else - { - if ((val = gimple_vuse (stmt)) != NULL_TREE) - { - if (!add_dependency (val, lim_data, loop, false)) - return false; - } - } + else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false)) + return false; } lim_data->cost += stmt_cost (stmt); @@ -856,11 +753,12 @@ operands) is hoisted at least out of the loop LEVEL. */ static void -set_level (gimple stmt, struct loop *orig_loop, struct loop *level) +set_level (gimple *stmt, struct loop *orig_loop, struct loop *level) { struct loop *stmt_loop = gimple_bb (stmt)->loop_father; - struct depend *dep; struct lim_aux_data *lim_data; + gimple *dep_stmt; + unsigned i; stmt_loop = find_common_loop (orig_loop, stmt_loop); lim_data = get_lim_data (stmt); @@ -874,8 +772,8 @@ || flow_loop_nested_p (lim_data->max_loop, level)); lim_data->tgt_loop = level; - for (dep = lim_data->depends; dep; dep = dep->next) - set_level (dep->stmt, orig_loop, level); + FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt) + set_level (dep_stmt, orig_loop, level); } /* Determines an outermost loop from that we want to hoist the statement STMT. @@ -883,7 +781,7 @@ information to set it more sanely. */ static void -set_profitable_level (gimple stmt) +set_profitable_level (gimple *stmt) { set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop); } @@ -891,7 +789,7 @@ /* Returns true if STMT is a call that has side effects. */ static bool -nonpure_call_p (gimple stmt) +nonpure_call_p (gimple *stmt) { if (gimple_code (stmt) != GIMPLE_CALL) return false; @@ -901,31 +799,25 @@ /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */ -static gimple +static gimple * rewrite_reciprocal (gimple_stmt_iterator *bsi) { - gimple stmt, stmt1, stmt2; - tree var, name, lhs, type; + gassign *stmt, *stmt1, *stmt2; + tree name, lhs, type; tree real_one; gimple_stmt_iterator gsi; - stmt = gsi_stmt (*bsi); + stmt = as_a <gassign *> (gsi_stmt (*bsi)); lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); - var = create_tmp_var (type, "reciptmp"); - add_referenced_var (var); - DECL_GIMPLE_REG_P (var) = 1; - real_one = build_one_cst (type); - stmt1 = gimple_build_assign_with_ops (RDIV_EXPR, - var, real_one, gimple_assign_rhs2 (stmt)); - name = make_ssa_name (var, stmt1); - gimple_assign_set_lhs (stmt1, name); - - stmt2 = gimple_build_assign_with_ops (MULT_EXPR, lhs, name, - gimple_assign_rhs1 (stmt)); + name = make_temp_ssa_name (type, NULL, "reciptmp"); + stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one, + gimple_assign_rhs2 (stmt)); + stmt2 = gimple_build_assign (lhs, MULT_EXPR, name, + gimple_assign_rhs1 (stmt)); /* Replace division stmt with reciprocal and multiply stmts. The multiply stmt is not invariant, so update iterator @@ -941,25 +833,31 @@ /* Check if the pattern at *BSI is a bittest of the form (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */ -static gimple +static gimple * rewrite_bittest (gimple_stmt_iterator *bsi) { - gimple stmt, use_stmt, stmt1, stmt2; - tree lhs, var, name, t, a, b; + gassign *stmt; + gimple *stmt1; + gassign *stmt2; + gimple *use_stmt; + gcond *cond_stmt; + tree lhs, name, t, a, b; use_operand_p use; - stmt = gsi_stmt (*bsi); + stmt = as_a <gassign *> (gsi_stmt (*bsi)); lhs = gimple_assign_lhs (stmt); /* Verify that the single use of lhs is a comparison against zero. */ if (TREE_CODE (lhs) != SSA_NAME - || !single_imm_use (lhs, &use, &use_stmt) - || gimple_code (use_stmt) != GIMPLE_COND) + || !single_imm_use (lhs, &use, &use_stmt)) + return stmt; + cond_stmt = dyn_cast <gcond *> (use_stmt); + if (!cond_stmt) return stmt; - if (gimple_cond_lhs (use_stmt) != lhs - || (gimple_cond_code (use_stmt) != NE_EXPR - && gimple_cond_code (use_stmt) != EQ_EXPR) - || !integer_zerop (gimple_cond_rhs (use_stmt))) + if (gimple_cond_lhs (cond_stmt) != lhs + || (gimple_cond_code (cond_stmt) != NE_EXPR + && gimple_cond_code (cond_stmt) != EQ_EXPR) + || !integer_zerop (gimple_cond_rhs (cond_stmt))) return stmt; /* Get at the operands of the shift. The rhs is TMP1 & 1. */ @@ -994,24 +892,22 @@ gimple_stmt_iterator rsi; /* 1 << B */ - var = create_tmp_var (TREE_TYPE (a), "shifttmp"); - add_referenced_var (var); t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a), build_int_cst (TREE_TYPE (a), 1), b); - stmt1 = gimple_build_assign (var, t); - name = make_ssa_name (var, stmt1); - gimple_assign_set_lhs (stmt1, name); + name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp"); + stmt1 = gimple_build_assign (name, t); /* A & (1 << B) */ t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name); - stmt2 = gimple_build_assign (var, t); - name = make_ssa_name (var, stmt2); - gimple_assign_set_lhs (stmt2, name); + name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp"); + stmt2 = gimple_build_assign (name, t); /* Replace the SSA_NAME we compare against zero. Adjust the type of zero accordingly. */ SET_USE (use, name); - gimple_cond_set_rhs (use_stmt, build_int_cst_type (TREE_TYPE (name), 0)); + gimple_cond_set_rhs (cond_stmt, + build_int_cst_type (TREE_TYPE (name), + 0)); /* Don't use gsi_replace here, none of the new assignments sets the variable originally set in stmt. Move bsi to stmt1, and @@ -1020,7 +916,9 @@ rsi = *bsi; gsi_insert_before (bsi, stmt1, GSI_NEW_STMT); gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT); + gimple *to_release = gsi_stmt (rsi); gsi_remove (&rsi, true); + release_defs (to_release); return stmt1; } @@ -1028,24 +926,35 @@ return stmt; } +/* For each statement determines the outermost loop in that it is invariant, + - statements on whose motion it depends and the cost of the computation. + - This information is stored to the LIM_DATA structure associated with + - each statement. */ +class invariantness_dom_walker : public dom_walker +{ +public: + invariantness_dom_walker (cdi_direction direction) + : dom_walker (direction) {} + + virtual edge before_dom_children (basic_block); +}; /* Determine the outermost loops in that statements in basic block BB are invariant, and record them to the LIM_DATA associated with the statements. - Callback for walk_dominator_tree. */ - -static void -determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, - basic_block bb) + Callback for dom_walker. */ + +edge +invariantness_dom_walker::before_dom_children (basic_block bb) { enum move_pos pos; gimple_stmt_iterator bsi; - gimple stmt; + gimple *stmt; bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL; struct loop *outermost = ALWAYS_EXECUTED_IN (bb); struct lim_aux_data *lim_data; if (!loop_outer (bb->loop_father)) - return; + return NULL; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n", @@ -1067,7 +976,9 @@ if (pos == MOVE_IMPOSSIBLE) continue; - lim_data = init_lim_data (stmt); + lim_data = get_lim_data (stmt); + if (! lim_data) + lim_data = init_lim_data (stmt); lim_data->always_executed_in = outermost; if (!determine_max_movement (stmt, false)) @@ -1078,7 +989,7 @@ if (dump_file && (dump_flags & TDF_DETAILS)) { - print_gimple_stmt (dump_file, stmt, 2, 0); + print_gimple_stmt (dump_file, stmt, 2); fprintf (dump_file, " invariant up to level %d, cost %d.\n\n", loop_depth (lim_data->max_loop), lim_data->cost); @@ -1104,7 +1015,9 @@ store-motion work. */ else if (stmt_makes_single_store (stmt)) { - struct lim_aux_data *lim_data = init_lim_data (stmt); + struct lim_aux_data *lim_data = get_lim_data (stmt); + if (! lim_data) + lim_data = init_lim_data (stmt); lim_data->always_executed_in = outermost; } continue; @@ -1140,7 +1053,9 @@ stmt = rewrite_bittest (&bsi); } - lim_data = init_lim_data (stmt); + lim_data = get_lim_data (stmt); + if (! lim_data) + lim_data = init_lim_data (stmt); lim_data->always_executed_in = outermost; if (maybe_never && pos == MOVE_PRESERVE_EXECUTION) @@ -1154,7 +1069,7 @@ if (dump_file && (dump_flags & TDF_DETAILS)) { - print_gimple_stmt (dump_file, stmt, 2, 0); + print_gimple_stmt (dump_file, stmt, 2); fprintf (dump_file, " invariant up to level %d, cost %d.\n\n", loop_depth (lim_data->max_loop), lim_data->cost); @@ -1163,48 +1078,39 @@ if (lim_data->cost >= LIM_EXPENSIVE) set_profitable_level (stmt); } + return NULL; } -/* For each statement determines the outermost loop in that it is invariant, - statements on whose motion it depends and the cost of the computation. - This information is stored to the LIM_DATA structure associated with - each statement. */ - -static void -determine_invariantness (void) +class move_computations_dom_walker : public dom_walker { - struct dom_walk_data walk_data; - - memset (&walk_data, 0, sizeof (struct dom_walk_data)); - walk_data.dom_direction = CDI_DOMINATORS; - walk_data.before_dom_children = determine_invariantness_stmt; - - init_walk_dominator_tree (&walk_data); - walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); - fini_walk_dominator_tree (&walk_data); -} +public: + move_computations_dom_walker (cdi_direction direction) + : dom_walker (direction), todo_ (0) {} + + virtual edge before_dom_children (basic_block); + + unsigned int todo_; +}; /* Hoist the statements in basic block BB out of the loops prescribed by data stored in LIM_DATA structures associated with each statement. Callback for walk_dominator_tree. */ -static void -move_computations_stmt (struct dom_walk_data *dw_data, - basic_block bb) +unsigned int +move_computations_worker (basic_block bb) { struct loop *level; - gimple_stmt_iterator bsi; - gimple stmt; unsigned cost = 0; struct lim_aux_data *lim_data; + unsigned int todo = 0; if (!loop_outer (bb->loop_father)) - return; - - for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); ) + return todo; + + for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); ) { - gimple new_stmt; - stmt = gsi_stmt (bsi); + gassign *new_stmt; + gphi *stmt = bsi.phi (); lim_data = get_lim_data (stmt); if (lim_data == NULL) @@ -1226,7 +1132,7 @@ if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Moving PHI node\n"); - print_gimple_stmt (dump_file, stmt, 0, 0); + print_gimple_stmt (dump_file, stmt, 0); fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost, level->num); } @@ -1234,15 +1140,13 @@ if (gimple_phi_num_args (stmt) == 1) { tree arg = PHI_ARG_DEF (stmt, 0); - new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg), - gimple_phi_result (stmt), - arg, NULL_TREE); - SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt; + new_stmt = gimple_build_assign (gimple_phi_result (stmt), + TREE_CODE (arg), arg); } else { basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); - gimple cond = gsi_stmt (gsi_last_bb (dom)); + gimple *cond = gsi_stmt (gsi_last_bb (dom)); tree arg0 = NULL_TREE, arg1 = NULL_TREE, t; /* Get the PHI arguments corresponding to the true and false edges of COND. */ @@ -1250,21 +1154,27 @@ gcc_assert (arg0 && arg1); t = build2 (gimple_cond_code (cond), boolean_type_node, gimple_cond_lhs (cond), gimple_cond_rhs (cond)); - t = build3 (COND_EXPR, TREE_TYPE (gimple_phi_result (stmt)), - t, arg0, arg1); - new_stmt = gimple_build_assign_with_ops (COND_EXPR, - gimple_phi_result (stmt), - t, NULL_TREE); - SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt; - *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg; + new_stmt = gimple_build_assign (gimple_phi_result (stmt), + COND_EXPR, t, arg0, arg1); + todo |= TODO_cleanup_cfg; + } + if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt))) + && (!ALWAYS_EXECUTED_IN (bb) + || (ALWAYS_EXECUTED_IN (bb) != level + && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) + { + tree lhs = gimple_assign_lhs (new_stmt); + SSA_NAME_RANGE_INFO (lhs) = NULL; } gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); remove_phi_node (&bsi, false); } - for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); ) + for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); ) { - stmt = gsi_stmt (bsi); + edge e; + + gimple *stmt = gsi_stmt (bsi); lim_data = get_lim_data (stmt); if (lim_data == NULL) @@ -1291,15 +1201,58 @@ if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Moving statement\n"); - print_gimple_stmt (dump_file, stmt, 0, 0); + print_gimple_stmt (dump_file, stmt, 0); fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost, level->num); } - mark_virtual_ops_for_renaming (stmt); - gsi_insert_on_edge (loop_preheader_edge (level), stmt); + e = loop_preheader_edge (level); + gcc_assert (!gimple_vdef (stmt)); + if (gimple_vuse (stmt)) + { + /* The new VUSE is the one from the virtual PHI in the loop + header or the one already present. */ + gphi_iterator gsi2; + for (gsi2 = gsi_start_phis (e->dest); + !gsi_end_p (gsi2); gsi_next (&gsi2)) + { + gphi *phi = gsi2.phi (); + if (virtual_operand_p (gimple_phi_result (phi))) + { + gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e)); + break; + } + } + } gsi_remove (&bsi, false); + if (gimple_has_lhs (stmt) + && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt))) + && (!ALWAYS_EXECUTED_IN (bb) + || !(ALWAYS_EXECUTED_IN (bb) == level + || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) + { + tree lhs = gimple_get_lhs (stmt); + SSA_NAME_RANGE_INFO (lhs) = NULL; + } + /* In case this is a stmt that is not unconditionally executed + when the target loop header is executed and the stmt may + invoke undefined integer or pointer overflow rewrite it to + unsigned arithmetic. */ + if (is_gimple_assign (stmt) + && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))) + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt))) + && arith_code_with_undefined_signed_overflow + (gimple_assign_rhs_code (stmt)) + && (!ALWAYS_EXECUTED_IN (bb) + || !(ALWAYS_EXECUTED_IN (bb) == level + || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) + gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt)); + else + gsi_insert_on_edge (e, stmt); } + + return todo; } /* Hoist the statements out of the loops prescribed by data stored in @@ -1308,17 +1261,14 @@ static unsigned int move_computations (void) { - struct dom_walk_data walk_data; - unsigned int todo = 0; - - memset (&walk_data, 0, sizeof (struct dom_walk_data)); - walk_data.global_data = &todo; - walk_data.dom_direction = CDI_DOMINATORS; - walk_data.before_dom_children = move_computations_stmt; - - init_walk_dominator_tree (&walk_data); - walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); - fini_walk_dominator_tree (&walk_data); + int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun)); + int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); + unsigned todo = 0; + + for (int i = 0; i < n; ++i) + todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i])); + + free (rpo); gsi_commit_edge_inserts (); if (need_ssa_update_p (cfun)) @@ -1364,7 +1314,7 @@ static void force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop) { - gimple stmt; + gimple *stmt; if (!op || is_gimple_min_invariant (op)) @@ -1408,136 +1358,63 @@ return true; } -/* A hash function for struct mem_ref object OBJ. */ - -static hashval_t -memref_hash (const void *obj) -{ - const struct mem_ref *const mem = (const struct mem_ref *) obj; - - return mem->hash; -} - -/* An equality function for struct mem_ref object OBJ1 with - memory reference OBJ2. */ - -static int -memref_eq (const void *obj1, const void *obj2) -{ - const struct mem_ref *const mem1 = (const struct mem_ref *) obj1; - - return operand_equal_p (mem1->mem, (const_tree) obj2, 0); -} - -/* Releases list of memory reference locations ACCS. */ - -static void -free_mem_ref_locs (mem_ref_locs_p accs) -{ - unsigned i; - mem_ref_loc_p loc; - - if (!accs) - return; - - FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc) - free (loc); - VEC_free (mem_ref_loc_p, heap, accs->locs); - free (accs); -} - /* A function to free the mem_ref object OBJ. */ static void -memref_free (void *obj) +memref_free (struct im_mem_ref *mem) { - struct mem_ref *const mem = (struct mem_ref *) obj; - unsigned i; - mem_ref_locs_p accs; - - BITMAP_FREE (mem->stored); - BITMAP_FREE (mem->indep_loop); - BITMAP_FREE (mem->dep_loop); - BITMAP_FREE (mem->indep_ref); - BITMAP_FREE (mem->dep_ref); - - FOR_EACH_VEC_ELT (mem_ref_locs_p, mem->accesses_in_loop, i, accs) - free_mem_ref_locs (accs); - VEC_free (mem_ref_locs_p, heap, mem->accesses_in_loop); - - BITMAP_FREE (mem->vops); - free (mem); + mem->accesses_in_loop.release (); } /* Allocates and returns a memory reference description for MEM whose hash value is HASH and id is ID. */ -static mem_ref_p +static im_mem_ref * mem_ref_alloc (tree mem, unsigned hash, unsigned id) { - mem_ref_p ref = XNEW (struct mem_ref); - ref->mem = mem; + im_mem_ref *ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref); + ao_ref_init (&ref->mem, mem); ref->id = id; ref->hash = hash; - ref->stored = BITMAP_ALLOC (NULL); - ref->indep_loop = BITMAP_ALLOC (NULL); - ref->dep_loop = BITMAP_ALLOC (NULL); - ref->indep_ref = BITMAP_ALLOC (NULL); - ref->dep_ref = BITMAP_ALLOC (NULL); - ref->accesses_in_loop = NULL; - ref->vops = BITMAP_ALLOC (NULL); + ref->stored = NULL; + bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack); + bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack); + ref->accesses_in_loop.create (1); return ref; } -/* Allocates and returns the new list of locations. */ - -static mem_ref_locs_p -mem_ref_locs_alloc (void) -{ - mem_ref_locs_p accs = XNEW (struct mem_ref_locs); - accs->locs = NULL; - return accs; -} - /* Records memory reference location *LOC in LOOP to the memory reference description REF. The reference occurs in statement STMT. */ static void -record_mem_ref_loc (mem_ref_p ref, struct loop *loop, gimple stmt, tree *loc) +record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc) { - mem_ref_loc_p aref = XNEW (struct mem_ref_loc); - mem_ref_locs_p accs; - bitmap ril = VEC_index (bitmap, memory_accesses.refs_in_loop, loop->num); - - if (VEC_length (mem_ref_locs_p, ref->accesses_in_loop) - <= (unsigned) loop->num) - VEC_safe_grow_cleared (mem_ref_locs_p, heap, ref->accesses_in_loop, - loop->num + 1); - accs = VEC_index (mem_ref_locs_p, ref->accesses_in_loop, loop->num); - if (!accs) - { - accs = mem_ref_locs_alloc (); - VEC_replace (mem_ref_locs_p, ref->accesses_in_loop, loop->num, accs); - } - - aref->stmt = stmt; - aref->ref = loc; - - VEC_safe_push (mem_ref_loc_p, heap, accs->locs, aref); - bitmap_set_bit (ril, ref->id); + mem_ref_loc aref; + aref.stmt = stmt; + aref.ref = loc; + ref->accesses_in_loop.safe_push (aref); +} + +/* Set the LOOP bit in REF stored bitmap and allocate that if + necessary. Return whether a bit was changed. */ + +static bool +set_ref_stored_in_loop (im_mem_ref *ref, struct loop *loop) +{ + if (!ref->stored) + ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack); + return bitmap_set_bit (ref->stored, loop->num); } /* Marks reference REF as stored in LOOP. */ static void -mark_ref_stored (mem_ref_p ref, struct loop *loop) +mark_ref_stored (im_mem_ref *ref, struct loop *loop) { - for (; - loop != current_loops->tree_root - && !bitmap_bit_p (ref->stored, loop->num); - loop = loop_outer (loop)) - bitmap_set_bit (ref->stored, loop->num); + while (loop != current_loops->tree_root + && set_ref_stored_in_loop (ref, loop)) + loop = loop_outer (loop); } /* Gathers memory references in statement STMT in LOOP, storing the @@ -1546,15 +1423,13 @@ well. */ static void -gather_mem_refs_stmt (struct loop *loop, gimple stmt) +gather_mem_refs_stmt (struct loop *loop, gimple *stmt) { tree *mem = NULL; hashval_t hash; - PTR *slot; - mem_ref_p ref; - tree vname; + im_mem_ref **slot; + im_mem_ref *ref; bool is_stored; - bitmap clvops; unsigned id; if (!gimple_vuse (stmt)) @@ -1562,302 +1437,137 @@ mem = simple_mem_ref_in_stmt (stmt, &is_stored); if (!mem) - goto fail; - - hash = iterative_hash_expr (*mem, 0); - slot = htab_find_slot_with_hash (memory_accesses.refs, *mem, hash, INSERT); - - if (*slot) { - ref = (mem_ref_p) *slot; - id = ref->id; + /* We use the shared mem_ref for all unanalyzable refs. */ + id = UNANALYZABLE_MEM_ID; + ref = memory_accesses.refs_list[id]; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Unanalyzed memory reference %u: ", id); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } + is_stored = gimple_vdef (stmt); } else { - id = VEC_length (mem_ref_p, memory_accesses.refs_list); - ref = mem_ref_alloc (*mem, hash, id); - VEC_safe_push (mem_ref_p, heap, memory_accesses.refs_list, ref); - *slot = ref; - - if (dump_file && (dump_flags & TDF_DETAILS)) + hash = iterative_hash_expr (*mem, 0); + slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT); + if (*slot) + { + ref = *slot; + id = ref->id; + } + else { - fprintf (dump_file, "Memory reference %u: ", id); - print_generic_expr (dump_file, ref->mem, TDF_SLIM); - fprintf (dump_file, "\n"); + id = memory_accesses.refs_list.length (); + ref = mem_ref_alloc (*mem, hash, id); + memory_accesses.refs_list.safe_push (ref); + *slot = ref; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Memory reference %u: ", id); + print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM); + fprintf (dump_file, "\n"); + } } + + record_mem_ref_loc (ref, stmt, mem); + } + bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id); + if (is_stored) + { + bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id); + mark_ref_stored (ref, loop); } - if (is_stored) - mark_ref_stored (ref, loop); - - if ((vname = gimple_vuse (stmt)) != NULL_TREE) - bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname))); - record_mem_ref_loc (ref, loop, stmt, mem); + init_lim_data (stmt)->ref = ref->id; return; - -fail: - clvops = VEC_index (bitmap, memory_accesses.clobbered_vops, loop->num); - if ((vname = gimple_vuse (stmt)) != NULL_TREE) - bitmap_set_bit (clvops, DECL_UID (SSA_NAME_VAR (vname))); +} + +static unsigned *bb_loop_postorder; + +/* qsort sort function to sort blocks after their loop fathers postorder. */ + +static int +sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_) +{ + basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_); + basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_); + struct loop *loop1 = bb1->loop_father; + struct loop *loop2 = bb2->loop_father; + if (loop1->num == loop2->num) + return 0; + return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; +} + +/* qsort sort function to sort ref locs after their loop fathers postorder. */ + +static int +sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_) +{ + mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_); + mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_); + struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father; + struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father; + if (loop1->num == loop2->num) + return 0; + return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; } /* Gathers memory references in loops. */ static void -gather_mem_refs_in_loops (void) +analyze_memory_references (void) { gimple_stmt_iterator bsi; - basic_block bb; - struct loop *loop; - loop_iterator li; - bitmap clvo, clvi; - bitmap lrefs, alrefs, alrefso; - - FOR_EACH_BB (bb) - { - loop = bb->loop_father; - if (loop == current_loops->tree_root) - continue; - - for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) - gather_mem_refs_stmt (loop, gsi_stmt (bsi)); - } - - /* Propagate the information about clobbered vops and accessed memory - references up the loop hierarchy. */ - FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + basic_block bb, *bbs; + struct loop *loop, *outer; + unsigned i, n; + + /* Collect all basic-blocks in loops and sort them after their + loops postorder. */ + i = 0; + bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); + FOR_EACH_BB_FN (bb, cfun) + if (bb->loop_father != current_loops->tree_root) + bbs[i++] = bb; + n = i; + qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp); + + /* Visit blocks in loop postorder and assign mem-ref IDs in that order. + That results in better locality for all the bitmaps. */ + for (i = 0; i < n; ++i) { - lrefs = VEC_index (bitmap, memory_accesses.refs_in_loop, loop->num); - alrefs = VEC_index (bitmap, memory_accesses.all_refs_in_loop, loop->num); - bitmap_ior_into (alrefs, lrefs); - - if (loop_outer (loop) == current_loops->tree_root) - continue; - - clvi = VEC_index (bitmap, memory_accesses.clobbered_vops, loop->num); - clvo = VEC_index (bitmap, memory_accesses.clobbered_vops, - loop_outer (loop)->num); - bitmap_ior_into (clvo, clvi); - - alrefso = VEC_index (bitmap, memory_accesses.all_refs_in_loop, - loop_outer (loop)->num); - bitmap_ior_into (alrefso, alrefs); - } -} - -/* Element of the hash table that maps vops to memory references. */ - -struct vop_to_refs_elt -{ - /* DECL_UID of the vop. */ - unsigned uid; - - /* List of the all references. */ - bitmap refs_all; - - /* List of stored references. */ - bitmap refs_stored; -}; - -/* A hash function for struct vop_to_refs_elt object OBJ. */ - -static hashval_t -vtoe_hash (const void *obj) -{ - const struct vop_to_refs_elt *const vtoe = - (const struct vop_to_refs_elt *) obj; - - return vtoe->uid; -} - -/* An equality function for struct vop_to_refs_elt object OBJ1 with - uid of a vop OBJ2. */ - -static int -vtoe_eq (const void *obj1, const void *obj2) -{ - const struct vop_to_refs_elt *const vtoe = - (const struct vop_to_refs_elt *) obj1; - const unsigned *const uid = (const unsigned *) obj2; - - return vtoe->uid == *uid; -} - -/* A function to free the struct vop_to_refs_elt object. */ - -static void -vtoe_free (void *obj) -{ - struct vop_to_refs_elt *const vtoe = - (struct vop_to_refs_elt *) obj; - - BITMAP_FREE (vtoe->refs_all); - BITMAP_FREE (vtoe->refs_stored); - free (vtoe); -} - -/* Records REF to hashtable VOP_TO_REFS for the index VOP. STORED is true - if the reference REF is stored. */ - -static void -record_vop_access (htab_t vop_to_refs, unsigned vop, unsigned ref, bool stored) -{ - void **slot = htab_find_slot_with_hash (vop_to_refs, &vop, vop, INSERT); - struct vop_to_refs_elt *vtoe; - - if (!*slot) - { - vtoe = XNEW (struct vop_to_refs_elt); - vtoe->uid = vop; - vtoe->refs_all = BITMAP_ALLOC (NULL); - vtoe->refs_stored = BITMAP_ALLOC (NULL); - *slot = vtoe; + basic_block bb = bbs[i]; + for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) + gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi)); } - else - vtoe = (struct vop_to_refs_elt *) *slot; - - bitmap_set_bit (vtoe->refs_all, ref); - if (stored) - bitmap_set_bit (vtoe->refs_stored, ref); -} - -/* Returns the set of references that access VOP according to the table - VOP_TO_REFS. */ - -static bitmap -get_vop_accesses (htab_t vop_to_refs, unsigned vop) -{ - struct vop_to_refs_elt *const vtoe = - (struct vop_to_refs_elt *) htab_find_with_hash (vop_to_refs, &vop, vop); - return vtoe->refs_all; -} - -/* Returns the set of stores that access VOP according to the table - VOP_TO_REFS. */ - -static bitmap -get_vop_stores (htab_t vop_to_refs, unsigned vop) -{ - struct vop_to_refs_elt *const vtoe = - (struct vop_to_refs_elt *) htab_find_with_hash (vop_to_refs, &vop, vop); - return vtoe->refs_stored; -} - -/* Adds REF to mapping from virtual operands to references in LOOP. */ - -static void -add_vop_ref_mapping (struct loop *loop, mem_ref_p ref) -{ - htab_t map = VEC_index (htab_t, memory_accesses.vop_ref_map, loop->num); - bool stored = bitmap_bit_p (ref->stored, loop->num); - bitmap clobbers = VEC_index (bitmap, memory_accesses.clobbered_vops, - loop->num); - bitmap_iterator bi; - unsigned vop; - - EXECUTE_IF_AND_COMPL_IN_BITMAP (ref->vops, clobbers, 0, vop, bi) - { - record_vop_access (map, vop, ref->id, stored); - } -} - -/* Create a mapping from virtual operands to references that touch them - in LOOP. */ - -static void -create_vop_ref_mapping_loop (struct loop *loop) -{ - bitmap refs = VEC_index (bitmap, memory_accesses.refs_in_loop, loop->num); - struct loop *sloop; - bitmap_iterator bi; - unsigned i; - mem_ref_p ref; - - EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi) + + /* Sort the location list of gathered memory references after their + loop postorder number. */ + im_mem_ref *ref; + FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) + ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp); + + free (bbs); +// free (bb_loop_postorder); + + /* Propagate the information about accessed memory references up + the loop hierarchy. */ + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) { - ref = VEC_index (mem_ref_p, memory_accesses.refs_list, i); - for (sloop = loop; sloop != current_loops->tree_root; sloop = loop_outer (sloop)) - add_vop_ref_mapping (sloop, ref); - } -} - -/* For each non-clobbered virtual operand and each loop, record the memory - references in this loop that touch the operand. */ - -static void -create_vop_ref_mapping (void) -{ - loop_iterator li; - struct loop *loop; - - FOR_EACH_LOOP (li, loop, 0) - { - create_vop_ref_mapping_loop (loop); - } -} - -/* Gathers information about memory accesses in the loops. */ - -static void -analyze_memory_references (void) -{ - unsigned i; - bitmap empty; - htab_t hempty; - - memory_accesses.refs - = htab_create (100, memref_hash, memref_eq, memref_free); - memory_accesses.refs_list = NULL; - memory_accesses.refs_in_loop = VEC_alloc (bitmap, heap, - number_of_loops ()); - memory_accesses.all_refs_in_loop = VEC_alloc (bitmap, heap, - number_of_loops ()); - memory_accesses.clobbered_vops = VEC_alloc (bitmap, heap, - number_of_loops ()); - memory_accesses.vop_ref_map = VEC_alloc (htab_t, heap, - number_of_loops ()); - - for (i = 0; i < number_of_loops (); i++) - { - empty = BITMAP_ALLOC (NULL); - VEC_quick_push (bitmap, memory_accesses.refs_in_loop, empty); - empty = BITMAP_ALLOC (NULL); - VEC_quick_push (bitmap, memory_accesses.all_refs_in_loop, empty); - empty = BITMAP_ALLOC (NULL); - VEC_quick_push (bitmap, memory_accesses.clobbered_vops, empty); - hempty = htab_create (10, vtoe_hash, vtoe_eq, vtoe_free); - VEC_quick_push (htab_t, memory_accesses.vop_ref_map, hempty); - } - - memory_accesses.ttae_cache = NULL; - - gather_mem_refs_in_loops (); - create_vop_ref_mapping (); -} - -/* Returns true if a region of size SIZE1 at position 0 and a region of - size SIZE2 at position DIFF cannot overlap. */ - -static bool -cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2) -{ - double_int d, bound; - - /* Unless the difference is a constant, we fail. */ - if (diff->n != 0) - return false; - - d = diff->offset; - if (double_int_negative_p (d)) - { - /* The second object is before the first one, we succeed if the last - element of the second object is before the start of the first one. */ - bound = double_int_add (d, double_int_add (size2, double_int_minus_one)); - return double_int_negative_p (bound); - } - else - { - /* We succeed if the second object starts after the first one ends. */ - return double_int_scmp (size1, d) <= 0; + /* Finalize the overall touched references (including subloops). */ + bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num], + &memory_accesses.refs_stored_in_loop[loop->num]); + + /* Propagate the information about accessed memory references up + the loop hierarchy. */ + outer = loop_outer (loop); + if (outer == current_loops->tree_root) + continue; + + bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num], + &memory_accesses.all_refs_stored_in_loop[loop->num]); } } @@ -1865,16 +1575,17 @@ tree_to_aff_combination_expand. */ static bool -mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache) +mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2, + hash_map<tree, name_expansion *> **ttae_cache) { /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same object and their offset differ in such a way that the locations cannot overlap, then they cannot alias. */ - double_int size1, size2; + widest_int size1, size2; aff_tree off1, off2; /* Perform basic offset and type-based disambiguation. */ - if (!refs_may_alias_p (mem1, mem2)) + if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true)) return false; /* The expansion of addresses may be a bit expensive, thus we only do @@ -1882,191 +1593,385 @@ if (optimize < 2) return true; - get_inner_reference_aff (mem1, &off1, &size1); - get_inner_reference_aff (mem2, &off2, &size2); + get_inner_reference_aff (mem1->mem.ref, &off1, &size1); + get_inner_reference_aff (mem2->mem.ref, &off2, &size2); aff_combination_expand (&off1, ttae_cache); aff_combination_expand (&off2, ttae_cache); - aff_combination_scale (&off1, double_int_minus_one); + aff_combination_scale (&off1, -1); aff_combination_add (&off2, &off1); - if (cannot_overlap_p (&off2, size1, size2)) + if (aff_comb_cannot_overlap_p (&off2, size1, size2)) return false; return true; } +/* Compare function for bsearch searching for reference locations + in a loop. */ + +static int +find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_) +{ + struct loop *loop = (struct loop *)const_cast<void *>(loop_); + mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_); + struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father; + if (loop->num == loc_loop->num + || flow_loop_nested_p (loop, loc_loop)) + return 0; + return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num] + ? -1 : 1); +} + +/* Iterates over all locations of REF in LOOP and its subloops calling + fn.operator() with the location as argument. When that operator + returns true the iteration is stopped and true is returned. + Otherwise false is returned. */ + +template <typename FN> +static bool +for_all_locs_in_loop (struct loop *loop, im_mem_ref *ref, FN fn) +{ + unsigned i; + mem_ref_loc *loc; + + /* Search for the cluster of locs in the accesses_in_loop vector + which is sorted after postorder index of the loop father. */ + loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp); + if (!loc) + return false; + + /* We have found one location inside loop or its sub-loops. Iterate + both forward and backward to cover the whole cluster. */ + i = loc - ref->accesses_in_loop.address (); + while (i > 0) + { + --i; + mem_ref_loc *l = &ref->accesses_in_loop[i]; + if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt))) + break; + if (fn (l)) + return true; + } + for (i = loc - ref->accesses_in_loop.address (); + i < ref->accesses_in_loop.length (); ++i) + { + mem_ref_loc *l = &ref->accesses_in_loop[i]; + if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt))) + break; + if (fn (l)) + return true; + } + + return false; +} + /* Rewrites location LOC by TMP_VAR. */ -static void -rewrite_mem_ref_loc (mem_ref_loc_p loc, tree tmp_var) +struct rewrite_mem_ref_loc { - mark_virtual_ops_for_renaming (loc->stmt); + rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {} + bool operator () (mem_ref_loc *loc); + tree tmp_var; +}; + +bool +rewrite_mem_ref_loc::operator () (mem_ref_loc *loc) +{ *loc->ref = tmp_var; update_stmt (loc->stmt); -} - -/* Adds all locations of REF in LOOP and its subloops to LOCS. */ - -static void -get_all_locs_in_loop (struct loop *loop, mem_ref_p ref, - VEC (mem_ref_loc_p, heap) **locs) -{ - mem_ref_locs_p accs; - unsigned i; - mem_ref_loc_p loc; - bitmap refs = VEC_index (bitmap, memory_accesses.all_refs_in_loop, - loop->num); - struct loop *subloop; - - if (!bitmap_bit_p (refs, ref->id)) - return; - - if (VEC_length (mem_ref_locs_p, ref->accesses_in_loop) - > (unsigned) loop->num) - { - accs = VEC_index (mem_ref_locs_p, ref->accesses_in_loop, loop->num); - if (accs) - { - FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc) - VEC_safe_push (mem_ref_loc_p, heap, *locs, loc); - } - } - - for (subloop = loop->inner; subloop != NULL; subloop = subloop->next) - get_all_locs_in_loop (subloop, ref, locs); + return false; } /* Rewrites all references to REF in LOOP by variable TMP_VAR. */ static void -rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var) +rewrite_mem_refs (struct loop *loop, im_mem_ref *ref, tree tmp_var) +{ + for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var)); +} + +/* Stores the first reference location in LOCP. */ + +struct first_mem_ref_loc_1 { - unsigned i; - mem_ref_loc_p loc; - VEC (mem_ref_loc_p, heap) *locs = NULL; - - get_all_locs_in_loop (loop, ref, &locs); - FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc) - rewrite_mem_ref_loc (loc, tmp_var); - VEC_free (mem_ref_loc_p, heap, locs); + first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {} + bool operator () (mem_ref_loc *loc); + mem_ref_loc **locp; +}; + +bool +first_mem_ref_loc_1::operator () (mem_ref_loc *loc) +{ + *locp = loc; + return true; +} + +/* Returns the first reference location to REF in LOOP. */ + +static mem_ref_loc * +first_mem_ref_loc (struct loop *loop, im_mem_ref *ref) +{ + mem_ref_loc *locp = NULL; + for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp)); + return locp; } -/* The name and the length of the currently generated variable - for lsm. */ -#define MAX_LSM_NAME_LENGTH 40 -static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1]; -static int lsm_tmp_name_length; - -/* Adds S to lsm_tmp_name. */ - -static void -lsm_tmp_name_add (const char *s) -{ - int l = strlen (s) + lsm_tmp_name_length; - if (l > MAX_LSM_NAME_LENGTH) - return; - - strcpy (lsm_tmp_name + lsm_tmp_name_length, s); - lsm_tmp_name_length = l; -} - -/* Stores the name for temporary variable that replaces REF to - lsm_tmp_name. */ +struct prev_flag_edges { + /* Edge to insert new flag comparison code. */ + edge append_cond_position; + + /* Edge for fall through from previous flag comparison. */ + edge last_cond_fallthru; +}; + +/* Helper function for execute_sm. Emit code to store TMP_VAR into + MEM along edge EX. + + The store is only done if MEM has changed. We do this so no + changes to MEM occur on code paths that did not originally store + into it. + + The common case for execute_sm will transform: + + for (...) { + if (foo) + stuff; + else + MEM = TMP_VAR; + } + + into: + + lsm = MEM; + for (...) { + if (foo) + stuff; + else + lsm = TMP_VAR; + } + MEM = lsm; + + This function will generate: + + lsm = MEM; + + lsm_flag = false; + ... + for (...) { + if (foo) + stuff; + else { + lsm = TMP_VAR; + lsm_flag = true; + } + } + if (lsm_flag) <-- + MEM = lsm; <-- +*/ static void -gen_lsm_tmp_name (tree ref) +execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, + edge preheader, hash_set <basic_block> *flag_bbs) { - const char *name; - - switch (TREE_CODE (ref)) + basic_block new_bb, then_bb, old_dest; + bool loop_has_only_one_exit; + edge then_old_edge, orig_ex = ex; + gimple_stmt_iterator gsi; + gimple *stmt; + struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux; + bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP; + + int freq_sum = 0; + profile_count count_sum = profile_count::zero (); + int nbbs = 0, ncount = 0; + profile_probability flag_probability = profile_probability::uninitialized (); + + /* Flag is set in FLAG_BBS. Determine probability that flag will be true + at loop exit. + + This code may look fancy, but it can not update profile very realistically + because we do not know the probability that flag will be true at given + loop exit. + + We look for two interesting extremes + - when exit is dominated by block setting the flag, we know it will + always be true. This is a common case. + - when all blocks setting the flag have very low frequency we know + it will likely be false. + In all other cases we default to 2/3 for flag being true. */ + + for (hash_set<basic_block>::iterator it = flag_bbs->begin (); + it != flag_bbs->end (); ++it) + { + freq_sum += (*it)->frequency; + if ((*it)->count.initialized_p ()) + count_sum += (*it)->count, ncount ++; + if (dominated_by_p (CDI_DOMINATORS, ex->src, *it)) + flag_probability = profile_probability::always (); + nbbs++; + } + + profile_probability cap = profile_probability::always ().apply_scale (2, 3); + + if (flag_probability.initialized_p ()) + ; + else if (ncount == nbbs && count_sum > 0 && preheader->count () >= count_sum) + { + flag_probability = count_sum.probability_in (preheader->count ()); + if (flag_probability > cap) + flag_probability = cap; + } + else if (freq_sum > 0 && EDGE_FREQUENCY (preheader) >= freq_sum) + { + flag_probability = profile_probability::from_reg_br_prob_base + (GCOV_COMPUTE_SCALE (freq_sum, EDGE_FREQUENCY (preheader))); + if (flag_probability > cap) + flag_probability = cap; + } + else + flag_probability = cap; + + /* ?? Insert store after previous store if applicable. See note + below. */ + if (prev_edges) + ex = prev_edges->append_cond_position; + + loop_has_only_one_exit = single_pred_p (ex->dest); + + if (loop_has_only_one_exit) + ex = split_block_after_labels (ex->dest); + else { - case MEM_REF: - gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); - lsm_tmp_name_add ("_"); - break; - - case ADDR_EXPR: - gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); - break; - - case BIT_FIELD_REF: - case VIEW_CONVERT_EXPR: - case ARRAY_RANGE_REF: - gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); - break; - - case REALPART_EXPR: - gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); - lsm_tmp_name_add ("_RE"); - break; - - case IMAGPART_EXPR: - gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); - lsm_tmp_name_add ("_IM"); - break; - - case COMPONENT_REF: - gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); - lsm_tmp_name_add ("_"); - name = get_name (TREE_OPERAND (ref, 1)); - if (!name) - name = "F"; - lsm_tmp_name_add (name); - break; - - case ARRAY_REF: - gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); - lsm_tmp_name_add ("_I"); - break; - - case SSA_NAME: - ref = SSA_NAME_VAR (ref); - /* Fallthru. */ - - case VAR_DECL: - case PARM_DECL: - name = get_name (ref); - if (!name) - name = "D"; - lsm_tmp_name_add (name); - break; - - case STRING_CST: - lsm_tmp_name_add ("S"); - break; - - case RESULT_DECL: - lsm_tmp_name_add ("R"); - break; - - case INTEGER_CST: - /* Nothing. */ - break; - - default: - gcc_unreachable (); + for (gphi_iterator gpi = gsi_start_phis (ex->dest); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + if (virtual_operand_p (gimple_phi_result (phi))) + continue; + + /* When the destination has a non-virtual PHI node with multiple + predecessors make sure we preserve the PHI structure by + forcing a forwarder block so that hoisting of that PHI will + still work. */ + split_edge (ex); + break; + } + } + + old_dest = ex->dest; + new_bb = split_edge (ex); + then_bb = create_empty_bb (new_bb); + then_bb->frequency = flag_probability.apply (new_bb->frequency); + then_bb->count = new_bb->count.apply_probability (flag_probability); + if (irr) + then_bb->flags = BB_IRREDUCIBLE_LOOP; + add_bb_to_loop (then_bb, new_bb->loop_father); + + gsi = gsi_start_bb (new_bb); + stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node, + NULL_TREE, NULL_TREE); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (then_bb); + /* Insert actual store. */ + stmt = gimple_build_assign (unshare_expr (mem), tmp_var); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + edge e1 = single_succ_edge (new_bb); + edge e2 = make_edge (new_bb, then_bb, + EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); + e2->probability = flag_probability; + + e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0); + e1->flags &= ~EDGE_FALLTHRU; + + e1->probability = flag_probability.invert (); + + then_old_edge = make_single_succ_edge (then_bb, old_dest, + EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); + + set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb); + + if (prev_edges) + { + basic_block prevbb = prev_edges->last_cond_fallthru->src; + redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb); + set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb); + set_immediate_dominator (CDI_DOMINATORS, old_dest, + recompute_dominator (CDI_DOMINATORS, old_dest)); } + + /* ?? Because stores may alias, they must happen in the exact + sequence they originally happened. Save the position right after + the (_lsm) store we just created so we can continue appending after + it and maintain the original order. */ + { + struct prev_flag_edges *p; + + if (orig_ex->aux) + orig_ex->aux = NULL; + alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges)); + p = (struct prev_flag_edges *) orig_ex->aux; + p->append_cond_position = then_old_edge; + p->last_cond_fallthru = find_edge (new_bb, old_dest); + orig_ex->aux = (void *) p; + } + + if (!loop_has_only_one_exit) + for (gphi_iterator gpi = gsi_start_phis (old_dest); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + unsigned i; + + for (i = 0; i < gimple_phi_num_args (phi); i++) + if (gimple_phi_arg_edge (phi, i)->src == new_bb) + { + tree arg = gimple_phi_arg_def (phi, i); + add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION); + update_stmt (phi); + } + } } -/* Determines name for temporary variable that replaces REF. - The name is accumulated into the lsm_tmp_name variable. - N is added to the name of the temporary. */ - -char * -get_lsm_tmp_name (tree ref, unsigned n) +/* When REF is set on the location, set flag indicating the store. */ + +struct sm_set_flag_if_changed { - char ns[2]; - - lsm_tmp_name_length = 0; - gen_lsm_tmp_name (ref); - lsm_tmp_name_add ("_lsm"); - if (n < 10) + sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_) + : flag (flag_), bbs (bbs_) {} + bool operator () (mem_ref_loc *loc); + tree flag; + hash_set <basic_block> *bbs; +}; + +bool +sm_set_flag_if_changed::operator () (mem_ref_loc *loc) +{ + /* Only set the flag for writes. */ + if (is_gimple_assign (loc->stmt) + && gimple_assign_lhs_ptr (loc->stmt) == loc->ref) { - ns[0] = '0' + n; - ns[1] = 0; - lsm_tmp_name_add (ns); + gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt); + gimple *stmt = gimple_build_assign (flag, boolean_true_node); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + bbs->add (gimple_bb (stmt)); } - return lsm_tmp_name; + return false; +} + +/* Helper function for execute_sm. On every location where REF is + set, set an appropriate flag indicating the store. */ + +static tree +execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref, + hash_set <basic_block> *bbs) +{ + tree flag; + char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag"); + flag = create_tmp_reg (boolean_type_node, str); + for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs)); + return flag; } /* Executes store motion of memory reference REF from LOOP. @@ -2075,46 +1980,76 @@ to the reference from the temporary variable are emitted to exits. */ static void -execute_sm (struct loop *loop, VEC (edge, heap) *exits, mem_ref_p ref) +execute_sm (struct loop *loop, vec<edge> exits, im_mem_ref *ref) { - tree tmp_var; + tree tmp_var, store_flag = NULL_TREE; unsigned i; - gimple load, store; + gassign *load; struct fmt_data fmt_data; edge ex; struct lim_aux_data *lim_data; + bool multi_threaded_model_p = false; + gimple_stmt_iterator gsi; + hash_set<basic_block> flag_bbs; if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Executing store motion of "); - print_generic_expr (dump_file, ref->mem, 0); + print_generic_expr (dump_file, ref->mem.ref); fprintf (dump_file, " from loop %d\n", loop->num); } - tmp_var = make_rename_temp (TREE_TYPE (ref->mem), - get_lsm_tmp_name (ref->mem, ~0)); + tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref), + get_lsm_tmp_name (ref->mem.ref, ~0)); fmt_data.loop = loop; fmt_data.orig_loop = loop; - for_each_index (&ref->mem, force_move_till, &fmt_data); + for_each_index (&ref->mem.ref, force_move_till, &fmt_data); + + if (bb_in_transaction (loop_preheader_edge (loop)->src) + || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES) + && ! ref_always_accessed_p (loop, ref, true))) + multi_threaded_model_p = true; + + if (multi_threaded_model_p) + store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs); rewrite_mem_refs (loop, ref, tmp_var); - /* Emit the load & stores. */ - load = gimple_build_assign (tmp_var, unshare_expr (ref->mem)); + /* Emit the load code on a random exit edge or into the latch if + the loop does not exit, so that we are sure it will be processed + by move_computations after all dependencies. */ + gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt); + + /* FIXME/TODO: For the multi-threaded variant, we could avoid this + load altogether, since the store is predicated by a flag. We + could, do the load only if it was originally in the loop. */ + load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); lim_data = init_lim_data (load); lim_data->max_loop = loop; lim_data->tgt_loop = loop; - - /* Put this into the latch, so that we are sure it will be processed after - all dependencies. */ - gsi_insert_on_edge (loop_latch_edge (loop), load); - - FOR_EACH_VEC_ELT (edge, exits, i, ex) + gsi_insert_before (&gsi, load, GSI_SAME_STMT); + + if (multi_threaded_model_p) { - store = gimple_build_assign (unshare_expr (ref->mem), tmp_var); - gsi_insert_on_edge (ex, store); + load = gimple_build_assign (store_flag, boolean_false_node); + lim_data = init_lim_data (load); + lim_data->max_loop = loop; + lim_data->tgt_loop = loop; + gsi_insert_before (&gsi, load, GSI_SAME_STMT); } + + /* Sink the store to every exit from the loop. */ + FOR_EACH_VEC_ELT (exits, i, ex) + if (!multi_threaded_model_p) + { + gassign *store; + store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var); + gsi_insert_on_edge (ex, store); + } + else + execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag, + loop_preheader_edge (loop), &flag_bbs); } /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit @@ -2122,226 +2057,265 @@ static void hoist_memory_references (struct loop *loop, bitmap mem_refs, - VEC (edge, heap) *exits) + vec<edge> exits) { - mem_ref_p ref; + im_mem_ref *ref; unsigned i; bitmap_iterator bi; EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) { - ref = VEC_index (mem_ref_p, memory_accesses.refs_list, i); + ref = memory_accesses.refs_list[i]; execute_sm (loop, exits, ref); } } +struct ref_always_accessed +{ + ref_always_accessed (struct loop *loop_, bool stored_p_) + : loop (loop_), stored_p (stored_p_) {} + bool operator () (mem_ref_loc *loc); + struct loop *loop; + bool stored_p; +}; + +bool +ref_always_accessed::operator () (mem_ref_loc *loc) +{ + struct loop *must_exec; + + if (!get_lim_data (loc->stmt)) + return false; + + /* If we require an always executed store make sure the statement + stores to the reference. */ + if (stored_p) + { + tree lhs = gimple_get_lhs (loc->stmt); + if (!lhs + || lhs != *loc->ref) + return false; + } + + must_exec = get_lim_data (loc->stmt)->always_executed_in; + if (!must_exec) + return false; + + if (must_exec == loop + || flow_loop_nested_p (must_exec, loop)) + return true; + + return false; +} + /* Returns true if REF is always accessed in LOOP. If STORED_P is true make sure REF is always stored to in LOOP. */ static bool -ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p) +ref_always_accessed_p (struct loop *loop, im_mem_ref *ref, bool stored_p) { - VEC (mem_ref_loc_p, heap) *locs = NULL; - unsigned i; - mem_ref_loc_p loc; - bool ret = false; - struct loop *must_exec; - tree base; - - base = get_base_address (ref->mem); - if (INDIRECT_REF_P (base) - || TREE_CODE (base) == MEM_REF) - base = TREE_OPERAND (base, 0); - - get_all_locs_in_loop (loop, ref, &locs); - FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc) - { - if (!get_lim_data (loc->stmt)) - continue; - - /* If we require an always executed store make sure the statement - stores to the reference. */ - if (stored_p) - { - tree lhs; - if (!gimple_get_lhs (loc->stmt)) - continue; - lhs = get_base_address (gimple_get_lhs (loc->stmt)); - if (!lhs) - continue; - if (INDIRECT_REF_P (lhs) - || TREE_CODE (lhs) == MEM_REF) - lhs = TREE_OPERAND (lhs, 0); - if (lhs != base) - continue; - } - - must_exec = get_lim_data (loc->stmt)->always_executed_in; - if (!must_exec) - continue; - - if (must_exec == loop - || flow_loop_nested_p (must_exec, loop)) - { - ret = true; - break; - } - } - VEC_free (mem_ref_loc_p, heap, locs); - - return ret; + return for_all_locs_in_loop (loop, ref, + ref_always_accessed (loop, stored_p)); } /* Returns true if REF1 and REF2 are independent. */ static bool -refs_independent_p (mem_ref_p ref1, mem_ref_p ref2) +refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2) { - if (ref1 == ref2 - || bitmap_bit_p (ref1->indep_ref, ref2->id)) + if (ref1 == ref2) return true; - if (bitmap_bit_p (ref1->dep_ref, ref2->id)) - return false; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Querying dependency of refs %u and %u: ", ref1->id, ref2->id); - if (mem_refs_may_alias_p (ref1->mem, ref2->mem, - &memory_accesses.ttae_cache)) + if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache)) { - bitmap_set_bit (ref1->dep_ref, ref2->id); - bitmap_set_bit (ref2->dep_ref, ref1->id); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "dependent.\n"); return false; } else { - bitmap_set_bit (ref1->indep_ref, ref2->id); - bitmap_set_bit (ref2->indep_ref, ref1->id); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "independent.\n"); return true; } } -/* Records the information whether REF is independent in LOOP (according - to INDEP). */ +/* Mark REF dependent on stores or loads (according to STORED_P) in LOOP + and its super-loops. */ static void -record_indep_loop (struct loop *loop, mem_ref_p ref, bool indep) +record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p) +{ + /* We can propagate dependent-in-loop bits up the loop + hierarchy to all outer loops. */ + while (loop != current_loops->tree_root + && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p))) + loop = loop_outer (loop); +} + +/* Returns true if REF is independent on all other memory + references in LOOP. REF_LOOP is where REF is accessed, SAFELEN is the + safelen to apply. */ + +static bool +ref_indep_loop_p_1 (int safelen, struct loop *loop, im_mem_ref *ref, + bool stored_p, struct loop *ref_loop) { - if (indep) - bitmap_set_bit (ref->indep_loop, loop->num); + stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num)); + + if (loop->safelen > safelen + /* Check that REF is accessed inside LOOP. */ + && (loop == ref_loop || flow_loop_nested_p (loop, ref_loop))) + safelen = loop->safelen; + + bool indep_p = true; + bitmap refs_to_check; + + if (stored_p) + refs_to_check = &memory_accesses.refs_in_loop[loop->num]; + else + refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num]; + + if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)) + indep_p = false; + else if (safelen > 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file,"REF is independent due to safelen %d\n", + safelen); + print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM); + fprintf (dump_file, "\n"); + } + + /* We need to recurse to properly handle UNANALYZABLE_MEM_ID. */ + struct loop *inner = loop->inner; + while (inner) + { + if (!ref_indep_loop_p_1 (safelen, inner, ref, stored_p, ref_loop)) + { + indep_p = false; + break; + } + inner = inner->next; + } + + /* Avoid caching here as safelen depends on context and refs + are shared between different contexts. */ + return indep_p; + } else - bitmap_set_bit (ref->dep_loop, loop->num); + { + if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))) + return true; + if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p))) + return false; + + struct loop *inner = loop->inner; + while (inner) + { + if (!ref_indep_loop_p_1 (safelen, inner, ref, stored_p, ref_loop)) + { + indep_p = false; + break; + } + inner = inner->next; + } + + if (indep_p) + { + unsigned i; + bitmap_iterator bi; + EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) + { + im_mem_ref *aref = memory_accesses.refs_list[i]; + if (!refs_independent_p (ref, aref)) + { + indep_p = false; + break; + } + } + } + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n", + ref->id, loop->num, indep_p ? "independent" : "dependent"); + + /* Record the computed result in the cache. */ + if (indep_p) + { + if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)) + && stored_p) + { + /* If it's independend against all refs then it's independent + against stores, too. */ + bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false)); + } + } + else + { + record_dep_loop (loop, ref, stored_p); + if (!stored_p) + { + /* If it's dependent against stores it's dependent against + all refs, too. */ + record_dep_loop (loop, ref, true); + } + } + + return indep_p; } /* Returns true if REF is independent on all other memory references in - LOOP. */ + LOOP. REF_LOOP is the loop where REF is accessed. */ static bool -ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref) +ref_indep_loop_p (struct loop *loop, im_mem_ref *ref, struct loop *ref_loop) { - bitmap clobbers, refs_to_check, refs; - unsigned i; - bitmap_iterator bi; - bool ret = true, stored = bitmap_bit_p (ref->stored, loop->num); - htab_t map; - mem_ref_p aref; - - /* If the reference is clobbered, it is not independent. */ - clobbers = VEC_index (bitmap, memory_accesses.clobbered_vops, loop->num); - if (bitmap_intersect_p (ref->vops, clobbers)) - return false; - - refs_to_check = BITMAP_ALLOC (NULL); - - map = VEC_index (htab_t, memory_accesses.vop_ref_map, loop->num); - EXECUTE_IF_AND_COMPL_IN_BITMAP (ref->vops, clobbers, 0, i, bi) - { - if (stored) - refs = get_vop_accesses (map, i); - else - refs = get_vop_stores (map, i); - - bitmap_ior_into (refs_to_check, refs); - } - - EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) - { - aref = VEC_index (mem_ref_p, memory_accesses.refs_list, i); - if (!refs_independent_p (ref, aref)) - { - ret = false; - record_indep_loop (loop, aref, false); - break; - } - } - - BITMAP_FREE (refs_to_check); - return ret; -} - -/* Returns true if REF is independent on all other memory references in - LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */ - -static bool -ref_indep_loop_p (struct loop *loop, mem_ref_p ref) -{ - bool ret; - - if (bitmap_bit_p (ref->indep_loop, loop->num)) - return true; - if (bitmap_bit_p (ref->dep_loop, loop->num)) - return false; - - ret = ref_indep_loop_p_1 (loop, ref); - - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n", - ref->id, loop->num, ret ? "independent" : "dependent"); - - record_indep_loop (loop, ref, ret); - - return ret; + gcc_checking_assert (MEM_ANALYZABLE (ref)); + + return ref_indep_loop_p_1 (0, loop, ref, false, ref_loop); } /* Returns true if we can perform store motion of REF from LOOP. */ static bool -can_sm_ref_p (struct loop *loop, mem_ref_p ref) +can_sm_ref_p (struct loop *loop, im_mem_ref *ref) { tree base; - /* Unless the reference is stored in the loop, there is nothing to do. */ - if (!bitmap_bit_p (ref->stored, loop->num)) + /* Can't hoist unanalyzable refs. */ + if (!MEM_ANALYZABLE (ref)) return false; /* It should be movable. */ - if (!is_gimple_reg_type (TREE_TYPE (ref->mem)) - || TREE_THIS_VOLATILE (ref->mem) - || !for_each_index (&ref->mem, may_move_till, loop)) + if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) + || TREE_THIS_VOLATILE (ref->mem.ref) + || !for_each_index (&ref->mem.ref, may_move_till, loop)) return false; /* If it can throw fail, we do not properly update EH info. */ - if (tree_could_throw_p (ref->mem)) + if (tree_could_throw_p (ref->mem.ref)) return false; /* If it can trap, it must be always executed in LOOP. Readonly memory locations may trap when storing to them, but tree_could_trap_p is a predicate for rvalues, so check that explicitly. */ - base = get_base_address (ref->mem); - if ((tree_could_trap_p (ref->mem) + base = get_base_address (ref->mem.ref); + if ((tree_could_trap_p (ref->mem.ref) || (DECL_P (base) && TREE_READONLY (base))) && !ref_always_accessed_p (loop, ref, true)) return false; /* And it must be independent on all other memory references in LOOP. */ - if (!ref_indep_loop_p (loop, ref)) + if (!ref_indep_loop_p (loop, ref, loop)) return false; return true; @@ -2354,15 +2328,14 @@ static void find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm) { - bitmap refs = VEC_index (bitmap, memory_accesses.all_refs_in_loop, - loop->num); + bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num]; unsigned i; bitmap_iterator bi; - mem_ref_p ref; + im_mem_ref *ref; EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi) { - ref = VEC_index (mem_ref_p, memory_accesses.refs_list, i); + ref = memory_accesses.refs_list[i]; if (can_sm_ref_p (loop, ref)) bitmap_set_bit (refs_to_sm, i); } @@ -2374,12 +2347,12 @@ static bool loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, - VEC (edge, heap) *exits) + vec<edge> exits) { unsigned i; edge ex; - FOR_EACH_VEC_ELT (edge, exits, i, ex) + FOR_EACH_VEC_ELT (exits, i, ex) if (ex->flags & (EDGE_ABNORMAL | EDGE_EH)) return false; @@ -2393,16 +2366,16 @@ static void store_motion_loop (struct loop *loop, bitmap sm_executed) { - VEC (edge, heap) *exits = get_loop_exit_edges (loop); + vec<edge> exits = get_loop_exit_edges (loop); struct loop *subloop; - bitmap sm_in_loop = BITMAP_ALLOC (NULL); + bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack); if (loop_suitable_for_sm (loop, exits)) { find_refs_for_sm (loop, sm_executed, sm_in_loop); hoist_memory_references (loop, sm_in_loop, exits); } - VEC_free (edge, heap, exits); + exits.release (); bitmap_ior_into (sm_executed, sm_in_loop); for (subloop = loop->inner; subloop != NULL; subloop = subloop->next) @@ -2418,7 +2391,7 @@ store_motion (void) { struct loop *loop; - bitmap sm_executed = BITMAP_ALLOC (NULL); + bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack); for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) store_motion_loop (loop, sm_executed); @@ -2433,14 +2406,14 @@ blocks that contain a nonpure call. */ static void -fill_always_executed_in (struct loop *loop, sbitmap contains_call) +fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call) { basic_block bb = NULL, *bbs, last = NULL; unsigned i; edge e; struct loop *inn_loop = loop; - if (!loop->header->aux) + if (ALWAYS_EXECUTED_IN (loop->header) == NULL) { bbs = get_loop_body_in_dom_order (loop); @@ -2452,12 +2425,20 @@ if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) last = bb; - if (TEST_BIT (contains_call, bb->index)) + if (bitmap_bit_p (contains_call, bb->index)) break; FOR_EACH_EDGE (e, ei, bb->succs) - if (!flow_bb_inside_loop_p (loop, e->dest)) - break; + { + /* If there is an exit from this BB. */ + if (!flow_bb_inside_loop_p (loop, e->dest)) + break; + /* Or we enter a possibly non-finite loop. */ + if (flow_loop_nested_p (bb->loop_father, + e->dest->loop_father) + && ! finite_loop_p (e->dest->loop_father)) + break; + } if (e) break; @@ -2482,7 +2463,7 @@ while (1) { - last->aux = loop; + SET_ALWAYS_EXECUTED_IN (last, loop); if (last == loop->header) break; last = get_immediate_dominator (CDI_DOMINATORS, last); @@ -2492,38 +2473,87 @@ } for (loop = loop->inner; loop; loop = loop->next) - fill_always_executed_in (loop, contains_call); + fill_always_executed_in_1 (loop, contains_call); } +/* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e. + for each such basic block bb records the outermost loop for that execution + of its header implies execution of bb. */ + +static void +fill_always_executed_in (void) +{ + basic_block bb; + struct loop *loop; + + auto_sbitmap contains_call (last_basic_block_for_fn (cfun)); + bitmap_clear (contains_call); + FOR_EACH_BB_FN (bb, cfun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + if (nonpure_call_p (gsi_stmt (gsi))) + break; + } + + if (!gsi_end_p (gsi)) + bitmap_set_bit (contains_call, bb->index); + } + + for (loop = current_loops->tree_root->inner; loop; loop = loop->next) + fill_always_executed_in_1 (loop, contains_call); +} + + /* Compute the global information needed by the loop invariant motion pass. */ static void tree_ssa_lim_initialize (void) { - sbitmap contains_call = sbitmap_alloc (last_basic_block); - gimple_stmt_iterator bsi; struct loop *loop; - basic_block bb; - - sbitmap_zero (contains_call); - FOR_EACH_BB (bb) + unsigned i; + + bitmap_obstack_initialize (&lim_bitmap_obstack); + gcc_obstack_init (&mem_ref_obstack); + lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>; + + if (flag_tm) + compute_transaction_bits (); + + alloc_aux_for_edges (0); + + memory_accesses.refs = new hash_table<mem_ref_hasher> (100); + memory_accesses.refs_list.create (100); + /* Allocate a special, unanalyzable mem-ref with ID zero. */ + memory_accesses.refs_list.quick_push + (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID)); + + memory_accesses.refs_in_loop.create (number_of_loops (cfun)); + memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun)); + memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun)); + memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun)); + memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun)); + memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun)); + + for (i = 0; i < number_of_loops (cfun); i++) { - for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) - { - if (nonpure_call_p (gsi_stmt (bsi))) - break; - } - - if (!gsi_end_p (bsi)) - SET_BIT (contains_call, bb->index); + bitmap_initialize (&memory_accesses.refs_in_loop[i], + &lim_bitmap_obstack); + bitmap_initialize (&memory_accesses.refs_stored_in_loop[i], + &lim_bitmap_obstack); + bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i], + &lim_bitmap_obstack); } - for (loop = current_loops->tree_root->inner; loop; loop = loop->next) - fill_always_executed_in (loop, contains_call); - - sbitmap_free (contains_call); - - lim_aux_data_map = pointer_map_create (); + memory_accesses.ttae_cache = NULL; + + /* Initialize bb_loop_postorder with a mapping from loop->num to + its postorder index. */ + i = 0; + bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun)); + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + bb_loop_postorder[loop->num] = i++; } /* Cleans up after the invariant motion pass. */ @@ -2533,43 +2563,38 @@ { basic_block bb; unsigned i; - bitmap b; - htab_t h; - - FOR_EACH_BB (bb) - { - bb->aux = NULL; - } - - pointer_map_destroy (lim_aux_data_map); - - VEC_free (mem_ref_p, heap, memory_accesses.refs_list); - htab_delete (memory_accesses.refs); - - FOR_EACH_VEC_ELT (bitmap, memory_accesses.refs_in_loop, i, b) - BITMAP_FREE (b); - VEC_free (bitmap, heap, memory_accesses.refs_in_loop); - - FOR_EACH_VEC_ELT (bitmap, memory_accesses.all_refs_in_loop, i, b) - BITMAP_FREE (b); - VEC_free (bitmap, heap, memory_accesses.all_refs_in_loop); - - FOR_EACH_VEC_ELT (bitmap, memory_accesses.clobbered_vops, i, b) - BITMAP_FREE (b); - VEC_free (bitmap, heap, memory_accesses.clobbered_vops); - - FOR_EACH_VEC_ELT (htab_t, memory_accesses.vop_ref_map, i, h) - htab_delete (h); - VEC_free (htab_t, heap, memory_accesses.vop_ref_map); + im_mem_ref *ref; + + free_aux_for_edges (); + + FOR_EACH_BB_FN (bb, cfun) + SET_ALWAYS_EXECUTED_IN (bb, NULL); + + bitmap_obstack_release (&lim_bitmap_obstack); + delete lim_aux_data_map; + + delete memory_accesses.refs; + memory_accesses.refs = NULL; + + FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) + memref_free (ref); + memory_accesses.refs_list.release (); + obstack_free (&mem_ref_obstack, NULL); + + memory_accesses.refs_in_loop.release (); + memory_accesses.refs_stored_in_loop.release (); + memory_accesses.all_refs_stored_in_loop.release (); if (memory_accesses.ttae_cache) - pointer_map_destroy (memory_accesses.ttae_cache); + free_affine_expand_cache (&memory_accesses.ttae_cache); + + free (bb_loop_postorder); } /* Moves invariants from loops. Only "expensive" invariants are moved out -- i.e. those that are likely to be win regardless of the register pressure. */ -unsigned int +static unsigned int tree_ssa_lim (void) { unsigned int todo; @@ -2579,9 +2604,13 @@ /* Gathers information about memory accesses in the loops. */ analyze_memory_references (); + /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ + fill_always_executed_in (); + /* For each statement determine the outermost loop in that it is invariant and cost for computing the invariant. */ - determine_invariantness (); + invariantness_dom_walker (CDI_DOMINATORS) + .walk (cfun->cfg->x_entry_block_ptr); /* Execute store motion. Force the necessary invariants to be moved out of the loops as well. */ @@ -2594,3 +2623,60 @@ return todo; } + +/* Loop invariant motion pass. */ + +namespace { + +const pass_data pass_data_lim = +{ + GIMPLE_PASS, /* type */ + "lim", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_LIM, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_lim : public gimple_opt_pass +{ +public: + pass_lim (gcc::context *ctxt) + : gimple_opt_pass (pass_data_lim, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_lim (m_ctxt); } + virtual bool gate (function *) { return flag_tree_loop_im != 0; } + virtual unsigned int execute (function *); + +}; // class pass_lim + +unsigned int +pass_lim::execute (function *fun) +{ + bool in_loop_pipeline = scev_initialized_p (); + if (!in_loop_pipeline) + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); + + if (number_of_loops (fun) <= 1) + return 0; + unsigned int todo = tree_ssa_lim (); + + if (!in_loop_pipeline) + loop_optimizer_finalize (); + return todo; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_lim (gcc::context *ctxt) +{ + return new pass_lim (ctxt); +} + +