Mercurial > hg > CbC > CbC_gcc
diff gcc/fwprop.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/fwprop.c Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/fwprop.c Fri Oct 27 22:46:09 2017 +0900 @@ -1,6 +1,5 @@ /* RTL-based forward propagation pass for GNU compiler. - Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 - Free Software Foundation, Inc. + Copyright (C) 2005-2017 Free Software Foundation, Inc. Contributed by Paolo Bonzini and Steven Bosscher. This file is part of GCC. @@ -22,25 +21,24 @@ #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" -#include "diagnostic-core.h" - -#include "sparseset.h" -#include "timevar.h" +#include "backend.h" +#include "target.h" #include "rtl.h" +#include "predict.h" +#include "df.h" +#include "memmodel.h" #include "tm_p.h" #include "insn-config.h" +#include "emit-rtl.h" #include "recog.h" -#include "flags.h" -#include "obstack.h" -#include "basic-block.h" -#include "output.h" -#include "df.h" -#include "target.h" + +#include "sparseset.h" +#include "cfgrtl.h" +#include "cfgcleanup.h" #include "cfgloop.h" #include "tree-pass.h" #include "domwalk.h" -#include "emit-rtl.h" +#include "rtl-iter.h" /* This pass does simple forward propagation and simplification when an @@ -118,11 +116,16 @@ static int num_changes; -DEF_VEC_P(df_ref); -DEF_VEC_ALLOC_P(df_ref,heap); -static VEC(df_ref,heap) *use_def_ref; -static VEC(df_ref,heap) *reg_defs; -static VEC(df_ref,heap) *reg_defs_stack; +static vec<df_ref> use_def_ref; +static vec<df_ref> reg_defs; +static vec<df_ref> reg_defs_stack; + +/* The maximum number of propagations that are still allowed. If we do + more propagations than originally we had uses, we must have ended up + in a propagation loop, as in PR79405. Until the algorithm fwprop + uses can obviously not get into such loops we need a workaround like + this. */ +static int propagations_left; /* The MD bitmaps are trimmed to include only live registers to cut memory usage on testcases like insn-recog.c. Track live registers @@ -137,7 +140,7 @@ static inline df_ref get_def_for_use (df_ref use) { - return VEC_index (df_ref, use_def_ref, DF_REF_ID (use)); + return use_def_ref[DF_REF_ID (use)]; } @@ -151,12 +154,11 @@ (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER) static void -process_defs (df_ref *def_rec, int top_flag) +process_defs (df_ref def, int top_flag) { - df_ref def; - while ((def = *def_rec++) != NULL) + for (; def; def = DF_REF_NEXT_LOC (def)) { - df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def)); + df_ref curr_def = reg_defs[DF_REF_REGNO (def)]; unsigned int dregno; if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag) @@ -164,7 +166,7 @@ dregno = DF_REF_REGNO (def); if (curr_def) - VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def); + reg_defs_stack.safe_push (curr_def); else { /* Do not store anything if "transitioning" from NULL to NULL. But @@ -173,18 +175,18 @@ if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS) ; else - VEC_safe_push (df_ref, heap, reg_defs_stack, def); + reg_defs_stack.safe_push (def); } if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS) { bitmap_set_bit (local_md, dregno); - VEC_replace (df_ref, reg_defs, dregno, NULL); + reg_defs[dregno] = NULL; } else { bitmap_clear_bit (local_md, dregno); - VEC_replace (df_ref, reg_defs, dregno, def); + reg_defs[dregno] = def; } } } @@ -196,36 +198,41 @@ is an artificial use vector. */ static void -process_uses (df_ref *use_rec, int top_flag) +process_uses (df_ref use, int top_flag) { - df_ref use; - while ((use = *use_rec++) != NULL) + for (; use; use = DF_REF_NEXT_LOC (use)) if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag) { unsigned int uregno = DF_REF_REGNO (use); - if (VEC_index (df_ref, reg_defs, uregno) + if (reg_defs[uregno] && !bitmap_bit_p (local_md, uregno) && bitmap_bit_p (local_lr, uregno)) - VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), - VEC_index (df_ref, reg_defs, uregno)); + use_def_ref[DF_REF_ID (use)] = reg_defs[uregno]; } } +class single_def_use_dom_walker : public dom_walker +{ +public: + single_def_use_dom_walker (cdi_direction direction) + : dom_walker (direction) {} + virtual edge before_dom_children (basic_block); + virtual void after_dom_children (basic_block); +}; -static void -single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, - basic_block bb) +edge +single_def_use_dom_walker::before_dom_children (basic_block bb) { int bb_index = bb->index; struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index); struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index); - rtx insn; + rtx_insn *insn; bitmap_copy (local_md, &md_bb_info->in); bitmap_copy (local_lr, &lr_bb_info->in); /* Push a marker for the leave_block callback. */ - VEC_safe_push (df_ref, heap, reg_defs_stack, NULL); + reg_defs_stack.safe_push (NULL); process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP); process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP); @@ -246,25 +253,26 @@ process_uses (df_get_artificial_uses (bb_index), 0); process_defs (df_get_artificial_defs (bb_index), 0); + + return NULL; } /* Pop the definitions created in this basic block when leaving its dominated parts. */ -static void -single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, - basic_block bb ATTRIBUTE_UNUSED) +void +single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED) { df_ref saved_def; - while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL) + while ((saved_def = reg_defs_stack.pop ()) != NULL) { unsigned int dregno = DF_REF_REGNO (saved_def); /* See also process_defs. */ - if (saved_def == VEC_index (df_ref, reg_defs, dregno)) - VEC_replace (df_ref, reg_defs, dregno, NULL); + if (saved_def == reg_defs[dregno]) + reg_defs[dregno] = NULL; else - VEC_replace (df_ref, reg_defs, dregno, saved_def); + reg_defs[dregno] = saved_def; } } @@ -275,8 +283,6 @@ static void build_single_def_use_links (void) { - struct dom_walk_data walk_data; - /* We use the multiple definitions problem to compute our restricted use-def chains. */ df_set_flags (DF_EQ_NOTES); @@ -285,31 +291,25 @@ df_analyze (); df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES); - use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ()); - VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ()); + use_def_ref.create (DF_USES_TABLE_SIZE ()); + use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ()); - reg_defs = VEC_alloc (df_ref, heap, max_reg_num ()); - VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ()); + reg_defs.create (max_reg_num ()); + reg_defs.safe_grow_cleared (max_reg_num ()); - reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10); + reg_defs_stack.create (n_basic_blocks_for_fn (cfun) * 10); local_md = BITMAP_ALLOC (NULL); local_lr = BITMAP_ALLOC (NULL); /* Walk the dominator tree looking for single reaching definitions dominating the uses. This is similar to how SSA form is built. */ - walk_data.dom_direction = CDI_DOMINATORS; - walk_data.initialize_block_local_data = NULL; - walk_data.before_dom_children = single_def_use_enter_block; - walk_data.after_dom_children = single_def_use_leave_block; - - init_walk_dominator_tree (&walk_data); - walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); - fini_walk_dominator_tree (&walk_data); + single_def_use_dom_walker (CDI_DOMINATORS) + .walk (cfun->cfg->x_entry_block_ptr); BITMAP_FREE (local_lr); BITMAP_FREE (local_md); - VEC_free (df_ref, heap, reg_defs); - VEC_free (df_ref, heap, reg_defs_stack); + reg_defs.release (); + reg_defs_stack.release (); } @@ -357,12 +357,12 @@ { case ASHIFT: if (CONST_INT_P (XEXP (x, 1)) - && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x)) - && INTVAL (XEXP (x, 1)) >= 0) + && INTVAL (XEXP (x, 1)) < GET_MODE_UNIT_BITSIZE (GET_MODE (x)) + && INTVAL (XEXP (x, 1)) >= 0) { HOST_WIDE_INT shift = INTVAL (XEXP (x, 1)); PUT_CODE (x, MULT); - XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift, + XEXP (x, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, GET_MODE (x)); } @@ -391,7 +391,7 @@ for a memory access in the given MODE. */ static bool -should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode, +should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode, addr_space_t as, bool speed) { int gain; @@ -409,11 +409,12 @@ - address_cost (new_rtx, mode, as, speed)); /* If the addresses have equivalent cost, prefer the new address - if it has the highest `rtx_cost'. That has the potential of + if it has the highest `set_src_cost'. That has the potential of eliminating the most insns without additional costs, and it is the same that cse.c used to do. */ if (gain == 0) - gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed); + gain = (set_src_cost (new_rtx, VOIDmode, speed) + - set_src_cost (old_rtx, VOIDmode, speed)); return (gain > 0); } @@ -461,8 +462,8 @@ { rtx x = *px, tem = NULL_RTX, op0, op1, op2; enum rtx_code code = GET_CODE (x); - enum machine_mode mode = GET_MODE (x); - enum machine_mode op_mode; + machine_mode mode = GET_MODE (x); + machine_mode op_mode; bool can_appear = (flags & PR_CAN_APPEAR) != 0; bool valid_ops = true; @@ -626,6 +627,15 @@ *px = tem; + /* Allow replacements that simplify operations on a vector or complex + value to a component. The most prominent case is + (subreg ([vec_]concat ...)). */ + if (REG_P (tem) && !HARD_REGISTER_P (tem) + && (VECTOR_MODE_P (GET_MODE (new_rtx)) + || COMPLEX_MODE_P (GET_MODE (new_rtx))) + && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx))) + return true; + /* The replacement we made so far is valid, if all of the recursive replacements were valid, or we could simplify everything to a constant. */ @@ -633,14 +643,16 @@ } -/* for_each_rtx traversal function that returns 1 if BODY points to - a non-constant mem. */ +/* Return true if X constains a non-constant mem. */ -static int -varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED) +static bool +varying_mem_p (const_rtx x) { - rtx x = *body; - return MEM_P (x) && !MEM_READONLY_P (x); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, x, NONCONST) + if (MEM_P (*iter) && !MEM_READONLY_P (*iter)) + return true; + return false; } @@ -653,7 +665,7 @@ Otherwise, we accept simplifications that have a lower or equal cost. */ static rtx -propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx, +propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, bool speed) { rtx tem; @@ -664,9 +676,13 @@ return NULL_RTX; flags = 0; - if (REG_P (new_rtx) || CONSTANT_P (new_rtx)) + if (REG_P (new_rtx) + || CONSTANT_P (new_rtx) + || (GET_CODE (new_rtx) == SUBREG + && REG_P (SUBREG_REG (new_rtx)) + && !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (new_rtx))))) flags |= PR_CAN_APPEAR; - if (!for_each_rtx (&new_rtx, varying_mem_p, NULL)) + if (!varying_mem_p (new_rtx)) flags |= PR_HANDLE_MEM; if (speed) @@ -697,22 +713,19 @@ between FROM to (but not including) TO. */ static bool -local_ref_killed_between_p (df_ref ref, rtx from, rtx to) +local_ref_killed_between_p (df_ref ref, rtx_insn *from, rtx_insn *to) { - rtx insn; + rtx_insn *insn; for (insn = from; insn != to; insn = NEXT_INSN (insn)) { - df_ref *def_rec; + df_ref def; if (!INSN_P (insn)) continue; - for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) - { - df_ref def = *def_rec; - if (DF_REF_REGNO (ref) == DF_REF_REGNO (def)) - return true; - } + FOR_EACH_INSN_DEF (def, insn) + if (DF_REF_REGNO (ref) == DF_REF_REGNO (def)) + return true; } return false; } @@ -727,7 +740,7 @@ we check if the definition is killed after DEF_INSN or before TARGET_INSN insn, in their respective basic blocks. */ static bool -use_killed_between (df_ref use, rtx def_insn, rtx target_insn) +use_killed_between (df_ref use, rtx_insn *def_insn, rtx_insn *target_insn) { basic_block def_bb = BLOCK_FOR_INSN (def_insn); basic_block target_bb = BLOCK_FOR_INSN (target_insn); @@ -791,35 +804,33 @@ would require full computation of available expressions; we check only restricted conditions, see use_killed_between. */ static bool -all_uses_available_at (rtx def_insn, rtx target_insn) +all_uses_available_at (rtx_insn *def_insn, rtx_insn *target_insn) { - df_ref *use_rec; + df_ref use; struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); rtx def_set = single_set (def_insn); + rtx_insn *next; gcc_assert (def_set); /* If target_insn comes right after def_insn, which is very common - for addresses, we can use a quicker test. */ - if (NEXT_INSN (def_insn) == target_insn - && REG_P (SET_DEST (def_set))) + for addresses, we can use a quicker test. Ignore debug insns + other than target insns for this. */ + next = NEXT_INSN (def_insn); + while (next && next != target_insn && DEBUG_INSN_P (next)) + next = NEXT_INSN (next); + if (next == target_insn && REG_P (SET_DEST (def_set))) { rtx def_reg = SET_DEST (def_set); /* If the insn uses the reg that it defines, the substitution is invalid. */ - for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++) - { - df_ref use = *use_rec; - if (rtx_equal_p (DF_REF_REG (use), def_reg)) - return false; - } - for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++) - { - df_ref use = *use_rec; - if (rtx_equal_p (DF_REF_REG (use), def_reg)) - return false; - } + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), def_reg)) + return false; + FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), def_reg)) + return false; } else { @@ -827,17 +838,15 @@ /* Look at all the uses of DEF_INSN, and see if they are not killed between DEF_INSN and TARGET_INSN. */ - for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++) + FOR_EACH_INSN_INFO_USE (use, insn_info) { - df_ref use = *use_rec; if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg)) return false; if (use_killed_between (use, def_insn, target_insn)) return false; } - for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++) + FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) { - df_ref use = *use_rec; if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg)) return false; if (use_killed_between (use, def_insn, target_insn)) @@ -850,26 +859,22 @@ static df_ref *active_defs; -#ifdef ENABLE_CHECKING static sparseset active_defs_check; -#endif /* Fill the ACTIVE_DEFS array with the use->def link for the registers mentioned in USE_REC. Register the valid entries in ACTIVE_DEFS_CHECK too, for checking purposes. */ static void -register_active_defs (df_ref *use_rec) +register_active_defs (df_ref use) { - while (*use_rec) + for (; use; use = DF_REF_NEXT_LOC (use)) { - df_ref use = *use_rec++; df_ref def = get_def_for_use (use); int regno = DF_REF_REGNO (use); -#ifdef ENABLE_CHECKING - sparseset_set_bit (active_defs_check, regno); -#endif + if (flag_checking) + sparseset_set_bit (active_defs_check, regno); active_defs[regno] = def; } } @@ -882,11 +887,10 @@ I'm not doing this yet, though. */ static void -update_df_init (rtx def_insn, rtx insn) +update_df_init (rtx_insn *def_insn, rtx_insn *insn) { -#ifdef ENABLE_CHECKING - sparseset_clear (active_defs_check); -#endif + if (flag_checking) + sparseset_clear (active_defs_check); register_active_defs (DF_INSN_USES (def_insn)); register_active_defs (DF_INSN_USES (insn)); register_active_defs (DF_INSN_EQ_USES (insn)); @@ -897,22 +901,19 @@ in the ACTIVE_DEFS array to match pseudos to their def. */ static inline void -update_uses (df_ref *use_rec) +update_uses (df_ref use) { - while (*use_rec) + for (; use; use = DF_REF_NEXT_LOC (use)) { - df_ref use = *use_rec++; int regno = DF_REF_REGNO (use); /* Set up the use-def chain. */ - if (DF_REF_ID (use) >= (int) VEC_length (df_ref, use_def_ref)) - VEC_safe_grow_cleared (df_ref, heap, use_def_ref, - DF_REF_ID (use) + 1); + if (DF_REF_ID (use) >= (int) use_def_ref.length ()) + use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1); -#ifdef ENABLE_CHECKING - gcc_assert (sparseset_bit_p (active_defs_check, regno)); -#endif - VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), active_defs[regno]); + if (flag_checking) + gcc_assert (sparseset_bit_p (active_defs_check, regno)); + use_def_ref[DF_REF_ID (use)] = active_defs[regno]; } } @@ -921,7 +922,7 @@ uses if NOTES_ONLY is true. */ static void -update_df (rtx insn, rtx note) +update_df (rtx_insn *insn, rtx note) { struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); @@ -948,9 +949,10 @@ performed. */ static bool -try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal) +try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, + bool set_reg_equal) { - rtx insn = DF_REF_INSN (use); + rtx_insn *insn = DF_REF_INSN (use); rtx set = single_set (insn); rtx note = NULL_RTX; bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); @@ -963,7 +965,7 @@ multiple sets. If so, assume the cost of the new instruction is not greater than the old one. */ if (set) - old_cost = rtx_cost (SET_SRC (set), SET, speed); + old_cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed); if (dump_file) { fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn)); @@ -984,7 +986,8 @@ else if (DF_REF_TYPE (use) == DF_REF_REG_USE && set - && rtx_cost (SET_SRC (set), SET, speed) > old_cost) + && (set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed) + > old_cost)) { if (dump_file) fprintf (dump_file, "Changes to insn %d not profitable\n", @@ -1012,10 +1015,27 @@ making a new one if one does not already exist. */ if (set_reg_equal) { - if (dump_file) - fprintf (dump_file, " Setting REG_EQUAL note\n"); + /* If there are any paradoxical SUBREGs, don't add REG_EQUAL note, + because the bits in there can be anything and so might not + match the REG_EQUAL note content. See PR70574. */ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) + { + rtx x = *iter; + if (SUBREG_P (x) && paradoxical_subreg_p (x)) + { + set_reg_equal = false; + break; + } + } - note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx)); + if (set_reg_equal) + { + if (dump_file) + fprintf (dump_file, " Setting REG_EQUAL note\n"); + + note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx)); + } } } @@ -1031,27 +1051,20 @@ load from memory. */ static bool -free_load_extend (rtx src, rtx insn) +free_load_extend (rtx src, rtx_insn *insn) { rtx reg; - df_ref *use_vec; - df_ref use = 0, def; + df_ref def, use; reg = XEXP (src, 0); -#ifdef LOAD_EXTEND_OP - if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src)) -#endif + if (load_extend_op (GET_MODE (reg)) != GET_CODE (src)) return false; - for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++) - { - use = *use_vec; - - if (!DF_REF_IS_ARTIFICIAL (use) - && DF_REF_TYPE (use) == DF_REF_REG_USE - && DF_REF_REG (use) == reg) - break; - } + FOR_EACH_INSN_USE (use, insn) + if (!DF_REF_IS_ARTIFICIAL (use) + && DF_REF_TYPE (use) == DF_REF_REG_USE + && DF_REF_REG (use) == reg) + break; if (!use) return false; @@ -1077,20 +1090,20 @@ /* If USE is a subreg, see if it can be replaced by a pseudo. */ static bool -forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set) +forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set) { rtx use_reg = DF_REF_REG (use); - rtx use_insn, src; + rtx_insn *use_insn; + rtx src; + scalar_int_mode int_use_mode, src_mode; /* Only consider subregs... */ - enum machine_mode use_mode = GET_MODE (use_reg); + machine_mode use_mode = GET_MODE (use_reg); if (GET_CODE (use_reg) != SUBREG || !REG_P (SET_DEST (def_set))) return false; - /* If this is a paradoxical SUBREG... */ - if (GET_MODE_SIZE (use_mode) - > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg)))) + if (paradoxical_subreg_p (use_reg)) { /* If this is a paradoxical SUBREG, we have no idea what value the extra bits would have. However, if the operand is equivalent to @@ -1101,6 +1114,7 @@ src = SET_SRC (def_set); if (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src)) + && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER && GET_MODE (SUBREG_REG (src)) == use_mode && subreg_lowpart_p (src) && all_uses_available_at (def_insn, use_insn)) @@ -1111,16 +1125,32 @@ /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG is the low part of the reg being extended then just use the inner operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will - be removed due to it matching a LOAD_EXTEND_OP load from memory. */ - else if (subreg_lowpart_p (use_reg)) + be removed due to it matching a LOAD_EXTEND_OP load from memory, + or due to the operation being a no-op when applied to registers. + For example, if we have: + + A: (set (reg:DI X) (sign_extend:DI (reg:SI Y))) + B: (... (subreg:SI (reg:DI X)) ...) + + and mode_rep_extended says that Y is already sign-extended, + the backend will typically allow A to be combined with the + definition of Y or, failing that, allow A to be deleted after + reload through register tying. Introducing more uses of Y + prevents both optimisations. */ + else if (is_a <scalar_int_mode> (use_mode, &int_use_mode) + && subreg_lowpart_p (use_reg)) { use_insn = DF_REF_INSN (use); src = SET_SRC (def_set); if ((GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND) + && is_a <scalar_int_mode> (GET_MODE (src), &src_mode) && REG_P (XEXP (src, 0)) + && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER && GET_MODE (XEXP (src, 0)) == use_mode && !free_load_extend (src, def_insn) + && (targetm.mode_rep_extended (int_use_mode, src_mode) + != (int) GET_CODE (src)) && all_uses_available_at (def_insn, use_insn)) return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0), def_insn, false); @@ -1132,11 +1162,12 @@ /* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */ static bool -forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg) +forward_propagate_asm (df_ref use, rtx_insn *def_insn, rtx def_set, rtx reg) { - rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc; + rtx_insn *use_insn = DF_REF_INSN (use); + rtx src, use_pat, asm_operands, new_rtx, *loc; int speed_p, i; - df_ref *use_vec; + df_ref uses; gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0); @@ -1145,8 +1176,8 @@ /* In __asm don't replace if src might need more registers than reg, as that could increase register pressure on the __asm. */ - use_vec = DF_INSN_USES (def_insn); - if (use_vec[0] && use_vec[1]) + uses = DF_INSN_USES (def_insn); + if (uses && DF_REF_NEXT_LOC (uses)) return false; update_df_init (def_insn, use_insn); @@ -1209,13 +1240,13 @@ result. */ static bool -forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set) +forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set) { - rtx use_insn = DF_REF_INSN (use); + rtx_insn *use_insn = DF_REF_INSN (use); rtx use_set = single_set (use_insn); rtx src, reg, new_rtx, *loc; bool set_reg_equal; - enum machine_mode mode; + machine_mode mode; int asm_use = -1; if (INSN_CODE (use_insn) < 0) @@ -1230,21 +1261,24 @@ /* If def and use are subreg, check if they match. */ reg = DF_REF_REG (use); - if (GET_CODE (reg) == SUBREG - && GET_CODE (SET_DEST (def_set)) == SUBREG - && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg) - || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))) + if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG) + { + if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)) + return false; + } + /* Check if the def had a subreg, but the use has the whole reg. */ + else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) return false; - - /* Check if the def had a subreg, but the use has the whole reg. */ - if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) - return false; - /* Check if the use has a subreg, but the def had the whole reg. Unlike the previous case, the optimization is possible and often useful indeed. */ - if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) + else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) reg = SUBREG_REG (reg); + /* Make sure that we can treat REG as having the same mode as the + source of DEF_SET. */ + if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)) + return false; + /* Check if the substitution is valid (last, because it's the most expensive check!). */ src = SET_SRC (def_set); @@ -1294,9 +1328,23 @@ /* Do not replace an existing REG_EQUAL note if the insn is not recognized. Either we're already replacing in the note, or we'll separately try plugging the definition in the note and simplifying. - And only install a REQ_EQUAL note when the destination is a REG, - as the note would be invalid otherwise. */ - set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set))); + And only install a REQ_EQUAL note when the destination is a REG + that isn't mentioned in USE_SET, as the note would be invalid + otherwise. We also don't want to install a note if we are merely + propagating a pseudo since verifying that this pseudo isn't dead + is a pain; moreover such a note won't help anything. + If the use is a paradoxical subreg, make sure we don't add a + REG_EQUAL note for it, because it is not equivalent, it is one + possible value for it, but we can't rely on it holding that value. + See PR70574. */ + set_reg_equal = (note == NULL_RTX + && REG_P (SET_DEST (use_set)) + && !REG_P (src) + && !(GET_CODE (src) == SUBREG + && REG_P (SUBREG_REG (src))) + && !reg_mentioned_p (SET_DEST (use_set), + SET_SRC (use_set)) + && !paradoxical_subreg_p (DF_REF_REG (use))); } if (GET_MODE (*loc) == VOIDmode) @@ -1322,7 +1370,8 @@ forward_propagate_into (df_ref use) { df_ref def; - rtx def_insn, def_set, use_insn; + rtx_insn *def_insn, *use_insn; + rtx def_set; rtx parent; if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) @@ -1365,6 +1414,8 @@ if (forward_propagate_and_simplify (use, def_insn, def_set) || forward_propagate_subreg (use, def_insn, def_set)) { + propagations_left--; + if (cfun->can_throw_non_call_exceptions && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX) && purge_dead_edges (DF_REF_BB (use))) @@ -1381,18 +1432,19 @@ calculate_dominance_info (CDI_DOMINATORS); /* We do not always want to propagate into loops, so we have to find - loops and be careful about them. But we have to call flow_loops_find - before df_analyze, because flow_loops_find may introduce new jump - insns (sadly) if we are not working in cfglayout mode. */ - loop_optimizer_init (0); + loops and be careful about them. Avoid CFG modifications so that + we don't have to update dominance information afterwards for + build_single_def_use_links. */ + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); build_single_def_use_links (); df_set_flags (DF_DEFER_INSN_RESCAN); active_defs = XNEWVEC (df_ref, max_reg_num ()); -#ifdef ENABLE_CHECKING - active_defs_check = sparseset_alloc (max_reg_num ()); -#endif + if (flag_checking) + active_defs_check = sparseset_alloc (max_reg_num ()); + + propagations_left = DF_USES_TABLE_SIZE (); } static void @@ -1400,11 +1452,10 @@ { loop_optimizer_finalize (); - VEC_free (df_ref, heap, use_def_ref); + use_def_ref.release (); free (active_defs); -#ifdef ENABLE_CHECKING - sparseset_free (active_defs_check); -#endif + if (flag_checking) + sparseset_free (active_defs_check); free_dominance_info (CDI_DOMINATORS); cleanup_cfg (0); @@ -1429,7 +1480,6 @@ fwprop (void) { unsigned i; - bool need_cleanup = false; fwprop_init (); @@ -1441,48 +1491,62 @@ for (i = 0; i < DF_USES_TABLE_SIZE (); i++) { + if (!propagations_left) + break; + df_ref use = DF_USES_GET (i); if (use) if (DF_REF_TYPE (use) == DF_REF_REG_USE || DF_REF_BB (use)->loop_father == NULL /* The outer most loop is not really a loop. */ || loop_outer (DF_REF_BB (use)->loop_father) == NULL) - need_cleanup |= forward_propagate_into (use); + forward_propagate_into (use); } fwprop_done (); - if (need_cleanup) - cleanup_cfg (0); return 0; } -struct rtl_opt_pass pass_rtl_fwprop = +namespace { + +const pass_data pass_data_rtl_fwprop = +{ + RTL_PASS, /* type */ + "fwprop1", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_FWPROP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_rtl_fwprop : public rtl_opt_pass { - { - RTL_PASS, - "fwprop1", /* name */ - gate_fwprop, /* gate */ - fwprop, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_FWPROP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish - | TODO_verify_flow - | TODO_verify_rtl_sharing - | TODO_dump_func /* todo_flags_finish */ - } -}; +public: + pass_rtl_fwprop (gcc::context *ctxt) + : rtl_opt_pass (pass_data_rtl_fwprop, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return gate_fwprop (); } + virtual unsigned int execute (function *) { return fwprop (); } + +}; // class pass_rtl_fwprop + +} // anon namespace + +rtl_opt_pass * +make_pass_rtl_fwprop (gcc::context *ctxt) +{ + return new pass_rtl_fwprop (ctxt); +} static unsigned int fwprop_addr (void) { unsigned i; - bool need_cleanup = false; fwprop_init (); @@ -1490,38 +1554,54 @@ end, and we'll go through them as well. */ for (i = 0; i < DF_USES_TABLE_SIZE (); i++) { + if (!propagations_left) + break; + df_ref use = DF_USES_GET (i); if (use) if (DF_REF_TYPE (use) != DF_REF_REG_USE && DF_REF_BB (use)->loop_father != NULL /* The outer most loop is not really a loop. */ && loop_outer (DF_REF_BB (use)->loop_father) != NULL) - need_cleanup |= forward_propagate_into (use); + forward_propagate_into (use); } fwprop_done (); - - if (need_cleanup) - cleanup_cfg (0); return 0; } -struct rtl_opt_pass pass_rtl_fwprop_addr = +namespace { + +const pass_data pass_data_rtl_fwprop_addr = +{ + RTL_PASS, /* type */ + "fwprop2", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_FWPROP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_rtl_fwprop_addr : public rtl_opt_pass { - { - RTL_PASS, - "fwprop2", /* name */ - gate_fwprop, /* gate */ - fwprop_addr, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_FWPROP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish | TODO_verify_rtl_sharing | - TODO_dump_func /* todo_flags_finish */ - } -}; +public: + pass_rtl_fwprop_addr (gcc::context *ctxt) + : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return gate_fwprop (); } + virtual unsigned int execute (function *) { return fwprop_addr (); } + +}; // class pass_rtl_fwprop_addr + +} // anon namespace + +rtl_opt_pass * +make_pass_rtl_fwprop_addr (gcc::context *ctxt) +{ + return new pass_rtl_fwprop_addr (ctxt); +}