diff gcc/tree-predcom.c @ 16:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
line wrap: on
line diff
--- a/gcc/tree-predcom.c	Sun Aug 21 07:07:55 2011 +0900
+++ b/gcc/tree-predcom.c	Fri Oct 27 22:46:09 2017 +0900
@@ -1,6 +1,5 @@
 /* Predictive commoning.
-   Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
-   Free Software Foundation, Inc.
+   Copyright (C) 2005-2017 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -100,7 +99,7 @@
       and we can combine the chains for e and f into one chain.
 
    5) For each root reference (end of the chain) R, let N be maximum distance
-      of a reference reusing its value.  Variables R0 upto RN are created,
+      of a reference reusing its value.  Variables R0 up to RN are created,
       together with phi nodes that transfer values from R1 .. RN to
       R0 .. R(N-1).
       Initial values are loaded to R0..R(N-1) (in case not all references
@@ -157,29 +156,45 @@
           b[10] = x;
        }
 
-   TODO -- stores killing other stores can be taken into account, e.g.,
-   for (i = 0; i < n; i++)
-     {
-       a[i] = 1;
-       a[i+2] = 2;
-     }
-
-   can be replaced with
-
-   t0 = a[0];
-   t1 = a[1];
-   for (i = 0; i < n; i++)
-     {
-       a[i] = 1;
-       t2 = 2;
-       t0 = t1;
-       t1 = t2;
-     }
-   a[n] = t0;
-   a[n+1] = t1;
-
-   The interesting part is that this would generalize store motion; still, since
-   sm is performed elsewhere, it does not seem that important.
+   Apart from predictive commoning on Load-Load and Store-Load chains, we
+   also support Store-Store chains -- stores killed by other store can be
+   eliminated.  Given below example:
+
+     for (i = 0; i < n; i++)
+       {
+	 a[i] = 1;
+	 a[i+2] = 2;
+       }
+
+   It can be replaced with:
+
+     t0 = a[0];
+     t1 = a[1];
+     for (i = 0; i < n; i++)
+       {
+	 a[i] = 1;
+	 t2 = 2;
+	 t0 = t1;
+	 t1 = t2;
+       }
+     a[n] = t0;
+     a[n+1] = t1;
+
+   If the loop runs more than 1 iterations, it can be further simplified into:
+
+     for (i = 0; i < n; i++)
+       {
+	 a[i] = 1;
+       }
+     a[n] = 2;
+     a[n+1] = 2;
+
+   The interesting part is this can be viewed either as general store motion
+   or general dead store elimination in either intra/inter-iterations way.
+
+   TODO: For now, we don't support store-store chains in multi-exit loops.  We
+   force to not unroll in case of store-store chain even if other chains might
+   ask for unroll.
 
    Predictive commoning can be generalized for arbitrary computations (not
    just memory loads), and also nontrivial transfer functions (e.g., replacing
@@ -188,21 +203,33 @@
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
-#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
 #include "tree.h"
-#include "tm_p.h"
+#include "gimple.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-pretty-print.h"
+#include "alias.h"
+#include "fold-const.h"
 #include "cfgloop.h"
-#include "tree-flow.h"
-#include "ggc.h"
+#include "tree-eh.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "tree-into-ssa.h"
+#include "tree-dfa.h"
+#include "tree-ssa.h"
 #include "tree-data-ref.h"
 #include "tree-scalar-evolution.h"
-#include "tree-chrec.h"
 #include "params.h"
-#include "tree-pretty-print.h"
-#include "gimple-pretty-print.h"
-#include "tree-pass.h"
 #include "tree-affine.h"
-#include "tree-inline.h"
+#include "builtins.h"
 
 /* The maximum number of iterations between the considered memory
    references.  */
@@ -218,7 +245,7 @@
   struct data_reference *ref;
 
   /* The statement in that the reference appears.  */
-  gimple stmt;
+  gimple *stmt;
 
   /* In case that STMT is a phi node, this field is set to the SSA name
      defined by it in replace_phis_by_defined_names (in order to avoid
@@ -230,7 +257,7 @@
   unsigned distance;
 
   /* Number of iterations offset from the first reference in the component.  */
-  double_int offset;
+  widest_int offset;
 
   /* Number of the reference in a component, in dominance ordering.  */
   unsigned pos;
@@ -240,8 +267,6 @@
   unsigned always_accessed : 1;
 } *dref;
 
-DEF_VEC_P (dref);
-DEF_VEC_ALLOC_P (dref, heap);
 
 /* Type of the chain of the references.  */
 
@@ -256,6 +281,9 @@
   /* Root of the chain is store, the rest are loads.  */
   CT_STORE_LOAD,
 
+  /* There are only stores in the chain.  */
+  CT_STORE_STORE,
+
   /* A combination of two chains.  */
   CT_COMBINATION
 };
@@ -274,16 +302,25 @@
   struct chain *ch1, *ch2;
 
   /* The references in the chain.  */
-  VEC(dref,heap) *refs;
+  vec<dref> refs;
 
   /* The maximum distance of the reference in the chain from the root.  */
   unsigned length;
 
   /* The variables used to copy the value throughout iterations.  */
-  VEC(tree,heap) *vars;
+  vec<tree> vars;
 
   /* Initializers for the variables.  */
-  VEC(tree,heap) *inits;
+  vec<tree> inits;
+
+  /* Finalizers for the eliminated stores.  */
+  vec<tree> finis;
+
+  /* gimple stmts intializing the initial variables of the chain.  */
+  gimple_seq init_seq;
+
+  /* gimple stmts finalizing the eliminated stores of the chain.  */
+  gimple_seq fini_seq;
 
   /* True if there is a use of a variable with the maximal distance
      that comes after the root in the loop.  */
@@ -294,10 +331,12 @@
 
   /* True if this chain was combined together with some other chain.  */
   unsigned combined : 1;
+
+  /* True if this is store elimination chain and eliminated stores store
+     loop invariant value into memory.  */
+  unsigned inv_store_elimination : 1;
 } *chain_p;
 
-DEF_VEC_P (chain_p);
-DEF_VEC_ALLOC_P (chain_p, heap);
 
 /* Describes the knowledge about the step of the memory references in
    the component.  */
@@ -319,11 +358,15 @@
 struct component
 {
   /* The references in the component.  */
-  VEC(dref,heap) *refs;
+  vec<dref> refs;
 
   /* What we know about the step of the references in the component.  */
   enum ref_step_type comp_step;
 
+  /* True if all references in component are stores and we try to do
+     intra/inter loop iteration dead store elimination.  */
+  bool eliminate_store_p;
+
   /* Next component in the list.  */
   struct component *next;
 };
@@ -334,7 +377,7 @@
 
 /* Cache used by tree_to_aff_combination_expand.  */
 
-static struct pointer_map_t *name_expansions;
+static hash_map<tree, name_expansion *> *name_expansions;
 
 /* Dumps data reference REF to FILE.  */
 
@@ -350,7 +393,7 @@
 	       DR_IS_READ (ref->ref) ? "" : ", write");
 
       fprintf (file, "      offset ");
-      dump_double_int (file, ref->offset, false);
+      print_decs (ref->offset, file);
       fprintf (file, "\n");
 
       fprintf (file, "      distance %u\n", ref->distance);
@@ -394,6 +437,10 @@
       chain_type = "Store-loads";
       break;
 
+    case CT_STORE_STORE:
+      chain_type = "Store-stores";
+      break;
+
     case CT_COMBINATION:
       chain_type = "Combination";
       break;
@@ -417,10 +464,10 @@
       fprintf (file, "\n");
     }
 
-  if (chain->vars)
+  if (chain->vars.exists ())
     {
       fprintf (file, "  vars");
-      FOR_EACH_VEC_ELT (tree, chain->vars, i, var)
+      FOR_EACH_VEC_ELT (chain->vars, i, var)
 	{
 	  fprintf (file, " ");
 	  print_generic_expr (file, var, TDF_SLIM);
@@ -428,10 +475,10 @@
       fprintf (file, "\n");
     }
 
-  if (chain->inits)
+  if (chain->inits.exists ())
     {
       fprintf (file, "  inits");
-      FOR_EACH_VEC_ELT (tree, chain->inits, i, var)
+      FOR_EACH_VEC_ELT (chain->inits, i, var)
 	{
 	  fprintf (file, " ");
 	  print_generic_expr (file, var, TDF_SLIM);
@@ -440,7 +487,7 @@
     }
 
   fprintf (file, "  references:\n");
-  FOR_EACH_VEC_ELT (dref, chain->refs, i, a)
+  FOR_EACH_VEC_ELT (chain->refs, i, a)
     dump_dref (file, a);
 
   fprintf (file, "\n");
@@ -448,14 +495,14 @@
 
 /* Dumps CHAINS to FILE.  */
 
-extern void dump_chains (FILE *, VEC (chain_p, heap) *);
+extern void dump_chains (FILE *, vec<chain_p> );
 void
-dump_chains (FILE *file, VEC (chain_p, heap) *chains)
+dump_chains (FILE *file, vec<chain_p> chains)
 {
   chain_p chain;
   unsigned i;
 
-  FOR_EACH_VEC_ELT (chain_p, chains, i, chain)
+  FOR_EACH_VEC_ELT (chains, i, chain)
     dump_chain (file, chain);
 }
 
@@ -470,7 +517,7 @@
 
   fprintf (file, "Component%s:\n",
 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
-  FOR_EACH_VEC_ELT (dref, comp->refs, i, a)
+  FOR_EACH_VEC_ELT (comp->refs, i, a)
     dump_dref (file, a);
   fprintf (file, "\n");
 }
@@ -498,12 +545,18 @@
   if (chain == NULL)
     return;
 
-  FOR_EACH_VEC_ELT (dref, chain->refs, i, ref)
+  FOR_EACH_VEC_ELT (chain->refs, i, ref)
     free (ref);
 
-  VEC_free (dref, heap, chain->refs);
-  VEC_free (tree, heap, chain->vars);
-  VEC_free (tree, heap, chain->inits);
+  chain->refs.release ();
+  chain->vars.release ();
+  chain->inits.release ();
+  if (chain->init_seq)
+    gimple_seq_discard (chain->init_seq);
+
+  chain->finis.release ();
+  if (chain->fini_seq)
+    gimple_seq_discard (chain->fini_seq);
 
   free (chain);
 }
@@ -511,14 +564,14 @@
 /* Frees CHAINS.  */
 
 static void
-release_chains (VEC (chain_p, heap) *chains)
+release_chains (vec<chain_p> chains)
 {
   unsigned i;
   chain_p chain;
 
-  FOR_EACH_VEC_ELT (chain_p, chains, i, chain)
+  FOR_EACH_VEC_ELT (chains, i, chain)
     release_chain (chain);
-  VEC_free (chain_p, heap, chains);
+  chains.release ();
 }
 
 /* Frees a component COMP.  */
@@ -526,7 +579,7 @@
 static void
 release_component (struct component *comp)
 {
-  VEC_free (dref, heap, comp->refs);
+  comp->refs.release ();
   free (comp);
 }
 
@@ -598,6 +651,7 @@
   tree ref = DR_REF (a), step = DR_STEP (a);
 
   if (!step
+      || TREE_THIS_VOLATILE (ref)
       || !is_gimple_reg_type (TREE_TYPE (ref))
       || tree_could_throw_p (ref))
     return false;
@@ -617,11 +671,12 @@
 static void
 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
 {
+  tree type = TREE_TYPE (DR_OFFSET (dr));
   aff_tree delta;
 
-  tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset,
+  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
 				  &name_expansions);
-  aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr)));
+  aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
   aff_combination_add (offset, &delta);
 }
 
@@ -633,7 +688,7 @@
 
 static bool
 determine_offset (struct data_reference *a, struct data_reference *b,
-		  double_int *off)
+		  widest_int *off)
 {
   aff_tree diff, baseb, step;
   tree typea, typeb;
@@ -654,7 +709,7 @@
     {
       /* If the references have loop invariant address, check that they access
 	 exactly the same location.  */
-      *off = double_int_zero;
+      *off = 0;
       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
     }
@@ -663,10 +718,10 @@
      is a multiple of step.  */
   aff_combination_dr_offset (a, &diff);
   aff_combination_dr_offset (b, &baseb);
-  aff_combination_scale (&baseb, double_int_minus_one);
+  aff_combination_scale (&baseb, -1);
   aff_combination_add (&diff, &baseb);
 
-  tree_to_aff_combination_expand (DR_STEP (a), sizetype,
+  tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
 				  &step, &name_expansions);
   return aff_combination_constant_multiple_p (&diff, &step, off);
 }
@@ -678,13 +733,13 @@
 last_always_executed_block (struct loop *loop)
 {
   unsigned i;
-  VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+  vec<edge> exits = get_loop_exit_edges (loop);
   edge ex;
   basic_block last = loop->latch;
 
-  FOR_EACH_VEC_ELT (edge, exits, i, ex)
+  FOR_EACH_VEC_ELT (exits, i, ex)
     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
-  VEC_free (edge, heap, exits);
+  exits.release ();
 
   return last;
 }
@@ -693,10 +748,10 @@
 
 static struct component *
 split_data_refs_to_components (struct loop *loop,
-			       VEC (data_reference_p, heap) *datarefs,
-			       VEC (ddr_p, heap) *depends)
+			       vec<data_reference_p> datarefs,
+			       vec<ddr_p> depends)
 {
-  unsigned i, n = VEC_length (data_reference_p, datarefs);
+  unsigned i, n = datarefs.length ();
   unsigned ca, ia, ib, bad;
   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
@@ -705,9 +760,11 @@
   struct data_dependence_relation *ddr;
   struct component *comp_list = NULL, *comp;
   dref dataref;
+  /* Don't do store elimination if loop has multiple exit edges.  */
+  bool eliminate_store_p = single_exit (loop) != NULL;
   basic_block last_always_executed = last_always_executed_block (loop);
 
-  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
       if (!DR_REF (dr))
 	{
@@ -715,6 +772,9 @@
 	     just fail.  */
 	  goto end;
 	}
+      /* predcom pass isn't prepared to handle calls with data references.  */
+      if (is_gimple_call (DR_STMT (dr)))
+	goto end;
       dr->aux = (void *) (size_t) i;
       comp_father[i] = i;
       comp_size[i] = 1;
@@ -724,7 +784,7 @@
   comp_father[n] = n;
   comp_size[n] = 1;
 
-  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
       enum ref_step_type dummy;
 
@@ -735,15 +795,23 @@
 	}
     }
 
-  FOR_EACH_VEC_ELT (ddr_p, depends, i, ddr)
+  FOR_EACH_VEC_ELT (depends, i, ddr)
     {
-      double_int dummy_off;
+      widest_int dummy_off;
 
       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 	continue;
 
       dra = DDR_A (ddr);
       drb = DDR_B (ddr);
+
+      /* Don't do store elimination if there is any unknown dependence for
+	 any store data reference.  */
+      if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
+	  && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
+	      || DDR_NUM_DIST_VECTS (ddr) == 0))
+	eliminate_store_p = false;
+
       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
       if (ia == ib)
@@ -752,17 +820,62 @@
       bad = component_of (comp_father, n);
 
       /* If both A and B are reads, we may ignore unsuitable dependences.  */
-      if (DR_IS_READ (dra) && DR_IS_READ (drb)
-	  && (ia == bad || ib == bad
-	      || !determine_offset (dra, drb, &dummy_off)))
-	continue;
+      if (DR_IS_READ (dra) && DR_IS_READ (drb))
+	{
+	  if (ia == bad || ib == bad
+	      || !determine_offset (dra, drb, &dummy_off))
+	    continue;
+	}
+      /* If A is read and B write or vice versa and there is unsuitable
+	 dependence, instead of merging both components into a component
+	 that will certainly not pass suitable_component_p, just put the
+	 read into bad component, perhaps at least the write together with
+	 all the other data refs in it's component will be optimizable.  */
+      else if (DR_IS_READ (dra) && ib != bad)
+	{
+	  if (ia == bad)
+	    continue;
+	  else if (!determine_offset (dra, drb, &dummy_off))
+	    {
+	      merge_comps (comp_father, comp_size, bad, ia);
+	      continue;
+	    }
+	}
+      else if (DR_IS_READ (drb) && ia != bad)
+	{
+	  if (ib == bad)
+	    continue;
+	  else if (!determine_offset (dra, drb, &dummy_off))
+	    {
+	      merge_comps (comp_father, comp_size, bad, ib);
+	      continue;
+	    }
+	}
+      else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
+	       && ia != bad && ib != bad
+	       && !determine_offset (dra, drb, &dummy_off))
+	{
+	  merge_comps (comp_father, comp_size, bad, ia);
+	  merge_comps (comp_father, comp_size, bad, ib);
+	  continue;
+	}
 
       merge_comps (comp_father, comp_size, ia, ib);
     }
 
+  if (eliminate_store_p)
+    {
+      tree niters = number_of_latch_executions (loop);
+
+      /* Don't do store elimination if niters info is unknown because stores
+	 in the last iteration can't be eliminated and we need to recover it
+	 after loop.  */
+      eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
+    }
+
   comps = XCNEWVEC (struct component *, n);
   bad = component_of (comp_father, n);
-  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
       ia = (unsigned) (size_t) dr->aux;
       ca = component_of (comp_father, ia);
@@ -773,21 +886,24 @@
       if (!comp)
 	{
 	  comp = XCNEW (struct component);
-	  comp->refs = VEC_alloc (dref, heap, comp_size[ca]);
+	  comp->refs.create (comp_size[ca]);
+	  comp->eliminate_store_p = eliminate_store_p;
 	  comps[ca] = comp;
 	}
 
       dataref = XCNEW (struct dref_d);
       dataref->ref = dr;
       dataref->stmt = DR_STMT (dr);
-      dataref->offset = double_int_zero;
+      dataref->offset = 0;
       dataref->distance = 0;
 
       dataref->always_accessed
 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
 				gimple_bb (dataref->stmt));
-      dataref->pos = VEC_length (dref, comp->refs);
-      VEC_quick_push (dref, comp->refs, dataref);
+      dataref->pos = comp->refs.length ();
+      comp->refs.quick_push (dataref);
+      if (DR_IS_READ (dr))
+	comp->eliminate_store_p = false;
     }
 
   for (i = 0; i < n; i++)
@@ -819,7 +935,7 @@
   basic_block ba, bp = loop->header;
   bool ok, has_write = false;
 
-  FOR_EACH_VEC_ELT (dref, comp->refs, i, a)
+  FOR_EACH_VEC_ELT (comp->refs, i, a)
     {
       ba = gimple_bb (a->stmt);
 
@@ -833,23 +949,19 @@
 	has_write = true;
     }
 
-  first = VEC_index (dref, comp->refs, 0);
+  first = comp->refs[0];
   ok = suitable_reference_p (first->ref, &comp->comp_step);
   gcc_assert (ok);
-  first->offset = double_int_zero;
-
-  for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++)
+  first->offset = 0;
+
+  for (i = 1; comp->refs.iterate (i, &a); i++)
     {
       if (!determine_offset (first->ref, a->ref, &a->offset))
 	return false;
 
-#ifdef ENABLE_CHECKING
-      {
-	enum ref_step_type a_step;
-	ok = suitable_reference_p (a->ref, &a_step);
-	gcc_assert (ok && a_step == comp->comp_step);
-      }
-#endif
+      enum ref_step_type a_step;
+      gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
+			   && a_step == comp->comp_step);
     }
 
   /* If there is a write inside the component, we must know whether the
@@ -883,7 +995,7 @@
 	  unsigned i;
 
 	  *comp = act->next;
-	  FOR_EACH_VEC_ELT (dref, act->refs, i, ref)
+	  FOR_EACH_VEC_ELT (act->refs, i, ref)
 	    free (ref);
 	  release_component (act);
 	}
@@ -900,7 +1012,7 @@
 {
   const dref *const da = (const dref *) a;
   const dref *const db = (const dref *) b;
-  int offcmp = double_int_scmp ((*da)->offset, (*db)->offset);
+  int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
 
   if (offcmp != 0)
     return offcmp;
@@ -913,7 +1025,22 @@
 static inline dref
 get_chain_root (chain_p chain)
 {
-  return VEC_index (dref, chain->refs, 0);
+  return chain->refs[0];
+}
+
+/* Given CHAIN, returns the last ref at DISTANCE, or NULL if it doesn't
+   exist.  */
+
+static inline dref
+get_chain_last_ref_at (chain_p chain, unsigned distance)
+{
+  unsigned i;
+
+  for (i = chain->refs.length (); i > 0; i--)
+    if (distance == chain->refs[i - 1]->distance)
+      break;
+
+  return (i > 0) ? chain->refs[i - 1] : NULL;
 }
 
 /* Adds REF to the chain CHAIN.  */
@@ -922,20 +1049,19 @@
 add_ref_to_chain (chain_p chain, dref ref)
 {
   dref root = get_chain_root (chain);
-  double_int dist;
-
-  gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0);
-  dist = double_int_sub (ref->offset, root->offset);
-  if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0)
+
+  gcc_assert (wi::les_p (root->offset, ref->offset));
+  widest_int dist = ref->offset - root->offset;
+  if (wi::leu_p (MAX_DISTANCE, dist))
     {
       free (ref);
       return;
     }
-  gcc_assert (double_int_fits_in_uhwi_p (dist));
-
-  VEC_safe_push (dref, heap, chain->refs, ref);
-
-  ref->distance = double_int_to_uhwi (dist);
+  gcc_assert (wi::fits_uhwi_p (dist));
+
+  chain->refs.safe_push (ref);
+
+  ref->distance = dist.to_uhwi ();
 
   if (ref->distance >= chain->length)
     {
@@ -943,7 +1069,9 @@
       chain->has_max_use_after = false;
     }
 
-  if (ref->distance == chain->length
+  /* Don't set the flag for store-store chain since there is no use.  */
+  if (chain->type != CT_STORE_STORE
+      && ref->distance == chain->length
       && ref->pos > root->pos)
     chain->has_max_use_after = true;
 
@@ -963,29 +1091,33 @@
 
   chain->all_always_accessed = true;
 
-  FOR_EACH_VEC_ELT (dref, comp->refs, i, ref)
+  FOR_EACH_VEC_ELT (comp->refs, i, ref)
     {
-      VEC_safe_push (dref, heap, chain->refs, ref);
+      chain->refs.safe_push (ref);
       chain->all_always_accessed &= ref->always_accessed;
     }
 
+  chain->inits = vNULL;
+  chain->finis = vNULL;
+
   return chain;
 }
 
-/* Make a new chain rooted at REF.  */
+/* Make a new chain of type TYPE rooted at REF.  */
 
 static chain_p
-make_rooted_chain (dref ref)
+make_rooted_chain (dref ref, enum chain_type type)
 {
   chain_p chain = XCNEW (struct chain);
 
-  chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
-
-  VEC_safe_push (dref, heap, chain->refs, ref);
+  chain->type = type;
+  chain->refs.safe_push (ref);
   chain->all_always_accessed = ref->always_accessed;
-
   ref->distance = 0;
 
+  chain->inits = vNULL;
+  chain->finis = vNULL;
+
   return chain;
 }
 
@@ -994,7 +1126,7 @@
 static bool
 nontrivial_chain_p (chain_p chain)
 {
-  return chain != NULL && VEC_length (dref, chain->refs) > 1;
+  return chain != NULL && chain->refs.length () > 1;
 }
 
 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
@@ -1026,7 +1158,7 @@
 		     unsigned distance, struct data_reference *root)
 {
   aff_tree diff, base, step;
-  double_int off;
+  widest_int off;
 
   /* Both REF and ROOT must be accessing the same object.  */
   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
@@ -1046,15 +1178,15 @@
      -DISTANCE-th iteration.  */
   aff_combination_dr_offset (root, &diff);
   aff_combination_dr_offset (ref, &base);
-  aff_combination_scale (&base, double_int_minus_one);
+  aff_combination_scale (&base, -1);
   aff_combination_add (&diff, &base);
 
-  tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step,
-				  &name_expansions);
+  tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
+				  &step, &name_expansions);
   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
     return false;
 
-  if (!double_int_equal_p (off, uhwi_to_double_int (distance)))
+  if (off != distance)
     return false;
 
   return true;
@@ -1065,14 +1197,15 @@
    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
    is the root of the current chain.  */
 
-static gimple
+static gphi *
 find_looparound_phi (struct loop *loop, dref ref, dref root)
 {
   tree name, init, init_ref;
-  gimple phi = NULL, init_stmt;
+  gphi *phi = NULL;
+  gimple *init_stmt;
   edge latch = loop_latch_edge (loop);
   struct data_reference init_dr;
-  gimple_stmt_iterator psi;
+  gphi_iterator psi;
 
   if (is_gimple_assign (ref->stmt))
     {
@@ -1088,7 +1221,7 @@
 
   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
     {
-      phi = gsi_stmt (psi);
+      phi = psi.phi ();
       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
 	break;
     }
@@ -1114,7 +1247,7 @@
   memset (&init_dr, 0, sizeof (struct data_reference));
   DR_REF (&init_dr) = init_ref;
   DR_STMT (&init_dr) = phi;
-  if (!dr_analyze_innermost (&init_dr))
+  if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop))
     return NULL;
 
   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
@@ -1126,7 +1259,7 @@
 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
 
 static void
-insert_looparound_copy (chain_p chain, dref ref, gimple phi)
+insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
 {
   dref nw = XCNEW (struct dref_d), aref;
   unsigned i;
@@ -1135,10 +1268,10 @@
   nw->distance = ref->distance + 1;
   nw->always_accessed = 1;
 
-  FOR_EACH_VEC_ELT (dref, chain->refs, i, aref)
+  FOR_EACH_VEC_ELT (chain->refs, i, aref)
     if (aref->distance >= nw->distance)
       break;
-  VEC_safe_insert (dref, heap, chain->refs, i, nw);
+  chain->refs.safe_insert (i, nw);
 
   if (nw->distance > chain->length)
     {
@@ -1157,9 +1290,12 @@
 {
   unsigned i;
   dref ref, root = get_chain_root (chain);
-  gimple phi;
-
-  FOR_EACH_VEC_ELT (dref, chain->refs, i, ref)
+  gphi *phi;
+
+  if (chain->type == CT_STORE_STORE)
+    return;
+
+  FOR_EACH_VEC_ELT (chain->refs, i, ref)
     {
       phi = find_looparound_phi (loop, ref, root);
       if (!phi)
@@ -1177,37 +1313,47 @@
 static void
 determine_roots_comp (struct loop *loop,
 		      struct component *comp,
-		      VEC (chain_p, heap) **chains)
+		      vec<chain_p> *chains)
 {
   unsigned i;
   dref a;
   chain_p chain = NULL;
-  double_int last_ofs = double_int_zero;
+  widest_int last_ofs = 0;
+  enum chain_type type;
 
   /* Invariants are handled specially.  */
   if (comp->comp_step == RS_INVARIANT)
     {
       chain = make_invariant_chain (comp);
-      VEC_safe_push (chain_p, heap, *chains, chain);
+      chains->safe_push (chain);
       return;
     }
 
-  VEC_qsort (dref, comp->refs, order_drefs);
-
-  FOR_EACH_VEC_ELT (dref, comp->refs, i, a)
+  /* Trivial component.  */
+  if (comp->refs.length () <= 1)
+    return;
+
+  comp->refs.qsort (order_drefs);
+  FOR_EACH_VEC_ELT (comp->refs, i, a)
     {
-      if (!chain || DR_IS_WRITE (a->ref)
-	  || double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE),
-			      double_int_sub (a->offset, last_ofs)) <= 0)
+      if (!chain
+	  || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
+	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
 	{
 	  if (nontrivial_chain_p (chain))
 	    {
 	      add_looparound_copies (loop, chain);
-	      VEC_safe_push (chain_p, heap, *chains, chain);
+	      chains->safe_push (chain);
 	    }
 	  else
 	    release_chain (chain);
-	  chain = make_rooted_chain (a);
+
+	  if (DR_IS_READ (a->ref))
+	    type = CT_LOAD;
+	  else
+	    type = comp->eliminate_store_p ? CT_STORE_STORE : CT_STORE_LOAD;
+
+	  chain = make_rooted_chain (a, type);
 	  last_ofs = a->offset;
 	  continue;
 	}
@@ -1218,7 +1364,7 @@
   if (nontrivial_chain_p (chain))
     {
       add_looparound_copies (loop, chain);
-      VEC_safe_push (chain_p, heap, *chains, chain);
+      chains->safe_push (chain);
     }
   else
     release_chain (chain);
@@ -1229,7 +1375,7 @@
 
 static void
 determine_roots (struct loop *loop,
-		 struct component *comps, VEC (chain_p, heap) **chains)
+		 struct component *comps, vec<chain_p> *chains)
 {
   struct component *comp;
 
@@ -1243,10 +1389,10 @@
    is in the lhs of STMT, false if it is in rhs.  */
 
 static void
-replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
+replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
 {
   tree val;
-  gimple new_stmt;
+  gassign *new_stmt;
   gimple_stmt_iterator bsi, psi;
 
   if (gimple_code (stmt) == GIMPLE_PHI)
@@ -1304,8 +1450,12 @@
       val = gimple_assign_lhs (stmt);
       if (TREE_CODE (val) != SSA_NAME)
 	{
-	  gcc_assert (gimple_assign_copy_p (stmt));
 	  val = gimple_assign_rhs1 (stmt);
+	  gcc_assert (gimple_assign_single_p (stmt));
+	  if (TREE_CLOBBER_P (val))
+	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
+	  else
+	    gcc_assert (gimple_assign_copy_p (stmt));
 	}
     }
   else
@@ -1324,90 +1474,85 @@
   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
 }
 
-/* Returns the reference to the address of REF in the ITER-th iteration of
-   LOOP, or NULL if we fail to determine it (ITER may be negative).  We
-   try to preserve the original shape of the reference (not rewrite it
-   as an indirect ref to the address), to make tree_could_trap_p in
-   prepare_initializers_chain return false more often.  */
+/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
+   of the loop it was analyzed in.  Append init stmts to STMTS.  */
 
 static tree
-ref_at_iteration (struct loop *loop, tree ref, int iter)
+ref_at_iteration (data_reference_p dr, int iter,
+		  gimple_seq *stmts, tree niters = NULL_TREE)
 {
-  tree idx, *idx_p, type, val, op0 = NULL_TREE, ret;
-  affine_iv iv;
-  bool ok;
-
-  if (handled_component_p (ref))
+  tree off = DR_OFFSET (dr);
+  tree coff = DR_INIT (dr);
+  tree ref = DR_REF (dr);
+  enum tree_code ref_code = ERROR_MARK;
+  tree ref_type = NULL_TREE;
+  tree ref_op1 = NULL_TREE;
+  tree ref_op2 = NULL_TREE;
+  tree new_offset;
+
+  if (iter != 0)
     {
-      op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter);
-      if (!op0)
-	return NULL_TREE;
-    }
-  else if (!INDIRECT_REF_P (ref)
-	   && TREE_CODE (ref) != MEM_REF)
-    return unshare_expr (ref);
-
-  if (TREE_CODE (ref) == MEM_REF)
-    {
-      ret = unshare_expr (ref);
-      idx = TREE_OPERAND (ref, 0);
-      idx_p = &TREE_OPERAND (ret, 0);
-    }
-  else if (TREE_CODE (ref) == COMPONENT_REF)
-    {
-      /* Check that the offset is loop invariant.  */
-      if (TREE_OPERAND (ref, 2)
-	  && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
-	return NULL_TREE;
-
-      return build3 (COMPONENT_REF, TREE_TYPE (ref), op0,
-		     unshare_expr (TREE_OPERAND (ref, 1)),
-		     unshare_expr (TREE_OPERAND (ref, 2)));
+      new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
+      if (TREE_CODE (new_offset) == INTEGER_CST)
+	coff = size_binop (PLUS_EXPR, coff, new_offset);
+      else
+	off = size_binop (PLUS_EXPR, off, new_offset);
     }
-  else if (TREE_CODE (ref) == ARRAY_REF)
+
+  if (niters != NULL_TREE)
     {
-      /* Check that the lower bound and the step are loop invariant.  */
-      if (TREE_OPERAND (ref, 2)
-	  && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
-	return NULL_TREE;
-      if (TREE_OPERAND (ref, 3)
-	  && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3)))
-	return NULL_TREE;
-
-      ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE,
-		    unshare_expr (TREE_OPERAND (ref, 2)),
-		    unshare_expr (TREE_OPERAND (ref, 3)));
-      idx = TREE_OPERAND (ref, 1);
-      idx_p = &TREE_OPERAND (ret, 1);
+      niters = fold_convert (ssizetype, niters);
+      new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
+      if (TREE_CODE (niters) == INTEGER_CST)
+	coff = size_binop (PLUS_EXPR, coff, new_offset);
+      else
+	off = size_binop (PLUS_EXPR, off, new_offset);
     }
-  else
-    return NULL_TREE;
-
-  ok = simple_iv (loop, loop, idx, &iv, true);
-  if (!ok)
-    return NULL_TREE;
-  iv.base = expand_simple_operations (iv.base);
-  if (integer_zerop (iv.step))
-    *idx_p = unshare_expr (iv.base);
-  else
+
+  /* While data-ref analysis punts on bit offsets it still handles
+     bitfield accesses at byte boundaries.  Cope with that.  Note that
+     if the bitfield object also starts at a byte-boundary we can simply
+     replicate the COMPONENT_REF, but we have to subtract the component's
+     byte-offset from the MEM_REF address first.
+     Otherwise we simply build a BIT_FIELD_REF knowing that the bits
+     start at offset zero.  */
+  if (TREE_CODE (ref) == COMPONENT_REF
+      && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
     {
-      type = TREE_TYPE (iv.base);
-      if (POINTER_TYPE_P (type))
+      unsigned HOST_WIDE_INT boff;
+      tree field = TREE_OPERAND (ref, 1);
+      tree offset = component_ref_field_offset (ref);
+      ref_type = TREE_TYPE (ref);
+      boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
+      /* This can occur in Ada.  See the comment in get_bit_range.  */
+      if (boff % BITS_PER_UNIT != 0
+	  || !tree_fits_uhwi_p (offset))
 	{
-	  val = fold_build2 (MULT_EXPR, sizetype, iv.step,
-			     size_int (iter));
-	  val = fold_build2 (POINTER_PLUS_EXPR, type, iv.base, val);
+	  ref_code = BIT_FIELD_REF;
+	  ref_op1 = DECL_SIZE (field);
+	  ref_op2 = bitsize_zero_node;
 	}
       else
 	{
-	  val = fold_build2 (MULT_EXPR, type, iv.step,
-			     build_int_cst_type (type, iter));
-	  val = fold_build2 (PLUS_EXPR, type, iv.base, val);
+	  boff >>= LOG2_BITS_PER_UNIT;
+	  boff += tree_to_uhwi (offset);
+	  coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
+	  ref_code = COMPONENT_REF;
+	  ref_op1 = field;
+	  ref_op2 = TREE_OPERAND (ref, 2);
+	  ref = TREE_OPERAND (ref, 0);
 	}
-      *idx_p = unshare_expr (val);
     }
-
-  return ret;
+  tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
+  addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
+				 is_gimple_mem_ref_addr, NULL_TREE);
+  tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
+  tree type = build_aligned_type (TREE_TYPE (ref),
+				  get_object_alignment (ref));
+  ref = build2 (MEM_REF, type, addr, alias_ptr);
+  if (ref_type)
+    ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
+  return ref;
 }
 
 /* Get the initialization expression for the INDEX-th temporary variable
@@ -1424,31 +1569,7 @@
       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
     }
   else
-    return VEC_index (tree, chain->inits, index);
-}
-
-/* Marks all virtual operands of statement STMT for renaming.  */
-
-void
-mark_virtual_ops_for_renaming (gimple stmt)
-{
-  tree var;
-
-  if (gimple_code (stmt) == GIMPLE_PHI)
-    {
-      var = PHI_RESULT (stmt);
-      if (is_gimple_reg (var))
-	return;
-
-      if (TREE_CODE (var) == SSA_NAME)
-	var = SSA_NAME_VAR (var);
-      mark_sym_for_renaming (var);
-      return;
-    }
-
-  update_stmt (stmt);
-  if (gimple_vuse (stmt))
-    mark_sym_for_renaming (gimple_vop (cfun));
+    return chain->inits[index];
 }
 
 /* Returns a new temporary variable used for the I-th variable carrying
@@ -1461,8 +1582,6 @@
   /* We never access the components of the temporary variable in predictive
      commoning.  */
   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
-
-  add_referenced_var (var);
   bitmap_set_bit (tmp_vars, DECL_UID (var));
   return var;
 }
@@ -1479,7 +1598,7 @@
   dref root = get_chain_root (chain);
   bool reuse_first = !chain->has_max_use_after;
   tree ref, init, var, next;
-  gimple phi;
+  gphi *phi;
   gimple_seq stmts;
   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
 
@@ -1487,7 +1606,7 @@
      since this is an nonempty chain, reuse_first cannot be true.  */
   gcc_assert (n > 0 || !reuse_first);
 
-  chain->vars = VEC_alloc (tree, heap, n + 1);
+  chain->vars.create (n + 1);
 
   if (chain->type == CT_COMBINATION)
     ref = gimple_assign_lhs (root->stmt);
@@ -1497,18 +1616,18 @@
   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
     {
       var = predcom_tmp_var (ref, i, tmp_vars);
-      VEC_quick_push (tree, chain->vars, var);
+      chain->vars.quick_push (var);
     }
   if (reuse_first)
-    VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0));
-
-  FOR_EACH_VEC_ELT (tree, chain->vars, i, var)
-    VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL));
+    chain->vars.quick_push (chain->vars[0]);
+
+  FOR_EACH_VEC_ELT (chain->vars, i, var)
+    chain->vars[i] = make_ssa_name (var);
 
   for (i = 0; i < n; i++)
     {
-      var = VEC_index (tree, chain->vars, i);
-      next = VEC_index (tree, chain->vars, i + 1);
+      var = chain->vars[i];
+      next = chain->vars[i + 1];
       init = get_init_expr (chain, i);
 
       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
@@ -1516,27 +1635,210 @@
 	gsi_insert_seq_on_edge_immediate (entry, stmts);
 
       phi = create_phi_node (var, loop->header);
-      SSA_NAME_DEF_STMT (var) = phi;
       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
     }
 }
 
-/* Create the variables and initialization statement for root of chain
-   CHAIN.  Uids of the newly created temporary variables are marked
-   in TMP_VARS.  */
+/* For inter-iteration store elimination CHAIN in LOOP, returns true if
+   all stores to be eliminated store loop invariant values into memory.
+   In this case, we can use these invariant values directly after LOOP.  */
+
+static bool
+is_inv_store_elimination_chain (struct loop *loop, chain_p chain)
+{
+  if (chain->length == 0 || chain->type != CT_STORE_STORE)
+    return false;
+
+  gcc_assert (!chain->has_max_use_after);
+
+  /* If loop iterates for unknown times or fewer times than chain->lenght,
+     we still need to setup root variable and propagate it with PHI node.  */
+  tree niters = number_of_latch_executions (loop);
+  if (TREE_CODE (niters) != INTEGER_CST
+      || wi::leu_p (wi::to_wide (niters), chain->length))
+    return false;
+
+  /* Check stores in chain for elimination if they only store loop invariant
+     values.  */
+  for (unsigned i = 0; i < chain->length; i++)
+    {
+      dref a = get_chain_last_ref_at (chain, i);
+      if (a == NULL)
+	continue;
+
+      gimple *def_stmt, *stmt = a->stmt;
+      if (!gimple_assign_single_p (stmt))
+	return false;
+
+      tree val = gimple_assign_rhs1 (stmt);
+      if (TREE_CLOBBER_P (val))
+	return false;
+
+      if (CONSTANT_CLASS_P (val))
+	continue;
+
+      if (TREE_CODE (val) != SSA_NAME)
+	return false;
+
+      def_stmt = SSA_NAME_DEF_STMT (val);
+      if (gimple_nop_p (def_stmt))
+	continue;
+
+      if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
+	return false;
+    }
+  return true;
+}
+
+/* Creates root variables for store elimination CHAIN in which stores for
+   elimination only store loop invariant values.  In this case, we neither
+   need to load root variables before loop nor propagate it with PHI nodes.  */
+
+static void
+initialize_root_vars_store_elim_1 (chain_p chain)
+{
+  tree var;
+  unsigned i, n = chain->length;
+
+  chain->vars.create (n);
+  chain->vars.safe_grow_cleared (n);
+
+  /* Initialize root value for eliminated stores at each distance.  */
+  for (i = 0; i < n; i++)
+    {
+      dref a = get_chain_last_ref_at (chain, i);
+      if (a == NULL)
+	continue;
+
+      var = gimple_assign_rhs1 (a->stmt);
+      chain->vars[a->distance] = var;
+    }
+
+  /* We don't propagate values with PHI nodes, so manually propagate value
+     to bubble positions.  */
+  var = chain->vars[0];
+  for (i = 1; i < n; i++)
+    {
+      if (chain->vars[i] != NULL_TREE)
+	{
+	  var = chain->vars[i];
+	  continue;
+	}
+      chain->vars[i] = var;
+    }
+
+  /* Revert the vector.  */
+  for (i = 0; i < n / 2; i++)
+    std::swap (chain->vars[i], chain->vars[n - i - 1]);
+}
+
+/* Creates root variables for store elimination CHAIN in which stores for
+   elimination store loop variant values.  In this case, we may need to
+   load root variables before LOOP and propagate it with PHI nodes.  Uids
+   of the newly created root variables are marked in TMP_VARS.  */
 
 static void
-initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
+initialize_root_vars_store_elim_2 (struct loop *loop,
+				   chain_p chain, bitmap tmp_vars)
 {
-  dref root = get_chain_root (chain);
-  bool in_lhs = (chain->type == CT_STORE_LOAD
-		 || chain->type == CT_COMBINATION);
-
-  initialize_root_vars (loop, chain, tmp_vars);
-  replace_ref_with (root->stmt,
-		    VEC_index (tree, chain->vars, chain->length),
-		    true, in_lhs);
+  unsigned i, n = chain->length;
+  tree ref, init, var, next, val, phi_result;
+  gimple *stmt;
+  gimple_seq stmts;
+
+  chain->vars.create (n);
+
+  ref = DR_REF (get_chain_root (chain)->ref);
+  for (i = 0; i < n; i++)
+    {
+      var = predcom_tmp_var (ref, i, tmp_vars);
+      chain->vars.quick_push (var);
+    }
+
+  FOR_EACH_VEC_ELT (chain->vars, i, var)
+    chain->vars[i] = make_ssa_name (var);
+
+  /* Root values are either rhs operand of stores to be eliminated, or
+     loaded from memory before loop.  */
+  auto_vec<tree> vtemps;
+  vtemps.safe_grow_cleared (n);
+  for (i = 0; i < n; i++)
+    {
+      init = get_init_expr (chain, i);
+      if (init == NULL_TREE)
+	{
+	  /* Root value is rhs operand of the store to be eliminated if
+	     it isn't loaded from memory before loop.  */
+	  dref a = get_chain_last_ref_at (chain, i);
+	  val = gimple_assign_rhs1 (a->stmt);
+	  if (TREE_CLOBBER_P (val))
+	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
+
+	  vtemps[n - i - 1] = val;
+	}
+      else
+	{
+	  edge latch = loop_latch_edge (loop);
+	  edge entry = loop_preheader_edge (loop);
+
+	  /* Root value is loaded from memory before loop, we also need
+	     to add PHI nodes to propagate the value across iterations.  */
+	  init = force_gimple_operand (init, &stmts, true, NULL_TREE);
+	  if (stmts)
+	    gsi_insert_seq_on_edge_immediate (entry, stmts);
+
+	  next = chain->vars[n - i];
+	  phi_result = copy_ssa_name (next);
+	  gphi *phi = create_phi_node (phi_result, loop->header);
+	  add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
+	  add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
+	  vtemps[n - i - 1] = phi_result;
+	}
+    }
+
+  /* Find the insertion position.  */
+  dref last = get_chain_root (chain);
+  for (i = 0; i < chain->refs.length (); i++)
+    {
+      if (chain->refs[i]->pos > last->pos)
+	last = chain->refs[i];
+    }
+
+  gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
+
+  /* Insert statements copying root value to root variable.  */
+  for (i = 0; i < n; i++)
+    {
+      var = chain->vars[i];
+      val = vtemps[i];
+      stmt = gimple_build_assign (var, val);
+      gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
+    }
+}
+
+/* Generates stores for CHAIN's eliminated stores in LOOP's last
+   (CHAIN->length - 1) iterations.  */
+
+static void
+finalize_eliminated_stores (struct loop *loop, chain_p chain)
+{
+  unsigned i, n = chain->length;
+
+  for (i = 0; i < n; i++)
+    {
+      tree var = chain->vars[i];
+      tree fini = chain->finis[n - i - 1];
+      gimple *stmt = gimple_build_assign (fini, var);
+
+      gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
+    }
+
+  if (chain->fini_seq)
+    {
+      gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
+      chain->fini_seq = NULL;
+    }
 }
 
 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
@@ -1548,29 +1850,29 @@
 
 static void
 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
-			 VEC(tree, heap) **vars, VEC(tree, heap) *inits,
+			 vec<tree> *vars, vec<tree> inits,
 			 bitmap tmp_vars)
 {
   unsigned i;
   tree ref = DR_REF (root->ref), init, var, next;
   gimple_seq stmts;
-  gimple phi;
+  gphi *phi;
   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
 
   /* Find the initializer for the variable, and check that it cannot
      trap.  */
-  init = VEC_index (tree, inits, 0);
-
-  *vars = VEC_alloc (tree, heap, written ? 2 : 1);
+  init = inits[0];
+
+  vars->create (written ? 2 : 1);
   var = predcom_tmp_var (ref, 0, tmp_vars);
-  VEC_quick_push (tree, *vars, var);
+  vars->quick_push (var);
   if (written)
-    VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0));
-
-  FOR_EACH_VEC_ELT (tree, *vars, i, var)
-    VEC_replace (tree, *vars, i, make_ssa_name (var, NULL));
-
-  var = VEC_index (tree, *vars, 0);
+    vars->quick_push ((*vars)[0]);
+
+  FOR_EACH_VEC_ELT (*vars, i, var)
+    (*vars)[i] = make_ssa_name (var);
+
+  var = (*vars)[0];
 
   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
   if (stmts)
@@ -1578,16 +1880,14 @@
 
   if (written)
     {
-      next = VEC_index (tree, *vars, 1);
+      next = (*vars)[1];
       phi = create_phi_node (var, loop->header);
-      SSA_NAME_DEF_STMT (var) = phi;
       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
     }
   else
     {
-      gimple init_stmt = gimple_build_assign (var, init);
-      mark_virtual_ops_for_renaming (init_stmt);
+      gassign *init_stmt = gimple_build_assign (var, init);
       gsi_insert_on_edge_immediate (entry, init_stmt);
     }
 }
@@ -1599,60 +1899,57 @@
 static void
 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
 {
-  VEC (tree, heap) *vars;
+  auto_vec<tree> vars;
   dref a;
   unsigned n_writes = 0, ridx, i;
   tree var;
 
   gcc_assert (chain->type == CT_INVARIANT);
   gcc_assert (!chain->combined);
-  FOR_EACH_VEC_ELT (dref, chain->refs, i, a)
+  FOR_EACH_VEC_ELT (chain->refs, i, a)
     if (DR_IS_WRITE (a->ref))
       n_writes++;
 
   /* If there are no reads in the loop, there is nothing to do.  */
-  if (n_writes == VEC_length (dref, chain->refs))
+  if (n_writes == chain->refs.length ())
     return;
 
   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
 			   &vars, chain->inits, tmp_vars);
 
   ridx = 0;
-  FOR_EACH_VEC_ELT (dref, chain->refs, i, a)
+  FOR_EACH_VEC_ELT (chain->refs, i, a)
     {
       bool is_read = DR_IS_READ (a->ref);
-      mark_virtual_ops_for_renaming (a->stmt);
 
       if (DR_IS_WRITE (a->ref))
 	{
 	  n_writes--;
 	  if (n_writes)
 	    {
-	      var = VEC_index (tree, vars, 0);
-	      var = make_ssa_name (SSA_NAME_VAR (var), NULL);
-	      VEC_replace (tree, vars, 0, var);
+	      var = vars[0];
+	      var = make_ssa_name (SSA_NAME_VAR (var));
+	      vars[0] = var;
 	    }
 	  else
 	    ridx = 1;
 	}
 
-      replace_ref_with (a->stmt, VEC_index (tree, vars, ridx),
+      replace_ref_with (a->stmt, vars[ridx],
 			!is_read, !is_read);
     }
-
-  VEC_free (tree, heap, vars);
 }
 
 /* Returns the single statement in that NAME is used, excepting
    the looparound phi nodes contained in one of the chains.  If there is no
    such statement, or more statements, NULL is returned.  */
 
-static gimple
+static gimple *
 single_nonlooparound_use (tree name)
 {
   use_operand_p use;
   imm_use_iterator it;
-  gimple stmt, ret = NULL;
+  gimple *stmt, *ret = NULL;
 
   FOR_EACH_IMM_USE_FAST (use, it, name)
     {
@@ -1683,16 +1980,17 @@
    used.  */
 
 static void
-remove_stmt (gimple stmt)
+remove_stmt (gimple *stmt)
 {
   tree name;
-  gimple next;
+  gimple *next;
   gimple_stmt_iterator psi;
 
   if (gimple_code (stmt) == GIMPLE_PHI)
     {
       name = PHI_RESULT (stmt);
       next = single_nonlooparound_use (name);
+      reset_debug_uses (stmt);
       psi = gsi_for_stmt (stmt);
       remove_phi_node (&psi, true);
 
@@ -1711,11 +2009,19 @@
       bsi = gsi_for_stmt (stmt);
 
       name = gimple_assign_lhs (stmt);
-      gcc_assert (TREE_CODE (name) == SSA_NAME);
-
-      next = single_nonlooparound_use (name);
-
-      mark_virtual_ops_for_renaming (stmt);
+      if (TREE_CODE (name) == SSA_NAME)
+	{
+	  next = single_nonlooparound_use (name);
+	  reset_debug_uses (stmt);
+	}
+      else
+	{
+	  /* This is a store to be eliminated.  */
+	  gcc_assert (gimple_vdef (stmt) != NULL);
+	  next = NULL;
+	}
+
+      unlink_stmt_vdef (stmt);
       gsi_remove (&bsi, true);
       release_defs (stmt);
 
@@ -1733,32 +2039,63 @@
 
 static void
 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
-			     bitmap tmp_vars)
+			      bitmap tmp_vars)
 {
-  unsigned i;
-  dref a, root;
+  unsigned i, n;
+  dref a;
   tree var;
+  bool in_lhs;
 
   if (chain->combined)
     {
       /* For combined chains, just remove the statements that are used to
-	 compute the values of the expression (except for the root one).  */
-      for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
-	remove_stmt (a->stmt);
+	 compute the values of the expression (except for the root one).
+	 We delay this until after all chains are processed.  */
+    }
+  else if (chain->type == CT_STORE_STORE)
+    {
+      if (chain->length > 0)
+	{
+	  if (chain->inv_store_elimination)
+	    {
+	      /* If dead stores in this chain only store loop invariant
+		 values, we can simply record the invariant value and use
+		 it directly after loop.  */
+	      initialize_root_vars_store_elim_1 (chain);
+	    }
+	  else
+	    {
+	      /* If dead stores in this chain store loop variant values,
+		 we need to set up the variables by loading from memory
+		 before loop and propagating it with PHI nodes.  */
+	      initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
+	    }
+
+	  /* For inter-iteration store elimination chain, stores at each
+	     distance in loop's last (chain->length - 1) iterations can't
+	     be eliminated, because there is no following killing store.
+	     We need to generate these stores after loop.  */
+	  finalize_eliminated_stores (loop, chain);
+	}
+
+      /* Eliminate the stores killed by following store.  */
+      n = chain->refs.length ();
+      for (i = 0; i < n - 1; i++)
+	remove_stmt (chain->refs[i]->stmt);
     }
   else
     {
-      /* For non-combined chains, set up the variables that hold its value,
-	 and replace the uses of the original references by these
-	 variables.  */
-      root = get_chain_root (chain);
-      mark_virtual_ops_for_renaming (root->stmt);
-
-      initialize_root (loop, chain, tmp_vars);
-      for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
+      /* For non-combined chains, set up the variables that hold its value.  */
+      initialize_root_vars (loop, chain, tmp_vars);
+      a = get_chain_root (chain);
+      in_lhs = (chain->type == CT_STORE_LOAD
+		|| chain->type == CT_COMBINATION);
+      replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
+
+      /* Replace the uses of the original references by these variables.  */
+      for (i = 1; chain->refs.iterate (i, &a); i++)
 	{
-	  mark_virtual_ops_for_renaming (a->stmt);
-	  var = VEC_index (tree, chain->vars, chain->length - a->distance);
+	  var = chain->vars[chain->length - a->distance];
 	  replace_ref_with (a->stmt, var, false, false);
 	}
     }
@@ -1769,16 +2106,31 @@
    optimized.  */
 
 static unsigned
-determine_unroll_factor (VEC (chain_p, heap) *chains)
+determine_unroll_factor (vec<chain_p> chains)
 {
   chain_p chain;
   unsigned factor = 1, af, nfactor, i;
   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 
-  FOR_EACH_VEC_ELT (chain_p, chains, i, chain)
+  FOR_EACH_VEC_ELT (chains, i, chain)
     {
-      if (chain->type == CT_INVARIANT || chain->combined)
+      if (chain->type == CT_INVARIANT)
 	continue;
+      /* For now we can't handle unrolling when eliminating stores.  */
+      else if (chain->type == CT_STORE_STORE)
+	return 1;
+
+      if (chain->combined)
+	{
+	  /* For combined chains, we can't handle unrolling if we replace
+	     looparound PHIs.  */
+	  dref a;
+	  unsigned j;
+	  for (j = 1; chain->refs.iterate (j, &a); j++)
+	    if (gimple_code (a->stmt) == GIMPLE_PHI)
+	      return 1;
+	  continue;
+	}
 
       /* The best unroll factor for this chain is equal to the number of
 	 temporary variables that we create for it.  */
@@ -1798,13 +2150,13 @@
    Uids of the newly created temporary variables are marked in TMP_VARS.  */
 
 static void
-execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains,
+execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
 			bitmap tmp_vars)
 {
   chain_p chain;
   unsigned i;
 
-  FOR_EACH_VEC_ELT (chain_p, chains, i, chain)
+  FOR_EACH_VEC_ELT (chains, i, chain)
     {
       if (chain->type == CT_INVARIANT)
 	execute_load_motion (loop, chain, tmp_vars);
@@ -1812,6 +2164,21 @@
 	execute_pred_commoning_chain (loop, chain, tmp_vars);
     }
 
+  FOR_EACH_VEC_ELT (chains, i, chain)
+    {
+      if (chain->type == CT_INVARIANT)
+	;
+      else if (chain->combined)
+	{
+	  /* For combined chains, just remove the statements that are used to
+	     compute the values of the expression (except for the root one).  */
+	  dref a;
+	  unsigned j;
+	  for (j = 1; chain->refs.iterate (j, &a); j++)
+	    remove_stmt (a->stmt);
+	}
+    }
+
   update_ssa (TODO_update_ssa_only_virtuals);
 }
 
@@ -1819,14 +2186,14 @@
    phi node, record the ssa name that is defined by it.  */
 
 static void
-replace_phis_by_defined_names (VEC (chain_p, heap) *chains)
+replace_phis_by_defined_names (vec<chain_p> chains)
 {
   chain_p chain;
   dref a;
   unsigned i, j;
 
-  FOR_EACH_VEC_ELT (chain_p, chains, i, chain)
-    FOR_EACH_VEC_ELT (dref, chain->refs, j, a)
+  FOR_EACH_VEC_ELT (chains, i, chain)
+    FOR_EACH_VEC_ELT (chain->refs, j, a)
       {
 	if (gimple_code (a->stmt) == GIMPLE_PHI)
 	  {
@@ -1840,14 +2207,14 @@
    NULL, use it to set the stmt field.  */
 
 static void
-replace_names_by_phis (VEC (chain_p, heap) *chains)
+replace_names_by_phis (vec<chain_p> chains)
 {
   chain_p chain;
   dref a;
   unsigned i, j;
 
-  FOR_EACH_VEC_ELT (chain_p, chains, i, chain)
-    FOR_EACH_VEC_ELT (dref, chain->refs, j, a)
+  FOR_EACH_VEC_ELT (chains, i, chain)
+    FOR_EACH_VEC_ELT (chain->refs, j, a)
       if (a->stmt == NULL)
 	{
 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
@@ -1861,7 +2228,7 @@
 
 struct epcc_data
 {
-  VEC (chain_p, heap) *chains;
+  vec<chain_p> chains;
   bitmap tmp_vars;
 };
 
@@ -1884,10 +2251,10 @@
 static void
 base_names_in_chain_on (struct loop *loop, tree name, tree var)
 {
-  gimple stmt, phi;
+  gimple *stmt, *phi;
   imm_use_iterator iter;
 
-  SSA_NAME_VAR (name) = var;
+  replace_ssa_name_symbol (name, var);
 
   while (1)
     {
@@ -1905,7 +2272,7 @@
 	return;
 
       name = PHI_RESULT (phi);
-      SSA_NAME_VAR (name) = var;
+      replace_ssa_name_symbol (name, var);
     }
 }
 
@@ -1918,17 +2285,18 @@
 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
 {
   edge e;
-  gimple phi, stmt;
+  gphi *phi;
+  gimple *stmt;
   tree name, use, var;
-  gimple_stmt_iterator psi;
+  gphi_iterator psi;
 
   e = loop_latch_edge (loop);
   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
     {
-      phi = gsi_stmt (psi);
+      phi = psi.phi ();
       name = PHI_RESULT (phi);
       var = SSA_NAME_VAR (name);
-      if (!bitmap_bit_p (tmp_vars, DECL_UID (var)))
+      if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
 	continue;
       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
       gcc_assert (TREE_CODE (use) == SSA_NAME);
@@ -1965,10 +2333,10 @@
    statements, NAME is replaced with the actual name used in the returned
    statement.  */
 
-static gimple
+static gimple *
 find_use_stmt (tree *name)
 {
-  gimple stmt;
+  gimple *stmt;
   tree rhs, lhs;
 
   /* Skip over assignments.  */
@@ -2018,11 +2386,11 @@
    tree of the same operations and returns its root.  Distance to the root
    is stored in DISTANCE.  */
 
-static gimple
-find_associative_operation_root (gimple stmt, unsigned *distance)
+static gimple *
+find_associative_operation_root (gimple *stmt, unsigned *distance)
 {
   tree lhs;
-  gimple next;
+  gimple *next;
   enum tree_code code = gimple_assign_rhs_code (stmt);
   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
   unsigned dist = 0;
@@ -2055,10 +2423,10 @@
    tree formed by this operation instead of the statement that uses NAME1 or
    NAME2.  */
 
-static gimple
+static gimple *
 find_common_use_stmt (tree *name1, tree *name2)
 {
-  gimple stmt1, stmt2;
+  gimple *stmt1, *stmt2;
 
   stmt1 = find_use_stmt (name1);
   if (!stmt1)
@@ -2093,7 +2461,7 @@
   bool aswap;
   tree atype;
   tree name1, name2;
-  gimple stmt;
+  gimple *stmt;
 
   name1 = name_for_ref (r1);
   name2 = name_for_ref (r2);
@@ -2101,7 +2469,11 @@
 
   stmt = find_common_use_stmt (&name1, &name2);
 
-  if (!stmt)
+  if (!stmt
+      /* A simple post-dominance check - make sure the combination
+         is executed under the same condition as the references.  */
+      || (gimple_bb (stmt) != gimple_bb (r1->stmt)
+	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
     return false;
 
   acode = gimple_assign_rhs_code (stmt);
@@ -2126,7 +2498,7 @@
    an assignment of the remaining operand.  */
 
 static void
-remove_name_from_operation (gimple stmt, tree op)
+remove_name_from_operation (gimple *stmt, tree op)
 {
   tree other_op;
   gimple_stmt_iterator si;
@@ -2148,13 +2520,14 @@
 }
 
 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
-   are combined in a single statement, and returns this statement.  */
-
-static gimple
-reassociate_to_the_same_stmt (tree name1, tree name2)
+   are combined in a single statement, and returns this statement.  Note the
+   statement is inserted before INSERT_BEFORE if it's not NULL.  */
+
+static gimple *
+reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
 {
-  gimple stmt1, stmt2, root1, root2, s1, s2;
-  gimple new_stmt, tmp_stmt;
+  gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
+  gassign *new_stmt, *tmp_stmt;
   tree new_name, tmp_name, var, r1, r2;
   unsigned dist1, dist2;
   enum tree_code code;
@@ -2206,28 +2579,30 @@
   /* Insert the new statement combining NAME1 and NAME2 before S1, and
      combine it with the rhs of S1.  */
   var = create_tmp_reg (type, "predreastmp");
-  add_referenced_var (var);
-  new_name = make_ssa_name (var, NULL);
-  new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
+  new_name = make_ssa_name (var);
+  new_stmt = gimple_build_assign (new_name, code, name1, name2);
+  if (insert_before && stmt_dominates_stmt_p (insert_before, s1))
+    bsi = gsi_for_stmt (insert_before);
+  else
+    bsi = gsi_for_stmt (s1);
+
+  gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
 
   var = create_tmp_reg (type, "predreastmp");
-  add_referenced_var (var);
-  tmp_name = make_ssa_name (var, NULL);
+  tmp_name = make_ssa_name (var);
 
   /* Rhs of S1 may now be either a binary expression with operation
      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
      so that name1 or name2 was removed from it).  */
-  tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1),
-					   tmp_name,
-					   gimple_assign_rhs1 (s1),
-					   gimple_assign_rhs2 (s1));
+  tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
+				  gimple_assign_rhs1 (s1),
+				  gimple_assign_rhs2 (s1));
 
   bsi = gsi_for_stmt (s1);
   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
   s1 = gsi_stmt (bsi);
   update_stmt (s1);
 
-  gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
 
   return new_stmt;
@@ -2236,12 +2611,13 @@
 /* Returns the statement that combines references R1 and R2.  In case R1
    and R2 are not used in the same statement, but they are used with an
    associative and commutative operation in the same expression, reassociate
-   the expression so that they are used in the same statement.  */
-
-static gimple
-stmt_combining_refs (dref r1, dref r2)
+   the expression so that they are used in the same statement.  The combined
+   statement is inserted before INSERT_BEFORE if it's not NULL.  */
+
+static gimple *
+stmt_combining_refs (dref r1, dref r2, gimple *insert_before)
 {
-  gimple stmt1, stmt2;
+  gimple *stmt1, *stmt2;
   tree name1 = name_for_ref (r1);
   tree name2 = name_for_ref (r2);
 
@@ -2250,7 +2626,7 @@
   if (stmt1 == stmt2)
     return stmt1;
 
-  return reassociate_to_the_same_stmt (name1, name2);
+  return reassociate_to_the_same_stmt (name1, name2, insert_before);
 }
 
 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
@@ -2263,8 +2639,8 @@
   enum tree_code op = ERROR_MARK;
   bool swap = false;
   chain_p new_chain;
-  unsigned i;
-  gimple root_stmt;
+  int i, j, num;
+  gimple *root_stmt;
   tree rslt_type = NULL_TREE;
 
   if (ch1 == ch2)
@@ -2272,11 +2648,11 @@
   if (ch1->length != ch2->length)
     return NULL;
 
-  if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs))
+  if (ch1->refs.length () != ch2->refs.length ())
     return NULL;
 
-  for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
-	       && VEC_iterate (dref, ch2->refs, i, r2)); i++)
+  for (i = 0; (ch1->refs.iterate (i, &r1)
+	       && ch2->refs.iterate (i, &r2)); i++)
     {
       if (r1->distance != r2->distance)
 	return NULL;
@@ -2285,12 +2661,11 @@
 	return NULL;
     }
 
+  ch1->combined = true;
+  ch2->combined = true;
+
   if (swap)
-    {
-      chain_p tmp = ch1;
-      ch1 = ch2;
-      ch2 = tmp;
-    }
+    std::swap (ch1, ch2);
 
   new_chain = XCNEW (struct chain);
   new_chain->type = CT_COMBINATION;
@@ -2300,19 +2675,49 @@
   new_chain->rslt_type = rslt_type;
   new_chain->length = ch1->length;
 
-  for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
-	       && VEC_iterate (dref, ch2->refs, i, r2)); i++)
+  gimple *insert = NULL;
+  num = ch1->refs.length ();
+  i = (new_chain->length == 0) ? num - 1 : 0;
+  j = (new_chain->length == 0) ? -1 : 1;
+  /* For ZERO length chain, process refs in reverse order so that dominant
+     position is ready when it comes to the root ref.
+     For non-ZERO length chain, process refs in order.  See PR79663.  */
+  for (; num > 0; num--, i += j)
     {
+      r1 = ch1->refs[i];
+      r2 = ch2->refs[i];
       nw = XCNEW (struct dref_d);
-      nw->stmt = stmt_combining_refs (r1, r2);
       nw->distance = r1->distance;
 
-      VEC_safe_push (dref, heap, new_chain->refs, nw);
+      /* For ZERO length chain, insert combined stmt of root ref at dominant
+	 position.  */
+      nw->stmt = stmt_combining_refs (r1, r2, i == 0 ? insert : NULL);
+      /* For ZERO length chain, record dominant position where combined stmt
+	 of root ref should be inserted.  In this case, though all root refs
+	 dominate following ones, it's possible that combined stmt doesn't.
+	 See PR70754.  */
+      if (new_chain->length == 0
+	  && (insert == NULL || stmt_dominates_stmt_p (nw->stmt, insert)))
+	insert = nw->stmt;
+
+      new_chain->refs.safe_push (nw);
+    }
+  if (new_chain->length == 0)
+    {
+      /* Restore the order for ZERO length chain's refs.  */
+      num = new_chain->refs.length () >> 1;
+      for (i = 0, j = new_chain->refs.length () - 1; i < num; i++, j--)
+	std::swap (new_chain->refs[i], new_chain->refs[j]);
+
+      /* For ZERO length chain, has_max_use_after must be true since root
+	 combined stmt must dominates others.  */
+      new_chain->has_max_use_after = true;
+      return new_chain;
     }
 
   new_chain->has_max_use_after = false;
   root_stmt = get_chain_root (new_chain)->stmt;
-  for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++)
+  for (i = 1; new_chain->refs.iterate (i, &nw); i++)
     {
       if (nw->distance == new_chain->length
 	  && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
@@ -2322,31 +2727,29 @@
 	}
     }
 
-  ch1->combined = true;
-  ch2->combined = true;
   return new_chain;
 }
 
 /* Try to combine the CHAINS.  */
 
 static void
-try_combine_chains (VEC (chain_p, heap) **chains)
+try_combine_chains (vec<chain_p> *chains)
 {
   unsigned i, j;
   chain_p ch1, ch2, cch;
-  VEC (chain_p, heap) *worklist = NULL;
-
-  FOR_EACH_VEC_ELT (chain_p, *chains, i, ch1)
+  auto_vec<chain_p> worklist;
+
+  FOR_EACH_VEC_ELT (*chains, i, ch1)
     if (chain_can_be_combined_p (ch1))
-      VEC_safe_push (chain_p, heap, worklist, ch1);
-
-  while (!VEC_empty (chain_p, worklist))
+      worklist.safe_push (ch1);
+
+  while (!worklist.is_empty ())
     {
-      ch1 = VEC_pop (chain_p, worklist);
+      ch1 = worklist.pop ();
       if (!chain_can_be_combined_p (ch1))
 	continue;
 
-      FOR_EACH_VEC_ELT (chain_p, *chains, j, ch2)
+      FOR_EACH_VEC_ELT (*chains, j, ch2)
 	{
 	  if (!chain_can_be_combined_p (ch2))
 	    continue;
@@ -2354,14 +2757,82 @@
 	  cch = combine_chains (ch1, ch2);
 	  if (cch)
 	    {
-	      VEC_safe_push (chain_p, heap, worklist, cch);
-	      VEC_safe_push (chain_p, heap, *chains, cch);
+	      worklist.safe_push (cch);
+	      chains->safe_push (cch);
 	      break;
 	    }
 	}
     }
 }
 
+/* Prepare initializers for store elimination CHAIN in LOOP.  Returns false
+   if this is impossible because one of these initializers may trap, true
+   otherwise.  */
+
+static bool
+prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain)
+{
+  unsigned i, n = chain->length;
+
+  /* For now we can't eliminate stores if some of them are conditional
+     executed.  */
+  if (!chain->all_always_accessed)
+    return false;
+
+  /* Nothing to intialize for intra-iteration store elimination.  */
+  if (n == 0 && chain->type == CT_STORE_STORE)
+    return true;
+
+  /* For store elimination chain, there is nothing to initialize if stores
+     to be eliminated only store loop invariant values into memory.  */
+  if (chain->type == CT_STORE_STORE
+      && is_inv_store_elimination_chain (loop, chain))
+    {
+      chain->inv_store_elimination = true;
+      return true;
+    }
+
+  chain->inits.create (n);
+  chain->inits.safe_grow_cleared (n);
+
+  /* For store eliminatin chain like below:
+
+     for (i = 0; i < len; i++)
+       {
+	 a[i] = 1;
+	 // a[i + 1] = ...
+	 a[i + 2] = 3;
+       }
+
+     store to a[i + 1] is missed in loop body, it acts like bubbles.  The
+     content of a[i + 1] remain the same if the loop iterates fewer times
+     than chain->length.  We need to set up root variables for such stores
+     by loading from memory before loop.  Note we only need to load bubble
+     elements because loop body is guaranteed to be executed at least once
+     after loop's preheader edge.  */
+  auto_vec<bool> bubbles;
+  bubbles.safe_grow_cleared (n + 1);
+  for (i = 0; i < chain->refs.length (); i++)
+    bubbles[chain->refs[i]->distance] = true;
+
+  struct data_reference *dr = get_chain_root (chain)->ref;
+  for (i = 0; i < n; i++)
+    {
+      if (bubbles[i])
+	continue;
+
+      gimple_seq stmts = NULL;
+
+      tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
+      if (stmts)
+	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
+
+      chain->inits[i] = init;
+    }
+
+  return true;
+}
+
 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
    impossible because one of these initializers may trap, true otherwise.  */
 
@@ -2371,45 +2842,48 @@
   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
   struct data_reference *dr = get_chain_root (chain)->ref;
   tree init;
-  gimple_seq stmts;
   dref laref;
   edge entry = loop_preheader_edge (loop);
 
+  if (chain->type == CT_STORE_STORE)
+    return prepare_initializers_chain_store_elim (loop, chain);
+
   /* Find the initializers for the variables, and check that they cannot
      trap.  */
-  chain->inits = VEC_alloc (tree, heap, n);
+  chain->inits.create (n);
   for (i = 0; i < n; i++)
-    VEC_quick_push (tree, chain->inits, NULL_TREE);
+    chain->inits.quick_push (NULL_TREE);
 
   /* If we have replaced some looparound phi nodes, use their initializers
      instead of creating our own.  */
-  FOR_EACH_VEC_ELT (dref, chain->refs, i, laref)
+  FOR_EACH_VEC_ELT (chain->refs, i, laref)
     {
       if (gimple_code (laref->stmt) != GIMPLE_PHI)
 	continue;
 
       gcc_assert (laref->distance > 0);
-      VEC_replace (tree, chain->inits, n - laref->distance,
-		   PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry));
+      chain->inits[n - laref->distance] 
+	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
     }
 
   for (i = 0; i < n; i++)
     {
-      if (VEC_index (tree, chain->inits, i) != NULL_TREE)
+      gimple_seq stmts = NULL;
+
+      if (chain->inits[i] != NULL_TREE)
 	continue;
 
-      init = ref_at_iteration (loop, DR_REF (dr), (int) i - n);
-      if (!init)
-	return false;
-
+      init = ref_at_iteration (dr, (int) i - n, &stmts);
       if (!chain->all_always_accessed && tree_could_trap_p (init))
-	return false;
-
-      init = force_gimple_operand (init, &stmts, false, NULL_TREE);
+	{
+	  gimple_seq_discard (stmts);
+	  return false;
+	}
+
       if (stmts)
-	gsi_insert_seq_on_edge_immediate (entry, stmts);
-
-      VEC_replace (tree, chain->inits, i, init);
+	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
+
+      chain->inits[i] = init;
     }
 
   return true;
@@ -2419,61 +2893,182 @@
    be used because the initializers might trap.  */
 
 static void
-prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains)
+prepare_initializers (struct loop *loop, vec<chain_p> chains)
 {
   chain_p chain;
   unsigned i;
 
-  for (i = 0; i < VEC_length (chain_p, chains); )
+  for (i = 0; i < chains.length (); )
     {
-      chain = VEC_index (chain_p, chains, i);
+      chain = chains[i];
       if (prepare_initializers_chain (loop, chain))
 	i++;
       else
 	{
 	  release_chain (chain);
-	  VEC_unordered_remove (chain_p, chains, i);
+	  chains.unordered_remove (i);
 	}
     }
 }
 
-/* Performs predictive commoning for LOOP.  Returns true if LOOP was
-   unrolled.  */
+/* Generates finalizer memory references for CHAIN in LOOP.  Returns true
+   if finalizer code for CHAIN can be generated, otherwise false.  */
+
+static bool
+prepare_finalizers_chain (struct loop *loop, chain_p chain)
+{
+  unsigned i, n = chain->length;
+  struct data_reference *dr = get_chain_root (chain)->ref;
+  tree fini, niters = number_of_latch_executions (loop);
+
+  /* For now we can't eliminate stores if some of them are conditional
+     executed.  */
+  if (!chain->all_always_accessed)
+    return false;
+
+  chain->finis.create (n);
+  for (i = 0; i < n; i++)
+    chain->finis.quick_push (NULL_TREE);
+
+  /* We never use looparound phi node for store elimination chains.  */
+
+  /* Find the finalizers for the variables, and check that they cannot
+     trap.  */
+  for (i = 0; i < n; i++)
+    {
+      gimple_seq stmts = NULL;
+      gcc_assert (chain->finis[i] == NULL_TREE);
+
+      if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
+	{
+	  niters = unshare_expr (niters);
+	  niters = force_gimple_operand (niters, &stmts, true, NULL);
+	  if (stmts)
+	    {
+	      gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
+	      stmts = NULL;
+	    }
+	}
+      fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
+      if (stmts)
+	gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
+
+      chain->finis[i] = fini;
+    }
+
+  return true;
+}
+
+/* Generates finalizer memory reference for CHAINS in LOOP.  Returns true
+   if finalizer code generation for CHAINS breaks loop closed ssa form.  */
 
 static bool
+prepare_finalizers (struct loop *loop, vec<chain_p> chains)
+{
+  chain_p chain;
+  unsigned i;
+  bool loop_closed_ssa = false;
+
+  for (i = 0; i < chains.length ();)
+    {
+      chain = chains[i];
+
+      /* Finalizer is only necessary for inter-iteration store elimination
+	 chains.  */
+      if (chain->length == 0 || chain->type != CT_STORE_STORE)
+	{
+	  i++;
+	  continue;
+	}
+
+      if (prepare_finalizers_chain (loop, chain))
+	{
+	  i++;
+	  /* Be conservative, assume loop closed ssa form is corrupted
+	     by store-store chain.  Though it's not always the case if
+	     eliminated stores only store loop invariant values into
+	     memory.  */
+	  loop_closed_ssa = true;
+	}
+      else
+	{
+	  release_chain (chain);
+	  chains.unordered_remove (i);
+	}
+    }
+  return loop_closed_ssa;
+}
+
+/* Insert all initializing gimple stmts into loop's entry edge.  */
+
+static void
+insert_init_seqs (struct loop *loop, vec<chain_p> chains)
+{
+  unsigned i;
+  edge entry = loop_preheader_edge (loop);
+
+  for (i = 0; i < chains.length (); ++i)
+    if (chains[i]->init_seq)
+      {
+	gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
+	chains[i]->init_seq = NULL;
+      }
+}
+
+/* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
+   if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
+   form was corrupted.  */
+
+static unsigned
 tree_predictive_commoning_loop (struct loop *loop)
 {
-  VEC (loop_p, heap) *loop_nest;
-  VEC (data_reference_p, heap) *datarefs;
-  VEC (ddr_p, heap) *dependences;
+  vec<data_reference_p> datarefs;
+  vec<ddr_p> dependences;
   struct component *components;
-  VEC (chain_p, heap) *chains = NULL;
+  vec<chain_p> chains = vNULL;
   unsigned unroll_factor;
   struct tree_niter_desc desc;
-  bool unroll = false;
+  bool unroll = false, loop_closed_ssa = false;
   edge exit;
-  bitmap tmp_vars;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Processing loop %d\n",  loop->num);
 
+  /* Nothing for predicitive commoning if loop only iterates 1 time.  */
+  if (get_max_loop_iterations_int (loop) == 0)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
+
+      return 0;
+    }
+
   /* Find the data references and split them into components according to their
      dependence relations.  */
-  datarefs = VEC_alloc (data_reference_p, heap, 10);
-  dependences = VEC_alloc (ddr_p, heap, 10);
-  loop_nest = VEC_alloc (loop_p, heap, 3);
-  compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
-				     &dependences);
+  auto_vec<loop_p, 3> loop_nest;
+  dependences.create (10);
+  datarefs.create (10);
+  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
+					   &dependences))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Cannot analyze data dependencies\n");
+      free_data_refs (datarefs);
+      free_dependence_relations (dependences);
+      return 0;
+    }
+
   if (dump_file && (dump_flags & TDF_DETAILS))
     dump_data_dependence_relations (dump_file, dependences);
 
   components = split_data_refs_to_components (loop, datarefs, dependences);
-  VEC_free (loop_p, heap, loop_nest);
+  loop_nest.release ();
   free_dependence_relations (dependences);
   if (!components)
     {
       free_data_refs (datarefs);
-      return false;
+      free_affine_expand_cache (&name_expansions);
+      return 0;
     }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2485,12 +3080,12 @@
   /* Find the suitable components and split them into chains.  */
   components = filter_suitable_components (loop, components);
 
-  tmp_vars = BITMAP_ALLOC (NULL);
+  auto_bitmap tmp_vars;
   looparound_phis = BITMAP_ALLOC (NULL);
   determine_roots (loop, components, &chains);
   release_components (components);
 
-  if (!chains)
+  if (!chains.exists ())
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
@@ -2498,10 +3093,13 @@
       goto end;
     }
   prepare_initializers (loop, chains);
+  loop_closed_ssa = prepare_finalizers (loop, chains);
 
   /* Try to combine the chains that are always worked with together.  */
   try_combine_chains (&chains);
 
+  insert_init_seqs (loop, chains);
+
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       fprintf (dump_file, "Before commoning:\n\n");
@@ -2553,12 +3151,11 @@
 end: ;
   release_chains (chains);
   free_data_refs (datarefs);
-  BITMAP_FREE (tmp_vars);
   BITMAP_FREE (looparound_phis);
 
   free_affine_expand_cache (&name_expansions);
 
-  return unroll;
+  return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
 }
 
 /* Runs predictive commoning.  */
@@ -2566,24 +3163,78 @@
 unsigned
 tree_predictive_commoning (void)
 {
-  bool unrolled = false;
   struct loop *loop;
-  loop_iterator li;
-  unsigned ret = 0;
+  unsigned ret = 0, changed = 0;
 
   initialize_original_copy_tables ();
-  FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
+  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
     if (optimize_loop_for_speed_p (loop))
       {
-	unrolled |= tree_predictive_commoning_loop (loop);
+	changed |= tree_predictive_commoning_loop (loop);
       }
-
-  if (unrolled)
+  free_original_copy_tables ();
+
+  if (changed > 0)
     {
       scev_reset ();
+
+      if (changed > 1)
+	rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+
       ret = TODO_cleanup_cfg;
     }
-  free_original_copy_tables ();
 
   return ret;
 }
+
+/* Predictive commoning Pass.  */
+
+static unsigned
+run_tree_predictive_commoning (struct function *fun)
+{
+  if (number_of_loops (fun) <= 1)
+    return 0;
+
+  return tree_predictive_commoning ();
+}
+
+namespace {
+
+const pass_data pass_data_predcom =
+{
+  GIMPLE_PASS, /* type */
+  "pcom", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_PREDCOM, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa_only_virtuals, /* todo_flags_finish */
+};
+
+class pass_predcom : public gimple_opt_pass
+{
+public:
+  pass_predcom (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_predcom, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return flag_predictive_commoning != 0; }
+  virtual unsigned int execute (function *fun)
+    {
+      return run_tree_predictive_commoning (fun);
+    }
+
+}; // class pass_predcom
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_predcom (gcc::context *ctxt)
+{
+  return new pass_predcom (ctxt);
+}
+
+