diff gcc/tree-ssa-loop-ivcanon.c @ 16:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
line wrap: on
line diff
--- a/gcc/tree-ssa-loop-ivcanon.c	Sun Aug 21 07:07:55 2011 +0900
+++ b/gcc/tree-ssa-loop-ivcanon.c	Fri Oct 27 22:46:09 2017 +0900
@@ -1,6 +1,5 @@
-/* Induction variable canonicalization.
-   Copyright (C) 2004, 2005, 2007, 2008, 2010
-   Free Software Foundation, Inc.
+/* Induction variable canonicalization and loop peeling.
+   Copyright (C) 2004-2017 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -29,29 +28,41 @@
    variables.  In that case the created optimization possibilities are likely
    to pay up.
 
-   Additionally in case we detect that it is beneficial to unroll the
-   loop completely, we do it right here to expose the optimization
-   possibilities to the following passes.  */
+   We also perform
+     - complete unrolling (or peeling) when the loops is rolling few enough
+       times
+     - simple peeling (i.e. copying few initial iterations prior the loop)
+       when number of iteration estimate is known (typically by the profile
+       info).  */
 
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
-#include "tm.h"
+#include "backend.h"
 #include "tree.h"
-#include "tm_p.h"
-#include "basic-block.h"
-#include "tree-pretty-print.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "cgraph.h"
 #include "gimple-pretty-print.h"
-#include "tree-flow.h"
-#include "tree-dump.h"
+#include "fold-const.h"
+#include "profile.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-iterator.h"
+#include "tree-cfg.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "tree-into-ssa.h"
 #include "cfgloop.h"
-#include "tree-pass.h"
 #include "tree-chrec.h"
 #include "tree-scalar-evolution.h"
 #include "params.h"
-#include "flags.h"
 #include "tree-inline.h"
-#include "target.h"
+#include "tree-cfgcleanup.h"
+#include "builtins.h"
 
 /* Specifies types of loops that may be unrolled.  */
 
@@ -72,7 +83,7 @@
 {
   edge in;
   tree type, var;
-  gimple cond;
+  gcond *cond;
   gimple_stmt_iterator incr_at;
   enum tree_code cmp;
 
@@ -83,7 +94,7 @@
       fprintf (dump_file, " iterations.\n");
     }
 
-  cond = last_stmt (exit->src);
+  cond = as_a <gcond *> (last_stmt (exit->src));
   in = EDGE_SUCC (exit->src, 0);
   if (in == exit)
     in = EDGE_SUCC (exit->src, 1);
@@ -110,23 +121,6 @@
   update_stmt (cond);
 }
 
-/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
-
-unsigned
-tree_num_loop_insns (struct loop *loop, eni_weights *weights)
-{
-  basic_block *body = get_loop_body (loop);
-  gimple_stmt_iterator gsi;
-  unsigned size = 0, i;
-
-  for (i = 0; i < loop->num_nodes; i++)
-    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
-      size += estimate_num_insns (gsi_stmt (gsi), weights);
-  free (body);
-
-  return size;
-}
-
 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 struct loop_size
 {
@@ -142,15 +136,27 @@
      instructions after exit are not executed.  */
   int last_iteration;
   int last_iteration_eliminated_by_peeling;
+  
+  /* If some IV computation will become constant.  */
+  bool constant_iv;
+
+  /* Number of call stmts that are not a builtin and are pure or const
+     present on the hot path.  */
+  int num_pure_calls_on_hot_path;
+  /* Number of call stmts that are not a builtin and are not pure nor const
+     present on the hot path.  */
+  int num_non_pure_calls_on_hot_path;
+  /* Number of statements other than calls in the loop.  */
+  int non_call_stmts_on_hot_path;
+  /* Number of branches seen on the hot path.  */
+  int num_branches_on_hot_path;
 };
 
 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 
 static bool
-constant_after_peeling (tree op, gimple stmt, struct loop *loop)
+constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
 {
-  affine_iv iv;
-
   if (is_gimple_min_invariant (op))
     return true;
 
@@ -162,8 +168,8 @@
       /* First make fast look if we see constant array inside.  */
       while (handled_component_p (base))
 	base = TREE_OPERAND (base, 0);
-      if ((DECL_P (base) == VAR_DECL
-	   && const_value_known_p (base))
+      if ((DECL_P (base)
+	   && ctor_for_folding (base) != error_mark_node)
 	  || CONSTANT_CLASS_P (base))
 	{
 	  /* If so, see if we understand all the indices.  */
@@ -180,113 +186,200 @@
       return false;
     }
 
-  /* Induction variables are constants.  */
-  if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
+  /* Induction variables are constants when defined in loop.  */
+  if (loop_containing_stmt (stmt) != loop)
     return false;
-  if (!is_gimple_min_invariant (iv.base))
-    return false;
-  if (!is_gimple_min_invariant (iv.step))
+  tree ev = analyze_scalar_evolution (loop, op);
+  if (chrec_contains_undetermined (ev)
+      || chrec_contains_symbols (ev))
     return false;
   return true;
 }
 
-/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
-   Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
+/* Computes an estimated number of insns in LOOP.
+   EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
+   iteration of the loop.
+   EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
+   of loop.
+   Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. 
+   Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 
-static void
-tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
+static bool
+tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
+			 struct loop_size *size, int upper_bound)
 {
   basic_block *body = get_loop_body (loop);
   gimple_stmt_iterator gsi;
   unsigned int i;
   bool after_exit;
+  vec<basic_block> path = get_loop_hot_path (loop);
 
   size->overall = 0;
   size->eliminated_by_peeling = 0;
   size->last_iteration = 0;
   size->last_iteration_eliminated_by_peeling = 0;
+  size->num_pure_calls_on_hot_path = 0;
+  size->num_non_pure_calls_on_hot_path = 0;
+  size->non_call_stmts_on_hot_path = 0;
+  size->num_branches_on_hot_path = 0;
+  size->constant_iv = 0;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
   for (i = 0; i < loop->num_nodes; i++)
     {
-      if (exit && body[i] != exit->src
-	  && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
+      if (edge_to_cancel && body[i] != edge_to_cancel->src
+	  && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 	after_exit = true;
       else
 	after_exit = false;
       if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
+	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
+		 after_exit);
 
       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 	{
-	  gimple stmt = gsi_stmt (gsi);
+	  gimple *stmt = gsi_stmt (gsi);
 	  int num = estimate_num_insns (stmt, &eni_size_weights);
 	  bool likely_eliminated = false;
+	  bool likely_eliminated_last = false;
+	  bool likely_eliminated_peeled = false;
 
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
 	      fprintf (dump_file, "  size: %3i ", num);
-	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
+	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 	    }
 
 	  /* Look for reasons why we might optimize this stmt away. */
 
-	  /* Exit conditional.  */
-	  if (body[i] == exit->src && stmt == last_stmt (exit->src))
-	    {
-	      if (dump_file && (dump_flags & TDF_DETAILS))
-	        fprintf (dump_file, "   Exit condition will be eliminated.\n");
-	      likely_eliminated = true;
-	    }
-	  /* Sets of IV variables  */
-	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
-	      && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
+	  if (!gimple_has_side_effects (stmt))
 	    {
-	      if (dump_file && (dump_flags & TDF_DETAILS))
-	        fprintf (dump_file, "   Induction variable computation will"
-			 " be folded away.\n");
-	      likely_eliminated = true;
-	    }
-	  /* Assignments of IV variables.  */
-	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
-		   && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
-		   && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
-		   && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
-		       || constant_after_peeling (gimple_assign_rhs2 (stmt),
-		       				  stmt, loop)))
-	    {
-	      if (dump_file && (dump_flags & TDF_DETAILS))
-	        fprintf (dump_file, "   Constant expression will be folded away.\n");
-	      likely_eliminated = true;
-	    }
-	  /* Conditionals.  */
-	  else if (gimple_code (stmt) == GIMPLE_COND
-		   && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
-		   && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
-	    {
-	      if (dump_file && (dump_flags & TDF_DETAILS))
-	        fprintf (dump_file, "   Constant conditional.\n");
-	      likely_eliminated = true;
+	      /* Exit conditional.  */
+	      if (exit && body[i] == exit->src
+		  && stmt == last_stmt (exit->src))
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    fprintf (dump_file, "   Exit condition will be eliminated "
+			     "in peeled copies.\n");
+		  likely_eliminated_peeled = true;
+		}
+	      if (edge_to_cancel && body[i] == edge_to_cancel->src
+		  && stmt == last_stmt (edge_to_cancel->src))
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    fprintf (dump_file, "   Exit condition will be eliminated "
+			     "in last copy.\n");
+		  likely_eliminated_last = true;
+		}
+	      /* Sets of IV variables  */
+	      if (gimple_code (stmt) == GIMPLE_ASSIGN
+		  && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    fprintf (dump_file, "   Induction variable computation will"
+			     " be folded away.\n");
+		  likely_eliminated = true;
+		}
+	      /* Assignments of IV variables.  */
+	      else if (gimple_code (stmt) == GIMPLE_ASSIGN
+		       && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
+		       && constant_after_peeling (gimple_assign_rhs1 (stmt),
+						  stmt, loop)
+		       && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
+			   || constant_after_peeling (gimple_assign_rhs2 (stmt),
+						      stmt, loop)))
+		{
+		  size->constant_iv = true;
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    fprintf (dump_file,
+			     "   Constant expression will be folded away.\n");
+		  likely_eliminated = true;
+		}
+	      /* Conditionals.  */
+	      else if ((gimple_code (stmt) == GIMPLE_COND
+			&& constant_after_peeling (gimple_cond_lhs (stmt), stmt,
+						   loop)
+			&& constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+						   loop)
+			/* We don't simplify all constant compares so make sure
+			   they are not both constant already.  See PR70288.  */
+			&& (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
+			    || ! is_gimple_min_invariant
+				 (gimple_cond_rhs (stmt))))
+		       || (gimple_code (stmt) == GIMPLE_SWITCH
+			   && constant_after_peeling (gimple_switch_index (
+							as_a <gswitch *>
+							  (stmt)),
+						      stmt, loop)
+			   && ! is_gimple_min_invariant
+				   (gimple_switch_index
+				      (as_a <gswitch *> (stmt)))))
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    fprintf (dump_file, "   Constant conditional.\n");
+		  likely_eliminated = true;
+		}
 	    }
 
 	  size->overall += num;
-	  if (likely_eliminated)
+	  if (likely_eliminated || likely_eliminated_peeled)
 	    size->eliminated_by_peeling += num;
 	  if (!after_exit)
 	    {
 	      size->last_iteration += num;
-	      if (likely_eliminated)
+	      if (likely_eliminated || likely_eliminated_last)
 		size->last_iteration_eliminated_by_peeling += num;
 	    }
+	  if ((size->overall * 3 / 2 - size->eliminated_by_peeling
+	      - size->last_iteration_eliminated_by_peeling) > upper_bound)
+	    {
+              free (body);
+	      path.release ();
+	      return true;
+	    }
 	}
     }
+  while (path.length ())
+    {
+      basic_block bb = path.pop ();
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+	  if (gimple_code (stmt) == GIMPLE_CALL
+	      && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
+	    {
+	      int flags = gimple_call_flags (stmt);
+	      if (flags & (ECF_PURE | ECF_CONST))
+		size->num_pure_calls_on_hot_path++;
+	      else
+		size->num_non_pure_calls_on_hot_path++;
+	      size->num_branches_on_hot_path ++;
+	    }
+	  /* Count inexpensive calls as non-calls, because they will likely
+	     expand inline.  */
+	  else if (gimple_code (stmt) != GIMPLE_DEBUG)
+	    size->non_call_stmts_on_hot_path++;
+	  if (((gimple_code (stmt) == GIMPLE_COND
+	        && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
+		    || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+					       loop)))
+	       || (gimple_code (stmt) == GIMPLE_SWITCH
+		   && !constant_after_peeling (gimple_switch_index (
+						 as_a <gswitch *> (stmt)),
+					       stmt, loop)))
+	      && (!exit || bb != exit->src))
+	    size->num_branches_on_hot_path++;
+	}
+    }
+  path.release ();
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
     	     size->eliminated_by_peeling, size->last_iteration,
 	     size->last_iteration_eliminated_by_peeling);
 
   free (body);
+  return false;
 }
 
 /* Estimate number of insns of completely unrolled loop.
@@ -295,7 +388,7 @@
    is dead and that some instructions will be eliminated after
    peeling.
 
-   Loop body is likely going to simplify futher, this is difficult
+   Loop body is likely going to simplify further, this is difficult
    to guess, we just decrease the result by 1/3.  */
 
 static unsigned HOST_WIDE_INT
@@ -316,37 +409,359 @@
   return unr_insns;
 }
 
+/* Loop LOOP is known to not loop.  See if there is an edge in the loop
+   body that can be remove to make the loop to always exit and at
+   the same time it does not make any code potentially executed 
+   during the last iteration dead.  
+
+   After complete unrolling we still may get rid of the conditional
+   on the exit in the last copy even if we have no idea what it does.
+   This is quite common case for loops of form
+
+     int a[5];
+     for (i=0;i<b;i++)
+       a[i]=0;
+
+   Here we prove the loop to iterate 5 times but we do not know
+   it from induction variable.
+
+   For now we handle only simple case where there is exit condition
+   just before the latch block and the latch block contains no statements
+   with side effect that may otherwise terminate the execution of loop
+   (such as by EH or by terminating the program or longjmp).
+
+   In the general case we may want to cancel the paths leading to statements
+   loop-niter identified as having undefined effect in the last iteration.
+   The other cases are hopefully rare and will be cleaned up later.  */
+
+static edge
+loop_edge_to_cancel (struct loop *loop)
+{
+  vec<edge> exits;
+  unsigned i;
+  edge edge_to_cancel;
+  gimple_stmt_iterator gsi;
+
+  /* We want only one predecestor of the loop.  */
+  if (EDGE_COUNT (loop->latch->preds) > 1)
+    return NULL;
+
+  exits = get_loop_exit_edges (loop);
+
+  FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
+    {
+       /* Find the other edge than the loop exit
+          leaving the conditoinal.  */
+       if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
+         continue;
+       if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
+         edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
+       else
+         edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
+
+      /* We only can handle conditionals.  */
+      if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
+	continue;
+
+      /* We should never have conditionals in the loop latch. */
+      gcc_assert (edge_to_cancel->dest != loop->header);
+
+      /* Check that it leads to loop latch.  */
+      if (edge_to_cancel->dest != loop->latch)
+        continue;
+
+      exits.release ();
+
+      /* Verify that the code in loop latch does nothing that may end program
+         execution without really reaching the exit.  This may include
+	 non-pure/const function calls, EH statements, volatile ASMs etc.  */
+      for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
+	if (gimple_has_side_effects (gsi_stmt (gsi)))
+	   return NULL;
+      return edge_to_cancel;
+    }
+  exits.release ();
+  return NULL;
+}
+
+/* Remove all tests for exits that are known to be taken after LOOP was
+   peeled NPEELED times. Put gcc_unreachable before every statement
+   known to not be executed.  */
+
+static bool
+remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
+{
+  struct nb_iter_bound *elt;
+  bool changed = false;
+
+  for (elt = loop->bounds; elt; elt = elt->next)
+    {
+      /* If statement is known to be undefined after peeling, turn it
+	 into unreachable (or trap when debugging experience is supposed
+	 to be good).  */
+      if (!elt->is_exit
+	  && wi::ltu_p (elt->bound, npeeled))
+	{
+	  gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
+	  gcall *stmt = gimple_build_call
+	      (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
+	  gimple_set_location (stmt, gimple_location (elt->stmt));
+	  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
+	  split_block (gimple_bb (stmt), stmt);
+	  changed = true;
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Forced statement unreachable: ");
+	      print_gimple_stmt (dump_file, elt->stmt, 0);
+	    }
+	}
+      /* If we know the exit will be taken after peeling, update.  */
+      else if (elt->is_exit
+	       && wi::leu_p (elt->bound, npeeled))
+	{
+	  basic_block bb = gimple_bb (elt->stmt);
+	  edge exit_edge = EDGE_SUCC (bb, 0);
+
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Forced exit to be taken: ");
+	      print_gimple_stmt (dump_file, elt->stmt, 0);
+	    }
+	  if (!loop_exit_edge_p (loop, exit_edge))
+	    exit_edge = EDGE_SUCC (bb, 1);
+	  exit_edge->probability = profile_probability::always ();
+	  gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
+	  gcond *cond_stmt = as_a <gcond *> (elt->stmt);
+	  if (exit_edge->flags & EDGE_TRUE_VALUE)
+	    gimple_cond_make_true (cond_stmt);
+	  else
+	    gimple_cond_make_false (cond_stmt);
+	  update_stmt (cond_stmt);
+	  changed = true;
+	}
+    }
+  return changed;
+}
+
+/* Remove all exits that are known to be never taken because of the loop bound
+   discovered.  */
+
+static bool
+remove_redundant_iv_tests (struct loop *loop)
+{
+  struct nb_iter_bound *elt;
+  bool changed = false;
+
+  if (!loop->any_upper_bound)
+    return false;
+  for (elt = loop->bounds; elt; elt = elt->next)
+    {
+      /* Exit is pointless if it won't be taken before loop reaches
+	 upper bound.  */
+      if (elt->is_exit && loop->any_upper_bound
+          && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
+	{
+	  basic_block bb = gimple_bb (elt->stmt);
+	  edge exit_edge = EDGE_SUCC (bb, 0);
+	  struct tree_niter_desc niter;
+
+	  if (!loop_exit_edge_p (loop, exit_edge))
+	    exit_edge = EDGE_SUCC (bb, 1);
+
+	  /* Only when we know the actual number of iterations, not
+	     just a bound, we can remove the exit.  */
+	  if (!number_of_iterations_exit (loop, exit_edge,
+					  &niter, false, false)
+	      || !integer_onep (niter.assumptions)
+	      || !integer_zerop (niter.may_be_zero)
+	      || !niter.niter
+	      || TREE_CODE (niter.niter) != INTEGER_CST
+	      || !wi::ltu_p (loop->nb_iterations_upper_bound,
+			     wi::to_widest (niter.niter)))
+	    continue;
+	  
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Removed pointless exit: ");
+	      print_gimple_stmt (dump_file, elt->stmt, 0);
+	    }
+	  gcond *cond_stmt = as_a <gcond *> (elt->stmt);
+	  if (exit_edge->flags & EDGE_TRUE_VALUE)
+	    gimple_cond_make_false (cond_stmt);
+	  else
+	    gimple_cond_make_true (cond_stmt);
+	  update_stmt (cond_stmt);
+	  changed = true;
+	}
+    }
+  return changed;
+}
+
+/* Stores loops that will be unlooped and edges that will be removed
+   after we process whole loop tree. */
+static vec<loop_p> loops_to_unloop;
+static vec<int> loops_to_unloop_nunroll;
+static vec<edge> edges_to_remove;
+/* Stores loops that has been peeled.  */
+static bitmap peeled_loops;
+
+/* Cancel all fully unrolled loops by putting __builtin_unreachable
+   on the latch edge.  
+   We do it after all unrolling since unlooping moves basic blocks
+   across loop boundaries trashing loop closed SSA form as well
+   as SCEV info needed to be intact during unrolling. 
+
+   IRRED_INVALIDATED is used to bookkeep if information about
+   irreducible regions may become invalid as a result
+   of the transformation.  
+   LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
+   when we need to go into loop closed SSA form.  */
+
+static void
+unloop_loops (bitmap loop_closed_ssa_invalidated,
+	      bool *irred_invalidated)
+{
+  while (loops_to_unloop.length ())
+    {
+      struct loop *loop = loops_to_unloop.pop ();
+      int n_unroll = loops_to_unloop_nunroll.pop ();
+      basic_block latch = loop->latch;
+      edge latch_edge = loop_latch_edge (loop);
+      int flags = latch_edge->flags;
+      location_t locus = latch_edge->goto_locus;
+      gcall *stmt;
+      gimple_stmt_iterator gsi;
+
+      remove_exits_and_undefined_stmts (loop, n_unroll);
+
+      /* Unloop destroys the latch edge.  */
+      unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
+
+      /* Create new basic block for the latch edge destination and wire
+	 it in.  */
+      stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
+      latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
+      latch_edge->probability = profile_probability::never ();
+      latch_edge->flags |= flags;
+      latch_edge->goto_locus = locus;
+
+      add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
+      latch_edge->dest->count = profile_count::zero ();
+      latch_edge->dest->frequency = 0;
+      set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
+
+      gsi = gsi_start_bb (latch_edge->dest);
+      gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
+    }
+  loops_to_unloop.release ();
+  loops_to_unloop_nunroll.release ();
+
+  /* Remove edges in peeled copies.  */
+  unsigned i;
+  edge e;
+  FOR_EACH_VEC_ELT (edges_to_remove, i, e)
+    {
+      bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated);
+      gcc_assert (ok);
+    }
+  edges_to_remove.release ();
+}
+
 /* Tries to unroll LOOP completely, i.e. NITER times.
    UL determines which loops we are allowed to unroll.
-   EXIT is the exit of the loop that should be eliminated.  */
+   EXIT is the exit of the loop that should be eliminated.
+   MAXITER specfy bound on number of iterations, -1 if it is
+   not known or too large for HOST_WIDE_INT.  The location
+   LOCUS corresponding to the loop is used when emitting
+   a summary of the unroll to the dump file.  */
 
 static bool
 try_unroll_loop_completely (struct loop *loop,
 			    edge exit, tree niter,
-			    enum unroll_level ul)
+			    enum unroll_level ul,
+			    HOST_WIDE_INT maxiter,
+			    location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
-  gimple cond;
+  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
   struct loop_size size;
+  bool n_unroll_found = false;
+  edge edge_to_cancel = NULL;
+  dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 
-  if (loop->inner)
+  /* See if we proved number of iterations to be low constant.
+
+     EXIT is an edge that will be removed in all but last iteration of 
+     the loop.
+
+     EDGE_TO_CACNEL is an edge that will be removed from the last iteration
+     of the unrolled sequence and is expected to make the final loop not
+     rolling. 
+
+     If the number of execution of loop is determined by standard induction
+     variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
+     from the iv test.  */
+  if (tree_fits_uhwi_p (niter))
+    {
+      n_unroll = tree_to_uhwi (niter);
+      n_unroll_found = true;
+      edge_to_cancel = EDGE_SUCC (exit->src, 0);
+      if (edge_to_cancel == exit)
+	edge_to_cancel = EDGE_SUCC (exit->src, 1);
+    }
+  /* We do not know the number of iterations and thus we can not eliminate
+     the EXIT edge.  */
+  else
+    exit = NULL;
+
+  /* See if we can improve our estimate by using recorded loop bounds.  */
+  if (maxiter >= 0
+      && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
+    {
+      n_unroll = maxiter;
+      n_unroll_found = true;
+      /* Loop terminates before the IV variable test, so we can not
+	 remove it in the last iteration.  */
+      edge_to_cancel = NULL;
+    }
+
+  if (!n_unroll_found)
     return false;
 
-  if (!host_integerp (niter, 1))
-    return false;
-  n_unroll = tree_low_cst (niter, 1);
+  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Not unrolling loop %d "
+		 "(--param max-completely-peel-times limit reached).\n",
+		 loop->num);
+      return false;
+    }
 
-  max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
-  if (n_unroll > max_unroll)
-    return false;
+  if (!edge_to_cancel)
+    edge_to_cancel = loop_edge_to_cancel (loop);
 
   if (n_unroll)
     {
+      bool large;
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      tree_estimate_loop_size (loop, exit, &size);
+      /* EXIT can be removed only if we are sure it passes first N_UNROLL
+	 iterations.  */
+      bool remove_exit = (exit && niter
+			  && TREE_CODE (niter) == INTEGER_CST
+			  && wi::leu_p (n_unroll, wi::to_widest (niter)));
+
+      large = tree_estimate_loop_size
+		 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
+		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
       ninsns = size.overall;
+      if (large)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+		     loop->num);
+	  return false;
+	}
 
       unr_insns = estimated_unrolled_size (&size, n_unroll);
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -356,79 +771,352 @@
 		   (int) unr_insns);
 	}
 
-      if (unr_insns > ninsns
-	  && (unr_insns
-	      > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
+      /* If the code is going to shrink, we don't need to be extra cautious
+	 on guessing if the unrolling is going to be profitable.  */
+      if (unr_insns
+	  /* If there is IV variable that will become constant, we save
+	     one instruction in the loop prologue we do not account
+	     otherwise.  */
+	  <= ninsns + (size.constant_iv != false))
+	;
+      /* We unroll only inner loops, because we do not consider it profitable
+	 otheriwse.  We still can cancel loopback edge of not rolling loop;
+	 this is always a good idea.  */
+      else if (ul == UL_NO_GROWTH)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+		     loop->num);
+	  return false;
+	}
+      /* Outer loops tend to be less interesting candidates for complete
+	 unrolling unless we can do a lot of propagation into the inner loop
+	 body.  For now we disable outer loop unrolling when the code would
+	 grow.  */
+      else if (loop->inner)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: "
+		     "it is not innermost and code would grow.\n",
+		     loop->num);
+	  return false;
+	}
+      /* If there is call on a hot path through the loop, then
+	 there is most probably not much to optimize.  */
+      else if (size.num_non_pure_calls_on_hot_path)
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d "
+	    fprintf (dump_file, "Not unrolling loop %d: "
+		     "contains call and code would grow.\n",
+		     loop->num);
+	  return false;
+	}
+      /* If there is pure/const call in the function, then we
+	 can still optimize the unrolled loop body if it contains
+	 some other interesting code than the calls and code
+	 storing or cumulating the return value.  */
+      else if (size.num_pure_calls_on_hot_path
+	       /* One IV increment, one test, one ivtmp store
+		  and one useful stmt.  That is about minimal loop
+		  doing pure call.  */
+	       && (size.non_call_stmts_on_hot_path
+		   <= 3 + size.num_pure_calls_on_hot_path))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: "
+		     "contains just pure calls and code would grow.\n",
+		     loop->num);
+	  return false;
+	}
+      /* Complete unrolling is a major win when control flow is removed and
+	 one big basic block is created.  If the loop contains control flow
+	 the optimization may still be a win because of eliminating the loop
+	 overhead but it also may blow the branch predictor tables.
+	 Limit number of branches on the hot path through the peeled
+	 sequence.  */
+      else if (size.num_branches_on_hot_path * (int)n_unroll
+	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: "
+		     " number of branches on hot path in the unrolled sequence"
+		     " reach --param max-peel-branches limit.\n",
+		     loop->num);
+	  return false;
+	}
+      else if (unr_insns
+	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: "
 		     "(--param max-completely-peeled-insns limit reached).\n",
 		     loop->num);
 	  return false;
 	}
-
-      if (ul == UL_NO_GROWTH
-	  && unr_insns > ninsns)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
-	  return false;
-	}
-    }
-
-  if (n_unroll)
-    {
-      sbitmap wont_exit;
-      edge e;
-      unsigned i;
-      VEC (edge, heap) *to_remove = NULL;
+      if (!n_unroll)
+        dump_printf_loc (report_flags, locus,
+                         "loop turned into non-loop; it never loops.\n");
 
       initialize_original_copy_tables ();
-      wont_exit = sbitmap_alloc (n_unroll + 1);
-      sbitmap_ones (wont_exit);
-      RESET_BIT (wont_exit, 0);
+      auto_sbitmap wont_exit (n_unroll + 1);
+      if (exit && niter
+	  && TREE_CODE (niter) == INTEGER_CST
+	  && wi::leu_p (n_unroll, wi::to_widest (niter)))
+	{
+	  bitmap_ones (wont_exit);
+	  if (wi::eq_p (wi::to_widest (niter), n_unroll)
+	      || edge_to_cancel)
+	    bitmap_clear_bit (wont_exit, 0);
+	}
+      else
+	{
+	  exit = NULL;
+	  bitmap_clear (wont_exit);
+	}
 
       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 						 n_unroll, wont_exit,
-						 exit, &to_remove,
+						 exit, &edges_to_remove,
 						 DLTHE_FLAG_UPDATE_FREQ
 						 | DLTHE_FLAG_COMPLETTE_PEEL))
 	{
           free_original_copy_tables ();
-	  free (wont_exit);
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Failed to duplicate the loop\n");
 	  return false;
 	}
 
-      FOR_EACH_VEC_ELT (edge, to_remove, i, e)
-	{
-	  bool ok = remove_path (e);
-	  gcc_assert (ok);
-	}
-
-      VEC_free (edge, heap, to_remove);
-      free (wont_exit);
       free_original_copy_tables ();
     }
 
-  cond = last_stmt (exit->src);
-  if (exit->flags & EDGE_TRUE_VALUE)
-    gimple_cond_make_true (cond);
-  else
-    gimple_cond_make_false (cond);
-  update_stmt (cond);
-  update_ssa (TODO_update_ssa);
+  /* Remove the conditional from the last copy of the loop.  */
+  if (edge_to_cancel)
+    {
+      gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
+      force_edge_cold (edge_to_cancel, true);
+      if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
+	gimple_cond_make_false (cond);
+      else
+	gimple_cond_make_true (cond);
+      update_stmt (cond);
+      /* Do not remove the path. Doing so may remove outer loop
+	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+    }
+
+  /* Store the loop for later unlooping and exit removal.  */
+  loops_to_unloop.safe_push (loop);
+  loops_to_unloop_nunroll.safe_push (n_unroll);
+
+  if (dump_enabled_p ())
+    {
+      if (!n_unroll)
+        dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
+                         "loop turned into non-loop; it never loops\n");
+      else
+        {
+          dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
+                           "loop with %d iterations completely unrolled",
+			   (int) (n_unroll + 1));
+          if (loop->header->count.initialized_p ())
+            dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
+                         " (header execution count %d)",
+                         (int)loop->header->count.to_gcov_type ());
+          dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
+        }
+    }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
+    {
+      if (exit)
+        fprintf (dump_file, "Exit condition of peeled iterations was "
+		 "eliminated.\n");
+      if (edge_to_cancel)
+        fprintf (dump_file, "Last iteration exit edge was proved true.\n");
+      else
+        fprintf (dump_file, "Latch of last iteration was marked by "
+		 "__builtin_unreachable ().\n");
+    }
 
   return true;
 }
 
+/* Return number of instructions after peeling.  */
+static unsigned HOST_WIDE_INT
+estimated_peeled_sequence_size (struct loop_size *size,
+			        unsigned HOST_WIDE_INT npeel)
+{
+  return MAX (npeel * (HOST_WIDE_INT) (size->overall
+			     	       - size->eliminated_by_peeling), 1);
+}
+
+/* If the loop is expected to iterate N times and is
+   small enough, duplicate the loop body N+1 times before
+   the loop itself.  This way the hot path will never
+   enter the loop.  
+   Parameters are the same as for try_unroll_loops_completely */
+
+static bool
+try_peel_loop (struct loop *loop,
+	       edge exit, tree niter,
+	       HOST_WIDE_INT maxiter)
+{
+  HOST_WIDE_INT npeel;
+  struct loop_size size;
+  int peeled_size;
+
+  if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
+      || !peeled_loops)
+    return false;
+
+  if (bitmap_bit_p (peeled_loops, loop->num))
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: loop is already peeled\n");
+      return false;
+    }
+
+  /* Peel only innermost loops.
+     While the code is perfectly capable of peeling non-innermost loops,
+     the heuristics would probably need some improvements. */
+  if (loop->inner)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: outer loop\n");
+      return false;
+    }
+
+  if (!optimize_loop_for_speed_p (loop))
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: cold loop\n");
+      return false;
+    }
+
+  /* Check if there is an estimate on the number of iterations.  */
+  npeel = estimated_loop_iterations_int (loop);
+  if (npeel < 0)
+    npeel = likely_max_loop_iterations_int (loop);
+  if (npeel < 0)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: number of iterations is not "
+	         "estimated\n");
+      return false;
+    }
+  if (maxiter >= 0 && maxiter <= npeel)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: upper bound is known so can "
+		 "unroll completely\n");
+      return false;
+    }
+
+  /* We want to peel estimated number of iterations + 1 (so we never
+     enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
+     and be sure to avoid overflows.  */
+  if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: rolls too much "
+		 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
+      return false;
+    }
+  npeel++;
+
+  /* Check peeled loops size.  */
+  tree_estimate_loop_size (loop, exit, NULL, &size,
+			   PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
+  if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
+      > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+		 "(%i insns > --param max-peel-insns)", peeled_size);
+      return false;
+    }
+
+  /* Duplicate possibly eliminating the exits.  */
+  initialize_original_copy_tables ();
+  auto_sbitmap wont_exit (npeel + 1);
+  if (exit && niter
+      && TREE_CODE (niter) == INTEGER_CST
+      && wi::leu_p (npeel, wi::to_widest (niter)))
+    {
+      bitmap_ones (wont_exit);
+      bitmap_clear_bit (wont_exit, 0);
+    }
+  else
+    {
+      exit = NULL;
+      bitmap_clear (wont_exit);
+    }
+  if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
+					     npeel, wont_exit,
+					     exit, &edges_to_remove,
+					     DLTHE_FLAG_UPDATE_FREQ))
+    {
+      free_original_copy_tables ();
+      return false;
+    }
+  free_original_copy_tables ();
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Peeled loop %d, %i times.\n",
+	       loop->num, (int) npeel);
+    }
+  if (loop->any_estimate)
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
+        loop->nb_iterations_estimate -= npeel;
+      else
+	loop->nb_iterations_estimate = 0;
+    }
+  if (loop->any_upper_bound)
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
+        loop->nb_iterations_upper_bound -= npeel;
+      else
+        loop->nb_iterations_upper_bound = 0;
+    }
+  if (loop->any_likely_upper_bound)
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
+	loop->nb_iterations_likely_upper_bound -= npeel;
+      else
+	{
+	  loop->any_estimate = true;
+	  loop->nb_iterations_estimate = 0;
+	  loop->nb_iterations_likely_upper_bound = 0;
+	}
+    }
+  profile_count entry_count = profile_count::zero ();
+  int entry_freq = 0;
+
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, loop->header->preds)
+    if (e->src != loop->latch)
+      {
+	if (e->src->count.initialized_p ())
+	  entry_count = e->src->count + e->src->count;
+	entry_freq += e->src->frequency;
+	gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
+      }
+  profile_probability p = profile_probability::very_unlikely ();
+  if (loop->header->count > 0)
+    p = entry_count.probability_in (loop->header->count);
+  else if (loop->header->frequency)
+    p = profile_probability::probability_in_gcov_type
+		 (entry_freq, loop->header->frequency);
+  scale_loop_profile (loop, p, 0);
+  bitmap_set_bit (peeled_loops, loop->num);
+  return true;
+}
 /* Adds a canonical induction variable to LOOP if suitable.
    CREATE_IV is true if we may create a new iv.  UL determines
    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
    to determine the number of iterations of a loop by direct evaluation.
-   Returns true if cfg is changed.  */
+   Returns true if cfg is changed.   */
 
 static bool
 canonicalize_loop_induction_variables (struct loop *loop,
@@ -437,19 +1125,19 @@
 {
   edge exit = NULL;
   tree niter;
+  HOST_WIDE_INT maxiter;
+  bool modified = false;
+  location_t locus = UNKNOWN_LOCATION;
 
   niter = number_of_latch_executions (loop);
+  exit = single_exit (loop);
   if (TREE_CODE (niter) == INTEGER_CST)
-    {
-      exit = single_exit (loop);
-      if (!just_once_each_iteration_p (loop, exit->src))
-	return false;
-    }
+    locus = gimple_location (last_stmt (exit->src));
   else
     {
       /* If the loop has more than one exit, try checking all of them
 	 for # of iterations determinable through scev.  */
-      if (!single_exit (loop))
+      if (!exit)
 	niter = find_loop_niter (loop, &exit);
 
       /* Finally if everything else fails, try brute force evaluation.  */
@@ -458,25 +1146,61 @@
 	      || TREE_CODE (niter) != INTEGER_CST))
 	niter = find_loop_niter_by_eval (loop, &exit);
 
-      if (chrec_contains_undetermined (niter)
-	  || TREE_CODE (niter) != INTEGER_CST)
-	return false;
+      if (exit)
+        locus = gimple_location (last_stmt (exit->src));
+
+      if (TREE_CODE (niter) != INTEGER_CST)
+	exit = NULL;
     }
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
+  /* We work exceptionally hard here to estimate the bound
+     by find_loop_niter_by_eval.  Be sure to keep it for future.  */
+  if (niter && TREE_CODE (niter) == INTEGER_CST)
+    {
+      record_niter_bound (loop, wi::to_widest (niter),
+			  exit == single_likely_exit (loop), true);
+    }
+
+  /* Force re-computation of loop bounds so we can remove redundant exits.  */
+  maxiter = max_loop_iterations_int (loop);
+
+  if (dump_file && (dump_flags & TDF_DETAILS)
+      && TREE_CODE (niter) == INTEGER_CST)
     {
       fprintf (dump_file, "Loop %d iterates ", loop->num);
       print_generic_expr (dump_file, niter, TDF_SLIM);
       fprintf (dump_file, " times.\n");
     }
+  if (dump_file && (dump_flags & TDF_DETAILS)
+      && maxiter >= 0)
+    {
+      fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
+	       (int)maxiter);
+    }
+  if (dump_file && (dump_flags & TDF_DETAILS)
+      && likely_max_loop_iterations_int (loop) >= 0)
+    {
+      fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
+	       loop->num, (int)likely_max_loop_iterations_int (loop));
+    }
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul))
+  /* Remove exits that are known to be never taken based on loop bound.
+     Needs to be called after compilation of max_loop_iterations_int that
+     populates the loop bounds.  */
+  modified |= remove_redundant_iv_tests (loop);
+
+  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
     return true;
 
-  if (create_iv)
+  if (create_iv
+      && niter && !chrec_contains_undetermined (niter)
+      && exit && just_once_each_iteration_p (loop, exit->src))
     create_canonical_iv (loop, exit, niter);
 
-  return false;
+  if (ul == UL_ALL)
+    modified |= try_peel_loop (loop, exit, niter, maxiter);
+
+  return modified;
 }
 
 /* The main entry point of the pass.  Adds canonical induction variables
@@ -485,26 +1209,145 @@
 unsigned int
 canonicalize_induction_variables (void)
 {
-  loop_iterator li;
   struct loop *loop;
   bool changed = false;
+  bool irred_invalidated = false;
+  bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
-  FOR_EACH_LOOP (li, loop, 0)
+  estimate_numbers_of_iterations (cfun);
+
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
       changed |= canonicalize_loop_induction_variables (loop,
 							true, UL_SINGLE_ITER,
 							true);
     }
+  gcc_assert (!need_ssa_update_p (cfun));
+
+  unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
+  if (irred_invalidated
+      && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
+    mark_irreducible_loops ();
 
   /* Clean up the information about numbers of iterations, since brute force
      evaluation could reveal new information.  */
+  free_numbers_of_iterations_estimates (cfun);
   scev_reset ();
 
+  if (!bitmap_empty_p (loop_closed_ssa_invalidated))
+    {
+      gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
+      rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+    }
+  BITMAP_FREE (loop_closed_ssa_invalidated);
+
   if (changed)
     return TODO_cleanup_cfg;
   return 0;
 }
 
+/* Propagate constant SSA_NAMEs defined in basic block BB.  */
+
+static void
+propagate_constants_for_unrolling (basic_block bb)
+{
+  /* Look for degenerate PHI nodes with constant argument.  */
+  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
+    {
+      gphi *phi = gsi.phi ();
+      tree result = gimple_phi_result (phi);
+      tree arg = gimple_phi_arg_def (phi, 0);
+
+      if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
+	  && gimple_phi_num_args (phi) == 1
+	  && CONSTANT_CLASS_P (arg))
+	{
+	  replace_uses_by (result, arg);
+	  gsi_remove (&gsi, true);
+	  release_ssa_name (result);
+	}
+      else
+	gsi_next (&gsi);
+    }
+
+  /* Look for assignments to SSA names with constant RHS.  */
+  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      tree lhs;
+
+      if (is_gimple_assign (stmt)
+	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant
+	  && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
+	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
+	{
+	  replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
+	  gsi_remove (&gsi, true);
+	  release_ssa_name (lhs);
+	}
+      else
+	gsi_next (&gsi);
+    }
+}
+
+/* Process loops from innermost to outer, stopping at the innermost
+   loop we unrolled.  */
+
+static bool
+tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
+				bitmap father_bbs, struct loop *loop)
+{
+  struct loop *loop_father;
+  bool changed = false;
+  struct loop *inner;
+  enum unroll_level ul;
+
+  /* Process inner loops first.  */
+  for (inner = loop->inner; inner != NULL; inner = inner->next)
+    changed |= tree_unroll_loops_completely_1 (may_increase_size,
+					       unroll_outer, father_bbs,
+					       inner);
+ 
+  /* If we changed an inner loop we cannot process outer loops in this
+     iteration because SSA form is not up-to-date.  Continue with
+     siblings of outer loops instead.  */
+  if (changed)
+    return true;
+
+  /* Don't unroll #pragma omp simd loops until the vectorizer
+     attempts to vectorize those.  */
+  if (loop->force_vectorize)
+    return false;
+
+  /* Try to unroll this loop.  */
+  loop_father = loop_outer (loop);
+  if (!loop_father)
+    return false;
+
+  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+      /* Unroll outermost loops only if asked to do so or they do
+	 not cause code growth.  */
+      && (unroll_outer || loop_outer (loop_father)))
+    ul = UL_ALL;
+  else
+    ul = UL_NO_GROWTH;
+
+  if (canonicalize_loop_induction_variables
+        (loop, false, ul, !flag_tree_loop_ivcanon))
+    {
+      /* If we'll continue unrolling, we need to propagate constants
+	 within the new basic blocks to fold away induction variable
+	 computations; otherwise, the size might blow up before the
+	 iteration is complete and the IR eventually cleaned up.  */
+      if (loop_outer (loop_father))
+	bitmap_set_bit (father_bbs, loop_father->header->index);
+
+      return true;
+    }
+
+  return false;
+}
+
 /* Unroll LOOPS completely if they iterate just few times.  Unless
    MAY_INCREASE_SIZE is true, perform the unrolling only if the
    size of the code does not increase.  */
@@ -512,32 +1355,66 @@
 unsigned int
 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 {
-  loop_iterator li;
-  struct loop *loop;
+  bitmap father_bbs = BITMAP_ALLOC (NULL);
   bool changed;
-  enum unroll_level ul;
   int iteration = 0;
+  bool irred_invalidated = false;
+
+  estimate_numbers_of_iterations (cfun);
 
   do
     {
       changed = false;
+      bitmap loop_closed_ssa_invalidated = NULL;
 
-      FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
-	{
-	  if (may_increase_size && optimize_loop_for_speed_p (loop)
-	      /* Unroll outermost loops only if asked to do so or they do
-		 not cause code growth.  */
-	      && (unroll_outer
-		  || loop_outer (loop_outer (loop))))
-	    ul = UL_ALL;
-	  else
-	    ul = UL_NO_GROWTH;
-	  changed |= canonicalize_loop_induction_variables
-		       (loop, false, ul, !flag_tree_loop_ivcanon);
-	}
+      if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
+	loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
+      free_numbers_of_iterations_estimates (cfun);
+      estimate_numbers_of_iterations (cfun);
+
+      changed = tree_unroll_loops_completely_1 (may_increase_size,
+						unroll_outer, father_bbs,
+						current_loops->tree_root);
       if (changed)
 	{
+	  unsigned i;
+
+          unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
+
+	  /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
+	  if (loop_closed_ssa_invalidated
+	      && !bitmap_empty_p (loop_closed_ssa_invalidated))
+            rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
+					  TODO_update_ssa);
+	  else
+	    update_ssa (TODO_update_ssa);
+
+	  /* father_bbs is a bitmap of loop father header BB indices.
+	     Translate that to what non-root loops these BBs belong to now.  */
+	  bitmap_iterator bi;
+	  bitmap fathers = BITMAP_ALLOC (NULL);
+	  EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
+	    {
+	      basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
+	      if (! unrolled_loop_bb)
+		continue;
+	      if (loop_outer (unrolled_loop_bb->loop_father))
+		bitmap_set_bit (fathers,
+				unrolled_loop_bb->loop_father->num);
+	    }
+	  bitmap_clear (father_bbs);
+	  /* Propagate the constants within the new basic blocks.  */
+	  EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
+	    {
+	      loop_p father = get_loop (cfun, i);
+	      basic_block *body = get_loop_body_in_dom_order (father);
+	      for (unsigned j = 0; j < father->num_nodes; j++)
+		propagate_constants_for_unrolling (body[j]);
+	      free (body);
+	    }
+	  BITMAP_FREE (fathers);
+
 	  /* This will take care of removing completely unrolled loops
 	     from the loop structures so we can continue unrolling now
 	     innermost loops.  */
@@ -547,10 +1424,184 @@
 	  /* Clean up the information about numbers of iterations, since
 	     complete unrolling might have invalidated it.  */
 	  scev_reset ();
+	  if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
+	    verify_loop_closed_ssa (true);
 	}
+      if (loop_closed_ssa_invalidated)
+        BITMAP_FREE (loop_closed_ssa_invalidated);
     }
   while (changed
 	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 
+  BITMAP_FREE (father_bbs);
+
+  if (irred_invalidated
+      && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
+    mark_irreducible_loops ();
+
   return 0;
 }
+
+/* Canonical induction variable creation pass.  */
+
+namespace {
+
+const pass_data pass_data_iv_canon =
+{
+  GIMPLE_PASS, /* type */
+  "ivcanon", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_TREE_LOOP_IVCANON, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_iv_canon : public gimple_opt_pass
+{
+public:
+  pass_iv_canon (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_iv_canon, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
+  virtual unsigned int execute (function *fun);
+
+}; // class pass_iv_canon
+
+unsigned int
+pass_iv_canon::execute (function *fun)
+{
+  if (number_of_loops (fun) <= 1)
+    return 0;
+
+  return canonicalize_induction_variables ();
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_iv_canon (gcc::context *ctxt)
+{
+  return new pass_iv_canon (ctxt);
+}
+
+/* Complete unrolling of loops.  */
+
+namespace {
+
+const pass_data pass_data_complete_unroll =
+{
+  GIMPLE_PASS, /* type */
+  "cunroll", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_COMPLETE_UNROLL, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_complete_unroll : public gimple_opt_pass
+{
+public:
+  pass_complete_unroll (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_complete_unroll, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual unsigned int execute (function *);
+
+}; // class pass_complete_unroll
+
+unsigned int
+pass_complete_unroll::execute (function *fun)
+{
+  if (number_of_loops (fun) <= 1)
+    return 0;
+
+  /* If we ever decide to run loop peeling more than once, we will need to
+     track loops already peeled in loop structures themselves to avoid
+     re-peeling the same loop multiple times.  */
+  if (flag_peel_loops)
+    peeled_loops = BITMAP_ALLOC (NULL);
+  int val = tree_unroll_loops_completely (flag_unroll_loops
+					  || flag_peel_loops
+					  || optimize >= 3, true);
+  if (peeled_loops)
+    {
+      BITMAP_FREE (peeled_loops);
+      peeled_loops = NULL;
+    }
+  return val;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_complete_unroll (gcc::context *ctxt)
+{
+  return new pass_complete_unroll (ctxt);
+}
+
+/* Complete unrolling of inner loops.  */
+
+namespace {
+
+const pass_data pass_data_complete_unrolli =
+{
+  GIMPLE_PASS, /* type */
+  "cunrolli", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_COMPLETE_UNROLL, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_complete_unrolli : public gimple_opt_pass
+{
+public:
+  pass_complete_unrolli (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return optimize >= 2; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_complete_unrolli
+
+unsigned int
+pass_complete_unrolli::execute (function *fun)
+{
+  unsigned ret = 0;
+
+  loop_optimizer_init (LOOPS_NORMAL
+		       | LOOPS_HAVE_RECORDED_EXITS);
+  if (number_of_loops (fun) > 1)
+    {
+      scev_initialize ();
+      ret = tree_unroll_loops_completely (optimize >= 3, false);
+      scev_finalize ();
+    }
+  loop_optimizer_finalize ();
+
+  return ret;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_complete_unrolli (gcc::context *ctxt)
+{
+  return new pass_complete_unrolli (ctxt);
+}
+
+