diff gcc/cfgloopmanip.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
line wrap: on
line diff
--- a/gcc/cfgloopmanip.c	Sun Aug 21 07:07:55 2011 +0900
+++ b/gcc/cfgloopmanip.c	Fri Oct 27 22:46:09 2017 +0900
@@ -1,6 +1,5 @@
 /* Loop manipulation code for GNU compiler.
-   Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
-   Free Software Foundation, Inc.
+   Copyright (C) 2002-2017 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -21,16 +20,17 @@
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
-#include "tm.h"
+#include "backend.h"
 #include "rtl.h"
-#include "hard-reg-set.h"
-#include "obstack.h"
-#include "basic-block.h"
+#include "tree.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfganal.h"
 #include "cfgloop.h"
-#include "cfglayout.h"
-#include "cfghooks.h"
-#include "output.h"
-#include "tree-flow.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "tree-ssa-loop-manip.h"
+#include "dumpfile.h"
 
 static void copy_loops_to (struct loop **, int,
 			   struct loop *);
@@ -40,10 +40,7 @@
 static int find_path (edge, basic_block **);
 static void fix_loop_placements (struct loop *, bool *);
 static bool fix_bb_placement (basic_block);
-static void fix_bb_placements (basic_block, bool *);
-static void unloop (struct loop *, bool *);
-
-#define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
+static void fix_bb_placements (basic_block, bool *, bitmap);
 
 /* Checks whether basic block BB is dominated by DATA.  */
 static bool
@@ -75,9 +72,9 @@
   gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
 
   /* Find bbs in the path.  */
-  *bbs = XCNEWVEC (basic_block, n_basic_blocks);
+  *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
   return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
-			     n_basic_blocks, e->dest);
+			     n_basic_blocks_for_fn (cfun), e->dest);
 }
 
 /* Fix placement of basic block BB inside loop hierarchy --
@@ -96,7 +93,7 @@
 
   FOR_EACH_EDGE (e, ei, bb->succs)
     {
-      if (e->dest == EXIT_BLOCK_PTR)
+      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 	continue;
 
       act = e->dest->loop_father;
@@ -119,18 +116,21 @@
 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
    of LOOP to that leads at least one exit edge of LOOP, and set it
    as the immediate superloop of LOOP.  Return true if the immediate superloop
-   of LOOP changed.  */
+   of LOOP changed.
+
+   IRRED_INVALIDATED is set to true if a change in the loop structures might
+   invalidate the information about irreducible regions.  */
 
 static bool
-fix_loop_placement (struct loop *loop)
+fix_loop_placement (struct loop *loop, bool *irred_invalidated)
 {
   unsigned i;
   edge e;
-  VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+  vec<edge> exits = get_loop_exit_edges (loop);
   struct loop *father = current_loops->tree_root, *act;
   bool ret = false;
 
-  FOR_EACH_VEC_ELT (edge, exits, i, e)
+  FOR_EACH_VEC_ELT (exits, i, e)
     {
       act = find_common_loop (loop, e->dest->loop_father);
       if (flow_loop_nested_p (father, act))
@@ -146,18 +146,23 @@
 
       /* The exit edges of LOOP no longer exits its original immediate
 	 superloops; remove them from the appropriate exit lists.  */
-      FOR_EACH_VEC_ELT (edge, exits, i, e)
-	rescan_loop_exit (e, false, false);
+      FOR_EACH_VEC_ELT (exits, i, e)
+	{
+	  /* We may need to recompute irreducible loops.  */
+	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
+	    *irred_invalidated = true;
+	  rescan_loop_exit (e, false, false);
+	}
 
       ret = true;
     }
 
-  VEC_free (edge, heap, exits);
+  exits.release ();
   return ret;
 }
 
 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
-   enforce condition condition stated in description of fix_bb_placement. We
+   enforce condition stated in description of fix_bb_placement. We
    start from basic block FROM that had some of its successors removed, so that
    his placement no longer has to be correct, and iteratively fix placement of
    its predecessors that may change if placement of FROM changed.  Also fix
@@ -166,13 +171,16 @@
    successors we consider edges coming out of the loops.
 
    If the changes may invalidate the information about irreducible regions,
-   IRRED_INVALIDATED is set to true.  */
+   IRRED_INVALIDATED is set to true.  
+
+   If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
+   changed loop_father are collected there. */
 
 static void
 fix_bb_placements (basic_block from,
-		   bool *irred_invalidated)
+		   bool *irred_invalidated,
+		   bitmap loop_closed_ssa_invalidated)
 {
-  sbitmap in_queue;
   basic_block *queue, *qtop, *qbeg, *qend;
   struct loop *base_loop, *target_loop;
   edge e;
@@ -192,11 +200,11 @@
       || from == base_loop->header)
     return;
 
-  in_queue = sbitmap_alloc (last_basic_block);
-  sbitmap_zero (in_queue);
-  SET_BIT (in_queue, from->index);
+  auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
+  bitmap_clear (in_queue);
+  bitmap_set_bit (in_queue, from->index);
   /* Prevent us from going out of the base_loop.  */
-  SET_BIT (in_queue, base_loop->header->index);
+  bitmap_set_bit (in_queue, base_loop->header->index);
 
   queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
   qtop = queue + base_loop->num_nodes + 1;
@@ -211,14 +219,21 @@
       qbeg++;
       if (qbeg == qtop)
 	qbeg = queue;
-      RESET_BIT (in_queue, from->index);
+      bitmap_clear_bit (in_queue, from->index);
 
       if (from->loop_father->header == from)
 	{
 	  /* Subloop header, maybe move the loop upward.  */
-	  if (!fix_loop_placement (from->loop_father))
+	  if (!fix_loop_placement (from->loop_father, irred_invalidated))
 	    continue;
 	  target_loop = loop_outer (from->loop_father);
+	  if (loop_closed_ssa_invalidated)
+	    {
+	      basic_block *bbs = get_loop_body (from->loop_father);
+	      for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
+		bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
+	      free (bbs);
+	    }
 	}
       else
 	{
@@ -226,6 +241,8 @@
 	  if (!fix_bb_placement (from))
 	    continue;
 	  target_loop = from->loop_father;
+	  if (loop_closed_ssa_invalidated)
+	    bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
 	}
 
       FOR_EACH_EDGE (e, ei, from->succs)
@@ -243,7 +260,7 @@
 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
 	    *irred_invalidated = true;
 
-	  if (TEST_BIT (in_queue, pred->index))
+	  if (bitmap_bit_p (in_queue, pred->index))
 	    continue;
 
 	  /* If it is subloop, then it either was not moved, or
@@ -263,7 +280,7 @@
 	      continue;
 	    }
 
-	  if (TEST_BIT (in_queue, pred->index))
+	  if (bitmap_bit_p (in_queue, pred->index))
 	    continue;
 
 	  /* Schedule the basic block.  */
@@ -271,10 +288,9 @@
 	  qend++;
 	  if (qend == qtop)
 	    qend = queue;
-	  SET_BIT (in_queue, pred->index);
+	  bitmap_set_bit (in_queue, pred->index);
 	}
     }
-  free (in_queue);
   free (queue);
 }
 
@@ -282,14 +298,19 @@
    and update loop structures and dominators.  Return true if we were able
    to remove the path, false otherwise (and nothing is affected then).  */
 bool
-remove_path (edge e)
+remove_path (edge e, bool *irred_invalidated,
+	     bitmap loop_closed_ssa_invalidated)
 {
   edge ae;
   basic_block *rem_bbs, *bord_bbs, from, bb;
-  VEC (basic_block, heap) *dom_bbs;
+  vec<basic_block> dom_bbs;
   int i, nrem, n_bord_bbs;
-  sbitmap seen;
-  bool irred_invalidated = false;
+  bool local_irred_invalidated = false;
+  edge_iterator ei;
+  struct loop *l, *f;
+
+  if (! irred_invalidated)
+    irred_invalidated = &local_irred_invalidated;
 
   if (!can_remove_branch_p (e))
     return false;
@@ -300,7 +321,7 @@
      that is inside an irreducible region is changed, or if such a loop is
      removed.  */
   if (e->flags & EDGE_IRREDUCIBLE_LOOP)
-    irred_invalidated = true;
+    *irred_invalidated = true;
 
   /* We need to check whether basic blocks are dominated by the edge
      e, but we only have basic block dominators.  This is easy to
@@ -313,41 +334,53 @@
      we belong to.  In this case first unloop the loops, then proceed
      normally.   We may assume that e->dest is not a header of any loop,
      as it now has exactly one predecessor.  */
-  while (loop_outer (e->src->loop_father)
-	 && dominated_by_p (CDI_DOMINATORS,
-			    e->src->loop_father->latch, e->dest))
-    unloop (e->src->loop_father, &irred_invalidated);
+  for (l = e->src->loop_father; loop_outer (l); l = f)
+    {
+      f = loop_outer (l);
+      if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
+        unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
+    }
 
   /* Identify the path.  */
   nrem = find_path (e, &rem_bbs);
 
   n_bord_bbs = 0;
-  bord_bbs = XCNEWVEC (basic_block, n_basic_blocks);
-  seen = sbitmap_alloc (last_basic_block);
-  sbitmap_zero (seen);
+  bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+  auto_sbitmap seen (last_basic_block_for_fn (cfun));
+  bitmap_clear (seen);
 
   /* Find "border" hexes -- i.e. those with predecessor in removed path.  */
   for (i = 0; i < nrem; i++)
-    SET_BIT (seen, rem_bbs[i]->index);
+    bitmap_set_bit (seen, rem_bbs[i]->index);
+  if (!*irred_invalidated)
+    FOR_EACH_EDGE (ae, ei, e->src->succs)
+      if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
+	  && !bitmap_bit_p (seen, ae->dest->index)
+	  && ae->flags & EDGE_IRREDUCIBLE_LOOP)
+	{
+	  *irred_invalidated = true;
+	  break;
+	}
+
   for (i = 0; i < nrem; i++)
     {
-      edge_iterator ei;
       bb = rem_bbs[i];
       FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
-	if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index))
+	if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
+	    && !bitmap_bit_p (seen, ae->dest->index))
 	  {
-	    SET_BIT (seen, ae->dest->index);
+	    bitmap_set_bit (seen, ae->dest->index);
 	    bord_bbs[n_bord_bbs++] = ae->dest;
 
 	    if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
-	      irred_invalidated = true;
+	      *irred_invalidated = true;
 	  }
     }
 
   /* Remove the path.  */
   from = e->src;
   remove_branch (e);
-  dom_bbs = NULL;
+  dom_bbs.create (0);
 
   /* Cancel loops contained in the path.  */
   for (i = 0; i < nrem; i++)
@@ -358,49 +391,47 @@
   free (rem_bbs);
 
   /* Find blocks whose dominators may be affected.  */
-  sbitmap_zero (seen);
+  bitmap_clear (seen);
   for (i = 0; i < n_bord_bbs; i++)
     {
       basic_block ldom;
 
       bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
-      if (TEST_BIT (seen, bb->index))
+      if (bitmap_bit_p (seen, bb->index))
 	continue;
-      SET_BIT (seen, bb->index);
+      bitmap_set_bit (seen, bb->index);
 
       for (ldom = first_dom_son (CDI_DOMINATORS, bb);
 	   ldom;
 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
 	if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
-	  VEC_safe_push (basic_block, heap, dom_bbs, ldom);
+	  dom_bbs.safe_push (ldom);
     }
 
-  free (seen);
-
   /* Recount dominators.  */
   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
-  VEC_free (basic_block, heap, dom_bbs);
+  dom_bbs.release ();
   free (bord_bbs);
 
   /* Fix placements of basic blocks inside loops and the placement of
      loops in the loop tree.  */
-  fix_bb_placements (from, &irred_invalidated);
-  fix_loop_placements (from->loop_father, &irred_invalidated);
+  fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
+  fix_loop_placements (from->loop_father, irred_invalidated);
 
-  if (irred_invalidated
+  if (local_irred_invalidated
       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
     mark_irreducible_loops ();
 
   return true;
 }
 
-/* Creates place for a new LOOP in loops structure.  */
+/* Creates place for a new LOOP in loops structure of FN.  */
 
-static void
-place_new_loop (struct loop *loop)
+void
+place_new_loop (struct function *fn, struct loop *loop)
 {
-  loop->num = number_of_loops ();
-  VEC_safe_push (loop_p, gc, current_loops->larray, loop);
+  loop->num = number_of_loops (fn);
+  vec_safe_push (loops_for_fn (fn)->larray, loop);
 }
 
 /* Given LOOP structure with filled header and latch, find the body of the
@@ -417,12 +448,12 @@
   edge_iterator ei;
 
   /* Add it to loop structure.  */
-  place_new_loop (loop);
+  place_new_loop (cfun, loop);
   flow_loop_tree_node_add (outer, loop);
 
   /* Find its nodes.  */
-  bbs = XNEWVEC (basic_block, n_basic_blocks);
-  n = get_loop_body_with_size (loop, bbs, n_basic_blocks);
+  bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+  n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
 
   for (i = 0; i < n; i++)
     {
@@ -457,33 +488,147 @@
   free (bbs);
 }
 
-/* Multiply all frequencies in LOOP by NUM/DEN.  */
+/* Scale profile of loop by P.  */
+
 void
-scale_loop_frequencies (struct loop *loop, int num, int den)
+scale_loop_frequencies (struct loop *loop, profile_probability p)
 {
   basic_block *bbs;
 
   bbs = get_loop_body (loop);
-  scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
+  scale_bbs_frequencies (bbs, loop->num_nodes, p);
   free (bbs);
 }
 
+/* Scale profile in LOOP by P.
+   If ITERATION_BOUND is non-zero, scale even further if loop is predicted
+   to iterate too many times.  */
+
+void
+scale_loop_profile (struct loop *loop, profile_probability p,
+		    gcov_type iteration_bound)
+{
+  gcov_type iterations = expected_loop_iterations_unbounded (loop);
+  edge e;
+  edge_iterator ei;
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, ";; Scaling loop %i with scale ",
+	       loop->num);
+      p.dump (dump_file);
+      fprintf (dump_file, " bounding iterations to %i from guessed %i\n",
+	       (int)iteration_bound, (int)iterations);
+    }
+
+  /* See if loop is predicted to iterate too many times.  */
+  if (iteration_bound && iterations > 0
+      && p.apply (iterations) > iteration_bound)
+    {
+      /* Fixing loop profile for different trip count is not trivial; the exit
+	 probabilities has to be updated to match and frequencies propagated down
+	 to the loop body.
+
+	 We fully update only the simple case of loop with single exit that is
+	 either from the latch or BB just before latch and leads from BB with
+	 simple conditional jump.   This is OK for use in vectorizer.  */
+      e = single_exit (loop);
+      if (e)
+	{
+	  edge other_e;
+	  int freq_delta;
+	  profile_count count_delta;
+
+          FOR_EACH_EDGE (other_e, ei, e->src->succs)
+	    if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
+		&& e != other_e)
+	      break;
+
+	  /* Probability of exit must be 1/iterations.  */
+	  freq_delta = EDGE_FREQUENCY (e);
+	  count_delta = e->count ();
+	  e->probability = profile_probability::always ()
+				.apply_scale (1, iteration_bound);
+	  other_e->probability = e->probability.invert ();
+	  freq_delta -= EDGE_FREQUENCY (e);
+	  count_delta -= e->count ();
+
+	  /* If latch exists, change its frequency and count, since we changed
+	     probability of exit.  Theoretically we should update everything from
+	     source of exit edge to latch, but for vectorizer this is enough.  */
+	  if (loop->latch
+	      && loop->latch != e->src)
+	    {
+	      loop->latch->frequency += freq_delta;
+	      if (loop->latch->frequency < 0)
+		loop->latch->frequency = 0;
+	      loop->latch->count += count_delta;
+	    }
+	}
+
+      /* Roughly speaking we want to reduce the loop body profile by the
+	 difference of loop iterations.  We however can do better if
+	 we look at the actual profile, if it is available.  */
+      p = p.apply_scale (iteration_bound, iterations);
+
+      bool determined = false;
+      if (loop->header->count.initialized_p ())
+	{
+	  profile_count count_in = profile_count::zero ();
+
+	  FOR_EACH_EDGE (e, ei, loop->header->preds)
+	    if (e->src != loop->latch)
+	      count_in += e->count ();
+
+	  if (count_in > profile_count::zero () )
+	    {
+	      p = count_in.probability_in (loop->header->count.apply_scale
+						 (iteration_bound, 1));
+	      determined = true;
+	    }
+	}
+      if (!determined && loop->header->frequency)
+	{
+	  int freq_in = 0;
+
+	  FOR_EACH_EDGE (e, ei, loop->header->preds)
+	    if (e->src != loop->latch)
+	      freq_in += EDGE_FREQUENCY (e);
+
+	  if (freq_in != 0)
+	    p = profile_probability::probability_in_gcov_type
+			 (freq_in * iteration_bound, loop->header->frequency);
+	}
+      if (!(p > profile_probability::never ()))
+	p = profile_probability::very_unlikely ();
+    }
+
+  if (p >= profile_probability::always ()
+      || !p.initialized_p ())
+    return;
+
+  /* Scale the actual probabilities.  */
+  scale_loop_frequencies (loop, p);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; guessed iterations are now %i\n",
+	     (int)expected_loop_iterations_unbounded (loop));
+}
+
 /* Recompute dominance information for basic blocks outside LOOP.  */
 
 static void
 update_dominators_in_loop (struct loop *loop)
 {
-  VEC (basic_block, heap) *dom_bbs = NULL;
-  sbitmap seen;
+  vec<basic_block> dom_bbs = vNULL;
   basic_block *body;
   unsigned i;
 
-  seen = sbitmap_alloc (last_basic_block);
-  sbitmap_zero (seen);
+  auto_sbitmap seen (last_basic_block_for_fn (cfun));
+  bitmap_clear (seen);
   body = get_loop_body (loop);
 
   for (i = 0; i < loop->num_nodes; i++)
-    SET_BIT (seen, body[i]->index);
+    bitmap_set_bit (seen, body[i]->index);
 
   for (i = 0; i < loop->num_nodes; i++)
     {
@@ -492,17 +637,16 @@
       for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
 	   ldom;
 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
-	if (!TEST_BIT (seen, ldom->index))
+	if (!bitmap_bit_p (seen, ldom->index))
 	  {
-	    SET_BIT (seen, ldom->index);
-	    VEC_safe_push (basic_block, heap, dom_bbs, ldom);
+	    bitmap_set_bit (seen, ldom->index);
+	    dom_bbs.safe_push (ldom);
 	  }
     }
 
   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
   free (body);
-  free (seen);
-  VEC_free (basic_block, heap, dom_bbs);
+  dom_bbs.release ();
 }
 
 /* Creates an if region as shown above. CONDITION is used to create
@@ -545,7 +689,7 @@
 
   basic_block cond_bb, true_bb, false_bb, join_bb;
   edge e_true, e_false, exit_edge;
-  gimple cond_stmt;
+  gcond *cond_stmt;
   tree simple_cond;
   gimple_stmt_iterator gsi;
 
@@ -631,10 +775,9 @@
   struct loop *loop;
   gimple_stmt_iterator gsi;
   gimple_seq stmts;
-  gimple cond_expr;
+  gcond *cond_expr;
   tree exit_test;
   edge exit_e;
-  int prob;
 
   gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
 
@@ -658,9 +801,7 @@
   add_loop (loop, outer);
 
   /* TODO: Fix frequencies and counts.  */
-  prob = REG_BR_PROB_BASE / 2;
-
-  scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
+  scale_loop_frequencies (loop, profile_probability::even ());
 
   /* Update dominators.  */
   update_dominators_in_loop (loop);
@@ -718,22 +859,21 @@
 struct loop *
 loopify (edge latch_edge, edge header_edge,
 	 basic_block switch_bb, edge true_edge, edge false_edge,
-	 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
+	 bool redirect_all_edges, profile_probability true_scale,
+	 profile_probability false_scale)
 {
   basic_block succ_bb = latch_edge->dest;
   basic_block pred_bb = header_edge->src;
   struct loop *loop = alloc_loop ();
   struct loop *outer = loop_outer (succ_bb->loop_father);
   int freq;
-  gcov_type cnt;
-  edge e;
-  edge_iterator ei;
+  profile_count cnt;
 
   loop->header = header_edge->dest;
   loop->latch = latch_edge->src;
 
   freq = EDGE_FREQUENCY (header_edge);
-  cnt = header_edge->count;
+  cnt = header_edge->count ();
 
   /* Redirect edges.  */
   loop_redirect_edge (latch_edge, loop->header);
@@ -766,13 +906,9 @@
     {
       switch_bb->frequency = freq;
       switch_bb->count = cnt;
-      FOR_EACH_EDGE (e, ei, switch_bb->succs)
-	{
-	  e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
-	}
     }
-  scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
-  scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
+  scale_loop_frequencies (loop, false_scale);
+  scale_loop_frequencies (succ_bb->loop_father, true_scale);
   update_dominators_in_loop (loop);
 
   return loop;
@@ -783,10 +919,14 @@
    have no successor, which caller is expected to fix somehow.
 
    If this may cause the information about irreducible regions to become
-   invalid, IRRED_INVALIDATED is set to true.  */
+   invalid, IRRED_INVALIDATED is set to true.  
 
-static void
-unloop (struct loop *loop, bool *irred_invalidated)
+   LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
+   basic blocks that had non-trivial update on their loop_father.*/
+
+void
+unloop (struct loop *loop, bool *irred_invalidated,
+	bitmap loop_closed_ssa_invalidated)
 {
   basic_block *body;
   struct loop *ploop;
@@ -811,7 +951,7 @@
 	remove_bb_from_loops (body[i]);
 	add_bb_to_loop (body[i], loop_outer (loop));
       }
-  free(body);
+  free (body);
 
   while (loop->inner)
     {
@@ -828,7 +968,7 @@
   /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
      there is an irreducible region inside the cancelled loop, the flags will
      be still correct.  */
-  fix_bb_placements (latch, &dummy);
+  fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
 }
 
 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
@@ -847,7 +987,7 @@
   while (loop_outer (loop))
     {
       outer = loop_outer (loop);
-      if (!fix_loop_placement (loop))
+      if (!fix_loop_placement (loop, irred_invalidated))
 	break;
 
       /* Changing the placement of a loop in the loop tree may alter the
@@ -856,11 +996,32 @@
 	 to the loop.  So call fix_bb_placements to fix up the placement
 	 of the preheader and (possibly) of its predecessors.  */
       fix_bb_placements (loop_preheader_edge (loop)->src,
-			 irred_invalidated);
+			 irred_invalidated, NULL);
       loop = outer;
     }
 }
 
+/* Duplicate loop bounds and other information we store about
+   the loop into its duplicate.  */
+
+void
+copy_loop_info (struct loop *loop, struct loop *target)
+{
+  gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
+  target->any_upper_bound = loop->any_upper_bound;
+  target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
+  target->any_likely_upper_bound = loop->any_likely_upper_bound;
+  target->nb_iterations_likely_upper_bound
+    = loop->nb_iterations_likely_upper_bound;
+  target->any_estimate = loop->any_estimate;
+  target->nb_iterations_estimate = loop->nb_iterations_estimate;
+  target->estimate_state = loop->estimate_state;
+  target->constraints = loop->constraints;
+  target->warned_aggressive_loop_optimizations
+    |= loop->warned_aggressive_loop_optimizations;
+  target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
+}
+
 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
    created loop into loops structure.  */
 struct loop *
@@ -868,7 +1029,9 @@
 {
   struct loop *cloop;
   cloop = alloc_loop ();
-  place_new_loop (cloop);
+  place_new_loop (cfun, cloop);
+ 
+  copy_loop_info (loop, cloop);
 
   /* Mark the new loop as copy of LOOP.  */
   set_loop_copy (loop, cloop);
@@ -931,41 +1094,6 @@
   return ret;
 }
 
-/* Sets probability and count of edge E to zero.  The probability and count
-   is redistributed evenly to the remaining edges coming from E->src.  */
-
-static void
-set_zero_probability (edge e)
-{
-  basic_block bb = e->src;
-  edge_iterator ei;
-  edge ae, last = NULL;
-  unsigned n = EDGE_COUNT (bb->succs);
-  gcov_type cnt = e->count, cnt1;
-  unsigned prob = e->probability, prob1;
-
-  gcc_assert (n > 1);
-  cnt1 = cnt / (n - 1);
-  prob1 = prob / (n - 1);
-
-  FOR_EACH_EDGE (ae, ei, bb->succs)
-    {
-      if (ae == e)
-	continue;
-
-      ae->probability += prob1;
-      ae->count += cnt1;
-      last = ae;
-    }
-
-  /* Move the rest to one of the edges.  */
-  last->probability += prob % (n - 1);
-  last->count += cnt % (n - 1);
-
-  e->probability = 0;
-  e->count = 0;
-}
-
 /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
    loop structure and dominators.  E's destination must be LOOP header for
    this to work, i.e. it must be entry or latch edge of this loop; these are
@@ -980,7 +1108,7 @@
 bool
 duplicate_loop_to_header_edge (struct loop *loop, edge e,
 			       unsigned int ndupl, sbitmap wont_exit,
-			       edge orig, VEC (edge, heap) **to_remove,
+			       edge orig, vec<edge> *to_remove,
 			       int flags)
 {
   struct loop *target, *aloop;
@@ -1052,13 +1180,18 @@
       prob_pass_wont_exit =
 	      RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
 
-      if (orig
-	  && REG_BR_PROB_BASE - orig->probability != 0)
+      if (orig && orig->probability.initialized_p ()
+	  && !(orig->probability == profile_probability::always ()))
 	{
 	  /* The blocks that are dominated by a removed exit edge ORIG have
 	     frequencies scaled by this.  */
-	  scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
-				   REG_BR_PROB_BASE - orig->probability);
+	  if (orig->probability.initialized_p ())
+	    scale_after_exit
+                = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE,
+                                      REG_BR_PROB_BASE
+				      - orig->probability.to_reg_br_prob_base ());
+	  else
+	    scale_after_exit = REG_BR_PROB_BASE;
 	  bbs_to_scale = BITMAP_ALLOC (NULL);
 	  for (i = 0; i < n; i++)
 	    {
@@ -1071,7 +1204,7 @@
       scale_step = XNEWVEC (int, ndupl);
 
       for (i = 1; i <= ndupl; i++)
-	scale_step[i - 1] = TEST_BIT (wont_exit, i)
+	scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
 				? prob_pass_wont_exit
 				: prob_pass_thru;
 
@@ -1089,16 +1222,16 @@
 	     frequency should be reduced by prob_pass_wont_exit.  Caller
 	     should've managed the flags so all except for original loop
 	     has won't exist set.  */
-	  scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
+	  scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
 	  /* Now simulate the duplication adjustments and compute header
 	     frequency of the last copy.  */
 	  for (i = 0; i < ndupl; i++)
-	    wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE);
-	  scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
+	    wanted_freq = combine_probabilities (wanted_freq, scale_step[i]);
+	  scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
 	}
       else if (is_latch)
 	{
-	  prob_pass_main = TEST_BIT (wont_exit, 0)
+	  prob_pass_main = bitmap_bit_p (wont_exit, 0)
 				? prob_pass_wont_exit
 				: prob_pass_thru;
 	  p = prob_pass_main;
@@ -1106,17 +1239,20 @@
 	  for (i = 0; i < ndupl; i++)
 	    {
 	      scale_main += p;
-	      p = RDIV (p * scale_step[i], REG_BR_PROB_BASE);
+	      p = combine_probabilities (p, scale_step[i]);
 	    }
-	  scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main);
-	  scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE);
+	  scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main);
+	  scale_act = combine_probabilities (scale_main, prob_pass_main);
 	}
       else
 	{
+	  int preheader_freq = EDGE_FREQUENCY (e);
 	  scale_main = REG_BR_PROB_BASE;
 	  for (i = 0; i < ndupl; i++)
-	    scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE);
-	  scale_act = REG_BR_PROB_BASE - prob_pass_thru;
+	    scale_main = combine_probabilities (scale_main, scale_step[i]);
+	  if (preheader_freq > freq_in)
+	    preheader_freq = freq_in;
+	  scale_act = GCOV_COMPUTE_SCALE (preheader_freq, freq_in);
 	}
       for (i = 0; i < ndupl; i++)
 	gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
@@ -1131,7 +1267,7 @@
   n_orig_loops = 0;
   for (aloop = loop->inner; aloop; aloop = aloop->next)
     n_orig_loops++;
-  orig_loops = XCNEWVEC (struct loop *, n_orig_loops);
+  orig_loops = XNEWVEC (struct loop *, n_orig_loops);
   for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
     orig_loops[i] = aloop;
 
@@ -1155,7 +1291,7 @@
 
       /* Copy bbs.  */
       copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
-		place_after);
+		place_after, true);
       place_after = new_spec_edges[SE_LATCH]->src;
 
       if (flags & DLTHE_RECORD_COPY_NUMBER)
@@ -1207,11 +1343,11 @@
 	}
 
       /* Record exit edge in this copy.  */
-      if (orig && TEST_BIT (wont_exit, j + 1))
+      if (orig && bitmap_bit_p (wont_exit, j + 1))
 	{
 	  if (to_remove)
-	    VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]);
-	  set_zero_probability (new_spec_edges[SE_ORIG]);
+	    to_remove->safe_push (new_spec_edges[SE_ORIG]);
+	  force_edge_cold (new_spec_edges[SE_ORIG], true);
 
 	  /* Scale the frequencies of the blocks dominated by the exit.  */
 	  if (bbs_to_scale)
@@ -1236,18 +1372,18 @@
       if (flags & DLTHE_FLAG_UPDATE_FREQ)
 	{
 	  scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
-	  scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE);
+	  scale_act = combine_probabilities (scale_act, scale_step[j]);
 	}
     }
   free (new_bbs);
   free (orig_loops);
 
   /* Record the exit edge in the original loop body, and update the frequencies.  */
-  if (orig && TEST_BIT (wont_exit, 0))
+  if (orig && bitmap_bit_p (wont_exit, 0))
     {
       if (to_remove)
-	VEC_safe_push (edge, heap, *to_remove, orig);
-      set_zero_probability (orig);
+	to_remove->safe_push (orig);
+      force_edge_cold (orig, true);
 
       /* Scale the frequencies of the blocks dominated by the exit.  */
       if (bbs_to_scale)
@@ -1273,14 +1409,14 @@
   for (i = 0; i < n; i++)
     {
       basic_block dominated, dom_bb;
-      VEC (basic_block, heap) *dom_bbs;
+      vec<basic_block> dom_bbs;
       unsigned j;
 
       bb = bbs[i];
       bb->aux = 0;
 
       dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
-      FOR_EACH_VEC_ELT (basic_block, dom_bbs, j, dominated)
+      FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
 	{
 	  if (flow_bb_inside_loop_p (loop, dominated))
 	    continue;
@@ -1288,7 +1424,7 @@
 			CDI_DOMINATORS, first_active[i], first_active_latch);
 	  set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
 	}
-      VEC_free (basic_block, heap, dom_bbs);
+      dom_bbs.release ();
     }
   free (first_active);
 
@@ -1334,7 +1470,7 @@
 basic_block
 create_preheader (struct loop *loop, int flags)
 {
-  edge e, fallthru;
+  edge e;
   basic_block dummy;
   int nentry = 0;
   bool irred = false;
@@ -1359,7 +1495,7 @@
 
       /* We do not allow entry block to be the loop preheader, since we
 	     cannot emit code there.  */
-      if (single_entry->src == ENTRY_BLOCK_PTR)
+      if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
         need_forwarder_block = true;
       else
         {
@@ -1381,9 +1517,14 @@
 
   mfb_kj_edge = loop_latch_edge (loop);
   latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
-  fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
-  dummy = fallthru->src;
-  loop->header = fallthru->dest;
+  if (nentry == 1)
+    dummy = split_edge (single_entry);
+  else
+    {
+      edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
+      dummy = fallthru->src;
+      loop->header = fallthru->dest;
+    }
 
   /* Try to be clever in placing the newly created preheader.  The idea is to
      avoid breaking any "fallthruness" relationship between blocks.
@@ -1425,13 +1566,12 @@
 void
 create_preheaders (int flags)
 {
-  loop_iterator li;
   struct loop *loop;
 
   if (!current_loops)
     return;
 
-  FOR_EACH_LOOP (li, loop, 0)
+  FOR_EACH_LOOP (loop, 0)
     create_preheader (loop, flags);
   loops_state_set (LOOPS_HAVE_PREHEADERS);
 }
@@ -1441,16 +1581,16 @@
 void
 force_single_succ_latches (void)
 {
-  loop_iterator li;
   struct loop *loop;
   edge e;
 
-  FOR_EACH_LOOP (li, loop, 0)
+  FOR_EACH_LOOP (loop, 0)
     {
       if (loop->latch != loop->header && single_succ_p (loop->latch))
 	continue;
 
       e = find_edge (loop->latch, loop->header);
+      gcc_checking_assert (e != NULL);
 
       split_edge (e);
     }
@@ -1471,11 +1611,16 @@
 			|
 			+---------> [second_head]
 
-  THEN_PROB is the probability of then branch of the condition.  */
+  THEN_PROB is the probability of then branch of the condition.
+  ELSE_PROB is the probability of else branch. Note that they may be both
+  REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
+  IFN_LOOP_DIST_ALIAS.  */
 
 static basic_block
 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
-			   edge e, void *cond_expr, unsigned then_prob)
+			   edge e, void *cond_expr,
+			   profile_probability then_prob,
+			   profile_probability else_prob)
 {
   basic_block new_head = NULL;
   edge e1;
@@ -1494,9 +1639,7 @@
   e1 = make_edge (new_head, first_head,
 		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
   e1->probability = then_prob;
-  e->probability = REG_BR_PROB_BASE - then_prob;
-  e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
-  e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
+  e->probability = else_prob;
 
   set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
   set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
@@ -1512,10 +1655,13 @@
    This transformation given a condition and a loop, creates
    -if (condition) { loop_copy1 } else { loop_copy2 },
    where loop_copy1 is the loop transformed in one way, and loop_copy2
-   is the loop transformed in another way (or unchanged). 'condition'
+   is the loop transformed in another way (or unchanged). COND_EXPR
    may be a run time test for things that were not resolved by static
    analysis (overlapping ranges (anti-aliasing), alignment, etc.).
 
+   If non-NULL, CONDITION_BB is set to the basic block containing the
+   condition.
+
    THEN_PROB is the probability of the then edge of the if.  THEN_SCALE
    is the ratio by that the frequencies in the original loop should
    be scaled.  ELSE_SCALE is the ratio by that the frequencies in the
@@ -1527,7 +1673,8 @@
 struct loop *
 loop_version (struct loop *loop,
 	      void *cond_expr, basic_block *condition_bb,
-	      unsigned then_prob, unsigned then_scale, unsigned else_scale,
+	      profile_probability then_prob, profile_probability else_prob,
+	      profile_probability then_scale, profile_probability else_scale,
 	      bool place_after)
 {
   basic_block first_head, second_head;
@@ -1558,7 +1705,7 @@
 
   /* Split loop entry edge and insert new block with cond expr.  */
   cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
-					entry, cond_expr, then_prob);
+					entry, cond_expr, then_prob, else_prob);
   if (condition_bb)
     *condition_bb = cond_bb;
 
@@ -1577,6 +1724,8 @@
 		   false /* Do not redirect all edges.  */,
 		   then_scale, else_scale);
 
+  copy_loop_info (loop, nloop);
+
   /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
   lv_flush_pending_stmts (latch_edge);
 
@@ -1615,112 +1764,3 @@
 
   return nloop;
 }
-
-/* The structure of loops might have changed.  Some loops might get removed
-   (and their headers and latches were set to NULL), loop exists might get
-   removed (thus the loop nesting may be wrong), and some blocks and edges
-   were changed (so the information about bb --> loop mapping does not have
-   to be correct).  But still for the remaining loops the header dominates
-   the latch, and loops did not get new subloops (new loops might possibly
-   get created, but we are not interested in them).  Fix up the mess.
-
-   If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
-   marked in it.  */
-
-void
-fix_loop_structure (bitmap changed_bbs)
-{
-  basic_block bb;
-  struct loop *loop, *ploop;
-  loop_iterator li;
-  bool record_exits = false;
-  struct loop **superloop = XNEWVEC (struct loop *, number_of_loops ());
-
-  /* Remove the old bb -> loop mapping.  Remember the depth of the blocks in
-     the loop hierarchy, so that we can recognize blocks whose loop nesting
-     relationship has changed.  */
-  FOR_EACH_BB (bb)
-    {
-      if (changed_bbs)
-	bb->aux = (void *) (size_t) loop_depth (bb->loop_father);
-      bb->loop_father = current_loops->tree_root;
-    }
-
-  if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
-    {
-      release_recorded_exits ();
-      record_exits = true;
-    }
-
-  /* Remove the dead loops from structures.  We start from the innermost
-     loops, so that when we remove the loops, we know that the loops inside
-     are preserved, and do not waste time relinking loops that will be
-     removed later.  */
-  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
-    {
-      if (loop->header)
-	continue;
-
-      while (loop->inner)
-	{
-	  ploop = loop->inner;
-	  flow_loop_tree_node_remove (ploop);
-	  flow_loop_tree_node_add (loop_outer (loop), ploop);
-	}
-
-      /* Remove the loop and free its data.  */
-      delete_loop (loop);
-    }
-
-  /* Rescan the bodies of loops, starting from the outermost ones.  We assume
-     that no optimization interchanges the order of the loops, i.e., it cannot
-     happen that L1 was superloop of L2 before and it is subloop of L2 now
-     (without explicitly updating loop information).  At the same time, we also
-     determine the new loop structure.  */
-  current_loops->tree_root->num_nodes = n_basic_blocks;
-  FOR_EACH_LOOP (li, loop, 0)
-    {
-      superloop[loop->num] = loop->header->loop_father;
-      loop->num_nodes = flow_loop_nodes_find (loop->header, loop);
-    }
-
-  /* Now fix the loop nesting.  */
-  FOR_EACH_LOOP (li, loop, 0)
-    {
-      ploop = superloop[loop->num];
-      if (ploop != loop_outer (loop))
-	{
-	  flow_loop_tree_node_remove (loop);
-	  flow_loop_tree_node_add (ploop, loop);
-	}
-    }
-  free (superloop);
-
-  /* Mark the blocks whose loop has changed.  */
-  if (changed_bbs)
-    {
-      FOR_EACH_BB (bb)
-	{
-	  if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux)
-	    bitmap_set_bit (changed_bbs, bb->index);
-
-    	  bb->aux = NULL;
-	}
-    }
-
-  if (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
-    create_preheaders (CP_SIMPLE_PREHEADERS);
-
-  if (loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
-    force_single_succ_latches ();
-
-  if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
-    mark_irreducible_loops ();
-
-  if (record_exits)
-    record_loop_exits ();
-
-#ifdef ENABLE_CHECKING
-  verify_loop_structure ();
-#endif
-}