CbC/CbC_gcc: gcc/tree-ssa-loop-ivcanon.c comparison

comparison gcc/tree-ssa-loop-ivcanon.c @ 132:d34655255c78

update gcc-8.2

author	mir3636
date	Thu, 25 Oct 2018 10:21:07 +0900
parents	84e7813d76e9
children	1830386684a0

comparison

equal deleted inserted replaced

-:e108057fa461
+:d34655255c78
 /* Induction variable canonicalization and loop peeling.
-Copyright (C) 2004-2017 Free Software Foundation, Inc.
+Copyright (C) 2004-2018 Free Software Foundation, Inc.
 This file is part of GCC.
 GCC is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 #include "tree-scalar-evolution.h"
 #include "params.h"
 #include "tree-inline.h"
 #include "tree-cfgcleanup.h"
 #include "builtins.h"
+#include "tree-ssa-sccvn.h"
 /* Specifies types of loops that may be unrolled.  */
 enum unroll_level
 {
 			   of code size.  */
 UL_ALL		/* All suitable loops.  */
 };
 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
-is the exit edge whose condition is replaced.  */
+is the exit edge whose condition is replaced.  The ssa versions of the new
+IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
-static void
+if they are not NULL.  */
-create_canonical_iv (struct loop *loop, edge exit, tree niter)
+void
+create_canonical_iv (struct loop *loop, edge exit, tree niter,
+		     tree *var_before = NULL, tree *var_after = NULL)
 {
 edge in;
 tree type, var;
 gcond *cond;
 gimple_stmt_iterator incr_at;
 		       build_int_cst (type, 1));
 incr_at = gsi_last_bb (in->src);
 create_iv (niter,
 	     build_int_cst (type, -1),
 	     NULL_TREE, loop,
-	     &incr_at, false, NULL, &var);
+	     &incr_at, false, var_before, &var);
+if (var_after)
+*var_after = var;
 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 gimple_cond_set_code (cond, cmp);
 gimple_cond_set_lhs (cond, var);
 gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 	     expand inline.  */
 	  else if (gimple_code (stmt) != GIMPLE_DEBUG)
 	    size->non_call_stmts_on_hot_path++;
 	  if (((gimple_code (stmt) == GIMPLE_COND
 	        && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
-		    || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+		    || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
-					       loop)))
+						loop)))
 	       || (gimple_code (stmt) == GIMPLE_SWITCH
 		   && !constant_after_peeling (gimple_switch_index (
 						 as_a <gswitch *> (stmt)),
 					       stmt, loop)))
 	      && (!exit || bb != exit->src))
 latch_edge->flags |= flags;
 latch_edge->goto_locus = locus;
 add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 latch_edge->dest->count = profile_count::zero ();
-latch_edge->dest->frequency = 0;
 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 gsi = gsi_start_bb (latch_edge->dest);
 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 }
 loops_to_unloop.release ();
 loops_to_unloop_nunroll.release ();
-/* Remove edges in peeled copies.  */
+/* Remove edges in peeled copies.  Given remove_path removes dominated
+regions we need to cope with removal of already removed paths.  */
 unsigned i;
 edge e;
+auto_vec<int, 20> src_bbs;
+src_bbs.reserve_exact (edges_to_remove.length ());
 FOR_EACH_VEC_ELT (edges_to_remove, i, e)
-{
+src_bbs.quick_push (e->src->index);
-bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated);
+FOR_EACH_VEC_ELT (edges_to_remove, i, e)
-gcc_assert (ok);
+if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
-}
+{
+	bool ok = remove_path (e, irred_invalidated,
+			       loop_closed_ssa_invalidated);
+	gcc_assert (ok);
+}
 edges_to_remove.release ();
 }
 /* Tries to unroll LOOP completely, i.e. NITER times.
 UL determines which loops we are allowed to unroll.
 LOCUS corresponding to the loop is used when emitting
 a summary of the unroll to the dump file.  */
 static bool
 try_unroll_loop_completely (struct loop *loop,
-			    edge exit, tree niter,
+			    edge exit, tree niter, bool may_be_zero,
 			    enum unroll_level ul,
 			    HOST_WIDE_INT maxiter,
-			    location_t locus)
+			    dump_user_location_t locus, bool allow_peel)
 {
-unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
+unsigned HOST_WIDE_INT n_unroll = 0;
-struct loop_size size;
 bool n_unroll_found = false;
 edge edge_to_cancel = NULL;
-dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 /* See if we proved number of iterations to be low constant.
 EXIT is an edge that will be removed in all but last iteration of
 the loop.
 the EXIT edge.  */
 else
 exit = NULL;
 /* See if we can improve our estimate by using recorded loop bounds.  */
-if (maxiter >= 0
+if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
+&& maxiter >= 0
 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 {
 n_unroll = maxiter;
 n_unroll_found = true;
 /* Loop terminates before the IV variable test, so we can not
 }
 if (!n_unroll_found)
 return false;
-if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+if (!loop->unroll
+&& n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 {
 if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
 		 "(--param max-completely-peel-times limit reached).\n",
 		 loop->num);
 if (!edge_to_cancel)
 edge_to_cancel = loop_edge_to_cancel (loop);
 if (n_unroll)
 {
-bool large;
 if (ul == UL_SINGLE_ITER)
 	return false;
-/* EXIT can be removed only if we are sure it passes first N_UNROLL
+if (loop->unroll)
-	 iterations.  */
-bool remove_exit = (exit && niter
-			  && TREE_CODE (niter) == INTEGER_CST
-			  && wi::leu_p (n_unroll, wi::to_widest (niter)));
-large = tree_estimate_loop_size
-		 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
-		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-ninsns = size.overall;
-if (large)
 	{
+	  /* If the unrolling factor is too large, bail out.  */
+	  if (n_unroll > (unsigned)loop->unroll)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file,
+			 "Not unrolling loop %d: "
+			 "user didn't want it unrolled completely.\n",
+			 loop->num);
+	      return false;
+	    }
+	}
+else
+	{
+	  struct loop_size size;
+	  /* EXIT can be removed only if we are sure it passes first N_UNROLL
+	     iterations.  */
+	  bool remove_exit = (exit && niter
+			      && TREE_CODE (niter) == INTEGER_CST
+			      && wi::leu_p (n_unroll, wi::to_widest (niter)));
+	  bool large
+	    = tree_estimate_loop_size
+		(loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
+		 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+	  if (large)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+			 loop->num);
+	      return false;
+	    }
+	  unsigned HOST_WIDE_INT ninsns = size.overall;
+	  unsigned HOST_WIDE_INT unr_insns
+	    = estimated_unrolled_size (&size, n_unroll);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+	    {
-		     loop->num);
+	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  return false;
+	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+		       (int) unr_insns);
+	    }
+	  /* If the code is going to shrink, we don't need to be extra
+	     cautious on guessing if the unrolling is going to be
+	     profitable.  */
+	  if (unr_insns
+	      /* If there is IV variable that will become constant, we
+		 save one instruction in the loop prologue we do not
+		 account otherwise.  */
+	      <= ninsns + (size.constant_iv != false))
+	    ;
+	  /* We unroll only inner loops, because we do not consider it
+	     profitable otheriwse.  We still can cancel loopback edge
+	     of not rolling loop; this is always a good idea.  */
+	  else if (ul == UL_NO_GROWTH)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Outer loops tend to be less interesting candidates for
+	     complete unrolling unless we can do a lot of propagation
+	     into the inner loop body.  For now we disable outer loop
+	     unrolling when the code would grow.  */
+	  else if (loop->inner)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "it is not innermost and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is call on a hot path through the loop, then
+	     there is most probably not much to optimize.  */
+	  else if (size.num_non_pure_calls_on_hot_path)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains call and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is pure/const call in the function, then we can
+	     still optimize the unrolled loop body if it contains some
+	     other interesting code than the calls and code storing or
+	     cumulating the return value.  */
+	  else if (size.num_pure_calls_on_hot_path
+		   /* One IV increment, one test, one ivtmp store and
+		      one useful stmt.  That is about minimal loop
+		      doing pure call.  */
+		   && (size.non_call_stmts_on_hot_path
+		       <= 3 + size.num_pure_calls_on_hot_path))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains just pure calls and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Complete unrolling is major win when control flow is
+	     removed and one big basic block is created.  If the loop
+	     contains control flow the optimization may still be a win
+	     because of eliminating the loop overhead but it also may
+	     blow the branch predictor tables.  Limit number of
+	     branches on the hot path through the peeled sequence.  */
+	  else if (size.num_branches_on_hot_path * (int)n_unroll
+		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "number of branches on hot path in the unrolled "
+			 "sequence reaches --param max-peel-branches limit.\n",
+			 loop->num);
+	      return false;
+	    }
+	  else if (unr_insns
+		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "number of insns in the unrolled sequence reaches "
+			 "--param max-completely-peeled-insns limit.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-unr_insns = estimated_unrolled_size (&size, n_unroll);
-if (dump_file && (dump_flags & TDF_DETAILS))
-	{
-	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-		   (int) unr_insns);
-	}
-/* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
-	;
-/* We unroll only inner loops, because we do not consider it profitable
-	 otheriwse.  We still can cancel loopback edge of not rolling loop;
-	 this is always a good idea.  */
-else if (ul == UL_NO_GROWTH)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-		     loop->num);
-	  return false;
-	}
-/* Outer loops tend to be less interesting candidates for complete
-	 unrolling unless we can do a lot of propagation into the inner loop
-	 body.  For now we disable outer loop unrolling when the code would
-	 grow.  */
-else if (loop->inner)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "it is not innermost and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-/* If there is call on a hot path through the loop, then
-	 there is most probably not much to optimize.  */
-else if (size.num_non_pure_calls_on_hot_path)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains call and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-/* If there is pure/const call in the function, then we
-	 can still optimize the unrolled loop body if it contains
-	 some other interesting code than the calls and code
-	 storing or cumulating the return value.  */
-else if (size.num_pure_calls_on_hot_path
-	       /* One IV increment, one test, one ivtmp store
-		  and one useful stmt.  That is about minimal loop
-		  doing pure call.  */
-	       && (size.non_call_stmts_on_hot_path
-		   <= 3 + size.num_pure_calls_on_hot_path))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains just pure calls and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-/* Complete unrolling is a major win when control flow is removed and
-	 one big basic block is created.  If the loop contains control flow
-	 the optimization may still be a win because of eliminating the loop
-	 overhead but it also may blow the branch predictor tables.
-	 Limit number of branches on the hot path through the peeled
-	 sequence.  */
-else if (size.num_branches_on_hot_path * (int)n_unroll
-	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     " number of branches on hot path in the unrolled sequence"
-		     " reach --param max-peel-branches limit.\n",
-		     loop->num);
-	  return false;
-	}
-else if (unr_insns
-	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
-	  return false;
-	}
-if (!n_unroll)
-dump_printf_loc (report_flags, locus,
-"loop turned into non-loop; it never loops.\n");
 initialize_original_copy_tables ();
 auto_sbitmap wont_exit (n_unroll + 1);
 if (exit && niter
 	  && TREE_CODE (niter) == INTEGER_CST
 else
 	{
 	  exit = NULL;
 	  bitmap_clear (wont_exit);
 	}
+if (may_be_zero)
+	bitmap_clear_bit (wont_exit, 1);
 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 						 n_unroll, wont_exit,
 						 exit, &edges_to_remove,
 						 DLTHE_FLAG_UPDATE_FREQ
 if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 	gimple_cond_make_false (cond);
 else
 	gimple_cond_make_true (cond);
 update_stmt (cond);
-/* Do not remove the path. Doing so may remove outer loop
+/* Do not remove the path, as doing so may remove outer loop and
-	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+	 confuse bookkeeping code in tree_unroll_loops_completely.  */
 }
 /* Store the loop for later unlooping and exit removal.  */
 loops_to_unloop.safe_push (loop);
 loops_to_unloop_nunroll.safe_push (n_unroll);
 "loop turned into non-loop; it never loops\n");
 else
 {
 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 "loop with %d iterations completely unrolled",
-			   (int) (n_unroll + 1));
+			   (int) n_unroll);
 if (loop->header->count.initialized_p ())
 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 " (header execution count %d)",
 (int)loop->header->count.to_gcov_type ());
 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 enter the loop.
 Parameters are the same as for try_unroll_loops_completely */
 static bool
 try_peel_loop (struct loop *loop,
-	       edge exit, tree niter,
+	       edge exit, tree niter, bool may_be_zero,
 	       HOST_WIDE_INT maxiter)
 {
 HOST_WIDE_INT npeel;
 struct loop_size size;
 int peeled_size;
-if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
+if (!flag_peel_loops
+|| PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
 || !peeled_loops)
 return false;
 if (bitmap_bit_p (peeled_loops, loop->num))
 {
 if (dump_file)
 fprintf (dump_file, "Not peeling: loop is already peeled\n");
+return false;
+}
+/* We don't peel loops that will be unrolled as this can duplicate a
+loop more times than the user requested.  */
+if (loop->unroll)
+{
+if (dump_file)
+fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
 return false;
 }
 /* Peel only innermost loops.
 While the code is perfectly capable of peeling non-innermost loops,
 the heuristics would probably need some improvements. */
 if (loop->inner)
 {
 if (dump_file)
-fprintf (dump_file, "Not peeling: outer loop\n");
+	fprintf (dump_file, "Not peeling: outer loop\n");
 return false;
 }
 if (!optimize_loop_for_speed_p (loop))
 {
 if (dump_file)
-fprintf (dump_file, "Not peeling: cold loop\n");
+	fprintf (dump_file, "Not peeling: cold loop\n");
 return false;
 }
 /* Check if there is an estimate on the number of iterations.  */
 npeel = estimated_loop_iterations_int (loop);
 return false;
 }
 if (maxiter >= 0 && maxiter <= npeel)
 {
 if (dump_file)
-fprintf (dump_file, "Not peeling: upper bound is known so can "
+	fprintf (dump_file, "Not peeling: upper bound is known so can "
 		 "unroll completely\n");
 return false;
 }
 /* We want to peel estimated number of iterations + 1 (so we never
 enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
 and be sure to avoid overflows.  */
 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
 {
 if (dump_file)
-fprintf (dump_file, "Not peeling: rolls too much "
+	fprintf (dump_file, "Not peeling: rolls too much "
 		 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
 return false;
 }
 npeel++;
 			   PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
 {
 if (dump_file)
-fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
 		 "(%i insns > --param max-peel-insns)", peeled_size);
 return false;
 }
 /* Duplicate possibly eliminating the exits.  */
 else
 {
 exit = NULL;
 bitmap_clear (wont_exit);
 }
+if (may_be_zero)
+bitmap_clear_bit (wont_exit, 1);
 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 					     npeel, wont_exit,
 					     exit, &edges_to_remove,
 					     DLTHE_FLAG_UPDATE_FREQ))
 {
 	  loop->nb_iterations_estimate = 0;
 	  loop->nb_iterations_likely_upper_bound = 0;
 	}
 }
 profile_count entry_count = profile_count::zero ();
-int entry_freq = 0;
 edge e;
 edge_iterator ei;
 FOR_EACH_EDGE (e, ei, loop->header->preds)
 if (e->src != loop->latch)
 {
 	if (e->src->count.initialized_p ())
 	  entry_count = e->src->count + e->src->count;
-	entry_freq += e->src->frequency;
 	gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
 }
 profile_probability p = profile_probability::very_unlikely ();
-if (loop->header->count > 0)
+p = entry_count.probability_in (loop->header->count);
-p = entry_count.probability_in (loop->header->count);
-else if (loop->header->frequency)
-p = profile_probability::probability_in_gcov_type
-		 (entry_freq, loop->header->frequency);
 scale_loop_profile (loop, p, 0);
 bitmap_set_bit (peeled_loops, loop->num);
 return true;
 }
 /* Adds a canonical induction variable to LOOP if suitable.
 Returns true if cfg is changed.   */
 static bool
 canonicalize_loop_induction_variables (struct loop *loop,
 				       bool create_iv, enum unroll_level ul,
-				       bool try_eval)
+				       bool try_eval, bool allow_peel)
 {
 edge exit = NULL;
 tree niter;
 HOST_WIDE_INT maxiter;
 bool modified = false;
-location_t locus = UNKNOWN_LOCATION;
+dump_user_location_t locus;
+struct tree_niter_desc niter_desc;
-niter = number_of_latch_executions (loop);
+bool may_be_zero = false;
+/* For unrolling allow conditional constant or zero iterations, thus
+perform loop-header copying on-the-fly.  */
 exit = single_exit (loop);
+niter = chrec_dont_know;
+if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
+{
+niter = niter_desc.niter;
+may_be_zero
+	= niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
+}
 if (TREE_CODE (niter) == INTEGER_CST)
-locus = gimple_location (last_stmt (exit->src));
+locus = last_stmt (exit->src);
 else
 {
+/* For non-constant niter fold may_be_zero into niter again.  */
+if (may_be_zero)
+	{
+	  if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+	    niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
+				 niter_desc.may_be_zero,
+				 build_int_cst (TREE_TYPE (niter), 0), niter);
+	  else
+	    niter = chrec_dont_know;
+	  may_be_zero = false;
+	}
 /* If the loop has more than one exit, try checking all of them
 	 for # of iterations determinable through scev.  */
 if (!exit)
 	niter = find_loop_niter (loop, &exit);
 	  && (chrec_contains_undetermined (niter)
 	      || TREE_CODE (niter) != INTEGER_CST))
 	niter = find_loop_niter_by_eval (loop, &exit);
 if (exit)
-locus = gimple_location (last_stmt (exit->src));
+locus = last_stmt (exit->src);
 if (TREE_CODE (niter) != INTEGER_CST)
 	exit = NULL;
 }
 /* Remove exits that are known to be never taken based on loop bound.
 Needs to be called after compilation of max_loop_iterations_int that
 populates the loop bounds.  */
 modified |= remove_redundant_iv_tests (loop);
-if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
+				  maxiter, locus, allow_peel))
 return true;
 if (create_iv
 && niter && !chrec_contains_undetermined (niter)
 && exit && just_once_each_iteration_p (loop, exit->src))
-create_canonical_iv (loop, exit, niter);
+{
+tree iv_niter = niter;
+if (may_be_zero)
+	{
+	  if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+	    iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
+				    niter_desc.may_be_zero,
+				    build_int_cst (TREE_TYPE (iv_niter), 0),
+				    iv_niter);
+	  else
+	    iv_niter = NULL_TREE;
+	}
+if (iv_niter)
+	create_canonical_iv (loop, exit, iv_niter);
+}
 if (ul == UL_ALL)
-modified |= try_peel_loop (loop, exit, niter, maxiter);
+modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
 return modified;
 }
 /* The main entry point of the pass.  Adds canonical induction variables
 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 {
 changed |= canonicalize_loop_induction_variables (loop,
 							true, UL_SINGLE_ITER,
-							true);
+							true, false);
 }
 gcc_assert (!need_ssa_update_p (cfun));
 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
 if (irred_invalidated
 if (changed)
 return TODO_cleanup_cfg;
 return 0;
 }
-/* Propagate constant SSA_NAMEs defined in basic block BB.  */
-static void
-propagate_constants_for_unrolling (basic_block bb)
-{
-/* Look for degenerate PHI nodes with constant argument.  */
-for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
-{
-gphi *phi = gsi.phi ();
-tree result = gimple_phi_result (phi);
-tree arg = gimple_phi_arg_def (phi, 0);
-if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
-	  && gimple_phi_num_args (phi) == 1
-	  && CONSTANT_CLASS_P (arg))
-	{
-	  replace_uses_by (result, arg);
-	  gsi_remove (&gsi, true);
-	  release_ssa_name (result);
-	}
-else
-	gsi_next (&gsi);
-}
-/* Look for assignments to SSA names with constant RHS.  */
-for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
-{
-gimple *stmt = gsi_stmt (gsi);
-tree lhs;
-if (is_gimple_assign (stmt)
-	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant
-	  && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
-	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
-	{
-	  replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
-	  gsi_remove (&gsi, true);
-	  release_ssa_name (lhs);
-	}
-else
-	gsi_next (&gsi);
-}
-}
 /* Process loops from innermost to outer, stopping at the innermost
 loop we unrolled.  */
 static bool
 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
 {
 struct loop *loop_father;
 bool changed = false;
 struct loop *inner;
 enum unroll_level ul;
+unsigned num = number_of_loops (cfun);
-/* Process inner loops first.  */
+/* Process inner loops first.  Don't walk loops added by the recursive
+calls because SSA form is not up-to-date.  They can be handled in the
+next iteration.  */
+bitmap child_father_bbs = NULL;
 for (inner = loop->inner; inner != NULL; inner = inner->next)
-changed |= tree_unroll_loops_completely_1 (may_increase_size,
+if ((unsigned) inner->num < num)
-					       unroll_outer, father_bbs,
+{
-					       inner);
+	if (!child_father_bbs)
+	  child_father_bbs = BITMAP_ALLOC (NULL);
+	if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
+					    child_father_bbs, inner))
+	  {
+	    bitmap_ior_into (father_bbs, child_father_bbs);
+	    bitmap_clear (child_father_bbs);
+	    changed = true;
+	  }
+}
+if (child_father_bbs)
+BITMAP_FREE (child_father_bbs);
 /* If we changed an inner loop we cannot process outer loops in this
 iteration because SSA form is not up-to-date.  Continue with
 siblings of outer loops instead.  */
 if (changed)
-return true;
+{
+/* If we are recorded as father clear all other fathers that
+are necessarily covered already to avoid redundant work.  */
+if (bitmap_bit_p (father_bbs, loop->header->index))
+	{
+	  bitmap_clear (father_bbs);
+	  bitmap_set_bit (father_bbs, loop->header->index);
+	}
+return true;
+}
 /* Don't unroll #pragma omp simd loops until the vectorizer
 attempts to vectorize those.  */
 if (loop->force_vectorize)
 return false;
 /* Try to unroll this loop.  */
 loop_father = loop_outer (loop);
 if (!loop_father)
 return false;
-if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+if (loop->unroll > 1)
+ul = UL_ALL;
+else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
 /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
 && (unroll_outer || loop_outer (loop_father)))
 ul = UL_ALL;
 else
 ul = UL_NO_GROWTH;
 if (canonicalize_loop_induction_variables
-(loop, false, ul, !flag_tree_loop_ivcanon))
+(loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
 {
 /* If we'll continue unrolling, we need to propagate constants
 	 within the new basic blocks to fold away induction variable
 	 computations; otherwise, the size might blow up before the
 	 iteration is complete and the IR eventually cleaned up.  */
 if (loop_outer (loop_father))
-	bitmap_set_bit (father_bbs, loop_father->header->index);
+	{
+	  /* Once we process our father we will have processed
+	     the fathers of our children as well, so avoid doing
+	     redundant work and clear fathers we've gathered sofar.  */
+	  bitmap_clear (father_bbs);
+	  bitmap_set_bit (father_bbs, loop_father->header->index);
+	}
 return true;
 }
 return false;
 /* Unroll LOOPS completely if they iterate just few times.  Unless
 MAY_INCREASE_SIZE is true, perform the unrolling only if the
 size of the code does not increase.  */
-unsigned int
+static unsigned int
 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 {
 bitmap father_bbs = BITMAP_ALLOC (NULL);
 bool changed;
 int iteration = 0;
 	  bitmap_clear (father_bbs);
 	  /* Propagate the constants within the new basic blocks.  */
 	  EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
 	    {
 	      loop_p father = get_loop (cfun, i);
-	      basic_block *body = get_loop_body_in_dom_order (father);
+	      bitmap exit_bbs = BITMAP_ALLOC (NULL);
-	      for (unsigned j = 0; j < father->num_nodes; j++)
+	      loop_exit *exit = father->exits->next;
-		propagate_constants_for_unrolling (body[j]);
+	      while (exit->e)
-	      free (body);
+		{
+		  bitmap_set_bit (exit_bbs, exit->e->dest->index);
+		  exit = exit->next;
+		}
+	      do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
 	    }
 	  BITMAP_FREE (fathers);
 	  /* This will take care of removing completely unrolled loops
 	     from the loop structures so we can continue unrolling now
 /* If we ever decide to run loop peeling more than once, we will need to
 track loops already peeled in loop structures themselves to avoid
 re-peeling the same loop multiple times.  */
 if (flag_peel_loops)
 peeled_loops = BITMAP_ALLOC (NULL);
-int val = tree_unroll_loops_completely (flag_unroll_loops
+unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
-					  || flag_peel_loops
+						   || flag_peel_loops
-					  || optimize >= 3, true);
+						   || optimize >= 3, true);
 if (peeled_loops)
 {
 BITMAP_FREE (peeled_loops);
 peeled_loops = NULL;
 }
 unsigned int
 pass_complete_unrolli::execute (function *fun)
 {
 unsigned ret = 0;
-loop_optimizer_init (LOOPS_NORMAL
+loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
-		       | LOOPS_HAVE_RECORDED_EXITS);
 if (number_of_loops (fun) > 1)
 {
 scev_initialize ();
 ret = tree_unroll_loops_completely (optimize >= 3, false);
 scev_finalize ();

Mercurial > hg > CbC > CbC_gcc

comparison gcc/tree-ssa-loop-ivcanon.c @ 132:d34655255c78