diff gcc/gimple-ssa-split-paths.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children 84e7813d76e9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gcc/gimple-ssa-split-paths.c	Fri Oct 27 22:46:09 2017 +0900
@@ -0,0 +1,501 @@
+/* Support routines for Splitting Paths to loop backedges
+   Copyright (C) 2015-2017 Free Software Foundation, Inc.
+   Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "tree-cfg.h"
+#include "cfganal.h"
+#include "cfgloop.h"
+#include "gimple-iterator.h"
+#include "tracer.h"
+#include "predict.h"
+#include "params.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+
+/* Given LATCH, the latch block in a loop, see if the shape of the
+   path reaching LATCH is suitable for being split by duplication.
+   If so, return the block that will be duplicated into its predecessor
+   paths.  Else return NULL.  */
+
+static basic_block
+find_block_to_duplicate_for_splitting_paths (basic_block latch)
+{
+  /* We should have simple latches at this point.  So the latch should
+     have a single successor.  This implies the predecessor of the latch
+     likely has the loop exit.  And it's that predecessor we're most
+     interested in. To keep things simple, we're going to require that
+     the latch have a single predecessor too.  */
+  if (single_succ_p (latch) && single_pred_p (latch))
+    {
+      basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
+      gcc_assert (single_pred_edge (latch)->src == bb);
+
+      /* If BB has been marked as not to be duplicated, then honor that
+	 request.  */
+      if (ignore_bb_p (bb))
+	return NULL;
+
+      gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb));
+      /* The immediate dominator of the latch must end in a conditional.  */
+      if (!last || gimple_code (last) != GIMPLE_COND)
+	return NULL;
+
+      /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
+	 region.  Verify that it is.
+
+	 First, verify that BB has two predecessors (each arm of the
+	 IF-THEN-ELSE) and two successors (the latch and exit).  */
+      if (EDGE_COUNT (bb->preds) == 2 && EDGE_COUNT (bb->succs) == 2)
+	{
+	  /* Now verify that BB's immediate dominator ends in a
+	     conditional as well.  */
+	  basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
+	  gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom));
+	  if (!last || gimple_code (last) != GIMPLE_COND)
+	    return NULL;
+
+	  /* And that BB's immediate dominator's successors are the
+	     predecessors of BB or BB itself.  */
+	  if (!(EDGE_PRED (bb, 0)->src == bb_idom
+		|| find_edge (bb_idom, EDGE_PRED (bb, 0)->src))
+	      || !(EDGE_PRED (bb, 1)->src == bb_idom
+		   || find_edge (bb_idom, EDGE_PRED (bb, 1)->src)))
+	    return NULL;
+
+	  /* And that the predecessors of BB each have a single successor
+	     or are BB's immediate domiator itself.  */
+	  if (!(EDGE_PRED (bb, 0)->src == bb_idom
+		|| single_succ_p (EDGE_PRED (bb, 0)->src))
+	      || !(EDGE_PRED (bb, 1)->src == bb_idom
+		   || single_succ_p (EDGE_PRED (bb, 1)->src)))
+	    return NULL;
+
+	  /* So at this point we have a simple diamond for an IF-THEN-ELSE
+	     construct starting at BB_IDOM, with a join point at BB.  BB
+	     pass control outside the loop or to the loop latch.
+
+	     We're going to want to create two duplicates of BB, one for
+	     each successor of BB_IDOM.  */
+	  return bb;
+	}
+    }
+  return NULL;
+}
+
+/* Return the number of non-debug statements in a block.  */
+static unsigned int
+count_stmts_in_block (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  unsigned int num_stmts = 0;
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      if (!is_gimple_debug (stmt))
+	num_stmts++;
+    }
+  return num_stmts;
+}
+
+/* Return TRUE if CODE represents a tree code that is not likely to
+   be easily if-convertable because it likely expands into multiple
+   insns, FALSE otherwise.  */
+static bool
+poor_ifcvt_candidate_code (enum tree_code code)
+{
+  return (code == MIN_EXPR
+	  || code == MAX_EXPR
+	  || code == ABS_EXPR
+	  || code == COND_EXPR
+	  || code == CALL_EXPR);
+}
+
+/* Return TRUE if BB is a reasonable block to duplicate by examining
+   its size, false otherwise.  BB will always be a loop latch block.
+
+   Things to consider:
+
+     We do not want to spoil if-conversion if at all possible.
+
+     Most of the benefit seems to be from eliminating the unconditional
+     jump rather than CSE/DCE opportunities.  So favor duplicating
+     small latches.  A latch with just a conditional branch is ideal.
+
+     CSE/DCE opportunties crop up when statements from the predecessors
+     feed statements in the latch and allow statements in the latch to
+     simplify.  */
+
+static bool
+is_feasible_trace (basic_block bb)
+{
+  basic_block pred1 = EDGE_PRED (bb, 0)->src;
+  basic_block pred2 = EDGE_PRED (bb, 1)->src;
+  int num_stmts_in_join = count_stmts_in_block (bb);
+  int num_stmts_in_pred1
+    = EDGE_COUNT (pred1->succs) == 1 ? count_stmts_in_block (pred1) : 0;
+  int num_stmts_in_pred2
+    = EDGE_COUNT (pred2->succs) == 1 ? count_stmts_in_block (pred2) : 0;
+
+  /* This is meant to catch cases that are likely opportunities for
+     if-conversion.  Essentially we look for the case where
+     BB's predecessors are both single statement blocks where
+     the output of that statement feed the same PHI in BB.  */
+  if (num_stmts_in_pred1 == 1 && num_stmts_in_pred2 == 1)
+    {
+      gimple *stmt1 = last_and_only_stmt (pred1);
+      gimple *stmt2 = last_and_only_stmt (pred2);
+
+      if (stmt1 && stmt2
+	  && gimple_code (stmt1) == GIMPLE_ASSIGN
+	  && gimple_code (stmt2) == GIMPLE_ASSIGN)
+	{
+	  enum tree_code code1 = gimple_assign_rhs_code (stmt1);
+	  enum tree_code code2 = gimple_assign_rhs_code (stmt2);
+
+	  if (!poor_ifcvt_candidate_code (code1)
+	      && !poor_ifcvt_candidate_code (code2))
+	    {
+	      tree lhs1 = gimple_assign_lhs (stmt1);
+	      tree lhs2 = gimple_assign_lhs (stmt2);
+	      gimple_stmt_iterator gsi;
+	      for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+		{
+		  gimple *phi = gsi_stmt (gsi);
+		  if ((gimple_phi_arg_def (phi, 0) == lhs1
+		       && gimple_phi_arg_def (phi, 1) == lhs2)
+		      || (gimple_phi_arg_def (phi, 1) == lhs1
+			  && gimple_phi_arg_def (phi, 0) == lhs2))
+		    {
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			fprintf (dump_file,
+				 "Block %d appears to be a join point for "
+				 "if-convertable diamond.\n",
+				 bb->index);
+		      return false;
+		    }
+		}
+	    }
+	}
+    }
+
+  /* If the joiner has no PHIs with useful uses there is zero chance
+     of CSE/DCE/jump-threading possibilities exposed by duplicating it.  */
+  bool found_useful_phi = false;
+  for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
+       gsi_next (&si))
+    {
+      gphi *phi = si.phi ();
+      use_operand_p use_p;
+      imm_use_iterator iter;
+      FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
+	{
+	  gimple *stmt = USE_STMT (use_p);
+	  if (is_gimple_debug (stmt))
+	    continue;
+	  /* If there's a use in the joiner this might be a CSE/DCE
+	     opportunity.  */
+	  if (gimple_bb (stmt) == bb)
+	    {
+	      found_useful_phi = true;
+	      break;
+	    }
+	  /* If the use is on a loop header PHI and on one path the
+	     value is unchanged this might expose a jump threading
+	     opportunity.  */
+	  if (gimple_code (stmt) == GIMPLE_PHI
+	      && gimple_bb (stmt) == bb->loop_father->header
+	      /* But for memory the PHI alone isn't good enough.  */
+	      && ! virtual_operand_p (gimple_phi_result (stmt)))
+	    {
+	      bool found_unchanged_path = false;
+	      for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+		if (gimple_phi_arg_def (phi, i) == gimple_phi_result (stmt))
+		  {
+		    found_unchanged_path = true;
+		    break;
+		  }
+	      /* If we found an unchanged path this can only be a threading
+	         opportunity if we have uses of the loop header PHI result
+		 in a stmt dominating the merge block.  Otherwise the
+		 splitting may prevent if-conversion.  */
+	      if (found_unchanged_path)
+		{
+		  use_operand_p use2_p;
+		  imm_use_iterator iter2;
+		  FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
+		    {
+		      gimple *use_stmt = USE_STMT (use2_p);
+		      if (is_gimple_debug (use_stmt))
+			continue;
+		      basic_block use_bb = gimple_bb (use_stmt);
+		      if (use_bb != bb
+			  && dominated_by_p (CDI_DOMINATORS, bb, use_bb))
+			{
+			  if (gcond *cond = dyn_cast <gcond *> (use_stmt))
+			    if (gimple_cond_code (cond) == EQ_EXPR
+				|| gimple_cond_code (cond) == NE_EXPR)
+			      found_useful_phi = true;
+			  break;
+			}
+		    }
+		}
+	      if (found_useful_phi)
+		break;
+	    }
+	}
+      if (found_useful_phi)
+	break;
+    }
+  /* There is one exception namely a controlling condition we can propagate
+     an equivalence from to the joiner.  */
+  bool found_cprop_opportunity = false;
+  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
+  gcond *cond = as_a <gcond *> (last_stmt (dom));
+  if (gimple_cond_code (cond) == EQ_EXPR
+      || gimple_cond_code (cond) == NE_EXPR)
+    for (unsigned i = 0; i < 2; ++i)
+      {
+	tree op = gimple_op (cond, i);
+	if (TREE_CODE (op) == SSA_NAME)
+	  {
+	    use_operand_p use_p;
+	    imm_use_iterator iter;
+	    FOR_EACH_IMM_USE_FAST (use_p, iter, op)
+	      {
+		if (is_gimple_debug (USE_STMT (use_p)))
+		  continue;
+		if (gimple_bb (USE_STMT (use_p)) == bb)
+		  {
+		    found_cprop_opportunity = true;
+		    break;
+		  }
+	      }
+	  }
+	if (found_cprop_opportunity)
+	  break;
+      }
+
+  if (! found_useful_phi && ! found_cprop_opportunity)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file,
+		 "Block %d is a join that does not expose CSE/DCE/jump-thread "
+		 "opportunities when duplicated.\n",
+		 bb->index);
+      return false;
+    }
+
+  /* We may want something here which looks at dataflow and tries
+     to guess if duplication of BB is likely to result in simplification
+     of instructions in BB in either the original or the duplicate.  */
+
+  /* Upper Hard limit on the number statements to copy.  */
+  if (num_stmts_in_join
+      >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS))
+    return false;
+
+  return true;
+}
+
+/* If the immediate dominator of the latch of the loop is
+   block with conditional branch, then the loop latch  is
+   duplicated to its predecessors path preserving the SSA
+   semantics.
+
+   CFG before transformation.
+
+              2
+              |
+              |
+        +---->3
+        |    / \
+        |   /   \
+        |  4     5
+        |   \   /
+        |    \ /
+        |     6
+        |    / \
+        |   /   \
+        |  8     7
+        |  |     |
+        ---+     E
+
+
+
+    Block 8 is the latch.  We're going to make copies of block 6 (9 & 10)
+    and wire things up so they look like this:
+
+              2
+              |
+              |
+        +---->3
+        |    / \
+        |   /   \
+        |  4     5
+        |  |     |
+        |  |     |
+        |  9    10
+        |  |\   /|
+        |  | \ / |
+        |  |  7  |
+        |  |  |  |
+        |  |  E  |
+        |  |     |
+        |   \   /
+        |    \ /
+        +-----8
+
+
+    Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
+    enables CSE, DCE and other optimizations to occur on a larger block
+    of code.   */
+
+static bool
+split_paths ()
+{
+  bool changed = false;
+  loop_p loop;
+
+  loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
+  initialize_original_copy_tables ();
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+    {
+      /* Only split paths if we are optimizing this loop for speed.  */
+      if (!optimize_loop_for_speed_p (loop))
+	continue;
+
+      /* See if there is a block that we can duplicate to split the
+	 path to the loop latch.  */
+      basic_block bb
+	= find_block_to_duplicate_for_splitting_paths (loop->latch);
+
+      /* BB is the merge point for an IF-THEN-ELSE we want to transform.
+
+	 Essentially we want to create a duplicate of bb and redirect the
+	 first predecessor of BB to the duplicate (leaving the second
+	 predecessor as is.  This will split the path leading to the latch
+	 re-using BB to avoid useless copying.  */
+      if (bb && is_feasible_trace (bb))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file,
+		     "Duplicating join block %d into predecessor paths\n",
+		     bb->index);
+	  basic_block pred0 = EDGE_PRED (bb, 0)->src;
+	  if (EDGE_COUNT (pred0->succs) != 1)
+	    pred0 = EDGE_PRED (bb, 1)->src;
+	  transform_duplicate (pred0, bb);
+	  changed = true;
+
+	  /* If BB has an outgoing edge marked as IRREDUCIBLE, then
+	     duplicating BB may result in an irreducible region turning
+	     into a natural loop.
+
+	     Long term we might want to hook this into the block
+	     duplication code, but as we've seen with similar changes
+	     for edge removal, that can be somewhat risky.  */
+	  if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
+	      || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		  fprintf (dump_file,
+			   "Join block %d has EDGE_IRREDUCIBLE_LOOP set.  "
+			   "Scheduling loop fixups.\n",
+			   bb->index);
+	      loops_state_set (LOOPS_NEED_FIXUP);
+	    }
+	}
+    }
+
+  loop_optimizer_finalize ();
+  free_original_copy_tables ();
+  return changed;
+}
+
+/* Main entry point for splitting paths.  Returns TODO_cleanup_cfg if any
+   paths where split, otherwise return zero.  */
+
+static unsigned int
+execute_split_paths ()
+{
+  /* If we don't have at least 2 real blocks and backedges in the
+     CFG, then there's no point in trying to perform path splitting.  */
+  if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
+      || !mark_dfs_back_edges ())
+    return 0;
+
+  bool changed = split_paths();
+  if (changed)
+    free_dominance_info (CDI_DOMINATORS);
+
+  return changed ? TODO_cleanup_cfg : 0;
+}
+
+static bool
+gate_split_paths ()
+{
+  return flag_split_paths;
+}
+
+namespace {
+
+const pass_data pass_data_split_paths =
+{
+  GIMPLE_PASS, /* type */
+  "split-paths", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_SPLIT_PATHS, /* tv_id */
+  PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_split_paths : public gimple_opt_pass
+{
+   public:
+    pass_split_paths (gcc::context *ctxt)
+      : gimple_opt_pass (pass_data_split_paths, ctxt)
+    {}
+   /* opt_pass methods: */
+   opt_pass * clone () { return new pass_split_paths (m_ctxt); }
+   virtual bool gate (function *) { return gate_split_paths (); }
+   virtual unsigned int execute (function *) { return execute_split_paths (); }
+
+}; // class pass_split_paths
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_split_paths (gcc::context *ctxt)
+{
+  return new pass_split_paths (ctxt);
+}