111
|
1 /* Support routines for Splitting Paths to loop backedges
|
|
2 Copyright (C) 2015-2017 Free Software Foundation, Inc.
|
|
3 Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify
|
|
8 it under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 GNU General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "backend.h"
|
|
25 #include "tree.h"
|
|
26 #include "gimple.h"
|
|
27 #include "tree-pass.h"
|
|
28 #include "tree-cfg.h"
|
|
29 #include "cfganal.h"
|
|
30 #include "cfgloop.h"
|
|
31 #include "gimple-iterator.h"
|
|
32 #include "tracer.h"
|
|
33 #include "predict.h"
|
|
34 #include "params.h"
|
|
35 #include "gimple-ssa.h"
|
|
36 #include "tree-phinodes.h"
|
|
37 #include "ssa-iterators.h"
|
|
38
|
|
39 /* Given LATCH, the latch block in a loop, see if the shape of the
|
|
40 path reaching LATCH is suitable for being split by duplication.
|
|
41 If so, return the block that will be duplicated into its predecessor
|
|
42 paths. Else return NULL. */
|
|
43
|
|
44 static basic_block
|
|
45 find_block_to_duplicate_for_splitting_paths (basic_block latch)
|
|
46 {
|
|
47 /* We should have simple latches at this point. So the latch should
|
|
48 have a single successor. This implies the predecessor of the latch
|
|
49 likely has the loop exit. And it's that predecessor we're most
|
|
50 interested in. To keep things simple, we're going to require that
|
|
51 the latch have a single predecessor too. */
|
|
52 if (single_succ_p (latch) && single_pred_p (latch))
|
|
53 {
|
|
54 basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
|
|
55 gcc_assert (single_pred_edge (latch)->src == bb);
|
|
56
|
|
57 /* If BB has been marked as not to be duplicated, then honor that
|
|
58 request. */
|
|
59 if (ignore_bb_p (bb))
|
|
60 return NULL;
|
|
61
|
|
62 gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb));
|
|
63 /* The immediate dominator of the latch must end in a conditional. */
|
|
64 if (!last || gimple_code (last) != GIMPLE_COND)
|
|
65 return NULL;
|
|
66
|
|
67 /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
|
|
68 region. Verify that it is.
|
|
69
|
|
70 First, verify that BB has two predecessors (each arm of the
|
|
71 IF-THEN-ELSE) and two successors (the latch and exit). */
|
|
72 if (EDGE_COUNT (bb->preds) == 2 && EDGE_COUNT (bb->succs) == 2)
|
|
73 {
|
|
74 /* Now verify that BB's immediate dominator ends in a
|
|
75 conditional as well. */
|
|
76 basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
|
|
77 gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom));
|
|
78 if (!last || gimple_code (last) != GIMPLE_COND)
|
|
79 return NULL;
|
|
80
|
|
81 /* And that BB's immediate dominator's successors are the
|
|
82 predecessors of BB or BB itself. */
|
|
83 if (!(EDGE_PRED (bb, 0)->src == bb_idom
|
|
84 || find_edge (bb_idom, EDGE_PRED (bb, 0)->src))
|
|
85 || !(EDGE_PRED (bb, 1)->src == bb_idom
|
|
86 || find_edge (bb_idom, EDGE_PRED (bb, 1)->src)))
|
|
87 return NULL;
|
|
88
|
|
89 /* And that the predecessors of BB each have a single successor
|
|
90 or are BB's immediate domiator itself. */
|
|
91 if (!(EDGE_PRED (bb, 0)->src == bb_idom
|
|
92 || single_succ_p (EDGE_PRED (bb, 0)->src))
|
|
93 || !(EDGE_PRED (bb, 1)->src == bb_idom
|
|
94 || single_succ_p (EDGE_PRED (bb, 1)->src)))
|
|
95 return NULL;
|
|
96
|
|
97 /* So at this point we have a simple diamond for an IF-THEN-ELSE
|
|
98 construct starting at BB_IDOM, with a join point at BB. BB
|
|
99 pass control outside the loop or to the loop latch.
|
|
100
|
|
101 We're going to want to create two duplicates of BB, one for
|
|
102 each successor of BB_IDOM. */
|
|
103 return bb;
|
|
104 }
|
|
105 }
|
|
106 return NULL;
|
|
107 }
|
|
108
|
|
109 /* Return the number of non-debug statements in a block. */
|
|
110 static unsigned int
|
|
111 count_stmts_in_block (basic_block bb)
|
|
112 {
|
|
113 gimple_stmt_iterator gsi;
|
|
114 unsigned int num_stmts = 0;
|
|
115
|
|
116 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
117 {
|
|
118 gimple *stmt = gsi_stmt (gsi);
|
|
119 if (!is_gimple_debug (stmt))
|
|
120 num_stmts++;
|
|
121 }
|
|
122 return num_stmts;
|
|
123 }
|
|
124
|
|
125 /* Return TRUE if CODE represents a tree code that is not likely to
|
|
126 be easily if-convertable because it likely expands into multiple
|
|
127 insns, FALSE otherwise. */
|
|
128 static bool
|
|
129 poor_ifcvt_candidate_code (enum tree_code code)
|
|
130 {
|
|
131 return (code == MIN_EXPR
|
|
132 || code == MAX_EXPR
|
|
133 || code == ABS_EXPR
|
|
134 || code == COND_EXPR
|
|
135 || code == CALL_EXPR);
|
|
136 }
|
|
137
|
|
138 /* Return TRUE if BB is a reasonable block to duplicate by examining
|
|
139 its size, false otherwise. BB will always be a loop latch block.
|
|
140
|
|
141 Things to consider:
|
|
142
|
|
143 We do not want to spoil if-conversion if at all possible.
|
|
144
|
|
145 Most of the benefit seems to be from eliminating the unconditional
|
|
146 jump rather than CSE/DCE opportunities. So favor duplicating
|
|
147 small latches. A latch with just a conditional branch is ideal.
|
|
148
|
|
149 CSE/DCE opportunties crop up when statements from the predecessors
|
|
150 feed statements in the latch and allow statements in the latch to
|
|
151 simplify. */
|
|
152
|
|
153 static bool
|
|
154 is_feasible_trace (basic_block bb)
|
|
155 {
|
|
156 basic_block pred1 = EDGE_PRED (bb, 0)->src;
|
|
157 basic_block pred2 = EDGE_PRED (bb, 1)->src;
|
|
158 int num_stmts_in_join = count_stmts_in_block (bb);
|
|
159 int num_stmts_in_pred1
|
|
160 = EDGE_COUNT (pred1->succs) == 1 ? count_stmts_in_block (pred1) : 0;
|
|
161 int num_stmts_in_pred2
|
|
162 = EDGE_COUNT (pred2->succs) == 1 ? count_stmts_in_block (pred2) : 0;
|
|
163
|
|
164 /* This is meant to catch cases that are likely opportunities for
|
|
165 if-conversion. Essentially we look for the case where
|
|
166 BB's predecessors are both single statement blocks where
|
|
167 the output of that statement feed the same PHI in BB. */
|
|
168 if (num_stmts_in_pred1 == 1 && num_stmts_in_pred2 == 1)
|
|
169 {
|
|
170 gimple *stmt1 = last_and_only_stmt (pred1);
|
|
171 gimple *stmt2 = last_and_only_stmt (pred2);
|
|
172
|
|
173 if (stmt1 && stmt2
|
|
174 && gimple_code (stmt1) == GIMPLE_ASSIGN
|
|
175 && gimple_code (stmt2) == GIMPLE_ASSIGN)
|
|
176 {
|
|
177 enum tree_code code1 = gimple_assign_rhs_code (stmt1);
|
|
178 enum tree_code code2 = gimple_assign_rhs_code (stmt2);
|
|
179
|
|
180 if (!poor_ifcvt_candidate_code (code1)
|
|
181 && !poor_ifcvt_candidate_code (code2))
|
|
182 {
|
|
183 tree lhs1 = gimple_assign_lhs (stmt1);
|
|
184 tree lhs2 = gimple_assign_lhs (stmt2);
|
|
185 gimple_stmt_iterator gsi;
|
|
186 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
187 {
|
|
188 gimple *phi = gsi_stmt (gsi);
|
|
189 if ((gimple_phi_arg_def (phi, 0) == lhs1
|
|
190 && gimple_phi_arg_def (phi, 1) == lhs2)
|
|
191 || (gimple_phi_arg_def (phi, 1) == lhs1
|
|
192 && gimple_phi_arg_def (phi, 0) == lhs2))
|
|
193 {
|
|
194 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
195 fprintf (dump_file,
|
|
196 "Block %d appears to be a join point for "
|
|
197 "if-convertable diamond.\n",
|
|
198 bb->index);
|
|
199 return false;
|
|
200 }
|
|
201 }
|
|
202 }
|
|
203 }
|
|
204 }
|
|
205
|
|
206 /* If the joiner has no PHIs with useful uses there is zero chance
|
|
207 of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
|
|
208 bool found_useful_phi = false;
|
|
209 for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
|
|
210 gsi_next (&si))
|
|
211 {
|
|
212 gphi *phi = si.phi ();
|
|
213 use_operand_p use_p;
|
|
214 imm_use_iterator iter;
|
|
215 FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
|
|
216 {
|
|
217 gimple *stmt = USE_STMT (use_p);
|
|
218 if (is_gimple_debug (stmt))
|
|
219 continue;
|
|
220 /* If there's a use in the joiner this might be a CSE/DCE
|
|
221 opportunity. */
|
|
222 if (gimple_bb (stmt) == bb)
|
|
223 {
|
|
224 found_useful_phi = true;
|
|
225 break;
|
|
226 }
|
|
227 /* If the use is on a loop header PHI and on one path the
|
|
228 value is unchanged this might expose a jump threading
|
|
229 opportunity. */
|
|
230 if (gimple_code (stmt) == GIMPLE_PHI
|
|
231 && gimple_bb (stmt) == bb->loop_father->header
|
|
232 /* But for memory the PHI alone isn't good enough. */
|
|
233 && ! virtual_operand_p (gimple_phi_result (stmt)))
|
|
234 {
|
|
235 bool found_unchanged_path = false;
|
|
236 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
|
|
237 if (gimple_phi_arg_def (phi, i) == gimple_phi_result (stmt))
|
|
238 {
|
|
239 found_unchanged_path = true;
|
|
240 break;
|
|
241 }
|
|
242 /* If we found an unchanged path this can only be a threading
|
|
243 opportunity if we have uses of the loop header PHI result
|
|
244 in a stmt dominating the merge block. Otherwise the
|
|
245 splitting may prevent if-conversion. */
|
|
246 if (found_unchanged_path)
|
|
247 {
|
|
248 use_operand_p use2_p;
|
|
249 imm_use_iterator iter2;
|
|
250 FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
|
|
251 {
|
|
252 gimple *use_stmt = USE_STMT (use2_p);
|
|
253 if (is_gimple_debug (use_stmt))
|
|
254 continue;
|
|
255 basic_block use_bb = gimple_bb (use_stmt);
|
|
256 if (use_bb != bb
|
|
257 && dominated_by_p (CDI_DOMINATORS, bb, use_bb))
|
|
258 {
|
|
259 if (gcond *cond = dyn_cast <gcond *> (use_stmt))
|
|
260 if (gimple_cond_code (cond) == EQ_EXPR
|
|
261 || gimple_cond_code (cond) == NE_EXPR)
|
|
262 found_useful_phi = true;
|
|
263 break;
|
|
264 }
|
|
265 }
|
|
266 }
|
|
267 if (found_useful_phi)
|
|
268 break;
|
|
269 }
|
|
270 }
|
|
271 if (found_useful_phi)
|
|
272 break;
|
|
273 }
|
|
274 /* There is one exception namely a controlling condition we can propagate
|
|
275 an equivalence from to the joiner. */
|
|
276 bool found_cprop_opportunity = false;
|
|
277 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
|
|
278 gcond *cond = as_a <gcond *> (last_stmt (dom));
|
|
279 if (gimple_cond_code (cond) == EQ_EXPR
|
|
280 || gimple_cond_code (cond) == NE_EXPR)
|
|
281 for (unsigned i = 0; i < 2; ++i)
|
|
282 {
|
|
283 tree op = gimple_op (cond, i);
|
|
284 if (TREE_CODE (op) == SSA_NAME)
|
|
285 {
|
|
286 use_operand_p use_p;
|
|
287 imm_use_iterator iter;
|
|
288 FOR_EACH_IMM_USE_FAST (use_p, iter, op)
|
|
289 {
|
|
290 if (is_gimple_debug (USE_STMT (use_p)))
|
|
291 continue;
|
|
292 if (gimple_bb (USE_STMT (use_p)) == bb)
|
|
293 {
|
|
294 found_cprop_opportunity = true;
|
|
295 break;
|
|
296 }
|
|
297 }
|
|
298 }
|
|
299 if (found_cprop_opportunity)
|
|
300 break;
|
|
301 }
|
|
302
|
|
303 if (! found_useful_phi && ! found_cprop_opportunity)
|
|
304 {
|
|
305 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
306 fprintf (dump_file,
|
|
307 "Block %d is a join that does not expose CSE/DCE/jump-thread "
|
|
308 "opportunities when duplicated.\n",
|
|
309 bb->index);
|
|
310 return false;
|
|
311 }
|
|
312
|
|
313 /* We may want something here which looks at dataflow and tries
|
|
314 to guess if duplication of BB is likely to result in simplification
|
|
315 of instructions in BB in either the original or the duplicate. */
|
|
316
|
|
317 /* Upper Hard limit on the number statements to copy. */
|
|
318 if (num_stmts_in_join
|
|
319 >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS))
|
|
320 return false;
|
|
321
|
|
322 return true;
|
|
323 }
|
|
324
|
|
325 /* If the immediate dominator of the latch of the loop is
|
|
326 block with conditional branch, then the loop latch is
|
|
327 duplicated to its predecessors path preserving the SSA
|
|
328 semantics.
|
|
329
|
|
330 CFG before transformation.
|
|
331
|
|
332 2
|
|
333 |
|
|
334 |
|
|
335 +---->3
|
|
336 | / \
|
|
337 | / \
|
|
338 | 4 5
|
|
339 | \ /
|
|
340 | \ /
|
|
341 | 6
|
|
342 | / \
|
|
343 | / \
|
|
344 | 8 7
|
|
345 | | |
|
|
346 ---+ E
|
|
347
|
|
348
|
|
349
|
|
350 Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
|
|
351 and wire things up so they look like this:
|
|
352
|
|
353 2
|
|
354 |
|
|
355 |
|
|
356 +---->3
|
|
357 | / \
|
|
358 | / \
|
|
359 | 4 5
|
|
360 | | |
|
|
361 | | |
|
|
362 | 9 10
|
|
363 | |\ /|
|
|
364 | | \ / |
|
|
365 | | 7 |
|
|
366 | | | |
|
|
367 | | E |
|
|
368 | | |
|
|
369 | \ /
|
|
370 | \ /
|
|
371 +-----8
|
|
372
|
|
373
|
|
374 Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
|
|
375 enables CSE, DCE and other optimizations to occur on a larger block
|
|
376 of code. */
|
|
377
|
|
378 static bool
|
|
379 split_paths ()
|
|
380 {
|
|
381 bool changed = false;
|
|
382 loop_p loop;
|
|
383
|
|
384 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
|
|
385 initialize_original_copy_tables ();
|
|
386 calculate_dominance_info (CDI_DOMINATORS);
|
|
387
|
|
388 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
|
|
389 {
|
|
390 /* Only split paths if we are optimizing this loop for speed. */
|
|
391 if (!optimize_loop_for_speed_p (loop))
|
|
392 continue;
|
|
393
|
|
394 /* See if there is a block that we can duplicate to split the
|
|
395 path to the loop latch. */
|
|
396 basic_block bb
|
|
397 = find_block_to_duplicate_for_splitting_paths (loop->latch);
|
|
398
|
|
399 /* BB is the merge point for an IF-THEN-ELSE we want to transform.
|
|
400
|
|
401 Essentially we want to create a duplicate of bb and redirect the
|
|
402 first predecessor of BB to the duplicate (leaving the second
|
|
403 predecessor as is. This will split the path leading to the latch
|
|
404 re-using BB to avoid useless copying. */
|
|
405 if (bb && is_feasible_trace (bb))
|
|
406 {
|
|
407 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
408 fprintf (dump_file,
|
|
409 "Duplicating join block %d into predecessor paths\n",
|
|
410 bb->index);
|
|
411 basic_block pred0 = EDGE_PRED (bb, 0)->src;
|
|
412 if (EDGE_COUNT (pred0->succs) != 1)
|
|
413 pred0 = EDGE_PRED (bb, 1)->src;
|
|
414 transform_duplicate (pred0, bb);
|
|
415 changed = true;
|
|
416
|
|
417 /* If BB has an outgoing edge marked as IRREDUCIBLE, then
|
|
418 duplicating BB may result in an irreducible region turning
|
|
419 into a natural loop.
|
|
420
|
|
421 Long term we might want to hook this into the block
|
|
422 duplication code, but as we've seen with similar changes
|
|
423 for edge removal, that can be somewhat risky. */
|
|
424 if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
|
|
425 || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
|
|
426 {
|
|
427 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
428 fprintf (dump_file,
|
|
429 "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
|
|
430 "Scheduling loop fixups.\n",
|
|
431 bb->index);
|
|
432 loops_state_set (LOOPS_NEED_FIXUP);
|
|
433 }
|
|
434 }
|
|
435 }
|
|
436
|
|
437 loop_optimizer_finalize ();
|
|
438 free_original_copy_tables ();
|
|
439 return changed;
|
|
440 }
|
|
441
|
|
442 /* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any
|
|
443 paths where split, otherwise return zero. */
|
|
444
|
|
445 static unsigned int
|
|
446 execute_split_paths ()
|
|
447 {
|
|
448 /* If we don't have at least 2 real blocks and backedges in the
|
|
449 CFG, then there's no point in trying to perform path splitting. */
|
|
450 if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
|
|
451 || !mark_dfs_back_edges ())
|
|
452 return 0;
|
|
453
|
|
454 bool changed = split_paths();
|
|
455 if (changed)
|
|
456 free_dominance_info (CDI_DOMINATORS);
|
|
457
|
|
458 return changed ? TODO_cleanup_cfg : 0;
|
|
459 }
|
|
460
|
|
461 static bool
|
|
462 gate_split_paths ()
|
|
463 {
|
|
464 return flag_split_paths;
|
|
465 }
|
|
466
|
|
467 namespace {
|
|
468
|
|
469 const pass_data pass_data_split_paths =
|
|
470 {
|
|
471 GIMPLE_PASS, /* type */
|
|
472 "split-paths", /* name */
|
|
473 OPTGROUP_NONE, /* optinfo_flags */
|
|
474 TV_SPLIT_PATHS, /* tv_id */
|
|
475 PROP_ssa, /* properties_required */
|
|
476 0, /* properties_provided */
|
|
477 0, /* properties_destroyed */
|
|
478 0, /* todo_flags_start */
|
|
479 TODO_update_ssa, /* todo_flags_finish */
|
|
480 };
|
|
481
|
|
482 class pass_split_paths : public gimple_opt_pass
|
|
483 {
|
|
484 public:
|
|
485 pass_split_paths (gcc::context *ctxt)
|
|
486 : gimple_opt_pass (pass_data_split_paths, ctxt)
|
|
487 {}
|
|
488 /* opt_pass methods: */
|
|
489 opt_pass * clone () { return new pass_split_paths (m_ctxt); }
|
|
490 virtual bool gate (function *) { return gate_split_paths (); }
|
|
491 virtual unsigned int execute (function *) { return execute_split_paths (); }
|
|
492
|
|
493 }; // class pass_split_paths
|
|
494
|
|
495 } // anon namespace
|
|
496
|
|
497 gimple_opt_pass *
|
|
498 make_pass_split_paths (gcc::context *ctxt)
|
|
499 {
|
|
500 return new pass_split_paths (ctxt);
|
|
501 }
|