111
|
1 /* SSA Jump Threading
|
|
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #include "config.h"
|
|
21 #include "system.h"
|
|
22 #include "coretypes.h"
|
|
23 #include "backend.h"
|
|
24 #include "predict.h"
|
|
25 #include "tree.h"
|
|
26 #include "gimple.h"
|
|
27 #include "fold-const.h"
|
|
28 #include "cfgloop.h"
|
|
29 #include "gimple-iterator.h"
|
|
30 #include "tree-cfg.h"
|
|
31 #include "tree-ssa-threadupdate.h"
|
|
32 #include "params.h"
|
|
33 #include "tree-ssa-loop.h"
|
|
34 #include "cfganal.h"
|
|
35 #include "tree-pass.h"
|
|
36 #include "gimple-ssa.h"
|
|
37 #include "tree-phinodes.h"
|
|
38 #include "tree-inline.h"
|
|
39 #include "tree-vectorizer.h"
|
|
40
|
|
41 static int max_threaded_paths;
|
|
42
|
|
43 /* Simple helper to get the last statement from BB, which is assumed
|
|
44 to be a control statement. Return NULL if the last statement is
|
|
45 not a control statement. */
|
|
46
|
|
47 static gimple *
|
|
48 get_gimple_control_stmt (basic_block bb)
|
|
49 {
|
|
50 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
|
|
51
|
|
52 if (gsi_end_p (gsi))
|
|
53 return NULL;
|
|
54
|
|
55 gimple *stmt = gsi_stmt (gsi);
|
|
56 enum gimple_code code = gimple_code (stmt);
|
|
57 if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO)
|
|
58 return stmt;
|
|
59 return NULL;
|
|
60 }
|
|
61
|
|
62 /* Return true if the CFG contains at least one path from START_BB to
|
|
63 END_BB. When a path is found, record in PATH the blocks from
|
|
64 END_BB to START_BB. VISITED_BBS is used to make sure we don't fall
|
|
65 into an infinite loop. Bound the recursion to basic blocks
|
|
66 belonging to LOOP. */
|
|
67
|
|
68 static bool
|
|
69 fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
|
|
70 vec<basic_block> &path,
|
|
71 hash_set<basic_block> &visited_bbs, loop_p loop)
|
|
72 {
|
|
73 if (loop != start_bb->loop_father)
|
|
74 return false;
|
|
75
|
|
76 if (start_bb == end_bb)
|
|
77 {
|
|
78 path.safe_push (start_bb);
|
|
79 return true;
|
|
80 }
|
|
81
|
|
82 if (!visited_bbs.add (start_bb))
|
|
83 {
|
|
84 edge e;
|
|
85 edge_iterator ei;
|
|
86 FOR_EACH_EDGE (e, ei, start_bb->succs)
|
|
87 if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop))
|
|
88 {
|
|
89 path.safe_push (start_bb);
|
|
90 return true;
|
|
91 }
|
|
92 }
|
|
93
|
|
94 return false;
|
|
95 }
|
|
96
|
|
97 /* Examine jump threading path PATH to which we want to add BBI.
|
|
98
|
|
99 If the resulting path is profitable to thread, then return the
|
|
100 final taken edge from the path, NULL otherwise.
|
|
101
|
|
102 NAME is the SSA_NAME of the variable we found to have a constant
|
|
103 value on PATH. ARG is the constant value of NAME on that path.
|
|
104
|
|
105 BBI will be appended to PATH when we have a profitable jump threading
|
|
106 path. Callers are responsible for removing BBI from PATH in that case.
|
|
107
|
|
108 SPEED_P indicate that we could increase code size to improve the
|
|
109 code path. */
|
|
110
|
|
111 static edge
|
|
112 profitable_jump_thread_path (vec<basic_block> &path,
|
|
113 basic_block bbi, tree name, tree arg,
|
|
114 bool speed_p, bool *creates_irreducible_loop)
|
|
115 {
|
|
116 /* Note BBI is not in the path yet, hence the +1 in the test below
|
|
117 to make sure BBI is accounted for in the path length test. */
|
|
118
|
|
119 /* We can get a length of 0 here when the statement that
|
|
120 makes a conditional generate a compile-time constant
|
|
121 result is in the same block as the conditional.
|
|
122
|
|
123 That's not really a jump threading opportunity, but instead is
|
|
124 simple cprop & simplification. We could handle it here if we
|
|
125 wanted by wiring up all the incoming edges. If we run this
|
|
126 early in IPA, that might be worth doing. For now we just
|
|
127 reject that case. */
|
|
128 if (path.is_empty ())
|
|
129 return NULL;
|
|
130
|
|
131 if (path.length () + 1 > (unsigned) PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
|
|
132 {
|
|
133 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
134 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
135 "the number of basic blocks on the path "
|
|
136 "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
|
|
137 return NULL;
|
|
138 }
|
|
139
|
|
140 if (max_threaded_paths <= 0)
|
|
141 {
|
|
142 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
143 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
144 "the number of previously recorded FSM paths to "
|
|
145 "thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
|
|
146 return NULL;
|
|
147 }
|
|
148
|
|
149 /* Add BBI to the path.
|
|
150 From this point onward, if we decide we the path is not profitable
|
|
151 to thread, we must remove BBI from the path. */
|
|
152 path.safe_push (bbi);
|
|
153
|
|
154 int n_insns = 0;
|
|
155 gimple_stmt_iterator gsi;
|
|
156 loop_p loop = path[0]->loop_father;
|
|
157 bool path_crosses_loops = false;
|
|
158 bool threaded_through_latch = false;
|
|
159 bool multiway_branch_in_path = false;
|
|
160 bool threaded_multiway_branch = false;
|
|
161 bool contains_hot_bb = false;
|
|
162
|
|
163 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
164 fprintf (dump_file, "Checking profitability of path (backwards): ");
|
|
165
|
|
166 /* Count the number of instructions on the path: as these instructions
|
|
167 will have to be duplicated, we will not record the path if there
|
|
168 are too many instructions on the path. Also check that all the
|
|
169 blocks in the path belong to a single loop. */
|
|
170 for (unsigned j = 0; j < path.length (); j++)
|
|
171 {
|
|
172 basic_block bb = path[j];
|
|
173
|
|
174 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
175 fprintf (dump_file, " bb:%i", bb->index);
|
|
176 /* Remember, blocks in the path are stored in opposite order
|
|
177 in the PATH array. The last entry in the array represents
|
|
178 the block with an outgoing edge that we will redirect to the
|
|
179 jump threading path. Thus we don't care about that block's
|
|
180 loop father, nor how many statements are in that block because
|
|
181 it will not be copied or whether or not it ends in a multiway
|
|
182 branch. */
|
|
183 if (j < path.length () - 1)
|
|
184 {
|
|
185 int orig_n_insns = n_insns;
|
|
186 if (bb->loop_father != loop)
|
|
187 {
|
|
188 path_crosses_loops = true;
|
|
189 break;
|
|
190 }
|
|
191
|
|
192 /* PHIs in the path will create degenerate PHIS in the
|
|
193 copied path which will then get propagated away, so
|
|
194 looking at just the duplicate path the PHIs would
|
|
195 seem unimportant.
|
|
196
|
|
197 But those PHIs, because they're assignments to objects
|
|
198 typically with lives that exist outside the thread path,
|
|
199 will tend to generate PHIs (or at least new PHI arguments)
|
|
200 at points where we leave the thread path and rejoin
|
|
201 the original blocks. So we do want to account for them.
|
|
202
|
|
203 We ignore virtual PHIs. We also ignore cases where BB
|
|
204 has a single incoming edge. That's the most common
|
|
205 degenerate PHI we'll see here. Finally we ignore PHIs
|
|
206 that are associated with the value we're tracking as
|
|
207 that object likely dies. */
|
|
208 if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
|
|
209 {
|
|
210 for (gphi_iterator gsip = gsi_start_phis (bb);
|
|
211 !gsi_end_p (gsip);
|
|
212 gsi_next (&gsip))
|
|
213 {
|
|
214 gphi *phi = gsip.phi ();
|
|
215 tree dst = gimple_phi_result (phi);
|
|
216
|
|
217 /* Note that if both NAME and DST are anonymous
|
|
218 SSA_NAMEs, then we do not have enough information
|
|
219 to consider them associated. */
|
|
220 if (dst != name
|
|
221 && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
|
|
222 || !SSA_NAME_VAR (dst))
|
|
223 && !virtual_operand_p (dst))
|
|
224 ++n_insns;
|
|
225 }
|
|
226 }
|
|
227
|
|
228 if (!contains_hot_bb && speed_p)
|
|
229 contains_hot_bb |= optimize_bb_for_speed_p (bb);
|
|
230 for (gsi = gsi_after_labels (bb);
|
|
231 !gsi_end_p (gsi);
|
|
232 gsi_next_nondebug (&gsi))
|
|
233 {
|
|
234 gimple *stmt = gsi_stmt (gsi);
|
|
235 /* Do not count empty statements and labels. */
|
|
236 if (gimple_code (stmt) != GIMPLE_NOP
|
|
237 && !(gimple_code (stmt) == GIMPLE_ASSIGN
|
|
238 && gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
|
|
239 && !is_gimple_debug (stmt))
|
|
240 n_insns += estimate_num_insns (stmt, &eni_size_weights);
|
|
241 }
|
|
242 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
243 fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns);
|
|
244
|
|
245 /* We do not look at the block with the threaded branch
|
|
246 in this loop. So if any block with a last statement that
|
|
247 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
|
|
248 multiway branch on our path.
|
|
249
|
|
250 The block in PATH[0] is special, it's the block were we're
|
|
251 going to be able to eliminate its branch. */
|
|
252 gimple *last = last_stmt (bb);
|
|
253 if (last && (gimple_code (last) == GIMPLE_SWITCH
|
|
254 || gimple_code (last) == GIMPLE_GOTO))
|
|
255 {
|
|
256 if (j == 0)
|
|
257 threaded_multiway_branch = true;
|
|
258 else
|
|
259 multiway_branch_in_path = true;
|
|
260 }
|
|
261 }
|
|
262
|
|
263 /* Note if we thread through the latch, we will want to include
|
|
264 the last entry in the array when determining if we thread
|
|
265 through the loop latch. */
|
|
266 if (loop->latch == bb)
|
|
267 threaded_through_latch = true;
|
|
268 }
|
|
269
|
|
270 gimple *stmt = get_gimple_control_stmt (path[0]);
|
|
271 gcc_assert (stmt);
|
|
272
|
|
273 /* We are going to remove the control statement at the end of the
|
|
274 last block in the threading path. So don't count it against our
|
|
275 statement count. */
|
|
276
|
|
277 int stmt_insns = estimate_num_insns (stmt, &eni_size_weights);
|
|
278 n_insns-= stmt_insns;
|
|
279
|
|
280 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
281 fprintf (dump_file, "\n Control statement insns: %i\n"
|
|
282 " Overall: %i insns\n",
|
|
283 stmt_insns, n_insns);
|
|
284
|
|
285 /* We have found a constant value for ARG. For GIMPLE_SWITCH
|
|
286 and GIMPLE_GOTO, we use it as-is. However, for a GIMPLE_COND
|
|
287 we need to substitute, fold and simplify so we can determine
|
|
288 the edge taken out of the last block. */
|
|
289 if (gimple_code (stmt) == GIMPLE_COND)
|
|
290 {
|
|
291 enum tree_code cond_code = gimple_cond_code (stmt);
|
|
292
|
|
293 /* We know the underyling format of the condition. */
|
|
294 arg = fold_binary (cond_code, boolean_type_node,
|
|
295 arg, gimple_cond_rhs (stmt));
|
|
296 }
|
|
297
|
|
298 /* If this path threaded through the loop latch back into the
|
|
299 same loop and the destination does not dominate the loop
|
|
300 latch, then this thread would create an irreducible loop.
|
|
301
|
|
302 We have to know the outgoing edge to figure this out. */
|
|
303 edge taken_edge = find_taken_edge (path[0], arg);
|
|
304
|
|
305 /* There are cases where we may not be able to extract the
|
|
306 taken edge. For example, a computed goto to an absolute
|
|
307 address. Handle those cases gracefully. */
|
|
308 if (taken_edge == NULL)
|
|
309 {
|
|
310 path.pop ();
|
|
311 return NULL;
|
|
312 }
|
|
313
|
|
314 *creates_irreducible_loop = false;
|
|
315 if (threaded_through_latch
|
|
316 && loop == taken_edge->dest->loop_father
|
|
317 && (determine_bb_domination_status (loop, taken_edge->dest)
|
|
318 == DOMST_NONDOMINATING))
|
|
319 *creates_irreducible_loop = true;
|
|
320
|
|
321 if (path_crosses_loops)
|
|
322 {
|
|
323 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
324 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
325 "the path crosses loops.\n");
|
|
326 path.pop ();
|
|
327 return NULL;
|
|
328 }
|
|
329
|
|
330 /* Threading is profitable if the path duplicated is hot but also
|
|
331 in a case we separate cold path from hot path and permit optimization
|
|
332 of the hot path later. Be on the agressive side here. In some testcases,
|
|
333 as in PR 78407 this leads to noticeable improvements. */
|
|
334 if (speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb))
|
|
335 {
|
|
336 if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
|
|
337 {
|
|
338 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
339 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
340 "the number of instructions on the path "
|
|
341 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
|
|
342 path.pop ();
|
|
343 return NULL;
|
|
344 }
|
|
345 }
|
|
346 else if (n_insns > 1)
|
|
347 {
|
|
348 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
349 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
350 "duplication of %i insns is needed and optimizing for size.\n",
|
|
351 n_insns);
|
|
352 path.pop ();
|
|
353 return NULL;
|
|
354 }
|
|
355
|
|
356 /* We avoid creating irreducible inner loops unless we thread through
|
|
357 a multiway branch, in which case we have deemed it worth losing
|
|
358 other loop optimizations later.
|
|
359
|
|
360 We also consider it worth creating an irreducible inner loop if
|
|
361 the number of copied statement is low relative to the length of
|
|
362 the path -- in that case there's little the traditional loop
|
|
363 optimizer would have done anyway, so an irreducible loop is not
|
|
364 so bad. */
|
|
365 if (!threaded_multiway_branch && *creates_irreducible_loop
|
|
366 && (n_insns * (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
|
|
367 > (path.length () *
|
|
368 (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS))))
|
|
369
|
|
370 {
|
|
371 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
372 fprintf (dump_file,
|
|
373 "FSM would create irreducible loop without threading "
|
|
374 "multiway branch.\n");
|
|
375 path.pop ();
|
|
376 return NULL;
|
|
377 }
|
|
378
|
|
379
|
|
380 /* If this path does not thread through the loop latch, then we are
|
|
381 using the FSM threader to find old style jump threads. This
|
|
382 is good, except the FSM threader does not re-use an existing
|
|
383 threading path to reduce code duplication.
|
|
384
|
|
385 So for that case, drastically reduce the number of statements
|
|
386 we are allowed to copy. */
|
|
387 if (!(threaded_through_latch && threaded_multiway_branch)
|
|
388 && (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
|
|
389 >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS)))
|
|
390 {
|
|
391 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
392 fprintf (dump_file,
|
|
393 "FSM did not thread around loop and would copy too "
|
|
394 "many statements.\n");
|
|
395 path.pop ();
|
|
396 return NULL;
|
|
397 }
|
|
398
|
|
399 /* When there is a multi-way branch on the path, then threading can
|
|
400 explode the CFG due to duplicating the edges for that multi-way
|
|
401 branch. So like above, only allow a multi-way branch on the path
|
|
402 if we actually thread a multi-way branch. */
|
|
403 if (!threaded_multiway_branch && multiway_branch_in_path)
|
|
404 {
|
|
405 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
406 fprintf (dump_file,
|
|
407 "FSM Thread through multiway branch without threading "
|
|
408 "a multiway branch.\n");
|
|
409 path.pop ();
|
|
410 return NULL;
|
|
411 }
|
|
412 return taken_edge;
|
|
413 }
|
|
414
|
|
415 /* PATH is vector of blocks forming a jump threading path in reverse
|
|
416 order. TAKEN_EDGE is the edge taken from path[0].
|
|
417
|
|
418 Convert that path into the form used by register_jump_thread and
|
|
419 register the path. */
|
|
420
|
|
421 static void
|
|
422 convert_and_register_jump_thread_path (vec<basic_block> &path, edge taken_edge)
|
|
423 {
|
|
424 vec<jump_thread_edge *> *jump_thread_path = new vec<jump_thread_edge *> ();
|
|
425
|
|
426 /* Record the edges between the blocks in PATH. */
|
|
427 for (unsigned int j = 0; j + 1 < path.length (); j++)
|
|
428 {
|
|
429 basic_block bb1 = path[path.length () - j - 1];
|
|
430 basic_block bb2 = path[path.length () - j - 2];
|
|
431
|
|
432 edge e = find_edge (bb1, bb2);
|
|
433 gcc_assert (e);
|
|
434 jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
|
|
435 jump_thread_path->safe_push (x);
|
|
436 }
|
|
437
|
|
438 /* Add the edge taken when the control variable has value ARG. */
|
|
439 jump_thread_edge *x
|
|
440 = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
|
|
441 jump_thread_path->safe_push (x);
|
|
442
|
|
443 register_jump_thread (jump_thread_path);
|
|
444 --max_threaded_paths;
|
|
445 }
|
|
446
|
|
447 /* While following a chain of SSA_NAME definitions, we jumped from a
|
|
448 definition in LAST_BB to a definition in NEW_BB (walking
|
|
449 backwards).
|
|
450
|
|
451 Verify there is a single path between the blocks and none of the
|
|
452 blocks in the path is already in VISITED_BBS. If so, then update
|
|
453 VISISTED_BBS, add the new blocks to PATH and return TRUE.
|
|
454 Otherwise return FALSE.
|
|
455
|
|
456 Store the length of the subpath in NEXT_PATH_LENGTH. */
|
|
457
|
|
458 static bool
|
|
459 check_subpath_and_update_thread_path (basic_block last_bb, basic_block new_bb,
|
|
460 hash_set<basic_block> &visited_bbs,
|
|
461 vec<basic_block> &path,
|
|
462 int *next_path_length)
|
|
463 {
|
|
464 edge e;
|
|
465 int e_count = 0;
|
|
466 edge_iterator ei;
|
|
467 auto_vec<basic_block> next_path;
|
|
468
|
|
469 FOR_EACH_EDGE (e, ei, last_bb->preds)
|
|
470 {
|
|
471 hash_set<basic_block> visited_bbs;
|
|
472
|
|
473 if (fsm_find_thread_path (new_bb, e->src, next_path, visited_bbs,
|
|
474 e->src->loop_father))
|
|
475 ++e_count;
|
|
476
|
|
477 /* If there is more than one path, stop. */
|
|
478 if (e_count > 1)
|
|
479 return false;
|
|
480 }
|
|
481
|
|
482 /* Stop if we have not found a path: this could occur when the recursion
|
|
483 is stopped by one of the bounds. */
|
|
484 if (e_count == 0)
|
|
485 return false;
|
|
486
|
|
487 /* Make sure we haven't already visited any of the nodes in
|
|
488 NEXT_PATH. Don't add them here to avoid pollution. */
|
|
489 for (unsigned int i = 0; i + 1 < next_path.length (); i++)
|
|
490 {
|
|
491 if (visited_bbs.contains (next_path[i]))
|
|
492 return false;
|
|
493 }
|
|
494
|
|
495 /* Now add the nodes to VISISTED_BBS. */
|
|
496 for (unsigned int i = 0; i + 1 < next_path.length (); i++)
|
|
497 visited_bbs.add (next_path[i]);
|
|
498
|
|
499 /* Append all the nodes from NEXT_PATH to PATH. */
|
|
500 path.safe_splice (next_path);
|
|
501 *next_path_length = next_path.length ();
|
|
502
|
|
503 return true;
|
|
504 }
|
|
505
|
|
506 /* If this is a profitable jump thread path, register it.
|
|
507
|
|
508 NAME is an SSA NAME with a possible constant value of ARG on PATH.
|
|
509
|
|
510 DEF_BB is the basic block that ultimately defines the constant.
|
|
511
|
|
512 SPEED_P indicate that we could increase code size to improve the
|
|
513 code path.
|
|
514 */
|
|
515
|
|
516 static void
|
|
517 register_jump_thread_path_if_profitable (vec<basic_block> &path,
|
|
518 tree name,
|
|
519 tree arg,
|
|
520 basic_block def_bb,
|
|
521 bool speed_p)
|
|
522 {
|
|
523 if (TREE_CODE_CLASS (TREE_CODE (arg)) != tcc_constant)
|
|
524 return;
|
|
525
|
|
526 bool irreducible = false;
|
|
527 edge taken_edge = profitable_jump_thread_path (path, def_bb, name, arg,
|
|
528 speed_p, &irreducible);
|
|
529 if (taken_edge)
|
|
530 {
|
|
531 convert_and_register_jump_thread_path (path, taken_edge);
|
|
532 path.pop ();
|
|
533
|
|
534 if (irreducible)
|
|
535 vect_free_loop_info_assumptions (path[0]->loop_father);
|
|
536 }
|
|
537 }
|
|
538
|
|
539 static void fsm_find_control_statement_thread_paths (tree,
|
|
540 hash_set<basic_block> &,
|
|
541 vec<basic_block> &,
|
|
542 bool, bool);
|
|
543
|
|
544 /* Given PHI which defines NAME in block DEF_BB, recurse through the
|
|
545 PHI's arguments searching for paths where NAME will ultimately have
|
|
546 a constant value.
|
|
547
|
|
548 VISITED_BBS tracks the blocks that have been encountered.
|
|
549
|
|
550 PATH contains the series of blocks to traverse that will result in
|
|
551 NAME having a constant value.
|
|
552
|
|
553 SEEN_LOOP_PHI tracks if we have recursed through a loop PHI node.
|
|
554
|
|
555 SPEED_P indicates if we are optimizing for speed over space. */
|
|
556
|
|
557 static void
|
|
558 handle_phi (gphi *phi, tree name, basic_block def_bb,
|
|
559 hash_set<basic_block> &visited_bbs,
|
|
560 vec<basic_block> &path,
|
|
561 bool seen_loop_phi, bool speed_p)
|
|
562 {
|
|
563 /* Iterate over the arguments of PHI. */
|
|
564 for (unsigned int i = 0; i < gimple_phi_num_args (phi); i++)
|
|
565 {
|
|
566 tree arg = gimple_phi_arg_def (phi, i);
|
|
567 basic_block bbi = gimple_phi_arg_edge (phi, i)->src;
|
|
568
|
|
569 /* Skip edges pointing outside the current loop. */
|
|
570 if (!arg || def_bb->loop_father != bbi->loop_father)
|
|
571 continue;
|
|
572
|
|
573 if (TREE_CODE (arg) == SSA_NAME)
|
|
574 {
|
|
575 path.safe_push (bbi);
|
|
576 /* Recursively follow SSA_NAMEs looking for a constant
|
|
577 definition. */
|
|
578 fsm_find_control_statement_thread_paths (arg, visited_bbs, path,
|
|
579 seen_loop_phi, speed_p);
|
|
580
|
|
581 path.pop ();
|
|
582 continue;
|
|
583 }
|
|
584
|
|
585 register_jump_thread_path_if_profitable (path, name, arg, bbi, speed_p);
|
|
586 }
|
|
587 }
|
|
588
|
|
589 /* Return TRUE if STMT is a gimple assignment we want to either directly
|
|
590 handle or recurse through. Return FALSE otherwise.
|
|
591
|
|
592 Note that adding more cases here requires adding cases to handle_assignment
|
|
593 below. */
|
|
594
|
|
595 static bool
|
|
596 handle_assignment_p (gimple *stmt)
|
|
597 {
|
|
598 if (is_gimple_assign (stmt))
|
|
599 {
|
|
600 enum tree_code def_code = gimple_assign_rhs_code (stmt);
|
|
601
|
|
602 /* If the RHS is an SSA_NAME, then we will recurse through it.
|
|
603 Go ahead and filter out cases where the SSA_NAME is a default
|
|
604 definition. There's little to be gained by trying to handle that. */
|
|
605 if (def_code == SSA_NAME
|
|
606 && !SSA_NAME_IS_DEFAULT_DEF (gimple_assign_rhs1 (stmt)))
|
|
607 return true;
|
|
608
|
|
609 /* If the RHS is a constant, then it's a terminal that we'll want
|
|
610 to handle as well. */
|
|
611 if (TREE_CODE_CLASS (def_code) == tcc_constant)
|
|
612 return true;
|
|
613 }
|
|
614
|
|
615 /* Anything not explicitly allowed is not handled. */
|
|
616 return false;
|
|
617 }
|
|
618
|
|
619 /* Given STMT which defines NAME in block DEF_BB, recurse through the
|
|
620 PHI's arguments searching for paths where NAME will ultimately have
|
|
621 a constant value.
|
|
622
|
|
623 VISITED_BBS tracks the blocks that have been encountered.
|
|
624
|
|
625 PATH contains the series of blocks to traverse that will result in
|
|
626 NAME having a constant value.
|
|
627
|
|
628 SEEN_LOOP_PHI tracks if we have recursed through a loop PHI node.
|
|
629
|
|
630 SPEED_P indicates if we are optimizing for speed over space. */
|
|
631
|
|
632 static void
|
|
633 handle_assignment (gimple *stmt, tree name, basic_block def_bb,
|
|
634 hash_set<basic_block> &visited_bbs,
|
|
635 vec<basic_block> &path,
|
|
636 bool seen_loop_phi, bool speed_p)
|
|
637 {
|
|
638 tree arg = gimple_assign_rhs1 (stmt);
|
|
639
|
|
640 if (TREE_CODE (arg) == SSA_NAME)
|
|
641 fsm_find_control_statement_thread_paths (arg, visited_bbs,
|
|
642 path, seen_loop_phi, speed_p);
|
|
643
|
|
644 else
|
|
645 {
|
|
646 /* register_jump_thread_path_if_profitable will push the current
|
|
647 block onto the path. But the path will always have the current
|
|
648 block at this point. So we can just pop it. */
|
|
649 path.pop ();
|
|
650
|
|
651 register_jump_thread_path_if_profitable (path, name, arg, def_bb,
|
|
652 speed_p);
|
|
653
|
|
654 /* And put the current block back onto the path so that the
|
|
655 state of the stack is unchanged when we leave. */
|
|
656 path.safe_push (def_bb);
|
|
657 }
|
|
658 }
|
|
659
|
|
660 /* We trace the value of the SSA_NAME NAME back through any phi nodes
|
|
661 looking for places where it gets a constant value and save the
|
|
662 path.
|
|
663
|
|
664 SPEED_P indicate that we could increase code size to improve the
|
|
665 code path. */
|
|
666
|
|
667 static void
|
|
668 fsm_find_control_statement_thread_paths (tree name,
|
|
669 hash_set<basic_block> &visited_bbs,
|
|
670 vec<basic_block> &path,
|
|
671 bool seen_loop_phi, bool speed_p)
|
|
672 {
|
|
673 /* If NAME appears in an abnormal PHI, then don't try to trace its
|
|
674 value back through PHI nodes. */
|
|
675 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
|
|
676 return;
|
|
677
|
|
678 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
|
|
679 basic_block def_bb = gimple_bb (def_stmt);
|
|
680
|
|
681 if (def_bb == NULL)
|
|
682 return;
|
|
683
|
|
684 /* We allow the SSA chain to contains PHIs and simple copies and constant
|
|
685 initializations. */
|
|
686 if (gimple_code (def_stmt) != GIMPLE_PHI
|
|
687 && gimple_code (def_stmt) != GIMPLE_ASSIGN)
|
|
688 return;
|
|
689
|
|
690 if (gimple_code (def_stmt) == GIMPLE_PHI
|
|
691 && (gimple_phi_num_args (def_stmt)
|
|
692 >= (unsigned) PARAM_VALUE (PARAM_FSM_MAXIMUM_PHI_ARGUMENTS)))
|
|
693 return;
|
|
694
|
|
695 if (is_gimple_assign (def_stmt)
|
|
696 && ! handle_assignment_p (def_stmt))
|
|
697 return;
|
|
698
|
|
699 /* Avoid infinite recursion. */
|
|
700 if (visited_bbs.add (def_bb))
|
|
701 return;
|
|
702
|
|
703 int next_path_length = 0;
|
|
704 basic_block last_bb_in_path = path.last ();
|
|
705
|
|
706 if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt))
|
|
707 {
|
|
708 /* Do not walk through more than one loop PHI node. */
|
|
709 if (seen_loop_phi)
|
|
710 return;
|
|
711 seen_loop_phi = true;
|
|
712 }
|
|
713
|
|
714 /* Following the chain of SSA_NAME definitions, we jumped from a definition in
|
|
715 LAST_BB_IN_PATH to a definition in DEF_BB. When these basic blocks are
|
|
716 different, append to PATH the blocks from LAST_BB_IN_PATH to DEF_BB. */
|
|
717 if (def_bb != last_bb_in_path)
|
|
718 {
|
|
719 /* When DEF_BB == LAST_BB_IN_PATH, then the first block in the path
|
|
720 will already be in VISITED_BBS. When they are not equal, then we
|
|
721 must ensure that first block is accounted for to ensure we do not
|
|
722 create bogus jump threading paths. */
|
|
723 visited_bbs.add (path[0]);
|
|
724 if (!check_subpath_and_update_thread_path (last_bb_in_path, def_bb,
|
|
725 visited_bbs, path,
|
|
726 &next_path_length))
|
|
727 return;
|
|
728 }
|
|
729
|
|
730 gcc_assert (path.last () == def_bb);
|
|
731
|
|
732 if (gimple_code (def_stmt) == GIMPLE_PHI)
|
|
733 handle_phi (as_a <gphi *> (def_stmt), name, def_bb,
|
|
734 visited_bbs, path, seen_loop_phi, speed_p);
|
|
735 else if (gimple_code (def_stmt) == GIMPLE_ASSIGN)
|
|
736 handle_assignment (def_stmt, name, def_bb,
|
|
737 visited_bbs, path, seen_loop_phi, speed_p);
|
|
738
|
|
739 /* Remove all the nodes that we added from NEXT_PATH. */
|
|
740 if (next_path_length)
|
|
741 path.truncate (path.length () - next_path_length);
|
|
742 }
|
|
743
|
|
744 /* Search backwards from BB looking for paths where NAME (an SSA_NAME)
|
|
745 is a constant. Record such paths for jump threading.
|
|
746
|
|
747 It is assumed that BB ends with a control statement and that by
|
|
748 finding a path where NAME is a constant, we can thread the path.
|
|
749 SPEED_P indicate that we could increase code size to improve the
|
|
750 code path. */
|
|
751
|
|
752 void
|
|
753 find_jump_threads_backwards (basic_block bb, bool speed_p)
|
|
754 {
|
|
755 gimple *stmt = get_gimple_control_stmt (bb);
|
|
756 if (!stmt)
|
|
757 return;
|
|
758
|
|
759 enum gimple_code code = gimple_code (stmt);
|
|
760 tree name = NULL;
|
|
761 if (code == GIMPLE_SWITCH)
|
|
762 name = gimple_switch_index (as_a <gswitch *> (stmt));
|
|
763 else if (code == GIMPLE_GOTO)
|
|
764 name = gimple_goto_dest (stmt);
|
|
765 else if (code == GIMPLE_COND)
|
|
766 {
|
|
767 if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
|
|
768 && TREE_CODE_CLASS (TREE_CODE (gimple_cond_rhs (stmt))) == tcc_constant
|
|
769 && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))
|
|
770 || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))))
|
|
771 name = gimple_cond_lhs (stmt);
|
|
772 }
|
|
773
|
|
774 if (!name || TREE_CODE (name) != SSA_NAME)
|
|
775 return;
|
|
776
|
|
777 auto_vec<basic_block> bb_path;
|
|
778 bb_path.safe_push (bb);
|
|
779 hash_set<basic_block> visited_bbs;
|
|
780
|
|
781 max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
|
|
782 fsm_find_control_statement_thread_paths (name, visited_bbs, bb_path, false,
|
|
783 speed_p);
|
|
784 }
|
|
785
|
|
786 namespace {
|
|
787
|
|
788 const pass_data pass_data_thread_jumps =
|
|
789 {
|
|
790 GIMPLE_PASS,
|
|
791 "thread",
|
|
792 OPTGROUP_NONE,
|
|
793 TV_TREE_SSA_THREAD_JUMPS,
|
|
794 ( PROP_cfg | PROP_ssa ),
|
|
795 0,
|
|
796 0,
|
|
797 0,
|
|
798 TODO_update_ssa,
|
|
799 };
|
|
800
|
|
801 class pass_thread_jumps : public gimple_opt_pass
|
|
802 {
|
|
803 public:
|
|
804 pass_thread_jumps (gcc::context *ctxt)
|
|
805 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
|
|
806 {}
|
|
807
|
|
808 opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); }
|
|
809 virtual bool gate (function *);
|
|
810 virtual unsigned int execute (function *);
|
|
811 };
|
|
812
|
|
813 bool
|
|
814 pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
|
|
815 {
|
|
816 return flag_expensive_optimizations;
|
|
817 }
|
|
818
|
|
819
|
|
820 unsigned int
|
|
821 pass_thread_jumps::execute (function *fun)
|
|
822 {
|
|
823 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
|
|
824
|
|
825 /* Try to thread each block with more than one successor. */
|
|
826 basic_block bb;
|
|
827 FOR_EACH_BB_FN (bb, fun)
|
|
828 {
|
|
829 if (EDGE_COUNT (bb->succs) > 1)
|
|
830 find_jump_threads_backwards (bb, true);
|
|
831 }
|
|
832 bool changed = thread_through_all_blocks (true);
|
|
833
|
|
834 loop_optimizer_finalize ();
|
|
835 return changed ? TODO_cleanup_cfg : 0;
|
|
836 }
|
|
837
|
|
838 }
|
|
839
|
|
840 gimple_opt_pass *
|
|
841 make_pass_thread_jumps (gcc::context *ctxt)
|
|
842 {
|
|
843 return new pass_thread_jumps (ctxt);
|
|
844 }
|
|
845
|
|
846 namespace {
|
|
847
|
|
848 const pass_data pass_data_early_thread_jumps =
|
|
849 {
|
|
850 GIMPLE_PASS,
|
|
851 "ethread",
|
|
852 OPTGROUP_NONE,
|
|
853 TV_TREE_SSA_THREAD_JUMPS,
|
|
854 ( PROP_cfg | PROP_ssa ),
|
|
855 0,
|
|
856 0,
|
|
857 0,
|
|
858 ( TODO_cleanup_cfg | TODO_update_ssa ),
|
|
859 };
|
|
860
|
|
861 class pass_early_thread_jumps : public gimple_opt_pass
|
|
862 {
|
|
863 public:
|
|
864 pass_early_thread_jumps (gcc::context *ctxt)
|
|
865 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
|
|
866 {}
|
|
867
|
|
868 opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); }
|
|
869 virtual bool gate (function *);
|
|
870 virtual unsigned int execute (function *);
|
|
871 };
|
|
872
|
|
873 bool
|
|
874 pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
|
|
875 {
|
|
876 return true;
|
|
877 }
|
|
878
|
|
879
|
|
880 unsigned int
|
|
881 pass_early_thread_jumps::execute (function *fun)
|
|
882 {
|
|
883 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
|
884
|
|
885 /* Try to thread each block with more than one successor. */
|
|
886 basic_block bb;
|
|
887 FOR_EACH_BB_FN (bb, fun)
|
|
888 {
|
|
889 if (EDGE_COUNT (bb->succs) > 1)
|
|
890 find_jump_threads_backwards (bb, false);
|
|
891 }
|
|
892 thread_through_all_blocks (true);
|
|
893
|
|
894 loop_optimizer_finalize ();
|
|
895 return 0;
|
|
896 }
|
|
897
|
|
898 }
|
|
899
|
|
900 gimple_opt_pass *
|
|
901 make_pass_early_thread_jumps (gcc::context *ctxt)
|
|
902 {
|
|
903 return new pass_early_thread_jumps (ctxt);
|
|
904 }
|