111
|
1 /* SSA Jump Threading
|
145
|
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #include "config.h"
|
|
21 #include "system.h"
|
|
22 #include "coretypes.h"
|
|
23 #include "backend.h"
|
|
24 #include "predict.h"
|
|
25 #include "tree.h"
|
|
26 #include "gimple.h"
|
|
27 #include "fold-const.h"
|
|
28 #include "cfgloop.h"
|
|
29 #include "gimple-iterator.h"
|
|
30 #include "tree-cfg.h"
|
|
31 #include "tree-ssa-threadupdate.h"
|
|
32 #include "tree-ssa-loop.h"
|
|
33 #include "cfganal.h"
|
|
34 #include "tree-pass.h"
|
|
35 #include "gimple-ssa.h"
|
|
36 #include "tree-phinodes.h"
|
|
37 #include "tree-inline.h"
|
|
38 #include "tree-vectorizer.h"
|
|
39
|
131
|
40 class thread_jumps
|
|
41 {
|
|
42 public:
|
|
43 void find_jump_threads_backwards (basic_block bb, bool speed_p);
|
|
44 private:
|
|
45 edge profitable_jump_thread_path (basic_block bbi, tree name, tree arg,
|
|
46 bool *creates_irreducible_loop);
|
|
47 void convert_and_register_current_path (edge taken_edge);
|
|
48 void register_jump_thread_path_if_profitable (tree name, tree arg,
|
|
49 basic_block def_bb);
|
|
50 void handle_assignment (gimple *stmt, tree name, basic_block def_bb);
|
|
51 void handle_phi (gphi *phi, tree name, basic_block def_bb);
|
|
52 void fsm_find_control_statement_thread_paths (tree name);
|
|
53 bool check_subpath_and_update_thread_path (basic_block last_bb,
|
|
54 basic_block new_bb,
|
|
55 int *next_path_length);
|
|
56
|
|
57 /* Maximum number of BBs we are allowed to thread. */
|
|
58 int m_max_threaded_paths;
|
|
59 /* Hash to keep track of seen bbs. */
|
|
60 hash_set<basic_block> m_visited_bbs;
|
|
61 /* Current path we're analyzing. */
|
|
62 auto_vec<basic_block> m_path;
|
|
63 /* Tracks if we have recursed through a loop PHI node. */
|
|
64 bool m_seen_loop_phi;
|
|
65 /* Indicate that we could increase code size to improve the
|
|
66 code path. */
|
|
67 bool m_speed_p;
|
|
68 };
|
111
|
69
|
|
70 /* Simple helper to get the last statement from BB, which is assumed
|
|
71 to be a control statement. Return NULL if the last statement is
|
|
72 not a control statement. */
|
|
73
|
|
74 static gimple *
|
|
75 get_gimple_control_stmt (basic_block bb)
|
|
76 {
|
|
77 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
|
|
78
|
|
79 if (gsi_end_p (gsi))
|
|
80 return NULL;
|
|
81
|
|
82 gimple *stmt = gsi_stmt (gsi);
|
|
83 enum gimple_code code = gimple_code (stmt);
|
|
84 if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO)
|
|
85 return stmt;
|
|
86 return NULL;
|
|
87 }
|
|
88
|
|
89 /* Return true if the CFG contains at least one path from START_BB to
|
|
90 END_BB. When a path is found, record in PATH the blocks from
|
131
|
91 END_BB to START_BB. LOCAL_VISITED_BBS is used to make sure we
|
|
92 don't fall into an infinite loop. Bound the recursion to basic
|
|
93 blocks belonging to LOOP. */
|
111
|
94
|
|
95 static bool
|
|
96 fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
|
|
97 vec<basic_block> &path,
|
131
|
98 hash_set<basic_block> &local_visited_bbs,
|
|
99 loop_p loop)
|
111
|
100 {
|
|
101 if (loop != start_bb->loop_father)
|
|
102 return false;
|
|
103
|
|
104 if (start_bb == end_bb)
|
|
105 {
|
|
106 path.safe_push (start_bb);
|
|
107 return true;
|
|
108 }
|
|
109
|
131
|
110 if (!local_visited_bbs.add (start_bb))
|
111
|
111 {
|
|
112 edge e;
|
|
113 edge_iterator ei;
|
|
114 FOR_EACH_EDGE (e, ei, start_bb->succs)
|
131
|
115 if (fsm_find_thread_path (e->dest, end_bb, path, local_visited_bbs,
|
|
116 loop))
|
111
|
117 {
|
|
118 path.safe_push (start_bb);
|
|
119 return true;
|
|
120 }
|
|
121 }
|
|
122
|
|
123 return false;
|
|
124 }
|
|
125
|
|
126 /* Examine jump threading path PATH to which we want to add BBI.
|
|
127
|
|
128 If the resulting path is profitable to thread, then return the
|
|
129 final taken edge from the path, NULL otherwise.
|
|
130
|
|
131 NAME is the SSA_NAME of the variable we found to have a constant
|
|
132 value on PATH. ARG is the constant value of NAME on that path.
|
|
133
|
131
|
134 BBI will be appended to PATH when we have a profitable jump
|
|
135 threading path. Callers are responsible for removing BBI from PATH
|
|
136 in that case. */
|
111
|
137
|
131
|
138 edge
|
|
139 thread_jumps::profitable_jump_thread_path (basic_block bbi, tree name,
|
|
140 tree arg,
|
|
141 bool *creates_irreducible_loop)
|
111
|
142 {
|
|
143 /* Note BBI is not in the path yet, hence the +1 in the test below
|
|
144 to make sure BBI is accounted for in the path length test. */
|
|
145
|
|
146 /* We can get a length of 0 here when the statement that
|
|
147 makes a conditional generate a compile-time constant
|
|
148 result is in the same block as the conditional.
|
|
149
|
|
150 That's not really a jump threading opportunity, but instead is
|
|
151 simple cprop & simplification. We could handle it here if we
|
|
152 wanted by wiring up all the incoming edges. If we run this
|
|
153 early in IPA, that might be worth doing. For now we just
|
|
154 reject that case. */
|
131
|
155 if (m_path.is_empty ())
|
111
|
156 return NULL;
|
|
157
|
131
|
158 if (m_path.length () + 1
|
145
|
159 > (unsigned) param_max_fsm_thread_length)
|
111
|
160 {
|
|
161 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
162 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
163 "the number of basic blocks on the path "
|
|
164 "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
|
|
165 return NULL;
|
|
166 }
|
|
167
|
131
|
168 if (m_max_threaded_paths <= 0)
|
111
|
169 {
|
|
170 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
171 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
172 "the number of previously recorded FSM paths to "
|
|
173 "thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
|
|
174 return NULL;
|
|
175 }
|
|
176
|
|
177 /* Add BBI to the path.
|
|
178 From this point onward, if we decide we the path is not profitable
|
|
179 to thread, we must remove BBI from the path. */
|
131
|
180 m_path.safe_push (bbi);
|
111
|
181
|
|
182 int n_insns = 0;
|
|
183 gimple_stmt_iterator gsi;
|
131
|
184 loop_p loop = m_path[0]->loop_father;
|
111
|
185 bool path_crosses_loops = false;
|
|
186 bool threaded_through_latch = false;
|
|
187 bool multiway_branch_in_path = false;
|
|
188 bool threaded_multiway_branch = false;
|
|
189 bool contains_hot_bb = false;
|
|
190
|
|
191 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
192 fprintf (dump_file, "Checking profitability of path (backwards): ");
|
|
193
|
|
194 /* Count the number of instructions on the path: as these instructions
|
|
195 will have to be duplicated, we will not record the path if there
|
|
196 are too many instructions on the path. Also check that all the
|
|
197 blocks in the path belong to a single loop. */
|
131
|
198 for (unsigned j = 0; j < m_path.length (); j++)
|
111
|
199 {
|
131
|
200 basic_block bb = m_path[j];
|
111
|
201
|
|
202 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
203 fprintf (dump_file, " bb:%i", bb->index);
|
|
204 /* Remember, blocks in the path are stored in opposite order
|
|
205 in the PATH array. The last entry in the array represents
|
|
206 the block with an outgoing edge that we will redirect to the
|
|
207 jump threading path. Thus we don't care about that block's
|
|
208 loop father, nor how many statements are in that block because
|
|
209 it will not be copied or whether or not it ends in a multiway
|
|
210 branch. */
|
131
|
211 if (j < m_path.length () - 1)
|
111
|
212 {
|
|
213 int orig_n_insns = n_insns;
|
|
214 if (bb->loop_father != loop)
|
|
215 {
|
|
216 path_crosses_loops = true;
|
|
217 break;
|
|
218 }
|
|
219
|
|
220 /* PHIs in the path will create degenerate PHIS in the
|
|
221 copied path which will then get propagated away, so
|
|
222 looking at just the duplicate path the PHIs would
|
|
223 seem unimportant.
|
|
224
|
|
225 But those PHIs, because they're assignments to objects
|
|
226 typically with lives that exist outside the thread path,
|
|
227 will tend to generate PHIs (or at least new PHI arguments)
|
|
228 at points where we leave the thread path and rejoin
|
|
229 the original blocks. So we do want to account for them.
|
|
230
|
|
231 We ignore virtual PHIs. We also ignore cases where BB
|
|
232 has a single incoming edge. That's the most common
|
|
233 degenerate PHI we'll see here. Finally we ignore PHIs
|
|
234 that are associated with the value we're tracking as
|
|
235 that object likely dies. */
|
|
236 if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
|
|
237 {
|
|
238 for (gphi_iterator gsip = gsi_start_phis (bb);
|
|
239 !gsi_end_p (gsip);
|
|
240 gsi_next (&gsip))
|
|
241 {
|
|
242 gphi *phi = gsip.phi ();
|
|
243 tree dst = gimple_phi_result (phi);
|
|
244
|
|
245 /* Note that if both NAME and DST are anonymous
|
|
246 SSA_NAMEs, then we do not have enough information
|
|
247 to consider them associated. */
|
|
248 if (dst != name
|
|
249 && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
|
|
250 || !SSA_NAME_VAR (dst))
|
|
251 && !virtual_operand_p (dst))
|
|
252 ++n_insns;
|
|
253 }
|
|
254 }
|
|
255
|
131
|
256 if (!contains_hot_bb && m_speed_p)
|
111
|
257 contains_hot_bb |= optimize_bb_for_speed_p (bb);
|
|
258 for (gsi = gsi_after_labels (bb);
|
|
259 !gsi_end_p (gsi);
|
|
260 gsi_next_nondebug (&gsi))
|
|
261 {
|
|
262 gimple *stmt = gsi_stmt (gsi);
|
145
|
263 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
|
|
264 {
|
|
265 m_path.pop ();
|
|
266 return NULL;
|
|
267 }
|
111
|
268 /* Do not count empty statements and labels. */
|
|
269 if (gimple_code (stmt) != GIMPLE_NOP
|
|
270 && !(gimple_code (stmt) == GIMPLE_ASSIGN
|
|
271 && gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
|
|
272 && !is_gimple_debug (stmt))
|
|
273 n_insns += estimate_num_insns (stmt, &eni_size_weights);
|
|
274 }
|
|
275 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
276 fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns);
|
|
277
|
|
278 /* We do not look at the block with the threaded branch
|
|
279 in this loop. So if any block with a last statement that
|
|
280 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
|
|
281 multiway branch on our path.
|
|
282
|
|
283 The block in PATH[0] is special, it's the block were we're
|
|
284 going to be able to eliminate its branch. */
|
|
285 gimple *last = last_stmt (bb);
|
|
286 if (last && (gimple_code (last) == GIMPLE_SWITCH
|
|
287 || gimple_code (last) == GIMPLE_GOTO))
|
|
288 {
|
|
289 if (j == 0)
|
|
290 threaded_multiway_branch = true;
|
|
291 else
|
|
292 multiway_branch_in_path = true;
|
|
293 }
|
|
294 }
|
|
295
|
|
296 /* Note if we thread through the latch, we will want to include
|
|
297 the last entry in the array when determining if we thread
|
|
298 through the loop latch. */
|
|
299 if (loop->latch == bb)
|
|
300 threaded_through_latch = true;
|
|
301 }
|
|
302
|
131
|
303 gimple *stmt = get_gimple_control_stmt (m_path[0]);
|
111
|
304 gcc_assert (stmt);
|
|
305
|
|
306 /* We are going to remove the control statement at the end of the
|
|
307 last block in the threading path. So don't count it against our
|
|
308 statement count. */
|
|
309
|
|
310 int stmt_insns = estimate_num_insns (stmt, &eni_size_weights);
|
|
311 n_insns-= stmt_insns;
|
|
312
|
|
313 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
314 fprintf (dump_file, "\n Control statement insns: %i\n"
|
|
315 " Overall: %i insns\n",
|
|
316 stmt_insns, n_insns);
|
|
317
|
|
318 /* We have found a constant value for ARG. For GIMPLE_SWITCH
|
|
319 and GIMPLE_GOTO, we use it as-is. However, for a GIMPLE_COND
|
|
320 we need to substitute, fold and simplify so we can determine
|
|
321 the edge taken out of the last block. */
|
|
322 if (gimple_code (stmt) == GIMPLE_COND)
|
|
323 {
|
|
324 enum tree_code cond_code = gimple_cond_code (stmt);
|
|
325
|
|
326 /* We know the underyling format of the condition. */
|
|
327 arg = fold_binary (cond_code, boolean_type_node,
|
|
328 arg, gimple_cond_rhs (stmt));
|
|
329 }
|
|
330
|
|
331 /* If this path threaded through the loop latch back into the
|
|
332 same loop and the destination does not dominate the loop
|
|
333 latch, then this thread would create an irreducible loop.
|
|
334
|
|
335 We have to know the outgoing edge to figure this out. */
|
131
|
336 edge taken_edge = find_taken_edge (m_path[0], arg);
|
111
|
337
|
|
338 /* There are cases where we may not be able to extract the
|
|
339 taken edge. For example, a computed goto to an absolute
|
|
340 address. Handle those cases gracefully. */
|
|
341 if (taken_edge == NULL)
|
|
342 {
|
131
|
343 m_path.pop ();
|
111
|
344 return NULL;
|
|
345 }
|
|
346
|
|
347 *creates_irreducible_loop = false;
|
|
348 if (threaded_through_latch
|
|
349 && loop == taken_edge->dest->loop_father
|
|
350 && (determine_bb_domination_status (loop, taken_edge->dest)
|
|
351 == DOMST_NONDOMINATING))
|
|
352 *creates_irreducible_loop = true;
|
|
353
|
|
354 if (path_crosses_loops)
|
|
355 {
|
|
356 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
357 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
358 "the path crosses loops.\n");
|
131
|
359 m_path.pop ();
|
111
|
360 return NULL;
|
|
361 }
|
|
362
|
|
363 /* Threading is profitable if the path duplicated is hot but also
|
|
364 in a case we separate cold path from hot path and permit optimization
|
|
365 of the hot path later. Be on the agressive side here. In some testcases,
|
|
366 as in PR 78407 this leads to noticeable improvements. */
|
131
|
367 if (m_speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb))
|
111
|
368 {
|
145
|
369 if (n_insns >= param_max_fsm_thread_path_insns)
|
111
|
370 {
|
|
371 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
372 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
373 "the number of instructions on the path "
|
|
374 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
|
131
|
375 m_path.pop ();
|
111
|
376 return NULL;
|
|
377 }
|
|
378 }
|
|
379 else if (n_insns > 1)
|
|
380 {
|
|
381 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
382 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
383 "duplication of %i insns is needed and optimizing for size.\n",
|
|
384 n_insns);
|
131
|
385 m_path.pop ();
|
111
|
386 return NULL;
|
|
387 }
|
|
388
|
|
389 /* We avoid creating irreducible inner loops unless we thread through
|
|
390 a multiway branch, in which case we have deemed it worth losing
|
|
391 other loop optimizations later.
|
|
392
|
|
393 We also consider it worth creating an irreducible inner loop if
|
|
394 the number of copied statement is low relative to the length of
|
|
395 the path -- in that case there's little the traditional loop
|
|
396 optimizer would have done anyway, so an irreducible loop is not
|
|
397 so bad. */
|
|
398 if (!threaded_multiway_branch && *creates_irreducible_loop
|
145
|
399 && (n_insns * (unsigned) param_fsm_scale_path_stmts
|
131
|
400 > (m_path.length () *
|
145
|
401 (unsigned) param_fsm_scale_path_blocks)))
|
111
|
402
|
|
403 {
|
|
404 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
405 fprintf (dump_file,
|
|
406 "FSM would create irreducible loop without threading "
|
|
407 "multiway branch.\n");
|
131
|
408 m_path.pop ();
|
111
|
409 return NULL;
|
|
410 }
|
|
411
|
|
412
|
|
413 /* If this path does not thread through the loop latch, then we are
|
|
414 using the FSM threader to find old style jump threads. This
|
|
415 is good, except the FSM threader does not re-use an existing
|
|
416 threading path to reduce code duplication.
|
|
417
|
|
418 So for that case, drastically reduce the number of statements
|
|
419 we are allowed to copy. */
|
|
420 if (!(threaded_through_latch && threaded_multiway_branch)
|
145
|
421 && (n_insns * param_fsm_scale_path_stmts
|
|
422 >= param_max_jump_thread_duplication_stmts))
|
111
|
423 {
|
|
424 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
425 fprintf (dump_file,
|
|
426 "FSM did not thread around loop and would copy too "
|
|
427 "many statements.\n");
|
131
|
428 m_path.pop ();
|
111
|
429 return NULL;
|
|
430 }
|
|
431
|
|
432 /* When there is a multi-way branch on the path, then threading can
|
|
433 explode the CFG due to duplicating the edges for that multi-way
|
|
434 branch. So like above, only allow a multi-way branch on the path
|
|
435 if we actually thread a multi-way branch. */
|
|
436 if (!threaded_multiway_branch && multiway_branch_in_path)
|
|
437 {
|
|
438 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
439 fprintf (dump_file,
|
|
440 "FSM Thread through multiway branch without threading "
|
|
441 "a multiway branch.\n");
|
131
|
442 m_path.pop ();
|
111
|
443 return NULL;
|
|
444 }
|
|
445 return taken_edge;
|
|
446 }
|
|
447
|
131
|
448 /* The current path PATH is a vector of blocks forming a jump threading
|
|
449 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
|
111
|
450
|
131
|
451 Convert the current path into the form used by register_jump_thread and
|
|
452 register it. */
|
111
|
453
|
131
|
454 void
|
|
455 thread_jumps::convert_and_register_current_path (edge taken_edge)
|
111
|
456 {
|
|
457 vec<jump_thread_edge *> *jump_thread_path = new vec<jump_thread_edge *> ();
|
|
458
|
|
459 /* Record the edges between the blocks in PATH. */
|
131
|
460 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
|
111
|
461 {
|
131
|
462 basic_block bb1 = m_path[m_path.length () - j - 1];
|
|
463 basic_block bb2 = m_path[m_path.length () - j - 2];
|
111
|
464
|
|
465 edge e = find_edge (bb1, bb2);
|
|
466 gcc_assert (e);
|
|
467 jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
|
|
468 jump_thread_path->safe_push (x);
|
|
469 }
|
|
470
|
|
471 /* Add the edge taken when the control variable has value ARG. */
|
|
472 jump_thread_edge *x
|
|
473 = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
|
|
474 jump_thread_path->safe_push (x);
|
|
475
|
|
476 register_jump_thread (jump_thread_path);
|
131
|
477 --m_max_threaded_paths;
|
111
|
478 }
|
|
479
|
|
480 /* While following a chain of SSA_NAME definitions, we jumped from a
|
|
481 definition in LAST_BB to a definition in NEW_BB (walking
|
|
482 backwards).
|
|
483
|
|
484 Verify there is a single path between the blocks and none of the
|
|
485 blocks in the path is already in VISITED_BBS. If so, then update
|
|
486 VISISTED_BBS, add the new blocks to PATH and return TRUE.
|
|
487 Otherwise return FALSE.
|
|
488
|
|
489 Store the length of the subpath in NEXT_PATH_LENGTH. */
|
|
490
|
131
|
491 bool
|
|
492 thread_jumps::check_subpath_and_update_thread_path (basic_block last_bb,
|
|
493 basic_block new_bb,
|
|
494 int *next_path_length)
|
111
|
495 {
|
|
496 edge e;
|
|
497 int e_count = 0;
|
|
498 edge_iterator ei;
|
|
499 auto_vec<basic_block> next_path;
|
|
500
|
|
501 FOR_EACH_EDGE (e, ei, last_bb->preds)
|
|
502 {
|
131
|
503 hash_set<basic_block> local_visited_bbs;
|
111
|
504
|
131
|
505 if (fsm_find_thread_path (new_bb, e->src, next_path,
|
|
506 local_visited_bbs, e->src->loop_father))
|
111
|
507 ++e_count;
|
|
508
|
|
509 /* If there is more than one path, stop. */
|
|
510 if (e_count > 1)
|
|
511 return false;
|
|
512 }
|
|
513
|
|
514 /* Stop if we have not found a path: this could occur when the recursion
|
|
515 is stopped by one of the bounds. */
|
|
516 if (e_count == 0)
|
|
517 return false;
|
|
518
|
|
519 /* Make sure we haven't already visited any of the nodes in
|
|
520 NEXT_PATH. Don't add them here to avoid pollution. */
|
|
521 for (unsigned int i = 0; i + 1 < next_path.length (); i++)
|
|
522 {
|
131
|
523 if (m_visited_bbs.contains (next_path[i]))
|
111
|
524 return false;
|
|
525 }
|
|
526
|
|
527 /* Now add the nodes to VISISTED_BBS. */
|
|
528 for (unsigned int i = 0; i + 1 < next_path.length (); i++)
|
131
|
529 m_visited_bbs.add (next_path[i]);
|
111
|
530
|
|
531 /* Append all the nodes from NEXT_PATH to PATH. */
|
131
|
532 m_path.safe_splice (next_path);
|
111
|
533 *next_path_length = next_path.length ();
|
|
534
|
|
535 return true;
|
|
536 }
|
|
537
|
|
538 /* If this is a profitable jump thread path, register it.
|
|
539
|
|
540 NAME is an SSA NAME with a possible constant value of ARG on PATH.
|
|
541
|
131
|
542 DEF_BB is the basic block that ultimately defines the constant. */
|
111
|
543
|
131
|
544 void
|
|
545 thread_jumps::register_jump_thread_path_if_profitable (tree name, tree arg,
|
|
546 basic_block def_bb)
|
111
|
547 {
|
|
548 if (TREE_CODE_CLASS (TREE_CODE (arg)) != tcc_constant)
|
|
549 return;
|
|
550
|
|
551 bool irreducible = false;
|
131
|
552 edge taken_edge = profitable_jump_thread_path (def_bb, name, arg,
|
|
553 &irreducible);
|
111
|
554 if (taken_edge)
|
|
555 {
|
131
|
556 convert_and_register_current_path (taken_edge);
|
|
557 m_path.pop ();
|
111
|
558
|
|
559 if (irreducible)
|
131
|
560 vect_free_loop_info_assumptions (m_path[0]->loop_father);
|
111
|
561 }
|
|
562 }
|
|
563
|
|
564 /* Given PHI which defines NAME in block DEF_BB, recurse through the
|
|
565 PHI's arguments searching for paths where NAME will ultimately have
|
|
566 a constant value.
|
|
567
|
|
568 PATH contains the series of blocks to traverse that will result in
|
131
|
569 NAME having a constant value. */
|
111
|
570
|
131
|
571 void
|
|
572 thread_jumps::handle_phi (gphi *phi, tree name, basic_block def_bb)
|
111
|
573 {
|
|
574 /* Iterate over the arguments of PHI. */
|
|
575 for (unsigned int i = 0; i < gimple_phi_num_args (phi); i++)
|
|
576 {
|
|
577 tree arg = gimple_phi_arg_def (phi, i);
|
|
578 basic_block bbi = gimple_phi_arg_edge (phi, i)->src;
|
|
579
|
|
580 /* Skip edges pointing outside the current loop. */
|
|
581 if (!arg || def_bb->loop_father != bbi->loop_father)
|
|
582 continue;
|
|
583
|
|
584 if (TREE_CODE (arg) == SSA_NAME)
|
|
585 {
|
131
|
586 m_path.safe_push (bbi);
|
111
|
587 /* Recursively follow SSA_NAMEs looking for a constant
|
|
588 definition. */
|
131
|
589 fsm_find_control_statement_thread_paths (arg);
|
111
|
590
|
131
|
591 m_path.pop ();
|
111
|
592 continue;
|
|
593 }
|
|
594
|
131
|
595 register_jump_thread_path_if_profitable (name, arg, bbi);
|
111
|
596 }
|
|
597 }
|
|
598
|
|
599 /* Return TRUE if STMT is a gimple assignment we want to either directly
|
|
600 handle or recurse through. Return FALSE otherwise.
|
|
601
|
|
602 Note that adding more cases here requires adding cases to handle_assignment
|
|
603 below. */
|
|
604
|
|
605 static bool
|
|
606 handle_assignment_p (gimple *stmt)
|
|
607 {
|
|
608 if (is_gimple_assign (stmt))
|
|
609 {
|
|
610 enum tree_code def_code = gimple_assign_rhs_code (stmt);
|
|
611
|
|
612 /* If the RHS is an SSA_NAME, then we will recurse through it.
|
|
613 Go ahead and filter out cases where the SSA_NAME is a default
|
|
614 definition. There's little to be gained by trying to handle that. */
|
|
615 if (def_code == SSA_NAME
|
|
616 && !SSA_NAME_IS_DEFAULT_DEF (gimple_assign_rhs1 (stmt)))
|
|
617 return true;
|
|
618
|
|
619 /* If the RHS is a constant, then it's a terminal that we'll want
|
|
620 to handle as well. */
|
|
621 if (TREE_CODE_CLASS (def_code) == tcc_constant)
|
|
622 return true;
|
|
623 }
|
|
624
|
|
625 /* Anything not explicitly allowed is not handled. */
|
|
626 return false;
|
|
627 }
|
|
628
|
|
629 /* Given STMT which defines NAME in block DEF_BB, recurse through the
|
|
630 PHI's arguments searching for paths where NAME will ultimately have
|
|
631 a constant value.
|
|
632
|
|
633 PATH contains the series of blocks to traverse that will result in
|
131
|
634 NAME having a constant value. */
|
111
|
635
|
131
|
636 void
|
|
637 thread_jumps::handle_assignment (gimple *stmt, tree name, basic_block def_bb)
|
111
|
638 {
|
|
639 tree arg = gimple_assign_rhs1 (stmt);
|
|
640
|
|
641 if (TREE_CODE (arg) == SSA_NAME)
|
131
|
642 fsm_find_control_statement_thread_paths (arg);
|
111
|
643
|
|
644 else
|
|
645 {
|
|
646 /* register_jump_thread_path_if_profitable will push the current
|
|
647 block onto the path. But the path will always have the current
|
|
648 block at this point. So we can just pop it. */
|
131
|
649 m_path.pop ();
|
111
|
650
|
131
|
651 register_jump_thread_path_if_profitable (name, arg, def_bb);
|
111
|
652
|
|
653 /* And put the current block back onto the path so that the
|
|
654 state of the stack is unchanged when we leave. */
|
131
|
655 m_path.safe_push (def_bb);
|
111
|
656 }
|
|
657 }
|
|
658
|
|
659 /* We trace the value of the SSA_NAME NAME back through any phi nodes
|
|
660 looking for places where it gets a constant value and save the
|
131
|
661 path. */
|
111
|
662
|
131
|
663 void
|
|
664 thread_jumps::fsm_find_control_statement_thread_paths (tree name)
|
111
|
665 {
|
|
666 /* If NAME appears in an abnormal PHI, then don't try to trace its
|
|
667 value back through PHI nodes. */
|
|
668 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
|
|
669 return;
|
|
670
|
|
671 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
|
|
672 basic_block def_bb = gimple_bb (def_stmt);
|
|
673
|
|
674 if (def_bb == NULL)
|
|
675 return;
|
|
676
|
|
677 /* We allow the SSA chain to contains PHIs and simple copies and constant
|
|
678 initializations. */
|
|
679 if (gimple_code (def_stmt) != GIMPLE_PHI
|
|
680 && gimple_code (def_stmt) != GIMPLE_ASSIGN)
|
|
681 return;
|
|
682
|
|
683 if (gimple_code (def_stmt) == GIMPLE_PHI
|
|
684 && (gimple_phi_num_args (def_stmt)
|
145
|
685 >= (unsigned) param_fsm_maximum_phi_arguments))
|
111
|
686 return;
|
|
687
|
|
688 if (is_gimple_assign (def_stmt)
|
|
689 && ! handle_assignment_p (def_stmt))
|
|
690 return;
|
|
691
|
|
692 /* Avoid infinite recursion. */
|
131
|
693 if (m_visited_bbs.add (def_bb))
|
111
|
694 return;
|
|
695
|
|
696 int next_path_length = 0;
|
131
|
697 basic_block last_bb_in_path = m_path.last ();
|
111
|
698
|
|
699 if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt))
|
|
700 {
|
|
701 /* Do not walk through more than one loop PHI node. */
|
131
|
702 if (m_seen_loop_phi)
|
111
|
703 return;
|
131
|
704 m_seen_loop_phi = true;
|
111
|
705 }
|
|
706
|
|
707 /* Following the chain of SSA_NAME definitions, we jumped from a definition in
|
|
708 LAST_BB_IN_PATH to a definition in DEF_BB. When these basic blocks are
|
|
709 different, append to PATH the blocks from LAST_BB_IN_PATH to DEF_BB. */
|
|
710 if (def_bb != last_bb_in_path)
|
|
711 {
|
|
712 /* When DEF_BB == LAST_BB_IN_PATH, then the first block in the path
|
|
713 will already be in VISITED_BBS. When they are not equal, then we
|
|
714 must ensure that first block is accounted for to ensure we do not
|
|
715 create bogus jump threading paths. */
|
131
|
716 m_visited_bbs.add (m_path[0]);
|
111
|
717 if (!check_subpath_and_update_thread_path (last_bb_in_path, def_bb,
|
|
718 &next_path_length))
|
|
719 return;
|
|
720 }
|
|
721
|
131
|
722 gcc_assert (m_path.last () == def_bb);
|
111
|
723
|
|
724 if (gimple_code (def_stmt) == GIMPLE_PHI)
|
131
|
725 handle_phi (as_a <gphi *> (def_stmt), name, def_bb);
|
111
|
726 else if (gimple_code (def_stmt) == GIMPLE_ASSIGN)
|
131
|
727 handle_assignment (def_stmt, name, def_bb);
|
111
|
728
|
|
729 /* Remove all the nodes that we added from NEXT_PATH. */
|
|
730 if (next_path_length)
|
131
|
731 m_path.truncate (m_path.length () - next_path_length);
|
111
|
732 }
|
|
733
|
|
734 /* Search backwards from BB looking for paths where NAME (an SSA_NAME)
|
|
735 is a constant. Record such paths for jump threading.
|
|
736
|
|
737 It is assumed that BB ends with a control statement and that by
|
|
738 finding a path where NAME is a constant, we can thread the path.
|
131
|
739 SPEED_P indicates that we could increase code size to improve the
|
111
|
740 code path. */
|
|
741
|
131
|
742 void
|
|
743 thread_jumps::find_jump_threads_backwards (basic_block bb, bool speed_p)
|
111
|
744 {
|
|
745 gimple *stmt = get_gimple_control_stmt (bb);
|
|
746 if (!stmt)
|
|
747 return;
|
|
748
|
|
749 enum gimple_code code = gimple_code (stmt);
|
|
750 tree name = NULL;
|
|
751 if (code == GIMPLE_SWITCH)
|
|
752 name = gimple_switch_index (as_a <gswitch *> (stmt));
|
|
753 else if (code == GIMPLE_GOTO)
|
|
754 name = gimple_goto_dest (stmt);
|
|
755 else if (code == GIMPLE_COND)
|
|
756 {
|
|
757 if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
|
|
758 && TREE_CODE_CLASS (TREE_CODE (gimple_cond_rhs (stmt))) == tcc_constant
|
|
759 && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))
|
|
760 || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))))
|
|
761 name = gimple_cond_lhs (stmt);
|
|
762 }
|
|
763
|
|
764 if (!name || TREE_CODE (name) != SSA_NAME)
|
|
765 return;
|
|
766
|
131
|
767 /* Initialize pass local data that's different for each BB. */
|
|
768 m_path.truncate (0);
|
|
769 m_path.safe_push (bb);
|
|
770 m_visited_bbs.empty ();
|
|
771 m_seen_loop_phi = false;
|
|
772 m_speed_p = speed_p;
|
145
|
773 m_max_threaded_paths = param_max_fsm_thread_paths;
|
111
|
774
|
131
|
775 fsm_find_control_statement_thread_paths (name);
|
111
|
776 }
|
|
777
|
|
778 namespace {
|
|
779
|
|
780 const pass_data pass_data_thread_jumps =
|
|
781 {
|
|
782 GIMPLE_PASS,
|
|
783 "thread",
|
|
784 OPTGROUP_NONE,
|
|
785 TV_TREE_SSA_THREAD_JUMPS,
|
|
786 ( PROP_cfg | PROP_ssa ),
|
|
787 0,
|
|
788 0,
|
|
789 0,
|
|
790 TODO_update_ssa,
|
|
791 };
|
|
792
|
|
793 class pass_thread_jumps : public gimple_opt_pass
|
|
794 {
|
|
795 public:
|
|
796 pass_thread_jumps (gcc::context *ctxt)
|
|
797 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
|
|
798 {}
|
|
799
|
|
800 opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); }
|
|
801 virtual bool gate (function *);
|
|
802 virtual unsigned int execute (function *);
|
|
803 };
|
|
804
|
|
805 bool
|
|
806 pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
|
|
807 {
|
|
808 return flag_expensive_optimizations;
|
|
809 }
|
|
810
|
|
811
|
|
812 unsigned int
|
|
813 pass_thread_jumps::execute (function *fun)
|
|
814 {
|
|
815 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
|
|
816
|
|
817 /* Try to thread each block with more than one successor. */
|
131
|
818 thread_jumps threader;
|
111
|
819 basic_block bb;
|
|
820 FOR_EACH_BB_FN (bb, fun)
|
|
821 {
|
|
822 if (EDGE_COUNT (bb->succs) > 1)
|
131
|
823 threader.find_jump_threads_backwards (bb, true);
|
111
|
824 }
|
|
825 bool changed = thread_through_all_blocks (true);
|
|
826
|
|
827 loop_optimizer_finalize ();
|
|
828 return changed ? TODO_cleanup_cfg : 0;
|
|
829 }
|
|
830
|
|
831 }
|
|
832
|
|
833 gimple_opt_pass *
|
|
834 make_pass_thread_jumps (gcc::context *ctxt)
|
|
835 {
|
|
836 return new pass_thread_jumps (ctxt);
|
|
837 }
|
|
838
|
|
839 namespace {
|
|
840
|
|
841 const pass_data pass_data_early_thread_jumps =
|
|
842 {
|
|
843 GIMPLE_PASS,
|
|
844 "ethread",
|
|
845 OPTGROUP_NONE,
|
|
846 TV_TREE_SSA_THREAD_JUMPS,
|
|
847 ( PROP_cfg | PROP_ssa ),
|
|
848 0,
|
|
849 0,
|
|
850 0,
|
|
851 ( TODO_cleanup_cfg | TODO_update_ssa ),
|
|
852 };
|
|
853
|
|
854 class pass_early_thread_jumps : public gimple_opt_pass
|
|
855 {
|
|
856 public:
|
|
857 pass_early_thread_jumps (gcc::context *ctxt)
|
|
858 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
|
|
859 {}
|
|
860
|
|
861 opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); }
|
|
862 virtual bool gate (function *);
|
|
863 virtual unsigned int execute (function *);
|
|
864 };
|
|
865
|
|
866 bool
|
|
867 pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
|
|
868 {
|
|
869 return true;
|
|
870 }
|
|
871
|
|
872
|
|
873 unsigned int
|
|
874 pass_early_thread_jumps::execute (function *fun)
|
|
875 {
|
|
876 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
|
877
|
|
878 /* Try to thread each block with more than one successor. */
|
131
|
879 thread_jumps threader;
|
111
|
880 basic_block bb;
|
|
881 FOR_EACH_BB_FN (bb, fun)
|
|
882 {
|
|
883 if (EDGE_COUNT (bb->succs) > 1)
|
131
|
884 threader.find_jump_threads_backwards (bb, false);
|
111
|
885 }
|
|
886 thread_through_all_blocks (true);
|
|
887
|
|
888 loop_optimizer_finalize ();
|
|
889 return 0;
|
|
890 }
|
|
891
|
|
892 }
|
|
893
|
|
894 gimple_opt_pass *
|
|
895 make_pass_early_thread_jumps (gcc::context *ctxt)
|
|
896 {
|
|
897 return new pass_early_thread_jumps (ctxt);
|
|
898 }
|