111
|
1 /* SSA Jump Threading
|
131
|
2 Copyright (C) 2005-2018 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #include "config.h"
|
|
21 #include "system.h"
|
|
22 #include "coretypes.h"
|
|
23 #include "backend.h"
|
|
24 #include "predict.h"
|
|
25 #include "tree.h"
|
|
26 #include "gimple.h"
|
|
27 #include "fold-const.h"
|
|
28 #include "cfgloop.h"
|
|
29 #include "gimple-iterator.h"
|
|
30 #include "tree-cfg.h"
|
|
31 #include "tree-ssa-threadupdate.h"
|
|
32 #include "params.h"
|
|
33 #include "tree-ssa-loop.h"
|
|
34 #include "cfganal.h"
|
|
35 #include "tree-pass.h"
|
|
36 #include "gimple-ssa.h"
|
|
37 #include "tree-phinodes.h"
|
|
38 #include "tree-inline.h"
|
|
39 #include "tree-vectorizer.h"
|
|
40
|
131
|
41 class thread_jumps
|
|
42 {
|
|
43 public:
|
|
44 void find_jump_threads_backwards (basic_block bb, bool speed_p);
|
|
45 private:
|
|
46 edge profitable_jump_thread_path (basic_block bbi, tree name, tree arg,
|
|
47 bool *creates_irreducible_loop);
|
|
48 void convert_and_register_current_path (edge taken_edge);
|
|
49 void register_jump_thread_path_if_profitable (tree name, tree arg,
|
|
50 basic_block def_bb);
|
|
51 void handle_assignment (gimple *stmt, tree name, basic_block def_bb);
|
|
52 void handle_phi (gphi *phi, tree name, basic_block def_bb);
|
|
53 void fsm_find_control_statement_thread_paths (tree name);
|
|
54 bool check_subpath_and_update_thread_path (basic_block last_bb,
|
|
55 basic_block new_bb,
|
|
56 int *next_path_length);
|
|
57
|
|
58 /* Maximum number of BBs we are allowed to thread. */
|
|
59 int m_max_threaded_paths;
|
|
60 /* Hash to keep track of seen bbs. */
|
|
61 hash_set<basic_block> m_visited_bbs;
|
|
62 /* Current path we're analyzing. */
|
|
63 auto_vec<basic_block> m_path;
|
|
64 /* Tracks if we have recursed through a loop PHI node. */
|
|
65 bool m_seen_loop_phi;
|
|
66 /* Indicate that we could increase code size to improve the
|
|
67 code path. */
|
|
68 bool m_speed_p;
|
|
69 };
|
111
|
70
|
|
71 /* Simple helper to get the last statement from BB, which is assumed
|
|
72 to be a control statement. Return NULL if the last statement is
|
|
73 not a control statement. */
|
|
74
|
|
75 static gimple *
|
|
76 get_gimple_control_stmt (basic_block bb)
|
|
77 {
|
|
78 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
|
|
79
|
|
80 if (gsi_end_p (gsi))
|
|
81 return NULL;
|
|
82
|
|
83 gimple *stmt = gsi_stmt (gsi);
|
|
84 enum gimple_code code = gimple_code (stmt);
|
|
85 if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO)
|
|
86 return stmt;
|
|
87 return NULL;
|
|
88 }
|
|
89
|
|
90 /* Return true if the CFG contains at least one path from START_BB to
|
|
91 END_BB. When a path is found, record in PATH the blocks from
|
131
|
92 END_BB to START_BB. LOCAL_VISITED_BBS is used to make sure we
|
|
93 don't fall into an infinite loop. Bound the recursion to basic
|
|
94 blocks belonging to LOOP. */
|
111
|
95
|
|
96 static bool
|
|
97 fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
|
|
98 vec<basic_block> &path,
|
131
|
99 hash_set<basic_block> &local_visited_bbs,
|
|
100 loop_p loop)
|
111
|
101 {
|
|
102 if (loop != start_bb->loop_father)
|
|
103 return false;
|
|
104
|
|
105 if (start_bb == end_bb)
|
|
106 {
|
|
107 path.safe_push (start_bb);
|
|
108 return true;
|
|
109 }
|
|
110
|
131
|
111 if (!local_visited_bbs.add (start_bb))
|
111
|
112 {
|
|
113 edge e;
|
|
114 edge_iterator ei;
|
|
115 FOR_EACH_EDGE (e, ei, start_bb->succs)
|
131
|
116 if (fsm_find_thread_path (e->dest, end_bb, path, local_visited_bbs,
|
|
117 loop))
|
111
|
118 {
|
|
119 path.safe_push (start_bb);
|
|
120 return true;
|
|
121 }
|
|
122 }
|
|
123
|
|
124 return false;
|
|
125 }
|
|
126
|
|
127 /* Examine jump threading path PATH to which we want to add BBI.
|
|
128
|
|
129 If the resulting path is profitable to thread, then return the
|
|
130 final taken edge from the path, NULL otherwise.
|
|
131
|
|
132 NAME is the SSA_NAME of the variable we found to have a constant
|
|
133 value on PATH. ARG is the constant value of NAME on that path.
|
|
134
|
131
|
135 BBI will be appended to PATH when we have a profitable jump
|
|
136 threading path. Callers are responsible for removing BBI from PATH
|
|
137 in that case. */
|
111
|
138
|
131
|
139 edge
|
|
140 thread_jumps::profitable_jump_thread_path (basic_block bbi, tree name,
|
|
141 tree arg,
|
|
142 bool *creates_irreducible_loop)
|
111
|
143 {
|
|
144 /* Note BBI is not in the path yet, hence the +1 in the test below
|
|
145 to make sure BBI is accounted for in the path length test. */
|
|
146
|
|
147 /* We can get a length of 0 here when the statement that
|
|
148 makes a conditional generate a compile-time constant
|
|
149 result is in the same block as the conditional.
|
|
150
|
|
151 That's not really a jump threading opportunity, but instead is
|
|
152 simple cprop & simplification. We could handle it here if we
|
|
153 wanted by wiring up all the incoming edges. If we run this
|
|
154 early in IPA, that might be worth doing. For now we just
|
|
155 reject that case. */
|
131
|
156 if (m_path.is_empty ())
|
111
|
157 return NULL;
|
|
158
|
131
|
159 if (m_path.length () + 1
|
|
160 > (unsigned) PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
|
111
|
161 {
|
|
162 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
163 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
164 "the number of basic blocks on the path "
|
|
165 "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
|
|
166 return NULL;
|
|
167 }
|
|
168
|
131
|
169 if (m_max_threaded_paths <= 0)
|
111
|
170 {
|
|
171 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
172 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
173 "the number of previously recorded FSM paths to "
|
|
174 "thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
|
|
175 return NULL;
|
|
176 }
|
|
177
|
|
178 /* Add BBI to the path.
|
|
179 From this point onward, if we decide we the path is not profitable
|
|
180 to thread, we must remove BBI from the path. */
|
131
|
181 m_path.safe_push (bbi);
|
111
|
182
|
|
183 int n_insns = 0;
|
|
184 gimple_stmt_iterator gsi;
|
131
|
185 loop_p loop = m_path[0]->loop_father;
|
111
|
186 bool path_crosses_loops = false;
|
|
187 bool threaded_through_latch = false;
|
|
188 bool multiway_branch_in_path = false;
|
|
189 bool threaded_multiway_branch = false;
|
|
190 bool contains_hot_bb = false;
|
|
191
|
|
192 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
193 fprintf (dump_file, "Checking profitability of path (backwards): ");
|
|
194
|
|
195 /* Count the number of instructions on the path: as these instructions
|
|
196 will have to be duplicated, we will not record the path if there
|
|
197 are too many instructions on the path. Also check that all the
|
|
198 blocks in the path belong to a single loop. */
|
131
|
199 for (unsigned j = 0; j < m_path.length (); j++)
|
111
|
200 {
|
131
|
201 basic_block bb = m_path[j];
|
111
|
202
|
|
203 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
204 fprintf (dump_file, " bb:%i", bb->index);
|
|
205 /* Remember, blocks in the path are stored in opposite order
|
|
206 in the PATH array. The last entry in the array represents
|
|
207 the block with an outgoing edge that we will redirect to the
|
|
208 jump threading path. Thus we don't care about that block's
|
|
209 loop father, nor how many statements are in that block because
|
|
210 it will not be copied or whether or not it ends in a multiway
|
|
211 branch. */
|
131
|
212 if (j < m_path.length () - 1)
|
111
|
213 {
|
|
214 int orig_n_insns = n_insns;
|
|
215 if (bb->loop_father != loop)
|
|
216 {
|
|
217 path_crosses_loops = true;
|
|
218 break;
|
|
219 }
|
|
220
|
|
221 /* PHIs in the path will create degenerate PHIS in the
|
|
222 copied path which will then get propagated away, so
|
|
223 looking at just the duplicate path the PHIs would
|
|
224 seem unimportant.
|
|
225
|
|
226 But those PHIs, because they're assignments to objects
|
|
227 typically with lives that exist outside the thread path,
|
|
228 will tend to generate PHIs (or at least new PHI arguments)
|
|
229 at points where we leave the thread path and rejoin
|
|
230 the original blocks. So we do want to account for them.
|
|
231
|
|
232 We ignore virtual PHIs. We also ignore cases where BB
|
|
233 has a single incoming edge. That's the most common
|
|
234 degenerate PHI we'll see here. Finally we ignore PHIs
|
|
235 that are associated with the value we're tracking as
|
|
236 that object likely dies. */
|
|
237 if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
|
|
238 {
|
|
239 for (gphi_iterator gsip = gsi_start_phis (bb);
|
|
240 !gsi_end_p (gsip);
|
|
241 gsi_next (&gsip))
|
|
242 {
|
|
243 gphi *phi = gsip.phi ();
|
|
244 tree dst = gimple_phi_result (phi);
|
|
245
|
|
246 /* Note that if both NAME and DST are anonymous
|
|
247 SSA_NAMEs, then we do not have enough information
|
|
248 to consider them associated. */
|
|
249 if (dst != name
|
|
250 && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
|
|
251 || !SSA_NAME_VAR (dst))
|
|
252 && !virtual_operand_p (dst))
|
|
253 ++n_insns;
|
|
254 }
|
|
255 }
|
|
256
|
131
|
257 if (!contains_hot_bb && m_speed_p)
|
111
|
258 contains_hot_bb |= optimize_bb_for_speed_p (bb);
|
|
259 for (gsi = gsi_after_labels (bb);
|
|
260 !gsi_end_p (gsi);
|
|
261 gsi_next_nondebug (&gsi))
|
|
262 {
|
|
263 gimple *stmt = gsi_stmt (gsi);
|
|
264 /* Do not count empty statements and labels. */
|
|
265 if (gimple_code (stmt) != GIMPLE_NOP
|
|
266 && !(gimple_code (stmt) == GIMPLE_ASSIGN
|
|
267 && gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
|
|
268 && !is_gimple_debug (stmt))
|
|
269 n_insns += estimate_num_insns (stmt, &eni_size_weights);
|
|
270 }
|
|
271 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
272 fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns);
|
|
273
|
|
274 /* We do not look at the block with the threaded branch
|
|
275 in this loop. So if any block with a last statement that
|
|
276 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
|
|
277 multiway branch on our path.
|
|
278
|
|
279 The block in PATH[0] is special, it's the block were we're
|
|
280 going to be able to eliminate its branch. */
|
|
281 gimple *last = last_stmt (bb);
|
|
282 if (last && (gimple_code (last) == GIMPLE_SWITCH
|
|
283 || gimple_code (last) == GIMPLE_GOTO))
|
|
284 {
|
|
285 if (j == 0)
|
|
286 threaded_multiway_branch = true;
|
|
287 else
|
|
288 multiway_branch_in_path = true;
|
|
289 }
|
|
290 }
|
|
291
|
|
292 /* Note if we thread through the latch, we will want to include
|
|
293 the last entry in the array when determining if we thread
|
|
294 through the loop latch. */
|
|
295 if (loop->latch == bb)
|
|
296 threaded_through_latch = true;
|
|
297 }
|
|
298
|
131
|
299 gimple *stmt = get_gimple_control_stmt (m_path[0]);
|
111
|
300 gcc_assert (stmt);
|
|
301
|
|
302 /* We are going to remove the control statement at the end of the
|
|
303 last block in the threading path. So don't count it against our
|
|
304 statement count. */
|
|
305
|
|
306 int stmt_insns = estimate_num_insns (stmt, &eni_size_weights);
|
|
307 n_insns-= stmt_insns;
|
|
308
|
|
309 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
310 fprintf (dump_file, "\n Control statement insns: %i\n"
|
|
311 " Overall: %i insns\n",
|
|
312 stmt_insns, n_insns);
|
|
313
|
|
314 /* We have found a constant value for ARG. For GIMPLE_SWITCH
|
|
315 and GIMPLE_GOTO, we use it as-is. However, for a GIMPLE_COND
|
|
316 we need to substitute, fold and simplify so we can determine
|
|
317 the edge taken out of the last block. */
|
|
318 if (gimple_code (stmt) == GIMPLE_COND)
|
|
319 {
|
|
320 enum tree_code cond_code = gimple_cond_code (stmt);
|
|
321
|
|
322 /* We know the underyling format of the condition. */
|
|
323 arg = fold_binary (cond_code, boolean_type_node,
|
|
324 arg, gimple_cond_rhs (stmt));
|
|
325 }
|
|
326
|
|
327 /* If this path threaded through the loop latch back into the
|
|
328 same loop and the destination does not dominate the loop
|
|
329 latch, then this thread would create an irreducible loop.
|
|
330
|
|
331 We have to know the outgoing edge to figure this out. */
|
131
|
332 edge taken_edge = find_taken_edge (m_path[0], arg);
|
111
|
333
|
|
334 /* There are cases where we may not be able to extract the
|
|
335 taken edge. For example, a computed goto to an absolute
|
|
336 address. Handle those cases gracefully. */
|
|
337 if (taken_edge == NULL)
|
|
338 {
|
131
|
339 m_path.pop ();
|
111
|
340 return NULL;
|
|
341 }
|
|
342
|
|
343 *creates_irreducible_loop = false;
|
|
344 if (threaded_through_latch
|
|
345 && loop == taken_edge->dest->loop_father
|
|
346 && (determine_bb_domination_status (loop, taken_edge->dest)
|
|
347 == DOMST_NONDOMINATING))
|
|
348 *creates_irreducible_loop = true;
|
|
349
|
|
350 if (path_crosses_loops)
|
|
351 {
|
|
352 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
353 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
354 "the path crosses loops.\n");
|
131
|
355 m_path.pop ();
|
111
|
356 return NULL;
|
|
357 }
|
|
358
|
|
359 /* Threading is profitable if the path duplicated is hot but also
|
|
360 in a case we separate cold path from hot path and permit optimization
|
|
361 of the hot path later. Be on the agressive side here. In some testcases,
|
|
362 as in PR 78407 this leads to noticeable improvements. */
|
131
|
363 if (m_speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb))
|
111
|
364 {
|
|
365 if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
|
|
366 {
|
|
367 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
368 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
369 "the number of instructions on the path "
|
|
370 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
|
131
|
371 m_path.pop ();
|
111
|
372 return NULL;
|
|
373 }
|
|
374 }
|
|
375 else if (n_insns > 1)
|
|
376 {
|
|
377 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
378 fprintf (dump_file, "FSM jump-thread path not considered: "
|
|
379 "duplication of %i insns is needed and optimizing for size.\n",
|
|
380 n_insns);
|
131
|
381 m_path.pop ();
|
111
|
382 return NULL;
|
|
383 }
|
|
384
|
|
385 /* We avoid creating irreducible inner loops unless we thread through
|
|
386 a multiway branch, in which case we have deemed it worth losing
|
|
387 other loop optimizations later.
|
|
388
|
|
389 We also consider it worth creating an irreducible inner loop if
|
|
390 the number of copied statement is low relative to the length of
|
|
391 the path -- in that case there's little the traditional loop
|
|
392 optimizer would have done anyway, so an irreducible loop is not
|
|
393 so bad. */
|
|
394 if (!threaded_multiway_branch && *creates_irreducible_loop
|
|
395 && (n_insns * (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
|
131
|
396 > (m_path.length () *
|
111
|
397 (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS))))
|
|
398
|
|
399 {
|
|
400 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
401 fprintf (dump_file,
|
|
402 "FSM would create irreducible loop without threading "
|
|
403 "multiway branch.\n");
|
131
|
404 m_path.pop ();
|
111
|
405 return NULL;
|
|
406 }
|
|
407
|
|
408
|
|
409 /* If this path does not thread through the loop latch, then we are
|
|
410 using the FSM threader to find old style jump threads. This
|
|
411 is good, except the FSM threader does not re-use an existing
|
|
412 threading path to reduce code duplication.
|
|
413
|
|
414 So for that case, drastically reduce the number of statements
|
|
415 we are allowed to copy. */
|
|
416 if (!(threaded_through_latch && threaded_multiway_branch)
|
|
417 && (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
|
|
418 >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS)))
|
|
419 {
|
|
420 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
421 fprintf (dump_file,
|
|
422 "FSM did not thread around loop and would copy too "
|
|
423 "many statements.\n");
|
131
|
424 m_path.pop ();
|
111
|
425 return NULL;
|
|
426 }
|
|
427
|
|
428 /* When there is a multi-way branch on the path, then threading can
|
|
429 explode the CFG due to duplicating the edges for that multi-way
|
|
430 branch. So like above, only allow a multi-way branch on the path
|
|
431 if we actually thread a multi-way branch. */
|
|
432 if (!threaded_multiway_branch && multiway_branch_in_path)
|
|
433 {
|
|
434 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
435 fprintf (dump_file,
|
|
436 "FSM Thread through multiway branch without threading "
|
|
437 "a multiway branch.\n");
|
131
|
438 m_path.pop ();
|
111
|
439 return NULL;
|
|
440 }
|
|
441 return taken_edge;
|
|
442 }
|
|
443
|
131
|
444 /* The current path PATH is a vector of blocks forming a jump threading
|
|
445 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
|
111
|
446
|
131
|
447 Convert the current path into the form used by register_jump_thread and
|
|
448 register it. */
|
111
|
449
|
131
|
450 void
|
|
451 thread_jumps::convert_and_register_current_path (edge taken_edge)
|
111
|
452 {
|
|
453 vec<jump_thread_edge *> *jump_thread_path = new vec<jump_thread_edge *> ();
|
|
454
|
|
455 /* Record the edges between the blocks in PATH. */
|
131
|
456 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
|
111
|
457 {
|
131
|
458 basic_block bb1 = m_path[m_path.length () - j - 1];
|
|
459 basic_block bb2 = m_path[m_path.length () - j - 2];
|
111
|
460
|
|
461 edge e = find_edge (bb1, bb2);
|
|
462 gcc_assert (e);
|
|
463 jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
|
|
464 jump_thread_path->safe_push (x);
|
|
465 }
|
|
466
|
|
467 /* Add the edge taken when the control variable has value ARG. */
|
|
468 jump_thread_edge *x
|
|
469 = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
|
|
470 jump_thread_path->safe_push (x);
|
|
471
|
|
472 register_jump_thread (jump_thread_path);
|
131
|
473 --m_max_threaded_paths;
|
111
|
474 }
|
|
475
|
|
476 /* While following a chain of SSA_NAME definitions, we jumped from a
|
|
477 definition in LAST_BB to a definition in NEW_BB (walking
|
|
478 backwards).
|
|
479
|
|
480 Verify there is a single path between the blocks and none of the
|
|
481 blocks in the path is already in VISITED_BBS. If so, then update
|
|
482 VISISTED_BBS, add the new blocks to PATH and return TRUE.
|
|
483 Otherwise return FALSE.
|
|
484
|
|
485 Store the length of the subpath in NEXT_PATH_LENGTH. */
|
|
486
|
131
|
487 bool
|
|
488 thread_jumps::check_subpath_and_update_thread_path (basic_block last_bb,
|
|
489 basic_block new_bb,
|
|
490 int *next_path_length)
|
111
|
491 {
|
|
492 edge e;
|
|
493 int e_count = 0;
|
|
494 edge_iterator ei;
|
|
495 auto_vec<basic_block> next_path;
|
|
496
|
|
497 FOR_EACH_EDGE (e, ei, last_bb->preds)
|
|
498 {
|
131
|
499 hash_set<basic_block> local_visited_bbs;
|
111
|
500
|
131
|
501 if (fsm_find_thread_path (new_bb, e->src, next_path,
|
|
502 local_visited_bbs, e->src->loop_father))
|
111
|
503 ++e_count;
|
|
504
|
|
505 /* If there is more than one path, stop. */
|
|
506 if (e_count > 1)
|
|
507 return false;
|
|
508 }
|
|
509
|
|
510 /* Stop if we have not found a path: this could occur when the recursion
|
|
511 is stopped by one of the bounds. */
|
|
512 if (e_count == 0)
|
|
513 return false;
|
|
514
|
|
515 /* Make sure we haven't already visited any of the nodes in
|
|
516 NEXT_PATH. Don't add them here to avoid pollution. */
|
|
517 for (unsigned int i = 0; i + 1 < next_path.length (); i++)
|
|
518 {
|
131
|
519 if (m_visited_bbs.contains (next_path[i]))
|
111
|
520 return false;
|
|
521 }
|
|
522
|
|
523 /* Now add the nodes to VISISTED_BBS. */
|
|
524 for (unsigned int i = 0; i + 1 < next_path.length (); i++)
|
131
|
525 m_visited_bbs.add (next_path[i]);
|
111
|
526
|
|
527 /* Append all the nodes from NEXT_PATH to PATH. */
|
131
|
528 m_path.safe_splice (next_path);
|
111
|
529 *next_path_length = next_path.length ();
|
|
530
|
|
531 return true;
|
|
532 }
|
|
533
|
|
534 /* If this is a profitable jump thread path, register it.
|
|
535
|
|
536 NAME is an SSA NAME with a possible constant value of ARG on PATH.
|
|
537
|
131
|
538 DEF_BB is the basic block that ultimately defines the constant. */
|
111
|
539
|
131
|
540 void
|
|
541 thread_jumps::register_jump_thread_path_if_profitable (tree name, tree arg,
|
|
542 basic_block def_bb)
|
111
|
543 {
|
|
544 if (TREE_CODE_CLASS (TREE_CODE (arg)) != tcc_constant)
|
|
545 return;
|
|
546
|
|
547 bool irreducible = false;
|
131
|
548 edge taken_edge = profitable_jump_thread_path (def_bb, name, arg,
|
|
549 &irreducible);
|
111
|
550 if (taken_edge)
|
|
551 {
|
131
|
552 convert_and_register_current_path (taken_edge);
|
|
553 m_path.pop ();
|
111
|
554
|
|
555 if (irreducible)
|
131
|
556 vect_free_loop_info_assumptions (m_path[0]->loop_father);
|
111
|
557 }
|
|
558 }
|
|
559
|
|
560 /* Given PHI which defines NAME in block DEF_BB, recurse through the
|
|
561 PHI's arguments searching for paths where NAME will ultimately have
|
|
562 a constant value.
|
|
563
|
|
564 PATH contains the series of blocks to traverse that will result in
|
131
|
565 NAME having a constant value. */
|
111
|
566
|
131
|
567 void
|
|
568 thread_jumps::handle_phi (gphi *phi, tree name, basic_block def_bb)
|
111
|
569 {
|
|
570 /* Iterate over the arguments of PHI. */
|
|
571 for (unsigned int i = 0; i < gimple_phi_num_args (phi); i++)
|
|
572 {
|
|
573 tree arg = gimple_phi_arg_def (phi, i);
|
|
574 basic_block bbi = gimple_phi_arg_edge (phi, i)->src;
|
|
575
|
|
576 /* Skip edges pointing outside the current loop. */
|
|
577 if (!arg || def_bb->loop_father != bbi->loop_father)
|
|
578 continue;
|
|
579
|
|
580 if (TREE_CODE (arg) == SSA_NAME)
|
|
581 {
|
131
|
582 m_path.safe_push (bbi);
|
111
|
583 /* Recursively follow SSA_NAMEs looking for a constant
|
|
584 definition. */
|
131
|
585 fsm_find_control_statement_thread_paths (arg);
|
111
|
586
|
131
|
587 m_path.pop ();
|
111
|
588 continue;
|
|
589 }
|
|
590
|
131
|
591 register_jump_thread_path_if_profitable (name, arg, bbi);
|
111
|
592 }
|
|
593 }
|
|
594
|
|
595 /* Return TRUE if STMT is a gimple assignment we want to either directly
|
|
596 handle or recurse through. Return FALSE otherwise.
|
|
597
|
|
598 Note that adding more cases here requires adding cases to handle_assignment
|
|
599 below. */
|
|
600
|
|
601 static bool
|
|
602 handle_assignment_p (gimple *stmt)
|
|
603 {
|
|
604 if (is_gimple_assign (stmt))
|
|
605 {
|
|
606 enum tree_code def_code = gimple_assign_rhs_code (stmt);
|
|
607
|
|
608 /* If the RHS is an SSA_NAME, then we will recurse through it.
|
|
609 Go ahead and filter out cases where the SSA_NAME is a default
|
|
610 definition. There's little to be gained by trying to handle that. */
|
|
611 if (def_code == SSA_NAME
|
|
612 && !SSA_NAME_IS_DEFAULT_DEF (gimple_assign_rhs1 (stmt)))
|
|
613 return true;
|
|
614
|
|
615 /* If the RHS is a constant, then it's a terminal that we'll want
|
|
616 to handle as well. */
|
|
617 if (TREE_CODE_CLASS (def_code) == tcc_constant)
|
|
618 return true;
|
|
619 }
|
|
620
|
|
621 /* Anything not explicitly allowed is not handled. */
|
|
622 return false;
|
|
623 }
|
|
624
|
|
625 /* Given STMT which defines NAME in block DEF_BB, recurse through the
|
|
626 PHI's arguments searching for paths where NAME will ultimately have
|
|
627 a constant value.
|
|
628
|
|
629 PATH contains the series of blocks to traverse that will result in
|
131
|
630 NAME having a constant value. */
|
111
|
631
|
131
|
632 void
|
|
633 thread_jumps::handle_assignment (gimple *stmt, tree name, basic_block def_bb)
|
111
|
634 {
|
|
635 tree arg = gimple_assign_rhs1 (stmt);
|
|
636
|
|
637 if (TREE_CODE (arg) == SSA_NAME)
|
131
|
638 fsm_find_control_statement_thread_paths (arg);
|
111
|
639
|
|
640 else
|
|
641 {
|
|
642 /* register_jump_thread_path_if_profitable will push the current
|
|
643 block onto the path. But the path will always have the current
|
|
644 block at this point. So we can just pop it. */
|
131
|
645 m_path.pop ();
|
111
|
646
|
131
|
647 register_jump_thread_path_if_profitable (name, arg, def_bb);
|
111
|
648
|
|
649 /* And put the current block back onto the path so that the
|
|
650 state of the stack is unchanged when we leave. */
|
131
|
651 m_path.safe_push (def_bb);
|
111
|
652 }
|
|
653 }
|
|
654
|
|
655 /* We trace the value of the SSA_NAME NAME back through any phi nodes
|
|
656 looking for places where it gets a constant value and save the
|
131
|
657 path. */
|
111
|
658
|
131
|
659 void
|
|
660 thread_jumps::fsm_find_control_statement_thread_paths (tree name)
|
111
|
661 {
|
|
662 /* If NAME appears in an abnormal PHI, then don't try to trace its
|
|
663 value back through PHI nodes. */
|
|
664 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
|
|
665 return;
|
|
666
|
|
667 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
|
|
668 basic_block def_bb = gimple_bb (def_stmt);
|
|
669
|
|
670 if (def_bb == NULL)
|
|
671 return;
|
|
672
|
|
673 /* We allow the SSA chain to contains PHIs and simple copies and constant
|
|
674 initializations. */
|
|
675 if (gimple_code (def_stmt) != GIMPLE_PHI
|
|
676 && gimple_code (def_stmt) != GIMPLE_ASSIGN)
|
|
677 return;
|
|
678
|
|
679 if (gimple_code (def_stmt) == GIMPLE_PHI
|
|
680 && (gimple_phi_num_args (def_stmt)
|
|
681 >= (unsigned) PARAM_VALUE (PARAM_FSM_MAXIMUM_PHI_ARGUMENTS)))
|
|
682 return;
|
|
683
|
|
684 if (is_gimple_assign (def_stmt)
|
|
685 && ! handle_assignment_p (def_stmt))
|
|
686 return;
|
|
687
|
|
688 /* Avoid infinite recursion. */
|
131
|
689 if (m_visited_bbs.add (def_bb))
|
111
|
690 return;
|
|
691
|
|
692 int next_path_length = 0;
|
131
|
693 basic_block last_bb_in_path = m_path.last ();
|
111
|
694
|
|
695 if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt))
|
|
696 {
|
|
697 /* Do not walk through more than one loop PHI node. */
|
131
|
698 if (m_seen_loop_phi)
|
111
|
699 return;
|
131
|
700 m_seen_loop_phi = true;
|
111
|
701 }
|
|
702
|
|
703 /* Following the chain of SSA_NAME definitions, we jumped from a definition in
|
|
704 LAST_BB_IN_PATH to a definition in DEF_BB. When these basic blocks are
|
|
705 different, append to PATH the blocks from LAST_BB_IN_PATH to DEF_BB. */
|
|
706 if (def_bb != last_bb_in_path)
|
|
707 {
|
|
708 /* When DEF_BB == LAST_BB_IN_PATH, then the first block in the path
|
|
709 will already be in VISITED_BBS. When they are not equal, then we
|
|
710 must ensure that first block is accounted for to ensure we do not
|
|
711 create bogus jump threading paths. */
|
131
|
712 m_visited_bbs.add (m_path[0]);
|
111
|
713 if (!check_subpath_and_update_thread_path (last_bb_in_path, def_bb,
|
|
714 &next_path_length))
|
|
715 return;
|
|
716 }
|
|
717
|
131
|
718 gcc_assert (m_path.last () == def_bb);
|
111
|
719
|
|
720 if (gimple_code (def_stmt) == GIMPLE_PHI)
|
131
|
721 handle_phi (as_a <gphi *> (def_stmt), name, def_bb);
|
111
|
722 else if (gimple_code (def_stmt) == GIMPLE_ASSIGN)
|
131
|
723 handle_assignment (def_stmt, name, def_bb);
|
111
|
724
|
|
725 /* Remove all the nodes that we added from NEXT_PATH. */
|
|
726 if (next_path_length)
|
131
|
727 m_path.truncate (m_path.length () - next_path_length);
|
111
|
728 }
|
|
729
|
|
730 /* Search backwards from BB looking for paths where NAME (an SSA_NAME)
|
|
731 is a constant. Record such paths for jump threading.
|
|
732
|
|
733 It is assumed that BB ends with a control statement and that by
|
|
734 finding a path where NAME is a constant, we can thread the path.
|
131
|
735 SPEED_P indicates that we could increase code size to improve the
|
111
|
736 code path. */
|
|
737
|
131
|
738 void
|
|
739 thread_jumps::find_jump_threads_backwards (basic_block bb, bool speed_p)
|
111
|
740 {
|
|
741 gimple *stmt = get_gimple_control_stmt (bb);
|
|
742 if (!stmt)
|
|
743 return;
|
|
744
|
|
745 enum gimple_code code = gimple_code (stmt);
|
|
746 tree name = NULL;
|
|
747 if (code == GIMPLE_SWITCH)
|
|
748 name = gimple_switch_index (as_a <gswitch *> (stmt));
|
|
749 else if (code == GIMPLE_GOTO)
|
|
750 name = gimple_goto_dest (stmt);
|
|
751 else if (code == GIMPLE_COND)
|
|
752 {
|
|
753 if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
|
|
754 && TREE_CODE_CLASS (TREE_CODE (gimple_cond_rhs (stmt))) == tcc_constant
|
|
755 && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))
|
|
756 || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))))
|
|
757 name = gimple_cond_lhs (stmt);
|
|
758 }
|
|
759
|
|
760 if (!name || TREE_CODE (name) != SSA_NAME)
|
|
761 return;
|
|
762
|
131
|
763 /* Initialize pass local data that's different for each BB. */
|
|
764 m_path.truncate (0);
|
|
765 m_path.safe_push (bb);
|
|
766 m_visited_bbs.empty ();
|
|
767 m_seen_loop_phi = false;
|
|
768 m_speed_p = speed_p;
|
|
769 m_max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
|
111
|
770
|
131
|
771 fsm_find_control_statement_thread_paths (name);
|
111
|
772 }
|
|
773
|
|
774 namespace {
|
|
775
|
|
776 const pass_data pass_data_thread_jumps =
|
|
777 {
|
|
778 GIMPLE_PASS,
|
|
779 "thread",
|
|
780 OPTGROUP_NONE,
|
|
781 TV_TREE_SSA_THREAD_JUMPS,
|
|
782 ( PROP_cfg | PROP_ssa ),
|
|
783 0,
|
|
784 0,
|
|
785 0,
|
|
786 TODO_update_ssa,
|
|
787 };
|
|
788
|
|
789 class pass_thread_jumps : public gimple_opt_pass
|
|
790 {
|
|
791 public:
|
|
792 pass_thread_jumps (gcc::context *ctxt)
|
|
793 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
|
|
794 {}
|
|
795
|
|
796 opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); }
|
|
797 virtual bool gate (function *);
|
|
798 virtual unsigned int execute (function *);
|
|
799 };
|
|
800
|
|
801 bool
|
|
802 pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
|
|
803 {
|
|
804 return flag_expensive_optimizations;
|
|
805 }
|
|
806
|
|
807
|
|
808 unsigned int
|
|
809 pass_thread_jumps::execute (function *fun)
|
|
810 {
|
|
811 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
|
|
812
|
|
813 /* Try to thread each block with more than one successor. */
|
131
|
814 thread_jumps threader;
|
111
|
815 basic_block bb;
|
|
816 FOR_EACH_BB_FN (bb, fun)
|
|
817 {
|
|
818 if (EDGE_COUNT (bb->succs) > 1)
|
131
|
819 threader.find_jump_threads_backwards (bb, true);
|
111
|
820 }
|
|
821 bool changed = thread_through_all_blocks (true);
|
|
822
|
|
823 loop_optimizer_finalize ();
|
|
824 return changed ? TODO_cleanup_cfg : 0;
|
|
825 }
|
|
826
|
|
827 }
|
|
828
|
|
829 gimple_opt_pass *
|
|
830 make_pass_thread_jumps (gcc::context *ctxt)
|
|
831 {
|
|
832 return new pass_thread_jumps (ctxt);
|
|
833 }
|
|
834
|
|
835 namespace {
|
|
836
|
|
837 const pass_data pass_data_early_thread_jumps =
|
|
838 {
|
|
839 GIMPLE_PASS,
|
|
840 "ethread",
|
|
841 OPTGROUP_NONE,
|
|
842 TV_TREE_SSA_THREAD_JUMPS,
|
|
843 ( PROP_cfg | PROP_ssa ),
|
|
844 0,
|
|
845 0,
|
|
846 0,
|
|
847 ( TODO_cleanup_cfg | TODO_update_ssa ),
|
|
848 };
|
|
849
|
|
850 class pass_early_thread_jumps : public gimple_opt_pass
|
|
851 {
|
|
852 public:
|
|
853 pass_early_thread_jumps (gcc::context *ctxt)
|
|
854 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
|
|
855 {}
|
|
856
|
|
857 opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); }
|
|
858 virtual bool gate (function *);
|
|
859 virtual unsigned int execute (function *);
|
|
860 };
|
|
861
|
|
862 bool
|
|
863 pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
|
|
864 {
|
|
865 return true;
|
|
866 }
|
|
867
|
|
868
|
|
869 unsigned int
|
|
870 pass_early_thread_jumps::execute (function *fun)
|
|
871 {
|
|
872 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
|
873
|
|
874 /* Try to thread each block with more than one successor. */
|
131
|
875 thread_jumps threader;
|
111
|
876 basic_block bb;
|
|
877 FOR_EACH_BB_FN (bb, fun)
|
|
878 {
|
|
879 if (EDGE_COUNT (bb->succs) > 1)
|
131
|
880 threader.find_jump_threads_backwards (bb, false);
|
111
|
881 }
|
|
882 thread_through_all_blocks (true);
|
|
883
|
|
884 loop_optimizer_finalize ();
|
|
885 return 0;
|
|
886 }
|
|
887
|
|
888 }
|
|
889
|
|
890 gimple_opt_pass *
|
|
891 make_pass_early_thread_jumps (gcc::context *ctxt)
|
|
892 {
|
|
893 return new pass_early_thread_jumps (ctxt);
|
|
894 }
|