Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-threadupdate.c @ 141:ce508c72660f
copy cbc flang in cfgexpand
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 22 Nov 2018 19:44:39 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
rev | line source |
---|---|
0 | 1 /* Thread edges through blocks and update the control flow and SSA graphs. |
131 | 2 Copyright (C) 2004-2018 Free Software Foundation, Inc. |
0 | 3 |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation; either version 3, or (at your option) | |
9 any later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #include "config.h" | |
21 #include "system.h" | |
22 #include "coretypes.h" | |
111 | 23 #include "backend.h" |
0 | 24 #include "tree.h" |
111 | 25 #include "gimple.h" |
26 #include "cfghooks.h" | |
0 | 27 #include "tree-pass.h" |
111 | 28 #include "ssa.h" |
29 #include "fold-const.h" | |
30 #include "cfganal.h" | |
31 #include "gimple-iterator.h" | |
32 #include "tree-ssa.h" | |
33 #include "tree-ssa-threadupdate.h" | |
0 | 34 #include "cfgloop.h" |
111 | 35 #include "dbgcnt.h" |
36 #include "tree-cfg.h" | |
37 #include "tree-vectorizer.h" | |
0 | 38 |
39 /* Given a block B, update the CFG and SSA graph to reflect redirecting | |
40 one or more in-edges to B to instead reach the destination of an | |
41 out-edge from B while preserving any side effects in B. | |
42 | |
43 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the | |
44 side effects of executing B. | |
45 | |
46 1. Make a copy of B (including its outgoing edges and statements). Call | |
47 the copy B'. Note B' has no incoming edges or PHIs at this time. | |
48 | |
49 2. Remove the control statement at the end of B' and all outgoing edges | |
50 except B'->C. | |
51 | |
52 3. Add a new argument to each PHI in C with the same value as the existing | |
53 argument associated with edge B->C. Associate the new PHI arguments | |
54 with the edge B'->C. | |
55 | |
56 4. For each PHI in B, find or create a PHI in B' with an identical | |
57 PHI_RESULT. Add an argument to the PHI in B' which has the same | |
58 value as the PHI in B associated with the edge A->B. Associate | |
59 the new argument in the PHI in B' with the edge A->B. | |
60 | |
61 5. Change the edge A->B to A->B'. | |
62 | |
63 5a. This automatically deletes any PHI arguments associated with the | |
64 edge A->B in B. | |
65 | |
66 5b. This automatically associates each new argument added in step 4 | |
67 with the edge A->B'. | |
68 | |
69 6. Repeat for other incoming edges into B. | |
70 | |
71 7. Put the duplicated resources in B and all the B' blocks into SSA form. | |
72 | |
73 Note that block duplication can be minimized by first collecting the | |
74 set of unique destination blocks that the incoming edges should | |
111 | 75 be threaded to. |
76 | |
77 We reduce the number of edges and statements we create by not copying all | |
78 the outgoing edges and the control statement in step #1. We instead create | |
79 a template block without the outgoing edges and duplicate the template. | |
80 | |
81 Another case this code handles is threading through a "joiner" block. In | |
82 this case, we do not know the destination of the joiner block, but one | |
83 of the outgoing edges from the joiner block leads to a threadable path. This | |
84 case largely works as outlined above, except the duplicate of the joiner | |
85 block still contains a full set of outgoing edges and its control statement. | |
86 We just redirect one of its outgoing edges to our jump threading path. */ | |
0 | 87 |
88 | |
89 /* Steps #5 and #6 of the above algorithm are best implemented by walking | |
90 all the incoming edges which thread to the same destination edge at | |
91 the same time. That avoids lots of table lookups to get information | |
92 for the destination edge. | |
93 | |
94 To realize that implementation we create a list of incoming edges | |
95 which thread to the same outgoing edge. Thus to implement steps | |
96 #5 and #6 we traverse our hash table of outgoing edge information. | |
97 For each entry we walk the list of incoming edges which thread to | |
98 the current outgoing edge. */ | |
99 | |
100 struct el | |
101 { | |
102 edge e; | |
103 struct el *next; | |
104 }; | |
105 | |
106 /* Main data structure recording information regarding B's duplicate | |
107 blocks. */ | |
108 | |
109 /* We need to efficiently record the unique thread destinations of this | |
110 block and specific information associated with those destinations. We | |
111 may have many incoming edges threaded to the same outgoing edge. This | |
112 can be naturally implemented with a hash table. */ | |
113 | |
111 | 114 struct redirection_data : free_ptr_hash<redirection_data> |
115 { | |
116 /* We support wiring up two block duplicates in a jump threading path. | |
117 | |
118 One is a normal block copy where we remove the control statement | |
119 and wire up its single remaining outgoing edge to the thread path. | |
120 | |
121 The other is a joiner block where we leave the control statement | |
122 in place, but wire one of the outgoing edges to a thread path. | |
123 | |
124 In theory we could have multiple block duplicates in a jump | |
125 threading path, but I haven't tried that. | |
126 | |
127 The duplicate blocks appear in this array in the same order in | |
128 which they appear in the jump thread path. */ | |
129 basic_block dup_blocks[2]; | |
130 | |
131 /* The jump threading path. */ | |
132 vec<jump_thread_edge *> *path; | |
133 | |
134 /* A list of incoming edges which we want to thread to the | |
135 same path. */ | |
136 struct el *incoming_edges; | |
137 | |
138 /* hash_table support. */ | |
139 static inline hashval_t hash (const redirection_data *); | |
140 static inline int equal (const redirection_data *, const redirection_data *); | |
141 }; | |
142 | |
143 /* Dump a jump threading path, including annotations about each | |
144 edge in the path. */ | |
145 | |
146 static void | |
147 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path, | |
148 bool registering) | |
0 | 149 { |
111 | 150 fprintf (dump_file, |
151 " %s%s jump thread: (%d, %d) incoming edge; ", | |
152 (registering ? "Registering" : "Cancelling"), | |
153 (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""), | |
154 path[0]->e->src->index, path[0]->e->dest->index); | |
155 | |
156 for (unsigned int i = 1; i < path.length (); i++) | |
157 { | |
158 /* We can get paths with a NULL edge when the final destination | |
159 of a jump thread turns out to be a constant address. We dump | |
160 those paths when debugging, so we have to be prepared for that | |
161 possibility here. */ | |
162 if (path[i]->e == NULL) | |
163 continue; | |
164 | |
165 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
166 fprintf (dump_file, " (%d, %d) joiner; ", | |
167 path[i]->e->src->index, path[i]->e->dest->index); | |
168 if (path[i]->type == EDGE_COPY_SRC_BLOCK) | |
169 fprintf (dump_file, " (%d, %d) normal;", | |
170 path[i]->e->src->index, path[i]->e->dest->index); | |
171 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK) | |
172 fprintf (dump_file, " (%d, %d) nocopy;", | |
173 path[i]->e->src->index, path[i]->e->dest->index); | |
174 if (path[0]->type == EDGE_FSM_THREAD) | |
175 fprintf (dump_file, " (%d, %d) ", | |
176 path[i]->e->src->index, path[i]->e->dest->index); | |
177 } | |
178 fputc ('\n', dump_file); | |
179 } | |
180 | |
181 /* Simple hashing function. For any given incoming edge E, we're going | |
182 to be most concerned with the final destination of its jump thread | |
183 path. So hash on the block index of the final edge in the path. */ | |
184 | |
185 inline hashval_t | |
186 redirection_data::hash (const redirection_data *p) | |
187 { | |
188 vec<jump_thread_edge *> *path = p->path; | |
189 return path->last ()->e->dest->index; | |
190 } | |
191 | |
192 /* Given two hash table entries, return true if they have the same | |
193 jump threading path. */ | |
194 inline int | |
195 redirection_data::equal (const redirection_data *p1, const redirection_data *p2) | |
196 { | |
197 vec<jump_thread_edge *> *path1 = p1->path; | |
198 vec<jump_thread_edge *> *path2 = p2->path; | |
199 | |
200 if (path1->length () != path2->length ()) | |
201 return false; | |
202 | |
203 for (unsigned int i = 1; i < path1->length (); i++) | |
204 { | |
205 if ((*path1)[i]->type != (*path2)[i]->type | |
206 || (*path1)[i]->e != (*path2)[i]->e) | |
207 return false; | |
208 } | |
209 | |
210 return true; | |
211 } | |
212 | |
213 /* Rather than search all the edges in jump thread paths each time | |
214 DOM is able to simply if control statement, we build a hash table | |
215 with the deleted edges. We only care about the address of the edge, | |
216 not its contents. */ | |
217 struct removed_edges : nofree_ptr_hash<edge_def> | |
218 { | |
219 static hashval_t hash (edge e) { return htab_hash_pointer (e); } | |
220 static bool equal (edge e1, edge e2) { return e1 == e2; } | |
0 | 221 }; |
222 | |
111 | 223 static hash_table<removed_edges> *removed_edges; |
0 | 224 |
225 /* Data structure of information to pass to hash table traversal routines. */ | |
111 | 226 struct ssa_local_info_t |
0 | 227 { |
228 /* The current block we are working on. */ | |
229 basic_block bb; | |
230 | |
111 | 231 /* We only create a template block for the first duplicated block in a |
232 jump threading path as we may need many duplicates of that block. | |
233 | |
234 The second duplicate block in a path is specific to that path. Creating | |
235 and sharing a template for that block is considerably more difficult. */ | |
0 | 236 basic_block template_block; |
237 | |
111 | 238 /* Blocks duplicated for the thread. */ |
239 bitmap duplicate_blocks; | |
240 | |
0 | 241 /* TRUE if we thread one or more jumps, FALSE otherwise. */ |
242 bool jumps_threaded; | |
111 | 243 |
244 /* When we have multiple paths through a joiner which reach different | |
245 final destinations, then we may need to correct for potential | |
246 profile insanities. */ | |
247 bool need_profile_correction; | |
0 | 248 }; |
249 | |
250 /* Passes which use the jump threading code register jump threading | |
251 opportunities as they are discovered. We keep the registered | |
252 jump threading opportunities in this vector as edge pairs | |
253 (original_edge, target_edge). */ | |
111 | 254 static vec<vec<jump_thread_edge *> *> paths; |
255 | |
256 /* When we start updating the CFG for threading, data necessary for jump | |
257 threading is attached to the AUX field for the incoming edge. Use these | |
258 macros to access the underlying structure attached to the AUX field. */ | |
259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux) | |
0 | 260 |
261 /* Jump threading statistics. */ | |
262 | |
263 struct thread_stats_d | |
264 { | |
265 unsigned long num_threaded_edges; | |
266 }; | |
267 | |
268 struct thread_stats_d thread_stats; | |
269 | |
270 | |
271 /* Remove the last statement in block BB if it is a control statement | |
272 Also remove all outgoing edges except the edge which reaches DEST_BB. | |
273 If DEST_BB is NULL, then remove all outgoing edges. */ | |
274 | |
111 | 275 void |
0 | 276 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) |
277 { | |
278 gimple_stmt_iterator gsi; | |
279 edge e; | |
280 edge_iterator ei; | |
281 | |
282 gsi = gsi_last_bb (bb); | |
283 | |
284 /* If the duplicate ends with a control statement, then remove it. | |
285 | |
286 Note that if we are duplicating the template block rather than the | |
287 original basic block, then the duplicate might not have any real | |
288 statements in it. */ | |
289 if (!gsi_end_p (gsi) | |
290 && gsi_stmt (gsi) | |
291 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND | |
292 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
293 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH)) | |
294 gsi_remove (&gsi, true); | |
295 | |
296 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ) | |
297 { | |
298 if (e->dest != dest_bb) | |
111 | 299 { |
300 free_dom_edge_info (e); | |
301 remove_edge (e); | |
302 } | |
0 | 303 else |
111 | 304 { |
305 e->probability = profile_probability::always (); | |
306 ei_next (&ei); | |
307 } | |
0 | 308 } |
111 | 309 |
310 /* If the remaining edge is a loop exit, there must have | |
311 a removed edge that was not a loop exit. | |
312 | |
313 In that case BB and possibly other blocks were previously | |
314 in the loop, but are now outside the loop. Thus, we need | |
315 to update the loop structures. */ | |
316 if (single_succ_p (bb) | |
317 && loop_outer (bb->loop_father) | |
318 && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb))) | |
319 loops_state_set (LOOPS_NEED_FIXUP); | |
0 | 320 } |
321 | |
111 | 322 /* Create a duplicate of BB. Record the duplicate block in an array |
323 indexed by COUNT stored in RD. */ | |
0 | 324 |
325 static void | |
111 | 326 create_block_for_threading (basic_block bb, |
327 struct redirection_data *rd, | |
328 unsigned int count, | |
329 bitmap *duplicate_blocks) | |
0 | 330 { |
111 | 331 edge_iterator ei; |
332 edge e; | |
333 | |
0 | 334 /* We can use the generic block duplication code and simply remove |
335 the stuff we do not need. */ | |
111 | 336 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL); |
337 | |
338 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs) | |
339 e->aux = NULL; | |
0 | 340 |
341 /* Zero out the profile, since the block is unreachable for now. */ | |
111 | 342 rd->dup_blocks[count]->count = profile_count::uninitialized (); |
343 if (duplicate_blocks) | |
344 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index); | |
0 | 345 } |
346 | |
111 | 347 /* Main data structure to hold information for duplicates of BB. */ |
348 | |
349 static hash_table<redirection_data> *redirection_data; | |
0 | 350 |
351 /* Given an outgoing edge E lookup and return its entry in our hash table. | |
352 | |
353 If INSERT is true, then we insert the entry into the hash table if | |
354 it is not already present. INCOMING_EDGE is added to the list of incoming | |
355 edges associated with E in the hash table. */ | |
356 | |
357 static struct redirection_data * | |
111 | 358 lookup_redirection_data (edge e, enum insert_option insert) |
0 | 359 { |
111 | 360 struct redirection_data **slot; |
0 | 361 struct redirection_data *elt; |
111 | 362 vec<jump_thread_edge *> *path = THREAD_PATH (e); |
363 | |
364 /* Build a hash table element so we can see if E is already | |
0 | 365 in the table. */ |
366 elt = XNEW (struct redirection_data); | |
111 | 367 elt->path = path; |
368 elt->dup_blocks[0] = NULL; | |
369 elt->dup_blocks[1] = NULL; | |
0 | 370 elt->incoming_edges = NULL; |
371 | |
111 | 372 slot = redirection_data->find_slot (elt, insert); |
0 | 373 |
374 /* This will only happen if INSERT is false and the entry is not | |
375 in the hash table. */ | |
376 if (slot == NULL) | |
377 { | |
378 free (elt); | |
379 return NULL; | |
380 } | |
381 | |
382 /* This will only happen if E was not in the hash table and | |
383 INSERT is true. */ | |
384 if (*slot == NULL) | |
385 { | |
111 | 386 *slot = elt; |
0 | 387 elt->incoming_edges = XNEW (struct el); |
111 | 388 elt->incoming_edges->e = e; |
0 | 389 elt->incoming_edges->next = NULL; |
390 return elt; | |
391 } | |
392 /* E was in the hash table. */ | |
393 else | |
394 { | |
395 /* Free ELT as we do not need it anymore, we will extract the | |
396 relevant entry from the hash table itself. */ | |
397 free (elt); | |
398 | |
399 /* Get the entry stored in the hash table. */ | |
111 | 400 elt = *slot; |
0 | 401 |
402 /* If insertion was requested, then we need to add INCOMING_EDGE | |
403 to the list of incoming edges associated with E. */ | |
404 if (insert) | |
405 { | |
111 | 406 struct el *el = XNEW (struct el); |
0 | 407 el->next = elt->incoming_edges; |
111 | 408 el->e = e; |
0 | 409 elt->incoming_edges = el; |
410 } | |
411 | |
412 return elt; | |
413 } | |
414 } | |
415 | |
111 | 416 /* Similar to copy_phi_args, except that the PHI arg exists, it just |
417 does not have a value associated with it. */ | |
418 | |
419 static void | |
420 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e) | |
421 { | |
422 int src_idx = src_e->dest_idx; | |
423 int tgt_idx = tgt_e->dest_idx; | |
424 | |
425 /* Iterate over each PHI in e->dest. */ | |
426 for (gphi_iterator gsi = gsi_start_phis (src_e->dest), | |
427 gsi2 = gsi_start_phis (tgt_e->dest); | |
428 !gsi_end_p (gsi); | |
429 gsi_next (&gsi), gsi_next (&gsi2)) | |
430 { | |
431 gphi *src_phi = gsi.phi (); | |
432 gphi *dest_phi = gsi2.phi (); | |
433 tree val = gimple_phi_arg_def (src_phi, src_idx); | |
434 source_location locus = gimple_phi_arg_location (src_phi, src_idx); | |
435 | |
436 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val); | |
437 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus); | |
438 } | |
439 } | |
440 | |
441 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX | |
442 to see if it has constant value in a flow sensitive manner. Set | |
443 LOCUS to location of the constant phi arg and return the value. | |
444 Return DEF directly if either PATH or idx is ZERO. */ | |
445 | |
446 static tree | |
447 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path, | |
448 basic_block bb, int idx, source_location *locus) | |
449 { | |
450 tree arg; | |
451 gphi *def_phi; | |
452 basic_block def_bb; | |
453 | |
454 if (path == NULL || idx == 0) | |
455 return def; | |
456 | |
457 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def)); | |
458 if (!def_phi) | |
459 return def; | |
460 | |
461 def_bb = gimple_bb (def_phi); | |
462 /* Don't propagate loop invariants into deeper loops. */ | |
463 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb)) | |
464 return def; | |
465 | |
466 /* Backtrack jump threading path from IDX to see if def has constant | |
467 value. */ | |
468 for (int j = idx - 1; j >= 0; j--) | |
469 { | |
470 edge e = (*path)[j]->e; | |
471 if (e->dest == def_bb) | |
472 { | |
473 arg = gimple_phi_arg_def (def_phi, e->dest_idx); | |
474 if (is_gimple_min_invariant (arg)) | |
475 { | |
476 *locus = gimple_phi_arg_location (def_phi, e->dest_idx); | |
477 return arg; | |
478 } | |
479 break; | |
480 } | |
481 } | |
482 | |
483 return def; | |
484 } | |
485 | |
486 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. | |
487 Try to backtrack jump threading PATH from node IDX to see if the arg | |
488 has constant value, copy constant value instead of argument itself | |
489 if yes. */ | |
490 | |
491 static void | |
492 copy_phi_args (basic_block bb, edge src_e, edge tgt_e, | |
493 vec<jump_thread_edge *> *path, int idx) | |
494 { | |
495 gphi_iterator gsi; | |
496 int src_indx = src_e->dest_idx; | |
497 | |
498 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
499 { | |
500 gphi *phi = gsi.phi (); | |
501 tree def = gimple_phi_arg_def (phi, src_indx); | |
502 source_location locus = gimple_phi_arg_location (phi, src_indx); | |
503 | |
504 if (TREE_CODE (def) == SSA_NAME | |
505 && !virtual_operand_p (gimple_phi_result (phi))) | |
506 def = get_value_locus_in_path (def, path, bb, idx, &locus); | |
507 | |
508 add_phi_arg (phi, def, tgt_e, locus); | |
509 } | |
510 } | |
511 | |
512 /* We have recently made a copy of ORIG_BB, including its outgoing | |
513 edges. The copy is NEW_BB. Every PHI node in every direct successor of | |
514 ORIG_BB has a new argument associated with edge from NEW_BB to the | |
515 successor. Initialize the PHI argument so that it is equal to the PHI | |
516 argument associated with the edge from ORIG_BB to the successor. | |
517 PATH and IDX are used to check if the new PHI argument has constant | |
518 value in a flow sensitive manner. */ | |
519 | |
520 static void | |
521 update_destination_phis (basic_block orig_bb, basic_block new_bb, | |
522 vec<jump_thread_edge *> *path, int idx) | |
523 { | |
524 edge_iterator ei; | |
525 edge e; | |
526 | |
527 FOR_EACH_EDGE (e, ei, orig_bb->succs) | |
528 { | |
529 edge e2 = find_edge (new_bb, e->dest); | |
530 copy_phi_args (e->dest, e, e2, path, idx); | |
531 } | |
532 } | |
533 | |
0 | 534 /* Given a duplicate block and its single destination (both stored |
535 in RD). Create an edge between the duplicate and its single | |
536 destination. | |
537 | |
538 Add an additional argument to any PHI nodes at the single | |
111 | 539 destination. IDX is the start node in jump threading path |
540 we start to check to see if the new PHI argument has constant | |
541 value along the jump threading path. */ | |
0 | 542 |
543 static void | |
111 | 544 create_edge_and_update_destination_phis (struct redirection_data *rd, |
545 basic_block bb, int idx) | |
0 | 546 { |
111 | 547 edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU); |
0 | 548 |
549 rescan_loop_exit (e, true, false); | |
111 | 550 |
551 /* We used to copy the thread path here. That was added in 2007 | |
552 and dutifully updated through the representation changes in 2013. | |
553 | |
554 In 2013 we added code to thread from an interior node through | |
555 the backedge to another interior node. That runs after the code | |
556 to thread through loop headers from outside the loop. | |
557 | |
558 The latter may delete edges in the CFG, including those | |
559 which appeared in the jump threading path we copied here. Thus | |
560 we'd end up using a dangling pointer. | |
561 | |
562 After reviewing the 2007/2011 code, I can't see how anything | |
563 depended on copying the AUX field and clearly copying the jump | |
564 threading path is problematical due to embedded edge pointers. | |
565 It has been removed. */ | |
566 e->aux = NULL; | |
0 | 567 |
568 /* If there are any PHI nodes at the destination of the outgoing edge | |
569 from the duplicate block, then we will need to add a new argument | |
570 to them. The argument should have the same value as the argument | |
571 associated with the outgoing edge stored in RD. */ | |
111 | 572 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx); |
573 } | |
574 | |
575 /* Look through PATH beginning at START and return TRUE if there are | |
576 any additional blocks that need to be duplicated. Otherwise, | |
577 return FALSE. */ | |
578 static bool | |
579 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path, | |
580 unsigned int start) | |
581 { | |
582 for (unsigned int i = start + 1; i < path->length (); i++) | |
583 { | |
584 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK | |
585 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) | |
586 return true; | |
587 } | |
588 return false; | |
589 } | |
590 | |
591 | |
131 | 592 /* Compute the amount of profile count coming into the jump threading |
111 | 593 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and |
594 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the | |
595 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to | |
596 identify blocks duplicated for jump threading, which have duplicated | |
597 edges that need to be ignored in the analysis. Return true if path contains | |
598 a joiner, false otherwise. | |
599 | |
131 | 600 In the non-joiner case, this is straightforward - all the counts |
111 | 601 flowing into the jump threading path should flow through the duplicated |
602 block and out of the duplicated path. | |
603 | |
604 In the joiner case, it is very tricky. Some of the counts flowing into | |
605 the original path go offpath at the joiner. The problem is that while | |
606 we know how much total count goes off-path in the original control flow, | |
607 we don't know how many of the counts corresponding to just the jump | |
608 threading path go offpath at the joiner. | |
609 | |
610 For example, assume we have the following control flow and identified | |
611 jump threading paths: | |
612 | |
613 A B C | |
614 \ | / | |
615 Ea \ |Eb / Ec | |
616 \ | / | |
617 v v v | |
618 J <-- Joiner | |
619 / \ | |
620 Eoff/ \Eon | |
621 / \ | |
622 v v | |
623 Soff Son <--- Normal | |
624 /\ | |
625 Ed/ \ Ee | |
626 / \ | |
627 v v | |
628 D E | |
629 | |
630 Jump threading paths: A -> J -> Son -> D (path 1) | |
631 C -> J -> Son -> E (path 2) | |
632 | |
633 Note that the control flow could be more complicated: | |
634 - Each jump threading path may have more than one incoming edge. I.e. A and | |
635 Ea could represent multiple incoming blocks/edges that are included in | |
636 path 1. | |
637 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either | |
638 before or after the "normal" copy block). These are not duplicated onto | |
639 the jump threading path, as they are single-successor. | |
640 - Any of the blocks along the path may have other incoming edges that | |
641 are not part of any jump threading path, but add profile counts along | |
642 the path. | |
643 | |
644 In the above example, after all jump threading is complete, we will | |
645 end up with the following control flow: | |
646 | |
647 A B C | |
648 | | | | |
649 Ea| |Eb |Ec | |
650 | | | | |
651 v v v | |
652 Ja J Jc | |
653 / \ / \Eon' / \ | |
654 Eona/ \ ---/---\-------- \Eonc | |
655 / \ / / \ \ | |
656 v v v v v | |
657 Sona Soff Son Sonc | |
658 \ /\ / | |
659 \___________ / \ _____/ | |
660 \ / \/ | |
661 vv v | |
662 D E | |
663 | |
664 The main issue to notice here is that when we are processing path 1 | |
665 (A->J->Son->D) we need to figure out the outgoing edge weights to | |
666 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the | |
667 sum of the incoming weights to D remain Ed. The problem with simply | |
668 assuming that Ja (and Jc when processing path 2) has the same outgoing | |
669 probabilities to its successors as the original block J, is that after | |
670 all paths are processed and other edges/counts removed (e.g. none | |
671 of Ec will reach D after processing path 2), we may end up with not | |
672 enough count flowing along duplicated edge Sona->D. | |
673 | |
674 Therefore, in the case of a joiner, we keep track of all counts | |
675 coming in along the current path, as well as from predecessors not | |
676 on any jump threading path (Eb in the above example). While we | |
677 first assume that the duplicated Eona for Ja->Sona has the same | |
678 probability as the original, we later compensate for other jump | |
679 threading paths that may eliminate edges. We do that by keep track | |
680 of all counts coming into the original path that are not in a jump | |
681 thread (Eb in the above example, but as noted earlier, there could | |
682 be other predecessors incoming to the path at various points, such | |
683 as at Son). Call this cumulative non-path count coming into the path | |
684 before D as Enonpath. We then ensure that the count from Sona->D is as at | |
685 least as big as (Ed - Enonpath), but no bigger than the minimum | |
686 weight along the jump threading path. The probabilities of both the | |
687 original and duplicated joiner block J and Ja will be adjusted | |
688 accordingly after the updates. */ | |
689 | |
690 static bool | |
691 compute_path_counts (struct redirection_data *rd, | |
692 ssa_local_info_t *local_info, | |
693 profile_count *path_in_count_ptr, | |
131 | 694 profile_count *path_out_count_ptr) |
111 | 695 { |
696 edge e = rd->incoming_edges->e; | |
697 vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
698 edge elast = path->last ()->e; | |
699 profile_count nonpath_count = profile_count::zero (); | |
700 bool has_joiner = false; | |
701 profile_count path_in_count = profile_count::zero (); | |
702 | |
703 /* Start by accumulating incoming edge counts to the path's first bb | |
704 into a couple buckets: | |
705 path_in_count: total count of incoming edges that flow into the | |
706 current path. | |
707 nonpath_count: total count of incoming edges that are not | |
708 flowing along *any* path. These are the counts | |
709 that will still flow along the original path after | |
710 all path duplication is done by potentially multiple | |
711 calls to this routine. | |
712 (any other incoming edge counts are for a different jump threading | |
713 path that will be handled by a later call to this routine.) | |
714 To make this easier, start by recording all incoming edges that flow into | |
715 the current path in a bitmap. We could add up the path's incoming edge | |
716 counts here, but we still need to walk all the first bb's incoming edges | |
717 below to add up the counts of the other edges not included in this jump | |
718 threading path. */ | |
719 struct el *next, *el; | |
720 auto_bitmap in_edge_srcs; | |
721 for (el = rd->incoming_edges; el; el = next) | |
722 { | |
723 next = el->next; | |
724 bitmap_set_bit (in_edge_srcs, el->e->src->index); | |
725 } | |
726 edge ein; | |
727 edge_iterator ei; | |
728 FOR_EACH_EDGE (ein, ei, e->dest->preds) | |
729 { | |
730 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein); | |
731 /* Simply check the incoming edge src against the set captured above. */ | |
732 if (ein_path | |
733 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index)) | |
734 { | |
735 /* It is necessary but not sufficient that the last path edges | |
736 are identical. There may be different paths that share the | |
737 same last path edge in the case where the last edge has a nocopy | |
738 source block. */ | |
739 gcc_assert (ein_path->last ()->e == elast); | |
740 path_in_count += ein->count (); | |
741 } | |
742 else if (!ein_path) | |
743 { | |
744 /* Keep track of the incoming edges that are not on any jump-threading | |
745 path. These counts will still flow out of original path after all | |
746 jump threading is complete. */ | |
747 nonpath_count += ein->count (); | |
748 } | |
749 } | |
750 | |
751 /* Now compute the fraction of the total count coming into the first | |
752 path bb that is from the current threading path. */ | |
753 profile_count total_count = e->dest->count; | |
754 /* Handle incoming profile insanities. */ | |
755 if (total_count < path_in_count) | |
756 path_in_count = total_count; | |
757 profile_probability onpath_scale = path_in_count.probability_in (total_count); | |
758 | |
759 /* Walk the entire path to do some more computation in order to estimate | |
760 how much of the path_in_count will flow out of the duplicated threading | |
761 path. In the non-joiner case this is straightforward (it should be | |
762 the same as path_in_count, although we will handle incoming profile | |
763 insanities by setting it equal to the minimum count along the path). | |
764 | |
765 In the joiner case, we need to estimate how much of the path_in_count | |
766 will stay on the threading path after the joiner's conditional branch. | |
767 We don't really know for sure how much of the counts | |
768 associated with this path go to each successor of the joiner, but we'll | |
769 estimate based on the fraction of the total count coming into the path | |
770 bb was from the threading paths (computed above in onpath_scale). | |
771 Afterwards, we will need to do some fixup to account for other threading | |
772 paths and possible profile insanities. | |
773 | |
774 In order to estimate the joiner case's counts we also need to update | |
775 nonpath_count with any additional counts coming into the path. Other | |
776 blocks along the path may have additional predecessors from outside | |
777 the path. */ | |
778 profile_count path_out_count = path_in_count; | |
779 profile_count min_path_count = path_in_count; | |
780 for (unsigned int i = 1; i < path->length (); i++) | |
781 { | |
782 edge epath = (*path)[i]->e; | |
783 profile_count cur_count = epath->count (); | |
784 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
785 { | |
786 has_joiner = true; | |
787 cur_count = cur_count.apply_probability (onpath_scale); | |
788 } | |
789 /* In the joiner case we need to update nonpath_count for any edges | |
790 coming into the path that will contribute to the count flowing | |
791 into the path successor. */ | |
792 if (has_joiner && epath != elast) | |
793 { | |
794 /* Look for other incoming edges after joiner. */ | |
795 FOR_EACH_EDGE (ein, ei, epath->dest->preds) | |
796 { | |
797 if (ein != epath | |
798 /* Ignore in edges from blocks we have duplicated for a | |
799 threading path, which have duplicated edge counts until | |
800 they are redirected by an invocation of this routine. */ | |
801 && !bitmap_bit_p (local_info->duplicate_blocks, | |
802 ein->src->index)) | |
803 nonpath_count += ein->count (); | |
804 } | |
805 } | |
806 if (cur_count < path_out_count) | |
807 path_out_count = cur_count; | |
808 if (epath->count () < min_path_count) | |
809 min_path_count = epath->count (); | |
810 } | |
811 | |
812 /* We computed path_out_count above assuming that this path targeted | |
813 the joiner's on-path successor with the same likelihood as it | |
814 reached the joiner. However, other thread paths through the joiner | |
815 may take a different path through the normal copy source block | |
816 (i.e. they have a different elast), meaning that they do not | |
817 contribute any counts to this path's elast. As a result, it may | |
818 turn out that this path must have more count flowing to the on-path | |
819 successor of the joiner. Essentially, all of this path's elast | |
820 count must be contributed by this path and any nonpath counts | |
821 (since any path through the joiner with a different elast will not | |
822 include a copy of this elast in its duplicated path). | |
823 So ensure that this path's path_out_count is at least the | |
824 difference between elast->count () and nonpath_count. Otherwise the edge | |
825 counts after threading will not be sane. */ | |
826 if (local_info->need_profile_correction | |
827 && has_joiner && path_out_count < elast->count () - nonpath_count) | |
828 { | |
829 path_out_count = elast->count () - nonpath_count; | |
830 /* But neither can we go above the minimum count along the path | |
831 we are duplicating. This can be an issue due to profile | |
832 insanities coming in to this pass. */ | |
833 if (path_out_count > min_path_count) | |
834 path_out_count = min_path_count; | |
835 } | |
836 | |
837 *path_in_count_ptr = path_in_count; | |
838 *path_out_count_ptr = path_out_count; | |
839 return has_joiner; | |
840 } | |
841 | |
842 | |
843 /* Update the counts and frequencies for both an original path | |
844 edge EPATH and its duplicate EDUP. The duplicate source block | |
131 | 845 will get a count of PATH_IN_COUNT and PATH_IN_FREQ, |
111 | 846 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */ |
847 static void | |
848 update_profile (edge epath, edge edup, profile_count path_in_count, | |
131 | 849 profile_count path_out_count) |
111 | 850 { |
131 | 851 |
852 /* First update the duplicated block's count. */ | |
111 | 853 if (edup) |
0 | 854 { |
111 | 855 basic_block dup_block = edup->src; |
856 | |
857 /* Edup's count is reduced by path_out_count. We need to redistribute | |
858 probabilities to the remaining edges. */ | |
859 | |
860 edge esucc; | |
861 edge_iterator ei; | |
862 profile_probability edup_prob | |
863 = path_out_count.probability_in (path_in_count); | |
864 | |
865 /* Either scale up or down the remaining edges. | |
866 probabilities are always in range <0,1> and thus we can't do | |
867 both by same loop. */ | |
868 if (edup->probability > edup_prob) | |
869 { | |
870 profile_probability rev_scale | |
871 = (profile_probability::always () - edup->probability) | |
872 / (profile_probability::always () - edup_prob); | |
873 FOR_EACH_EDGE (esucc, ei, dup_block->succs) | |
874 if (esucc != edup) | |
875 esucc->probability /= rev_scale; | |
876 } | |
877 else if (edup->probability < edup_prob) | |
878 { | |
879 profile_probability scale | |
880 = (profile_probability::always () - edup_prob) | |
881 / (profile_probability::always () - edup->probability); | |
882 FOR_EACH_EDGE (esucc, ei, dup_block->succs) | |
883 if (esucc != edup) | |
884 esucc->probability *= scale; | |
885 } | |
131 | 886 if (edup_prob.initialized_p ()) |
887 edup->probability = edup_prob; | |
888 | |
889 gcc_assert (!dup_block->count.initialized_p ()); | |
111 | 890 dup_block->count = path_in_count; |
891 } | |
892 | |
131 | 893 if (path_in_count == profile_count::zero ()) |
894 return; | |
895 | |
111 | 896 profile_count final_count = epath->count () - path_out_count; |
897 | |
131 | 898 /* Now update the original block's count in the |
111 | 899 opposite manner - remove the counts/freq that will flow |
900 into the duplicated block. Handle underflow due to precision/ | |
901 rounding issues. */ | |
902 epath->src->count -= path_in_count; | |
903 | |
904 /* Next update this path edge's original and duplicated counts. We know | |
905 that the duplicated path will have path_out_count flowing | |
906 out of it (in the joiner case this is the count along the duplicated path | |
907 out of the duplicated joiner). This count can then be removed from the | |
908 original path edge. */ | |
131 | 909 |
910 edge esucc; | |
911 edge_iterator ei; | |
912 profile_probability epath_prob = final_count.probability_in (epath->src->count); | |
913 | |
914 if (epath->probability > epath_prob) | |
111 | 915 { |
131 | 916 profile_probability rev_scale |
917 = (profile_probability::always () - epath->probability) | |
918 / (profile_probability::always () - epath_prob); | |
919 FOR_EACH_EDGE (esucc, ei, epath->src->succs) | |
920 if (esucc != epath) | |
921 esucc->probability /= rev_scale; | |
111 | 922 } |
131 | 923 else if (epath->probability < epath_prob) |
111 | 924 { |
131 | 925 profile_probability scale |
926 = (profile_probability::always () - epath_prob) | |
927 / (profile_probability::always () - epath->probability); | |
928 FOR_EACH_EDGE (esucc, ei, epath->src->succs) | |
929 if (esucc != epath) | |
930 esucc->probability *= scale; | |
0 | 931 } |
131 | 932 if (epath_prob.initialized_p ()) |
933 epath->probability = epath_prob; | |
111 | 934 } |
935 | |
936 /* Wire up the outgoing edges from the duplicate blocks and | |
937 update any PHIs as needed. Also update the profile counts | |
938 on the original and duplicate blocks and edges. */ | |
939 void | |
940 ssa_fix_duplicate_block_edges (struct redirection_data *rd, | |
941 ssa_local_info_t *local_info) | |
942 { | |
943 bool multi_incomings = (rd->incoming_edges->next != NULL); | |
944 edge e = rd->incoming_edges->e; | |
945 vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
946 edge elast = path->last ()->e; | |
947 profile_count path_in_count = profile_count::zero (); | |
948 profile_count path_out_count = profile_count::zero (); | |
949 | |
950 /* First determine how much profile count to move from original | |
951 path to the duplicate path. This is tricky in the presence of | |
952 a joiner (see comments for compute_path_counts), where some portion | |
953 of the path's counts will flow off-path from the joiner. In the | |
954 non-joiner case the path_in_count and path_out_count should be the | |
955 same. */ | |
956 bool has_joiner = compute_path_counts (rd, local_info, | |
131 | 957 &path_in_count, &path_out_count); |
958 | |
111 | 959 for (unsigned int count = 0, i = 1; i < path->length (); i++) |
960 { | |
961 edge epath = (*path)[i]->e; | |
962 | |
963 /* If we were threading through an joiner block, then we want | |
964 to keep its control statement and redirect an outgoing edge. | |
965 Else we want to remove the control statement & edges, then create | |
966 a new outgoing edge. In both cases we may need to update PHIs. */ | |
967 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
968 { | |
969 edge victim; | |
970 edge e2; | |
971 | |
972 gcc_assert (has_joiner); | |
973 | |
974 /* This updates the PHIs at the destination of the duplicate | |
975 block. Pass 0 instead of i if we are threading a path which | |
976 has multiple incoming edges. */ | |
977 update_destination_phis (local_info->bb, rd->dup_blocks[count], | |
978 path, multi_incomings ? 0 : i); | |
979 | |
980 /* Find the edge from the duplicate block to the block we're | |
981 threading through. That's the edge we want to redirect. */ | |
982 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest); | |
983 | |
984 /* If there are no remaining blocks on the path to duplicate, | |
985 then redirect VICTIM to the final destination of the jump | |
986 threading path. */ | |
987 if (!any_remaining_duplicated_blocks (path, i)) | |
988 { | |
989 e2 = redirect_edge_and_branch (victim, elast->dest); | |
990 /* If we redirected the edge, then we need to copy PHI arguments | |
991 at the target. If the edge already existed (e2 != victim | |
992 case), then the PHIs in the target already have the correct | |
993 arguments. */ | |
994 if (e2 == victim) | |
995 copy_phi_args (e2->dest, elast, e2, | |
996 path, multi_incomings ? 0 : i); | |
997 } | |
998 else | |
999 { | |
1000 /* Redirect VICTIM to the next duplicated block in the path. */ | |
1001 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]); | |
1002 | |
1003 /* We need to update the PHIs in the next duplicated block. We | |
1004 want the new PHI args to have the same value as they had | |
1005 in the source of the next duplicate block. | |
1006 | |
1007 Thus, we need to know which edge we traversed into the | |
1008 source of the duplicate. Furthermore, we may have | |
1009 traversed many edges to reach the source of the duplicate. | |
1010 | |
1011 Walk through the path starting at element I until we | |
1012 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want | |
1013 the edge from the prior element. */ | |
1014 for (unsigned int j = i + 1; j < path->length (); j++) | |
1015 { | |
1016 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK) | |
1017 { | |
1018 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2); | |
1019 break; | |
1020 } | |
1021 } | |
1022 } | |
1023 | |
131 | 1024 /* Update the counts of both the original block |
111 | 1025 and path edge, and the duplicates. The path duplicate's |
131 | 1026 incoming count are the totals for all edges |
111 | 1027 incoming to this jump threading path computed earlier. |
1028 And we know that the duplicated path will have path_out_count | |
1029 flowing out of it (i.e. along the duplicated path out of the | |
1030 duplicated joiner). */ | |
131 | 1031 update_profile (epath, e2, path_in_count, path_out_count); |
111 | 1032 } |
1033 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) | |
1034 { | |
1035 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL); | |
1036 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count], | |
1037 multi_incomings ? 0 : i); | |
1038 if (count == 1) | |
1039 single_succ_edge (rd->dup_blocks[1])->aux = NULL; | |
1040 | |
131 | 1041 /* Update the counts of both the original block |
111 | 1042 and path edge, and the duplicates. Since we are now after |
1043 any joiner that may have existed on the path, the count | |
1044 flowing along the duplicated threaded path is path_out_count. | |
1045 If we didn't have a joiner, then cur_path_freq was the sum | |
1046 of the total frequencies along all incoming edges to the | |
1047 thread path (path_in_freq). If we had a joiner, it would have | |
1048 been updated at the end of that handling to the edge frequency | |
1049 along the duplicated joiner path edge. */ | |
1050 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0), | |
131 | 1051 path_out_count, path_out_count); |
111 | 1052 } |
1053 else | |
1054 { | |
1055 /* No copy case. In this case we don't have an equivalent block | |
1056 on the duplicated thread path to update, but we do need | |
1057 to remove the portion of the counts/freqs that were moved | |
1058 to the duplicated path from the counts/freqs flowing through | |
1059 this block on the original path. Since all the no-copy edges | |
1060 are after any joiner, the removed count is the same as | |
1061 path_out_count. | |
1062 | |
1063 If we didn't have a joiner, then cur_path_freq was the sum | |
1064 of the total frequencies along all incoming edges to the | |
1065 thread path (path_in_freq). If we had a joiner, it would have | |
1066 been updated at the end of that handling to the edge frequency | |
1067 along the duplicated joiner path edge. */ | |
131 | 1068 update_profile (epath, NULL, path_out_count, path_out_count); |
111 | 1069 } |
1070 | |
1071 /* Increment the index into the duplicated path when we processed | |
1072 a duplicated block. */ | |
1073 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK | |
1074 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) | |
1075 { | |
1076 count++; | |
1077 } | |
1078 } | |
1079 } | |
1080 | |
0 | 1081 /* Hash table traversal callback routine to create duplicate blocks. */ |
1082 | |
111 | 1083 int |
1084 ssa_create_duplicates (struct redirection_data **slot, | |
1085 ssa_local_info_t *local_info) | |
0 | 1086 { |
111 | 1087 struct redirection_data *rd = *slot; |
1088 | |
1089 /* The second duplicated block in a jump threading path is specific | |
1090 to the path. So it gets stored in RD rather than in LOCAL_DATA. | |
1091 | |
1092 Each time we're called, we have to look through the path and see | |
1093 if a second block needs to be duplicated. | |
1094 | |
1095 Note the search starts with the third edge on the path. The first | |
1096 edge is the incoming edge, the second edge always has its source | |
1097 duplicated. Thus we start our search with the third edge. */ | |
1098 vec<jump_thread_edge *> *path = rd->path; | |
1099 for (unsigned int i = 2; i < path->length (); i++) | |
1100 { | |
1101 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK | |
1102 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
1103 { | |
1104 create_block_for_threading ((*path)[i]->e->src, rd, 1, | |
1105 &local_info->duplicate_blocks); | |
1106 break; | |
1107 } | |
1108 } | |
0 | 1109 |
1110 /* Create a template block if we have not done so already. Otherwise | |
1111 use the template to create a new block. */ | |
1112 if (local_info->template_block == NULL) | |
1113 { | |
111 | 1114 create_block_for_threading ((*path)[1]->e->src, rd, 0, |
1115 &local_info->duplicate_blocks); | |
1116 local_info->template_block = rd->dup_blocks[0]; | |
0 | 1117 |
1118 /* We do not create any outgoing edges for the template. We will | |
1119 take care of that in a later traversal. That way we do not | |
1120 create edges that are going to just be deleted. */ | |
1121 } | |
1122 else | |
1123 { | |
111 | 1124 create_block_for_threading (local_info->template_block, rd, 0, |
1125 &local_info->duplicate_blocks); | |
0 | 1126 |
1127 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate | |
111 | 1128 block. */ |
1129 ssa_fix_duplicate_block_edges (rd, local_info); | |
0 | 1130 } |
1131 | |
1132 /* Keep walking the hash table. */ | |
1133 return 1; | |
1134 } | |
1135 | |
1136 /* We did not create any outgoing edges for the template block during | |
1137 block creation. This hash table traversal callback creates the | |
1138 outgoing edge for the template block. */ | |
1139 | |
111 | 1140 inline int |
1141 ssa_fixup_template_block (struct redirection_data **slot, | |
1142 ssa_local_info_t *local_info) | |
0 | 1143 { |
111 | 1144 struct redirection_data *rd = *slot; |
1145 | |
1146 /* If this is the template block halt the traversal after updating | |
1147 it appropriately. | |
1148 | |
1149 If we were threading through an joiner block, then we want | |
1150 to keep its control statement and redirect an outgoing edge. | |
1151 Else we want to remove the control statement & edges, then create | |
1152 a new outgoing edge. In both cases we may need to update PHIs. */ | |
1153 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block) | |
0 | 1154 { |
111 | 1155 ssa_fix_duplicate_block_edges (rd, local_info); |
0 | 1156 return 0; |
1157 } | |
1158 | |
1159 return 1; | |
1160 } | |
1161 | |
1162 /* Hash table traversal callback to redirect each incoming edge | |
1163 associated with this hash table element to its new destination. */ | |
1164 | |
111 | 1165 int |
1166 ssa_redirect_edges (struct redirection_data **slot, | |
1167 ssa_local_info_t *local_info) | |
0 | 1168 { |
111 | 1169 struct redirection_data *rd = *slot; |
0 | 1170 struct el *next, *el; |
1171 | |
111 | 1172 /* Walk over all the incoming edges associated with this hash table |
1173 entry. */ | |
0 | 1174 for (el = rd->incoming_edges; el; el = next) |
1175 { | |
1176 edge e = el->e; | |
111 | 1177 vec<jump_thread_edge *> *path = THREAD_PATH (e); |
0 | 1178 |
1179 /* Go ahead and free this element from the list. Doing this now | |
1180 avoids the need for another list walk when we destroy the hash | |
1181 table. */ | |
1182 next = el->next; | |
1183 free (el); | |
1184 | |
1185 thread_stats.num_threaded_edges++; | |
1186 | |
111 | 1187 if (rd->dup_blocks[0]) |
0 | 1188 { |
1189 edge e2; | |
1190 | |
1191 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1192 fprintf (dump_file, " Threaded jump %d --> %d to %d\n", | |
111 | 1193 e->src->index, e->dest->index, rd->dup_blocks[0]->index); |
1194 | |
1195 /* Redirect the incoming edge (possibly to the joiner block) to the | |
1196 appropriate duplicate block. */ | |
1197 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]); | |
0 | 1198 gcc_assert (e == e2); |
1199 flush_pending_stmts (e2); | |
1200 } | |
111 | 1201 |
1202 /* Go ahead and clear E->aux. It's not needed anymore and failure | |
1203 to clear it will cause all kinds of unpleasant problems later. */ | |
1204 delete_jump_thread_path (path); | |
1205 e->aux = NULL; | |
1206 | |
0 | 1207 } |
1208 | |
1209 /* Indicate that we actually threaded one or more jumps. */ | |
1210 if (rd->incoming_edges) | |
1211 local_info->jumps_threaded = true; | |
1212 | |
1213 return 1; | |
1214 } | |
1215 | |
1216 /* Return true if this block has no executable statements other than | |
1217 a simple ctrl flow instruction. When the number of outgoing edges | |
1218 is one, this is equivalent to a "forwarder" block. */ | |
1219 | |
1220 static bool | |
1221 redirection_block_p (basic_block bb) | |
1222 { | |
1223 gimple_stmt_iterator gsi; | |
1224 | |
1225 /* Advance to the first executable statement. */ | |
1226 gsi = gsi_start_bb (bb); | |
1227 while (!gsi_end_p (gsi) | |
111 | 1228 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1229 || is_gimple_debug (gsi_stmt (gsi)) |
111 | 1230 || gimple_nop_p (gsi_stmt (gsi)) |
1231 || gimple_clobber_p (gsi_stmt (gsi)))) | |
0 | 1232 gsi_next (&gsi); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1233 |
0 | 1234 /* Check if this is an empty block. */ |
1235 if (gsi_end_p (gsi)) | |
1236 return true; | |
1237 | |
1238 /* Test that we've reached the terminating control statement. */ | |
1239 return gsi_stmt (gsi) | |
111 | 1240 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND |
1241 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
1242 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH); | |
0 | 1243 } |
1244 | |
1245 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB | |
1246 is reached via one or more specific incoming edges, we know which | |
1247 outgoing edge from BB will be traversed. | |
1248 | |
1249 We want to redirect those incoming edges to the target of the | |
1250 appropriate outgoing edge. Doing so avoids a conditional branch | |
1251 and may expose new optimization opportunities. Note that we have | |
1252 to update dominator tree and SSA graph after such changes. | |
1253 | |
1254 The key to keeping the SSA graph update manageable is to duplicate | |
1255 the side effects occurring in BB so that those side effects still | |
1256 occur on the paths which bypass BB after redirecting edges. | |
1257 | |
1258 We accomplish this by creating duplicates of BB and arranging for | |
1259 the duplicates to unconditionally pass control to one specific | |
1260 successor of BB. We then revector the incoming edges into BB to | |
1261 the appropriate duplicate of BB. | |
1262 | |
1263 If NOLOOP_ONLY is true, we only perform the threading as long as it | |
111 | 1264 does not affect the structure of the loops in a nontrivial way. |
1265 | |
1266 If JOINERS is true, then thread through joiner blocks as well. */ | |
0 | 1267 |
1268 static bool | |
111 | 1269 thread_block_1 (basic_block bb, bool noloop_only, bool joiners) |
0 | 1270 { |
1271 /* E is an incoming edge into BB that we may or may not want to | |
1272 redirect to a duplicate of BB. */ | |
1273 edge e, e2; | |
1274 edge_iterator ei; | |
111 | 1275 ssa_local_info_t local_info; |
1276 | |
1277 local_info.duplicate_blocks = BITMAP_ALLOC (NULL); | |
1278 local_info.need_profile_correction = false; | |
0 | 1279 |
1280 /* To avoid scanning a linear array for the element we need we instead | |
1281 use a hash table. For normal code there should be no noticeable | |
1282 difference. However, if we have a block with a large number of | |
1283 incoming and outgoing edges such linear searches can get expensive. */ | |
111 | 1284 redirection_data |
1285 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs)); | |
0 | 1286 |
1287 /* Record each unique threaded destination into a hash table for | |
1288 efficient lookups. */ | |
111 | 1289 edge last = NULL; |
0 | 1290 FOR_EACH_EDGE (e, ei, bb->preds) |
1291 { | |
111 | 1292 if (e->aux == NULL) |
1293 continue; | |
1294 | |
1295 vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1296 | |
1297 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners) | |
1298 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners)) | |
1299 continue; | |
1300 | |
1301 e2 = path->last ()->e; | |
1302 if (!e2 || noloop_only) | |
1303 { | |
0 | 1304 /* If NOLOOP_ONLY is true, we only allow threading through the |
1305 header of a loop to exit edges. */ | |
111 | 1306 |
1307 /* One case occurs when there was loop header buried in a jump | |
1308 threading path that crosses loop boundaries. We do not try | |
1309 and thread this elsewhere, so just cancel the jump threading | |
1310 request by clearing the AUX field now. */ | |
1311 if (bb->loop_father != e2->src->loop_father | |
131 | 1312 && (!loop_exit_edge_p (e2->src->loop_father, e2) |
1313 || flow_loop_nested_p (bb->loop_father, | |
1314 e2->dest->loop_father))) | |
111 | 1315 { |
1316 /* Since this case is not handled by our special code | |
1317 to thread through a loop header, we must explicitly | |
1318 cancel the threading request here. */ | |
1319 delete_jump_thread_path (path); | |
1320 e->aux = NULL; | |
1321 continue; | |
1322 } | |
1323 | |
1324 /* Another case occurs when trying to thread through our | |
1325 own loop header, possibly from inside the loop. We will | |
1326 thread these later. */ | |
1327 unsigned int i; | |
1328 for (i = 1; i < path->length (); i++) | |
1329 { | |
1330 if ((*path)[i]->e->src == bb->loop_father->header | |
1331 && (!loop_exit_edge_p (bb->loop_father, e2) | |
1332 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)) | |
1333 break; | |
1334 } | |
1335 | |
1336 if (i != path->length ()) | |
1337 continue; | |
131 | 1338 |
1339 /* Loop parallelization can be confused by the result of | |
1340 threading through the loop exit test back into the loop. | |
1341 However, theading those jumps seems to help other codes. | |
1342 | |
1343 I have been unable to find anything related to the shape of | |
1344 the CFG, the contents of the affected blocks, etc which would | |
1345 allow a more sensible test than what we're using below which | |
1346 merely avoids the optimization when parallelizing loops. */ | |
1347 if (flag_tree_parallelize_loops > 1) | |
1348 { | |
1349 for (i = 1; i < path->length (); i++) | |
1350 if (bb->loop_father == e2->src->loop_father | |
1351 && loop_exits_from_bb_p (bb->loop_father, | |
1352 (*path)[i]->e->src) | |
1353 && !loop_exit_edge_p (bb->loop_father, e2)) | |
1354 break; | |
1355 | |
1356 if (i != path->length ()) | |
1357 { | |
1358 delete_jump_thread_path (path); | |
1359 e->aux = NULL; | |
1360 continue; | |
1361 } | |
1362 } | |
0 | 1363 } |
1364 | |
1365 /* Insert the outgoing edge into the hash table if it is not | |
1366 already in the hash table. */ | |
111 | 1367 lookup_redirection_data (e, INSERT); |
1368 | |
1369 /* When we have thread paths through a common joiner with different | |
1370 final destinations, then we may need corrections to deal with | |
1371 profile insanities. See the big comment before compute_path_counts. */ | |
1372 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
1373 { | |
1374 if (!last) | |
1375 last = e2; | |
1376 else if (e2 != last) | |
1377 local_info.need_profile_correction = true; | |
1378 } | |
0 | 1379 } |
1380 | |
1381 /* We do not update dominance info. */ | |
1382 free_dominance_info (CDI_DOMINATORS); | |
1383 | |
111 | 1384 /* We know we only thread through the loop header to loop exits. |
1385 Let the basic block duplication hook know we are not creating | |
1386 a multiple entry loop. */ | |
1387 if (noloop_only | |
1388 && bb == bb->loop_father->header) | |
1389 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father)); | |
1390 | |
0 | 1391 /* Now create duplicates of BB. |
1392 | |
1393 Note that for a block with a high outgoing degree we can waste | |
1394 a lot of time and memory creating and destroying useless edges. | |
1395 | |
1396 So we first duplicate BB and remove the control structure at the | |
1397 tail of the duplicate as well as all outgoing edges from the | |
1398 duplicate. We then use that duplicate block as a template for | |
1399 the rest of the duplicates. */ | |
1400 local_info.template_block = NULL; | |
1401 local_info.bb = bb; | |
1402 local_info.jumps_threaded = false; | |
111 | 1403 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates> |
1404 (&local_info); | |
0 | 1405 |
1406 /* The template does not have an outgoing edge. Create that outgoing | |
1407 edge and update PHI nodes as the edge's target as necessary. | |
1408 | |
1409 We do this after creating all the duplicates to avoid creating | |
1410 unnecessary edges. */ | |
111 | 1411 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block> |
1412 (&local_info); | |
0 | 1413 |
1414 /* The hash table traversals above created the duplicate blocks (and the | |
1415 statements within the duplicate blocks). This loop creates PHI nodes for | |
1416 the duplicated blocks and redirects the incoming edges into BB to reach | |
1417 the duplicates of BB. */ | |
111 | 1418 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges> |
1419 (&local_info); | |
0 | 1420 |
1421 /* Done with this block. Clear REDIRECTION_DATA. */ | |
111 | 1422 delete redirection_data; |
0 | 1423 redirection_data = NULL; |
1424 | |
111 | 1425 if (noloop_only |
1426 && bb == bb->loop_father->header) | |
1427 set_loop_copy (bb->loop_father, NULL); | |
1428 | |
1429 BITMAP_FREE (local_info.duplicate_blocks); | |
1430 local_info.duplicate_blocks = NULL; | |
1431 | |
0 | 1432 /* Indicate to our caller whether or not any jumps were threaded. */ |
1433 return local_info.jumps_threaded; | |
1434 } | |
1435 | |
111 | 1436 /* Wrapper for thread_block_1 so that we can first handle jump |
1437 thread paths which do not involve copying joiner blocks, then | |
1438 handle jump thread paths which have joiner blocks. | |
1439 | |
1440 By doing things this way we can be as aggressive as possible and | |
1441 not worry that copying a joiner block will create a jump threading | |
1442 opportunity. */ | |
1443 | |
1444 static bool | |
1445 thread_block (basic_block bb, bool noloop_only) | |
0 | 1446 { |
111 | 1447 bool retval; |
1448 retval = thread_block_1 (bb, noloop_only, false); | |
1449 retval |= thread_block_1 (bb, noloop_only, true); | |
1450 return retval; | |
0 | 1451 } |
1452 | |
1453 /* Callback for dfs_enumerate_from. Returns true if BB is different | |
1454 from STOP and DBDS_CE_STOP. */ | |
1455 | |
1456 static basic_block dbds_ce_stop; | |
1457 static bool | |
1458 dbds_continue_enumeration_p (const_basic_block bb, const void *stop) | |
1459 { | |
1460 return (bb != (const_basic_block) stop | |
1461 && bb != dbds_ce_stop); | |
1462 } | |
1463 | |
1464 /* Evaluates the dominance relationship of latch of the LOOP and BB, and | |
1465 returns the state. */ | |
1466 | |
1467 enum bb_dom_status | |
1468 determine_bb_domination_status (struct loop *loop, basic_block bb) | |
1469 { | |
1470 basic_block *bblocks; | |
1471 unsigned nblocks, i; | |
1472 bool bb_reachable = false; | |
1473 edge_iterator ei; | |
1474 edge e; | |
1475 | |
111 | 1476 /* This function assumes BB is a successor of LOOP->header. |
1477 If that is not the case return DOMST_NONDOMINATING which | |
1478 is always safe. */ | |
0 | 1479 { |
1480 bool ok = false; | |
1481 | |
1482 FOR_EACH_EDGE (e, ei, bb->preds) | |
1483 { | |
1484 if (e->src == loop->header) | |
1485 { | |
1486 ok = true; | |
1487 break; | |
1488 } | |
1489 } | |
1490 | |
111 | 1491 if (!ok) |
1492 return DOMST_NONDOMINATING; | |
0 | 1493 } |
1494 | |
1495 if (bb == loop->latch) | |
1496 return DOMST_DOMINATING; | |
1497 | |
1498 /* Check that BB dominates LOOP->latch, and that it is back-reachable | |
1499 from it. */ | |
1500 | |
1501 bblocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1502 dbds_ce_stop = loop->header; | |
1503 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, | |
1504 bblocks, loop->num_nodes, bb); | |
1505 for (i = 0; i < nblocks; i++) | |
1506 FOR_EACH_EDGE (e, ei, bblocks[i]->preds) | |
1507 { | |
1508 if (e->src == loop->header) | |
1509 { | |
1510 free (bblocks); | |
1511 return DOMST_NONDOMINATING; | |
1512 } | |
1513 if (e->src == bb) | |
1514 bb_reachable = true; | |
1515 } | |
1516 | |
1517 free (bblocks); | |
1518 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); | |
1519 } | |
1520 | |
1521 /* Thread jumps through the header of LOOP. Returns true if cfg changes. | |
1522 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges | |
1523 to the inside of the loop. */ | |
1524 | |
1525 static bool | |
1526 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) | |
1527 { | |
1528 basic_block header = loop->header; | |
1529 edge e, tgt_edge, latch = loop_latch_edge (loop); | |
1530 edge_iterator ei; | |
1531 basic_block tgt_bb, atgt_bb; | |
1532 enum bb_dom_status domst; | |
1533 | |
1534 /* We have already threaded through headers to exits, so all the threading | |
1535 requests now are to the inside of the loop. We need to avoid creating | |
1536 irreducible regions (i.e., loops with more than one entry block), and | |
1537 also loop with several latch edges, or new subloops of the loop (although | |
1538 there are cases where it might be appropriate, it is difficult to decide, | |
1539 and doing it wrongly may confuse other optimizers). | |
1540 | |
1541 We could handle more general cases here. However, the intention is to | |
1542 preserve some information about the loop, which is impossible if its | |
1543 structure changes significantly, in a way that is not well understood. | |
1544 Thus we only handle few important special cases, in which also updating | |
1545 of the loop-carried information should be feasible: | |
1546 | |
1547 1) Propagation of latch edge to a block that dominates the latch block | |
1548 of a loop. This aims to handle the following idiom: | |
1549 | |
1550 first = 1; | |
1551 while (1) | |
1552 { | |
1553 if (first) | |
1554 initialize; | |
1555 first = 0; | |
1556 body; | |
1557 } | |
1558 | |
1559 After threading the latch edge, this becomes | |
1560 | |
1561 first = 1; | |
1562 if (first) | |
1563 initialize; | |
1564 while (1) | |
1565 { | |
1566 first = 0; | |
1567 body; | |
1568 } | |
1569 | |
1570 The original header of the loop is moved out of it, and we may thread | |
1571 the remaining edges through it without further constraints. | |
1572 | |
1573 2) All entry edges are propagated to a single basic block that dominates | |
1574 the latch block of the loop. This aims to handle the following idiom | |
1575 (normally created for "for" loops): | |
1576 | |
1577 i = 0; | |
1578 while (1) | |
1579 { | |
1580 if (i >= 100) | |
1581 break; | |
1582 body; | |
1583 i++; | |
1584 } | |
1585 | |
1586 This becomes | |
1587 | |
1588 i = 0; | |
1589 while (1) | |
1590 { | |
1591 body; | |
1592 i++; | |
1593 if (i >= 100) | |
1594 break; | |
1595 } | |
1596 */ | |
1597 | |
1598 /* Threading through the header won't improve the code if the header has just | |
1599 one successor. */ | |
1600 if (single_succ_p (header)) | |
1601 goto fail; | |
1602 | |
111 | 1603 if (!may_peel_loop_headers && !redirection_block_p (loop->header)) |
0 | 1604 goto fail; |
1605 else | |
1606 { | |
1607 tgt_bb = NULL; | |
1608 tgt_edge = NULL; | |
1609 FOR_EACH_EDGE (e, ei, header->preds) | |
1610 { | |
1611 if (!e->aux) | |
1612 { | |
1613 if (e == latch) | |
1614 continue; | |
1615 | |
1616 /* If latch is not threaded, and there is a header | |
1617 edge that is not threaded, we would create loop | |
1618 with multiple entries. */ | |
1619 goto fail; | |
1620 } | |
1621 | |
111 | 1622 vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1623 | |
1624 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
1625 goto fail; | |
1626 tgt_edge = (*path)[1]->e; | |
0 | 1627 atgt_bb = tgt_edge->dest; |
1628 if (!tgt_bb) | |
1629 tgt_bb = atgt_bb; | |
1630 /* Two targets of threading would make us create loop | |
1631 with multiple entries. */ | |
1632 else if (tgt_bb != atgt_bb) | |
1633 goto fail; | |
1634 } | |
1635 | |
1636 if (!tgt_bb) | |
1637 { | |
1638 /* There are no threading requests. */ | |
1639 return false; | |
1640 } | |
1641 | |
1642 /* Redirecting to empty loop latch is useless. */ | |
1643 if (tgt_bb == loop->latch | |
1644 && empty_block_p (loop->latch)) | |
1645 goto fail; | |
1646 } | |
1647 | |
1648 /* The target block must dominate the loop latch, otherwise we would be | |
1649 creating a subloop. */ | |
1650 domst = determine_bb_domination_status (loop, tgt_bb); | |
1651 if (domst == DOMST_NONDOMINATING) | |
1652 goto fail; | |
1653 if (domst == DOMST_LOOP_BROKEN) | |
1654 { | |
1655 /* If the loop ceased to exist, mark it as such, and thread through its | |
1656 original header. */ | |
111 | 1657 mark_loop_for_removal (loop); |
0 | 1658 return thread_block (header, false); |
1659 } | |
1660 | |
1661 if (tgt_bb->loop_father->header == tgt_bb) | |
1662 { | |
1663 /* If the target of the threading is a header of a subloop, we need | |
1664 to create a preheader for it, so that the headers of the two loops | |
1665 do not merge. */ | |
1666 if (EDGE_COUNT (tgt_bb->preds) > 2) | |
1667 { | |
1668 tgt_bb = create_preheader (tgt_bb->loop_father, 0); | |
1669 gcc_assert (tgt_bb != NULL); | |
1670 } | |
1671 else | |
1672 tgt_bb = split_edge (tgt_edge); | |
1673 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1674 |
111 | 1675 basic_block new_preheader; |
1676 | |
1677 /* Now consider the case entry edges are redirected to the new entry | |
1678 block. Remember one entry edge, so that we can find the new | |
1679 preheader (its destination after threading). */ | |
1680 FOR_EACH_EDGE (e, ei, header->preds) | |
0 | 1681 { |
111 | 1682 if (e->aux) |
1683 break; | |
0 | 1684 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1685 |
111 | 1686 /* The duplicate of the header is the new preheader of the loop. Ensure |
1687 that it is placed correctly in the loop hierarchy. */ | |
1688 set_loop_copy (loop, loop_outer (loop)); | |
1689 | |
1690 thread_block (header, false); | |
1691 set_loop_copy (loop, NULL); | |
1692 new_preheader = e->dest; | |
1693 | |
1694 /* Create the new latch block. This is always necessary, as the latch | |
1695 must have only a single successor, but the original header had at | |
1696 least two successors. */ | |
1697 loop->latch = NULL; | |
1698 mfb_kj_edge = single_succ_edge (new_preheader); | |
1699 loop->header = mfb_kj_edge->dest; | |
1700 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); | |
1701 loop->header = latch->dest; | |
1702 loop->latch = latch->src; | |
0 | 1703 return true; |
1704 | |
1705 fail: | |
1706 /* We failed to thread anything. Cancel the requests. */ | |
1707 FOR_EACH_EDGE (e, ei, header->preds) | |
1708 { | |
111 | 1709 vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1710 | |
1711 if (path) | |
1712 { | |
1713 delete_jump_thread_path (path); | |
1714 e->aux = NULL; | |
1715 } | |
0 | 1716 } |
1717 return false; | |
1718 } | |
1719 | |
111 | 1720 /* E1 and E2 are edges into the same basic block. Return TRUE if the |
1721 PHI arguments associated with those edges are equal or there are no | |
1722 PHI arguments, otherwise return FALSE. */ | |
1723 | |
1724 static bool | |
1725 phi_args_equal_on_edges (edge e1, edge e2) | |
1726 { | |
1727 gphi_iterator gsi; | |
1728 int indx1 = e1->dest_idx; | |
1729 int indx2 = e2->dest_idx; | |
1730 | |
1731 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1732 { | |
1733 gphi *phi = gsi.phi (); | |
1734 | |
1735 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1), | |
1736 gimple_phi_arg_def (phi, indx2), 0)) | |
1737 return false; | |
1738 } | |
1739 return true; | |
1740 } | |
1741 | |
131 | 1742 /* Return the number of non-debug statements and non-virtual PHIs in a |
1743 block. */ | |
1744 | |
1745 static unsigned int | |
1746 count_stmts_and_phis_in_block (basic_block bb) | |
1747 { | |
1748 unsigned int num_stmts = 0; | |
1749 | |
1750 gphi_iterator gpi; | |
1751 for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi)) | |
1752 if (!virtual_operand_p (PHI_RESULT (gpi.phi ()))) | |
1753 num_stmts++; | |
1754 | |
1755 gimple_stmt_iterator gsi; | |
1756 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1757 { | |
1758 gimple *stmt = gsi_stmt (gsi); | |
1759 if (!is_gimple_debug (stmt)) | |
1760 num_stmts++; | |
1761 } | |
1762 | |
1763 return num_stmts; | |
1764 } | |
1765 | |
1766 | |
0 | 1767 /* Walk through the registered jump threads and convert them into a |
1768 form convenient for this pass. | |
1769 | |
1770 Any block which has incoming edges threaded to outgoing edges | |
1771 will have its entry in THREADED_BLOCK set. | |
1772 | |
1773 Any threaded edge will have its new outgoing edge stored in the | |
1774 original edge's AUX field. | |
1775 | |
1776 This form avoids the need to walk all the edges in the CFG to | |
1777 discover blocks which need processing and avoids unnecessary | |
1778 hash table lookups to map from threaded edge to new target. */ | |
1779 | |
1780 static void | |
1781 mark_threaded_blocks (bitmap threaded_blocks) | |
1782 { | |
1783 unsigned int i; | |
1784 bitmap_iterator bi; | |
111 | 1785 auto_bitmap tmp; |
0 | 1786 basic_block bb; |
1787 edge e; | |
1788 edge_iterator ei; | |
1789 | |
111 | 1790 /* It is possible to have jump threads in which one is a subpath |
1791 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner | |
1792 block and (B, C), (C, D) where no joiner block exists. | |
1793 | |
1794 When this occurs ignore the jump thread request with the joiner | |
1795 block. It's totally subsumed by the simpler jump thread request. | |
1796 | |
1797 This results in less block copying, simpler CFGs. More importantly, | |
1798 when we duplicate the joiner block, B, in this case we will create | |
1799 a new threading opportunity that we wouldn't be able to optimize | |
1800 until the next jump threading iteration. | |
1801 | |
1802 So first convert the jump thread requests which do not require a | |
1803 joiner block. */ | |
1804 for (i = 0; i < paths.length (); i++) | |
1805 { | |
1806 vec<jump_thread_edge *> *path = paths[i]; | |
1807 | |
131 | 1808 if (path->length () > 1 |
1809 && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK) | |
111 | 1810 { |
1811 edge e = (*path)[0]->e; | |
1812 e->aux = (void *)path; | |
1813 bitmap_set_bit (tmp, e->dest->index); | |
1814 } | |
1815 } | |
1816 | |
1817 /* Now iterate again, converting cases where we want to thread | |
1818 through a joiner block, but only if no other edge on the path | |
1819 already has a jump thread attached to it. We do this in two passes, | |
1820 to avoid situations where the order in the paths vec can hide overlapping | |
1821 threads (the path is recorded on the incoming edge, so we would miss | |
1822 cases where the second path starts at a downstream edge on the same | |
1823 path). First record all joiner paths, deleting any in the unexpected | |
1824 case where there is already a path for that incoming edge. */ | |
1825 for (i = 0; i < paths.length ();) | |
0 | 1826 { |
111 | 1827 vec<jump_thread_edge *> *path = paths[i]; |
1828 | |
131 | 1829 if (path->length () > 1 |
1830 && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
111 | 1831 { |
1832 /* Attach the path to the starting edge if none is yet recorded. */ | |
1833 if ((*path)[0]->e->aux == NULL) | |
1834 { | |
1835 (*path)[0]->e->aux = path; | |
1836 i++; | |
1837 } | |
1838 else | |
1839 { | |
1840 paths.unordered_remove (i); | |
1841 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1842 dump_jump_thread_path (dump_file, *path, false); | |
1843 delete_jump_thread_path (path); | |
1844 } | |
1845 } | |
1846 else | |
1847 { | |
1848 i++; | |
1849 } | |
1850 } | |
1851 | |
1852 /* Second, look for paths that have any other jump thread attached to | |
1853 them, and either finish converting them or cancel them. */ | |
1854 for (i = 0; i < paths.length ();) | |
1855 { | |
1856 vec<jump_thread_edge *> *path = paths[i]; | |
1857 edge e = (*path)[0]->e; | |
1858 | |
131 | 1859 if (path->length () > 1 |
1860 && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path) | |
111 | 1861 { |
1862 unsigned int j; | |
1863 for (j = 1; j < path->length (); j++) | |
1864 if ((*path)[j]->e->aux != NULL) | |
1865 break; | |
1866 | |
1867 /* If we iterated through the entire path without exiting the loop, | |
1868 then we are good to go, record it. */ | |
1869 if (j == path->length ()) | |
1870 { | |
1871 bitmap_set_bit (tmp, e->dest->index); | |
1872 i++; | |
1873 } | |
1874 else | |
1875 { | |
1876 e->aux = NULL; | |
1877 paths.unordered_remove (i); | |
1878 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1879 dump_jump_thread_path (dump_file, *path, false); | |
1880 delete_jump_thread_path (path); | |
1881 } | |
1882 } | |
1883 else | |
1884 { | |
1885 i++; | |
1886 } | |
0 | 1887 } |
1888 | |
131 | 1889 /* When optimizing for size, prune all thread paths where statement |
1890 duplication is necessary. | |
1891 | |
1892 We walk the jump thread path looking for copied blocks. There's | |
1893 two types of copied blocks. | |
1894 | |
1895 EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will | |
1896 cancel the jump threading request when optimizing for size. | |
1897 | |
1898 EDGE_COPY_SRC_BLOCK which is copied, but some of its statements | |
1899 will be killed by threading. If threading does not kill all of | |
1900 its statements, then we should cancel the jump threading request | |
1901 when optimizing for size. */ | |
0 | 1902 if (optimize_function_for_size_p (cfun)) |
1903 { | |
1904 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1905 { | |
131 | 1906 FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds) |
1907 if (e->aux) | |
1908 { | |
1909 vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1910 | |
1911 unsigned int j; | |
1912 for (j = 1; j < path->length (); j++) | |
1913 { | |
1914 bb = (*path)[j]->e->src; | |
1915 if (redirection_block_p (bb)) | |
1916 ; | |
1917 else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK | |
1918 || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK | |
1919 && (count_stmts_and_phis_in_block (bb) | |
1920 != estimate_threading_killed_stmts (bb)))) | |
1921 break; | |
1922 } | |
1923 | |
1924 if (j != path->length ()) | |
1925 { | |
1926 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1927 dump_jump_thread_path (dump_file, *path, 0); | |
1928 delete_jump_thread_path (path); | |
1929 e->aux = NULL; | |
1930 } | |
1931 else | |
1932 bitmap_set_bit (threaded_blocks, i); | |
1933 } | |
0 | 1934 } |
1935 } | |
1936 else | |
1937 bitmap_copy (threaded_blocks, tmp); | |
1938 | |
111 | 1939 /* If we have a joiner block (J) which has two successors S1 and S2 and |
1940 we are threading though S1 and the final destination of the thread | |
1941 is S2, then we must verify that any PHI nodes in S2 have the same | |
1942 PHI arguments for the edge J->S2 and J->S1->...->S2. | |
1943 | |
1944 We used to detect this prior to registering the jump thread, but | |
1945 that prohibits propagation of edge equivalences into non-dominated | |
1946 PHI nodes as the equivalency test might occur before propagation. | |
1947 | |
1948 This must also occur after we truncate any jump threading paths | |
1949 as this scenario may only show up after truncation. | |
1950 | |
1951 This works for now, but will need improvement as part of the FSA | |
1952 optimization. | |
1953 | |
1954 Note since we've moved the thread request data to the edges, | |
1955 we have to iterate on those rather than the threaded_edges vector. */ | |
1956 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1957 { | |
1958 bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
1959 FOR_EACH_EDGE (e, ei, bb->preds) | |
1960 { | |
1961 if (e->aux) | |
1962 { | |
1963 vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1964 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK); | |
1965 | |
1966 if (have_joiner) | |
1967 { | |
1968 basic_block joiner = e->dest; | |
1969 edge final_edge = path->last ()->e; | |
1970 basic_block final_dest = final_edge->dest; | |
1971 edge e2 = find_edge (joiner, final_dest); | |
1972 | |
1973 if (e2 && !phi_args_equal_on_edges (e2, final_edge)) | |
1974 { | |
1975 delete_jump_thread_path (path); | |
1976 e->aux = NULL; | |
1977 } | |
1978 } | |
1979 } | |
1980 } | |
1981 } | |
1982 | |
1983 /* Look for jump threading paths which cross multiple loop headers. | |
1984 | |
1985 The code to thread through loop headers will change the CFG in ways | |
1986 that invalidate the cached loop iteration information. So we must | |
1987 detect that case and wipe the cached information. */ | |
1988 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1989 { | |
1990 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
1991 FOR_EACH_EDGE (e, ei, bb->preds) | |
1992 { | |
1993 if (e->aux) | |
1994 { | |
1995 vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1996 | |
1997 for (unsigned int i = 0, crossed_headers = 0; | |
1998 i < path->length (); | |
1999 i++) | |
2000 { | |
2001 basic_block dest = (*path)[i]->e->dest; | |
2002 basic_block src = (*path)[i]->e->src; | |
2003 /* If we enter a loop. */ | |
2004 if (flow_loop_nested_p (src->loop_father, dest->loop_father)) | |
2005 ++crossed_headers; | |
2006 /* If we step from a block outside an irreducible region | |
2007 to a block inside an irreducible region, then we have | |
2008 crossed into a loop. */ | |
2009 else if (! (src->flags & BB_IRREDUCIBLE_LOOP) | |
2010 && (dest->flags & BB_IRREDUCIBLE_LOOP)) | |
2011 ++crossed_headers; | |
2012 if (crossed_headers > 1) | |
2013 { | |
2014 vect_free_loop_info_assumptions | |
2015 ((*path)[path->length () - 1]->e->dest->loop_father); | |
2016 break; | |
2017 } | |
2018 } | |
2019 } | |
2020 } | |
2021 } | |
2022 } | |
2023 | |
2024 | |
2025 /* Verify that the REGION is a valid jump thread. A jump thread is a special | |
2026 case of SEME Single Entry Multiple Exits region in which all nodes in the | |
2027 REGION have exactly one incoming edge. The only exception is the first block | |
2028 that may not have been connected to the rest of the cfg yet. */ | |
2029 | |
2030 DEBUG_FUNCTION void | |
2031 verify_jump_thread (basic_block *region, unsigned n_region) | |
2032 { | |
2033 for (unsigned i = 0; i < n_region; i++) | |
2034 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1); | |
2035 } | |
2036 | |
2037 /* Return true when BB is one of the first N items in BBS. */ | |
2038 | |
2039 static inline bool | |
2040 bb_in_bbs (basic_block bb, basic_block *bbs, int n) | |
2041 { | |
2042 for (int i = 0; i < n; i++) | |
2043 if (bb == bbs[i]) | |
2044 return true; | |
2045 | |
2046 return false; | |
0 | 2047 } |
2048 | |
131 | 2049 DEBUG_FUNCTION void |
2050 debug_path (FILE *dump_file, int pathno) | |
2051 { | |
2052 vec<jump_thread_edge *> *p = paths[pathno]; | |
2053 fprintf (dump_file, "path: "); | |
2054 for (unsigned i = 0; i < p->length (); ++i) | |
2055 fprintf (dump_file, "%d -> %d, ", | |
2056 (*p)[i]->e->src->index, (*p)[i]->e->dest->index); | |
2057 fprintf (dump_file, "\n"); | |
2058 } | |
2059 | |
2060 DEBUG_FUNCTION void | |
2061 debug_all_paths () | |
2062 { | |
2063 for (unsigned i = 0; i < paths.length (); ++i) | |
2064 debug_path (stderr, i); | |
2065 } | |
2066 | |
2067 /* Rewire a jump_thread_edge so that the source block is now a | |
2068 threaded source block. | |
2069 | |
2070 PATH_NUM is an index into the global path table PATHS. | |
2071 EDGE_NUM is the jump thread edge number into said path. | |
2072 | |
2073 Returns TRUE if we were able to successfully rewire the edge. */ | |
2074 | |
2075 static bool | |
2076 rewire_first_differing_edge (unsigned path_num, unsigned edge_num) | |
2077 { | |
2078 vec<jump_thread_edge *> *path = paths[path_num]; | |
2079 edge &e = (*path)[edge_num]->e; | |
2080 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2081 fprintf (dump_file, "rewiring edge candidate: %d -> %d\n", | |
2082 e->src->index, e->dest->index); | |
2083 basic_block src_copy = get_bb_copy (e->src); | |
2084 if (src_copy == NULL) | |
2085 { | |
2086 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2087 fprintf (dump_file, "ignoring candidate: there is no src COPY\n"); | |
2088 return false; | |
2089 } | |
2090 edge new_edge = find_edge (src_copy, e->dest); | |
2091 /* If the previously threaded paths created a flow graph where we | |
2092 can no longer figure out where to go, give up. */ | |
2093 if (new_edge == NULL) | |
2094 { | |
2095 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2096 fprintf (dump_file, "ignoring candidate: we lost our way\n"); | |
2097 return false; | |
2098 } | |
2099 e = new_edge; | |
2100 return true; | |
2101 } | |
2102 | |
2103 /* After an FSM path has been jump threaded, adjust the remaining FSM | |
2104 paths that are subsets of this path, so these paths can be safely | |
2105 threaded within the context of the new threaded path. | |
2106 | |
2107 For example, suppose we have just threaded: | |
2108 | |
2109 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12' | |
2110 | |
2111 And we have an upcoming threading candidate: | |
2112 5 -> 6 -> 7 -> 8 -> 15 -> 20 | |
2113 | |
2114 This function adjusts the upcoming path into: | |
2115 8' -> 15 -> 20 | |
2116 | |
2117 CURR_PATH_NUM is an index into the global paths table. It | |
2118 specifies the path that was just threaded. */ | |
2119 | |
2120 static void | |
2121 adjust_paths_after_duplication (unsigned curr_path_num) | |
2122 { | |
2123 vec<jump_thread_edge *> *curr_path = paths[curr_path_num]; | |
2124 gcc_assert ((*curr_path)[0]->type == EDGE_FSM_THREAD); | |
2125 | |
2126 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2127 { | |
2128 fprintf (dump_file, "just threaded: "); | |
2129 debug_path (dump_file, curr_path_num); | |
2130 } | |
2131 | |
2132 /* Iterate through all the other paths and adjust them. */ | |
2133 for (unsigned cand_path_num = 0; cand_path_num < paths.length (); ) | |
2134 { | |
2135 if (cand_path_num == curr_path_num) | |
2136 { | |
2137 ++cand_path_num; | |
2138 continue; | |
2139 } | |
2140 /* Make sure the candidate to adjust starts with the same path | |
2141 as the recently threaded path and is an FSM thread. */ | |
2142 vec<jump_thread_edge *> *cand_path = paths[cand_path_num]; | |
2143 if ((*cand_path)[0]->type != EDGE_FSM_THREAD | |
2144 || (*cand_path)[0]->e != (*curr_path)[0]->e) | |
2145 { | |
2146 ++cand_path_num; | |
2147 continue; | |
2148 } | |
2149 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2150 { | |
2151 fprintf (dump_file, "adjusting candidate: "); | |
2152 debug_path (dump_file, cand_path_num); | |
2153 } | |
2154 | |
2155 /* Chop off from the candidate path any prefix it shares with | |
2156 the recently threaded path. */ | |
2157 unsigned minlength = MIN (curr_path->length (), cand_path->length ()); | |
2158 unsigned j; | |
2159 for (j = 0; j < minlength; ++j) | |
2160 { | |
2161 edge cand_edge = (*cand_path)[j]->e; | |
2162 edge curr_edge = (*curr_path)[j]->e; | |
2163 | |
2164 /* Once the prefix no longer matches, adjust the first | |
2165 non-matching edge to point from an adjusted edge to | |
2166 wherever it was going. */ | |
2167 if (cand_edge != curr_edge) | |
2168 { | |
2169 gcc_assert (cand_edge->src == curr_edge->src); | |
2170 if (!rewire_first_differing_edge (cand_path_num, j)) | |
2171 goto remove_candidate_from_list; | |
2172 break; | |
2173 } | |
2174 } | |
2175 if (j == minlength) | |
2176 { | |
2177 /* If we consumed the max subgraph we could look at, and | |
2178 still didn't find any different edges, it's the | |
2179 last edge after MINLENGTH. */ | |
2180 if (cand_path->length () > minlength) | |
2181 { | |
2182 if (!rewire_first_differing_edge (cand_path_num, j)) | |
2183 goto remove_candidate_from_list; | |
2184 } | |
2185 else if (dump_file && (dump_flags & TDF_DETAILS)) | |
2186 fprintf (dump_file, "adjusting first edge after MINLENGTH.\n"); | |
2187 } | |
2188 if (j > 0) | |
2189 { | |
2190 /* If we are removing everything, delete the entire candidate. */ | |
2191 if (j == cand_path->length ()) | |
2192 { | |
2193 remove_candidate_from_list: | |
2194 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2195 fprintf (dump_file, "adjusted candidate: [EMPTY]\n"); | |
2196 delete_jump_thread_path (cand_path); | |
2197 paths.unordered_remove (cand_path_num); | |
2198 continue; | |
2199 } | |
2200 /* Otherwise, just remove the redundant sub-path. */ | |
2201 cand_path->block_remove (0, j); | |
2202 } | |
2203 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2204 { | |
2205 fprintf (dump_file, "adjusted candidate: "); | |
2206 debug_path (dump_file, cand_path_num); | |
2207 } | |
2208 ++cand_path_num; | |
2209 } | |
2210 } | |
2211 | |
111 | 2212 /* Duplicates a jump-thread path of N_REGION basic blocks. |
2213 The ENTRY edge is redirected to the duplicate of the region. | |
2214 | |
2215 Remove the last conditional statement in the last basic block in the REGION, | |
2216 and create a single fallthru edge pointing to the same destination as the | |
2217 EXIT edge. | |
2218 | |
131 | 2219 CURRENT_PATH_NO is an index into the global paths[] table |
2220 specifying the jump-thread path. | |
2221 | |
111 | 2222 Returns false if it is unable to copy the region, true otherwise. */ |
2223 | |
2224 static bool | |
2225 duplicate_thread_path (edge entry, edge exit, basic_block *region, | |
131 | 2226 unsigned n_region, unsigned current_path_no) |
111 | 2227 { |
2228 unsigned i; | |
2229 struct loop *loop = entry->dest->loop_father; | |
2230 edge exit_copy; | |
2231 edge redirected; | |
2232 profile_count curr_count; | |
2233 | |
2234 if (!can_copy_bbs_p (region, n_region)) | |
2235 return false; | |
2236 | |
131 | 2237 if (dump_file && (dump_flags & TDF_DETAILS)) |
2238 { | |
2239 fprintf (dump_file, "\nabout to thread: "); | |
2240 debug_path (dump_file, current_path_no); | |
2241 } | |
2242 | |
111 | 2243 /* Some sanity checking. Note that we do not check for all possible |
2244 missuses of the functions. I.e. if you ask to copy something weird, | |
2245 it will work, but the state of structures probably will not be | |
2246 correct. */ | |
2247 for (i = 0; i < n_region; i++) | |
2248 { | |
2249 /* We do not handle subloops, i.e. all the blocks must belong to the | |
2250 same loop. */ | |
2251 if (region[i]->loop_father != loop) | |
2252 return false; | |
2253 } | |
2254 | |
2255 initialize_original_copy_tables (); | |
2256 | |
2257 set_loop_copy (loop, loop); | |
2258 | |
2259 basic_block *region_copy = XNEWVEC (basic_block, n_region); | |
2260 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, | |
2261 split_edge_bb_loc (entry), false); | |
2262 | |
2263 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The | |
2264 following code ensures that all the edges exiting the jump-thread path are | |
2265 redirected back to the original code: these edges are exceptions | |
2266 invalidating the property that is propagated by executing all the blocks of | |
2267 the jump-thread path in order. */ | |
2268 | |
2269 curr_count = entry->count (); | |
2270 | |
2271 for (i = 0; i < n_region; i++) | |
2272 { | |
2273 edge e; | |
2274 edge_iterator ei; | |
2275 basic_block bb = region_copy[i]; | |
2276 | |
2277 /* Watch inconsistent profile. */ | |
2278 if (curr_count > region[i]->count) | |
2279 curr_count = region[i]->count; | |
2280 /* Scale current BB. */ | |
131 | 2281 if (region[i]->count.nonzero_p () && curr_count.initialized_p ()) |
111 | 2282 { |
2283 /* In the middle of the path we only scale the frequencies. | |
2284 In last BB we need to update probabilities of outgoing edges | |
2285 because we know which one is taken at the threaded path. */ | |
2286 if (i + 1 != n_region) | |
2287 scale_bbs_frequencies_profile_count (region + i, 1, | |
2288 region[i]->count - curr_count, | |
2289 region[i]->count); | |
2290 else | |
2291 update_bb_profile_for_threading (region[i], | |
131 | 2292 curr_count, |
111 | 2293 exit); |
2294 scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count, | |
2295 region_copy[i]->count); | |
2296 } | |
2297 | |
2298 if (single_succ_p (bb)) | |
2299 { | |
2300 /* Make sure the successor is the next node in the path. */ | |
2301 gcc_assert (i + 1 == n_region | |
2302 || region_copy[i + 1] == single_succ_edge (bb)->dest); | |
2303 if (i + 1 != n_region) | |
2304 { | |
2305 curr_count = single_succ_edge (bb)->count (); | |
2306 } | |
2307 continue; | |
2308 } | |
2309 | |
2310 /* Special case the last block on the path: make sure that it does not | |
2311 jump back on the copied path, including back to itself. */ | |
2312 if (i + 1 == n_region) | |
2313 { | |
2314 FOR_EACH_EDGE (e, ei, bb->succs) | |
2315 if (bb_in_bbs (e->dest, region_copy, n_region)) | |
2316 { | |
2317 basic_block orig = get_bb_original (e->dest); | |
2318 if (orig) | |
2319 redirect_edge_and_branch_force (e, orig); | |
2320 } | |
2321 continue; | |
2322 } | |
2323 | |
2324 /* Redirect all other edges jumping to non-adjacent blocks back to the | |
2325 original code. */ | |
2326 FOR_EACH_EDGE (e, ei, bb->succs) | |
2327 if (region_copy[i + 1] != e->dest) | |
2328 { | |
2329 basic_block orig = get_bb_original (e->dest); | |
2330 if (orig) | |
2331 redirect_edge_and_branch_force (e, orig); | |
2332 } | |
2333 else | |
2334 { | |
2335 curr_count = e->count (); | |
2336 } | |
2337 } | |
2338 | |
2339 | |
2340 if (flag_checking) | |
2341 verify_jump_thread (region_copy, n_region); | |
2342 | |
2343 /* Remove the last branch in the jump thread path. */ | |
2344 remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest); | |
2345 | |
2346 /* And fixup the flags on the single remaining edge. */ | |
2347 edge fix_e = find_edge (region_copy[n_region - 1], exit->dest); | |
2348 fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); | |
2349 fix_e->flags |= EDGE_FALLTHRU; | |
2350 | |
2351 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU); | |
2352 | |
2353 if (e) | |
2354 { | |
2355 rescan_loop_exit (e, true, false); | |
2356 e->probability = profile_probability::always (); | |
2357 } | |
2358 | |
2359 /* Redirect the entry and add the phi node arguments. */ | |
2360 if (entry->dest == loop->header) | |
2361 mark_loop_for_removal (loop); | |
2362 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest)); | |
2363 gcc_assert (redirected != NULL); | |
2364 flush_pending_stmts (entry); | |
2365 | |
2366 /* Add the other PHI node arguments. */ | |
2367 add_phi_args_after_copy (region_copy, n_region, NULL); | |
2368 | |
2369 free (region_copy); | |
2370 | |
131 | 2371 adjust_paths_after_duplication (current_path_no); |
2372 | |
111 | 2373 free_original_copy_tables (); |
2374 return true; | |
2375 } | |
2376 | |
2377 /* Return true when PATH is a valid jump-thread path. */ | |
2378 | |
2379 static bool | |
2380 valid_jump_thread_path (vec<jump_thread_edge *> *path) | |
2381 { | |
2382 unsigned len = path->length (); | |
2383 | |
2384 /* Check that the path is connected. */ | |
2385 for (unsigned int j = 0; j < len - 1; j++) | |
2386 { | |
2387 edge e = (*path)[j]->e; | |
2388 if (e->dest != (*path)[j+1]->e->src) | |
2389 return false; | |
2390 } | |
2391 return true; | |
2392 } | |
2393 | |
2394 /* Remove any queued jump threads that include edge E. | |
2395 | |
2396 We don't actually remove them here, just record the edges into ax | |
2397 hash table. That way we can do the search once per iteration of | |
2398 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */ | |
2399 | |
2400 void | |
2401 remove_jump_threads_including (edge_def *e) | |
2402 { | |
2403 if (!paths.exists ()) | |
2404 return; | |
2405 | |
2406 if (!removed_edges) | |
2407 removed_edges = new hash_table<struct removed_edges> (17); | |
2408 | |
2409 edge *slot = removed_edges->find_slot (e, INSERT); | |
2410 *slot = e; | |
2411 } | |
0 | 2412 |
2413 /* Walk through all blocks and thread incoming edges to the appropriate | |
2414 outgoing edge for each edge pair recorded in THREADED_EDGES. | |
2415 | |
2416 It is the caller's responsibility to fix the dominance information | |
2417 and rewrite duplicated SSA_NAMEs back into SSA form. | |
2418 | |
2419 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through | |
2420 loop headers if it does not simplify the loop. | |
2421 | |
2422 Returns true if one or more edges were threaded, false otherwise. */ | |
2423 | |
2424 bool | |
2425 thread_through_all_blocks (bool may_peel_loop_headers) | |
2426 { | |
2427 bool retval = false; | |
2428 unsigned int i; | |
2429 struct loop *loop; | |
111 | 2430 auto_bitmap threaded_blocks; |
131 | 2431 hash_set<edge> visited_starting_edges; |
111 | 2432 |
2433 if (!paths.exists ()) | |
2434 { | |
2435 retval = false; | |
2436 goto out; | |
2437 } | |
2438 | |
0 | 2439 memset (&thread_stats, 0, sizeof (thread_stats)); |
2440 | |
111 | 2441 /* Remove any paths that referenced removed edges. */ |
2442 if (removed_edges) | |
2443 for (i = 0; i < paths.length (); ) | |
2444 { | |
2445 unsigned int j; | |
2446 vec<jump_thread_edge *> *path = paths[i]; | |
2447 | |
2448 for (j = 0; j < path->length (); j++) | |
2449 { | |
2450 edge e = (*path)[j]->e; | |
2451 if (removed_edges->find_slot (e, NO_INSERT)) | |
2452 break; | |
2453 } | |
2454 | |
2455 if (j != path->length ()) | |
2456 { | |
2457 delete_jump_thread_path (path); | |
2458 paths.unordered_remove (i); | |
2459 continue; | |
2460 } | |
2461 i++; | |
2462 } | |
2463 | |
2464 /* Jump-thread all FSM threads before other jump-threads. */ | |
2465 for (i = 0; i < paths.length ();) | |
2466 { | |
2467 vec<jump_thread_edge *> *path = paths[i]; | |
2468 edge entry = (*path)[0]->e; | |
2469 | |
2470 /* Only code-generate FSM jump-threads in this loop. */ | |
2471 if ((*path)[0]->type != EDGE_FSM_THREAD) | |
2472 { | |
2473 i++; | |
2474 continue; | |
2475 } | |
2476 | |
131 | 2477 /* Do not jump-thread twice from the same starting edge. |
2478 | |
2479 Previously we only checked that we weren't threading twice | |
2480 from the same BB, but that was too restrictive. Imagine a | |
2481 path that starts from GIMPLE_COND(x_123 == 0,...), where both | |
2482 edges out of this conditional yield paths that can be | |
2483 threaded (for example, both lead to an x_123==0 or x_123!=0 | |
2484 conditional further down the line. */ | |
2485 if (visited_starting_edges.contains (entry) | |
2486 /* We may not want to realize this jump thread path for | |
2487 various reasons. So check it first. */ | |
111 | 2488 || !valid_jump_thread_path (path)) |
2489 { | |
2490 /* Remove invalid FSM jump-thread paths. */ | |
2491 delete_jump_thread_path (path); | |
2492 paths.unordered_remove (i); | |
2493 continue; | |
2494 } | |
2495 | |
2496 unsigned len = path->length (); | |
2497 edge exit = (*path)[len - 1]->e; | |
2498 basic_block *region = XNEWVEC (basic_block, len - 1); | |
2499 | |
2500 for (unsigned int j = 0; j < len - 1; j++) | |
2501 region[j] = (*path)[j]->e->dest; | |
2502 | |
131 | 2503 if (duplicate_thread_path (entry, exit, region, len - 1, i)) |
111 | 2504 { |
2505 /* We do not update dominance info. */ | |
2506 free_dominance_info (CDI_DOMINATORS); | |
131 | 2507 visited_starting_edges.add (entry); |
111 | 2508 retval = true; |
2509 thread_stats.num_threaded_edges++; | |
2510 } | |
2511 | |
2512 delete_jump_thread_path (path); | |
2513 paths.unordered_remove (i); | |
2514 free (region); | |
2515 } | |
2516 | |
2517 /* Remove from PATHS all the jump-threads starting with an edge already | |
2518 jump-threaded. */ | |
2519 for (i = 0; i < paths.length ();) | |
2520 { | |
2521 vec<jump_thread_edge *> *path = paths[i]; | |
2522 edge entry = (*path)[0]->e; | |
2523 | |
2524 /* Do not jump-thread twice from the same block. */ | |
131 | 2525 if (visited_starting_edges.contains (entry)) |
111 | 2526 { |
2527 delete_jump_thread_path (path); | |
2528 paths.unordered_remove (i); | |
2529 } | |
2530 else | |
2531 i++; | |
2532 } | |
2533 | |
0 | 2534 mark_threaded_blocks (threaded_blocks); |
2535 | |
2536 initialize_original_copy_tables (); | |
2537 | |
131 | 2538 /* The order in which we process jump threads can be important. |
2539 | |
2540 Consider if we have two jump threading paths A and B. If the | |
2541 target edge of A is the starting edge of B and we thread path A | |
2542 first, then we create an additional incoming edge into B->dest that | |
2543 we can not discover as a jump threading path on this iteration. | |
2544 | |
2545 If we instead thread B first, then the edge into B->dest will have | |
2546 already been redirected before we process path A and path A will | |
2547 natually, with no further work, target the redirected path for B. | |
2548 | |
2549 An post-order is sufficient here. Compute the ordering first, then | |
2550 process the blocks. */ | |
2551 if (!bitmap_empty_p (threaded_blocks)) | |
0 | 2552 { |
131 | 2553 int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); |
2554 unsigned int postorder_num = post_order_compute (postorder, false, false); | |
2555 for (unsigned int i = 0; i < postorder_num; i++) | |
2556 { | |
2557 unsigned int indx = postorder[i]; | |
2558 if (bitmap_bit_p (threaded_blocks, indx)) | |
2559 { | |
2560 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx); | |
2561 retval |= thread_block (bb, true); | |
2562 } | |
2563 } | |
2564 free (postorder); | |
0 | 2565 } |
2566 | |
2567 /* Then perform the threading through loop headers. We start with the | |
2568 innermost loop, so that the changes in cfg we perform won't affect | |
2569 further threading. */ | |
111 | 2570 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
0 | 2571 { |
2572 if (!loop->header | |
2573 || !bitmap_bit_p (threaded_blocks, loop->header->index)) | |
2574 continue; | |
2575 | |
2576 retval |= thread_through_loop_header (loop, may_peel_loop_headers); | |
2577 } | |
2578 | |
111 | 2579 /* All jump threading paths should have been resolved at this |
2580 point. Verify that is the case. */ | |
2581 basic_block bb; | |
2582 FOR_EACH_BB_FN (bb, cfun) | |
2583 { | |
2584 edge_iterator ei; | |
2585 edge e; | |
2586 FOR_EACH_EDGE (e, ei, bb->preds) | |
2587 gcc_assert (e->aux == NULL); | |
2588 } | |
2589 | |
0 | 2590 statistics_counter_event (cfun, "Jumps threaded", |
2591 thread_stats.num_threaded_edges); | |
2592 | |
2593 free_original_copy_tables (); | |
2594 | |
111 | 2595 paths.release (); |
0 | 2596 |
2597 if (retval) | |
2598 loops_state_set (LOOPS_NEED_FIXUP); | |
2599 | |
111 | 2600 out: |
2601 delete removed_edges; | |
2602 removed_edges = NULL; | |
0 | 2603 return retval; |
2604 } | |
2605 | |
111 | 2606 /* Delete the jump threading path PATH. We have to explicitly delete |
2607 each entry in the vector, then the container. */ | |
2608 | |
2609 void | |
2610 delete_jump_thread_path (vec<jump_thread_edge *> *path) | |
2611 { | |
2612 for (unsigned int i = 0; i < path->length (); i++) | |
2613 delete (*path)[i]; | |
2614 path->release(); | |
2615 delete path; | |
2616 } | |
2617 | |
0 | 2618 /* Register a jump threading opportunity. We queue up all the jump |
2619 threading opportunities discovered by a pass and update the CFG | |
2620 and SSA form all at once. | |
2621 | |
2622 E is the edge we can thread, E2 is the new target edge, i.e., we | |
2623 are effectively recording that E->dest can be changed to E2->dest | |
2624 after fixing the SSA graph. */ | |
2625 | |
2626 void | |
111 | 2627 register_jump_thread (vec<jump_thread_edge *> *path) |
0 | 2628 { |
111 | 2629 if (!dbg_cnt (registered_jump_thread)) |
2630 { | |
2631 delete_jump_thread_path (path); | |
2632 return; | |
2633 } | |
2634 | |
2635 /* First make sure there are no NULL outgoing edges on the jump threading | |
2636 path. That can happen for jumping to a constant address. */ | |
2637 for (unsigned int i = 0; i < path->length (); i++) | |
2638 { | |
2639 if ((*path)[i]->e == NULL) | |
2640 { | |
2641 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2642 { | |
2643 fprintf (dump_file, | |
2644 "Found NULL edge in jump threading path. Cancelling jump thread:\n"); | |
2645 dump_jump_thread_path (dump_file, *path, false); | |
2646 } | |
2647 | |
2648 delete_jump_thread_path (path); | |
2649 return; | |
2650 } | |
2651 | |
2652 /* Only the FSM threader is allowed to thread across | |
2653 backedges in the CFG. */ | |
2654 if (flag_checking | |
2655 && (*path)[0]->type != EDGE_FSM_THREAD) | |
2656 gcc_assert (((*path)[i]->e->flags & EDGE_DFS_BACK) == 0); | |
2657 } | |
2658 | |
2659 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2660 dump_jump_thread_path (dump_file, *path, true); | |
2661 | |
2662 if (!paths.exists ()) | |
2663 paths.create (5); | |
2664 | |
2665 paths.safe_push (path); | |
0 | 2666 } |
131 | 2667 |
2668 /* Return how many uses of T there are within BB, as long as there | |
2669 aren't any uses outside BB. If there are any uses outside BB, | |
2670 return -1 if there's at most one use within BB, or -2 if there is | |
2671 more than one use within BB. */ | |
2672 | |
2673 static int | |
2674 uses_in_bb (tree t, basic_block bb) | |
2675 { | |
2676 int uses = 0; | |
2677 bool outside_bb = false; | |
2678 | |
2679 imm_use_iterator iter; | |
2680 use_operand_p use_p; | |
2681 FOR_EACH_IMM_USE_FAST (use_p, iter, t) | |
2682 { | |
2683 if (is_gimple_debug (USE_STMT (use_p))) | |
2684 continue; | |
2685 | |
2686 if (gimple_bb (USE_STMT (use_p)) != bb) | |
2687 outside_bb = true; | |
2688 else | |
2689 uses++; | |
2690 | |
2691 if (outside_bb && uses > 1) | |
2692 return -2; | |
2693 } | |
2694 | |
2695 if (outside_bb) | |
2696 return -1; | |
2697 | |
2698 return uses; | |
2699 } | |
2700 | |
2701 /* Starting from the final control flow stmt in BB, assuming it will | |
2702 be removed, follow uses in to-be-removed stmts back to their defs | |
2703 and count how many defs are to become dead and be removed as | |
2704 well. */ | |
2705 | |
2706 unsigned int | |
2707 estimate_threading_killed_stmts (basic_block bb) | |
2708 { | |
2709 int killed_stmts = 0; | |
2710 hash_map<tree, int> ssa_remaining_uses; | |
2711 auto_vec<gimple *, 4> dead_worklist; | |
2712 | |
2713 /* If the block has only two predecessors, threading will turn phi | |
2714 dsts into either src, so count them as dead stmts. */ | |
2715 bool drop_all_phis = EDGE_COUNT (bb->preds) == 2; | |
2716 | |
2717 if (drop_all_phis) | |
2718 for (gphi_iterator gsi = gsi_start_phis (bb); | |
2719 !gsi_end_p (gsi); gsi_next (&gsi)) | |
2720 { | |
2721 gphi *phi = gsi.phi (); | |
2722 tree dst = gimple_phi_result (phi); | |
2723 | |
2724 /* We don't count virtual PHIs as stmts in | |
2725 record_temporary_equivalences_from_phis. */ | |
2726 if (virtual_operand_p (dst)) | |
2727 continue; | |
2728 | |
2729 killed_stmts++; | |
2730 } | |
2731 | |
2732 if (gsi_end_p (gsi_last_bb (bb))) | |
2733 return killed_stmts; | |
2734 | |
2735 gimple *stmt = gsi_stmt (gsi_last_bb (bb)); | |
2736 if (gimple_code (stmt) != GIMPLE_COND | |
2737 && gimple_code (stmt) != GIMPLE_GOTO | |
2738 && gimple_code (stmt) != GIMPLE_SWITCH) | |
2739 return killed_stmts; | |
2740 | |
2741 /* The control statement is always dead. */ | |
2742 killed_stmts++; | |
2743 dead_worklist.quick_push (stmt); | |
2744 while (!dead_worklist.is_empty ()) | |
2745 { | |
2746 stmt = dead_worklist.pop (); | |
2747 | |
2748 ssa_op_iter iter; | |
2749 use_operand_p use_p; | |
2750 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) | |
2751 { | |
2752 tree t = USE_FROM_PTR (use_p); | |
2753 gimple *def = SSA_NAME_DEF_STMT (t); | |
2754 | |
2755 if (gimple_bb (def) == bb | |
2756 && (gimple_code (def) != GIMPLE_PHI | |
2757 || !drop_all_phis) | |
2758 && !gimple_has_side_effects (def)) | |
2759 { | |
2760 int *usesp = ssa_remaining_uses.get (t); | |
2761 int uses; | |
2762 | |
2763 if (usesp) | |
2764 uses = *usesp; | |
2765 else | |
2766 uses = uses_in_bb (t, bb); | |
2767 | |
2768 gcc_assert (uses); | |
2769 | |
2770 /* Don't bother recording the expected use count if we | |
2771 won't find any further uses within BB. */ | |
2772 if (!usesp && (uses < -1 || uses > 1)) | |
2773 { | |
2774 usesp = &ssa_remaining_uses.get_or_insert (t); | |
2775 *usesp = uses; | |
2776 } | |
2777 | |
2778 if (uses < 0) | |
2779 continue; | |
2780 | |
2781 --uses; | |
2782 if (usesp) | |
2783 *usesp = uses; | |
2784 | |
2785 if (!uses) | |
2786 { | |
2787 killed_stmts++; | |
2788 if (usesp) | |
2789 ssa_remaining_uses.remove (t); | |
2790 if (gimple_code (def) != GIMPLE_PHI) | |
2791 dead_worklist.safe_push (def); | |
2792 } | |
2793 } | |
2794 } | |
2795 } | |
2796 | |
2797 if (dump_file) | |
2798 fprintf (dump_file, "threading bb %i kills %i stmts\n", | |
2799 bb->index, killed_stmts); | |
2800 | |
2801 return killed_stmts; | |
2802 } |