Mercurial > hg > CbC > CbC_gcc
comparison gcc/cfgloopmanip.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Loop manipulation code for GNU compiler. | |
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009 Free Software | |
3 Foundation, Inc. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tm.h" | |
25 #include "rtl.h" | |
26 #include "hard-reg-set.h" | |
27 #include "obstack.h" | |
28 #include "basic-block.h" | |
29 #include "cfgloop.h" | |
30 #include "cfglayout.h" | |
31 #include "cfghooks.h" | |
32 #include "output.h" | |
33 #include "tree-flow.h" | |
34 | |
35 static void duplicate_subloops (struct loop *, struct loop *); | |
36 static void copy_loops_to (struct loop **, int, | |
37 struct loop *); | |
38 static void loop_redirect_edge (edge, basic_block); | |
39 static void remove_bbs (basic_block *, int); | |
40 static bool rpe_enum_p (const_basic_block, const void *); | |
41 static int find_path (edge, basic_block **); | |
42 static void fix_loop_placements (struct loop *, bool *); | |
43 static bool fix_bb_placement (basic_block); | |
44 static void fix_bb_placements (basic_block, bool *); | |
45 static void unloop (struct loop *, bool *); | |
46 | |
47 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) | |
48 | |
49 /* Checks whether basic block BB is dominated by DATA. */ | |
50 static bool | |
51 rpe_enum_p (const_basic_block bb, const void *data) | |
52 { | |
53 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data); | |
54 } | |
55 | |
56 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */ | |
57 | |
58 static void | |
59 remove_bbs (basic_block *bbs, int nbbs) | |
60 { | |
61 int i; | |
62 | |
63 for (i = 0; i < nbbs; i++) | |
64 delete_basic_block (bbs[i]); | |
65 } | |
66 | |
67 /* Find path -- i.e. the basic blocks dominated by edge E and put them | |
68 into array BBS, that will be allocated large enough to contain them. | |
69 E->dest must have exactly one predecessor for this to work (it is | |
70 easy to achieve and we do not put it here because we do not want to | |
71 alter anything by this function). The number of basic blocks in the | |
72 path is returned. */ | |
73 static int | |
74 find_path (edge e, basic_block **bbs) | |
75 { | |
76 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1); | |
77 | |
78 /* Find bbs in the path. */ | |
79 *bbs = XCNEWVEC (basic_block, n_basic_blocks); | |
80 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs, | |
81 n_basic_blocks, e->dest); | |
82 } | |
83 | |
84 /* Fix placement of basic block BB inside loop hierarchy -- | |
85 Let L be a loop to that BB belongs. Then every successor of BB must either | |
86 1) belong to some superloop of loop L, or | |
87 2) be a header of loop K such that K->outer is superloop of L | |
88 Returns true if we had to move BB into other loop to enforce this condition, | |
89 false if the placement of BB was already correct (provided that placements | |
90 of its successors are correct). */ | |
91 static bool | |
92 fix_bb_placement (basic_block bb) | |
93 { | |
94 edge e; | |
95 edge_iterator ei; | |
96 struct loop *loop = current_loops->tree_root, *act; | |
97 | |
98 FOR_EACH_EDGE (e, ei, bb->succs) | |
99 { | |
100 if (e->dest == EXIT_BLOCK_PTR) | |
101 continue; | |
102 | |
103 act = e->dest->loop_father; | |
104 if (act->header == e->dest) | |
105 act = loop_outer (act); | |
106 | |
107 if (flow_loop_nested_p (loop, act)) | |
108 loop = act; | |
109 } | |
110 | |
111 if (loop == bb->loop_father) | |
112 return false; | |
113 | |
114 remove_bb_from_loops (bb); | |
115 add_bb_to_loop (bb, loop); | |
116 | |
117 return true; | |
118 } | |
119 | |
120 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop | |
121 of LOOP to that leads at least one exit edge of LOOP, and set it | |
122 as the immediate superloop of LOOP. Return true if the immediate superloop | |
123 of LOOP changed. */ | |
124 | |
125 static bool | |
126 fix_loop_placement (struct loop *loop) | |
127 { | |
128 unsigned i; | |
129 edge e; | |
130 VEC (edge, heap) *exits = get_loop_exit_edges (loop); | |
131 struct loop *father = current_loops->tree_root, *act; | |
132 bool ret = false; | |
133 | |
134 for (i = 0; VEC_iterate (edge, exits, i, e); i++) | |
135 { | |
136 act = find_common_loop (loop, e->dest->loop_father); | |
137 if (flow_loop_nested_p (father, act)) | |
138 father = act; | |
139 } | |
140 | |
141 if (father != loop_outer (loop)) | |
142 { | |
143 for (act = loop_outer (loop); act != father; act = loop_outer (act)) | |
144 act->num_nodes -= loop->num_nodes; | |
145 flow_loop_tree_node_remove (loop); | |
146 flow_loop_tree_node_add (father, loop); | |
147 | |
148 /* The exit edges of LOOP no longer exits its original immediate | |
149 superloops; remove them from the appropriate exit lists. */ | |
150 for (i = 0; VEC_iterate (edge, exits, i, e); i++) | |
151 rescan_loop_exit (e, false, false); | |
152 | |
153 ret = true; | |
154 } | |
155 | |
156 VEC_free (edge, heap, exits); | |
157 return ret; | |
158 } | |
159 | |
160 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e. | |
161 enforce condition condition stated in description of fix_bb_placement. We | |
162 start from basic block FROM that had some of its successors removed, so that | |
163 his placement no longer has to be correct, and iteratively fix placement of | |
164 its predecessors that may change if placement of FROM changed. Also fix | |
165 placement of subloops of FROM->loop_father, that might also be altered due | |
166 to this change; the condition for them is similar, except that instead of | |
167 successors we consider edges coming out of the loops. | |
168 | |
169 If the changes may invalidate the information about irreducible regions, | |
170 IRRED_INVALIDATED is set to true. */ | |
171 | |
172 static void | |
173 fix_bb_placements (basic_block from, | |
174 bool *irred_invalidated) | |
175 { | |
176 sbitmap in_queue; | |
177 basic_block *queue, *qtop, *qbeg, *qend; | |
178 struct loop *base_loop; | |
179 edge e; | |
180 | |
181 /* We pass through blocks back-reachable from FROM, testing whether some | |
182 of their successors moved to outer loop. It may be necessary to | |
183 iterate several times, but it is finite, as we stop unless we move | |
184 the basic block up the loop structure. The whole story is a bit | |
185 more complicated due to presence of subloops, those are moved using | |
186 fix_loop_placement. */ | |
187 | |
188 base_loop = from->loop_father; | |
189 if (base_loop == current_loops->tree_root) | |
190 return; | |
191 | |
192 in_queue = sbitmap_alloc (last_basic_block); | |
193 sbitmap_zero (in_queue); | |
194 SET_BIT (in_queue, from->index); | |
195 /* Prevent us from going out of the base_loop. */ | |
196 SET_BIT (in_queue, base_loop->header->index); | |
197 | |
198 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1); | |
199 qtop = queue + base_loop->num_nodes + 1; | |
200 qbeg = queue; | |
201 qend = queue + 1; | |
202 *qbeg = from; | |
203 | |
204 while (qbeg != qend) | |
205 { | |
206 edge_iterator ei; | |
207 from = *qbeg; | |
208 qbeg++; | |
209 if (qbeg == qtop) | |
210 qbeg = queue; | |
211 RESET_BIT (in_queue, from->index); | |
212 | |
213 if (from->loop_father->header == from) | |
214 { | |
215 /* Subloop header, maybe move the loop upward. */ | |
216 if (!fix_loop_placement (from->loop_father)) | |
217 continue; | |
218 } | |
219 else | |
220 { | |
221 /* Ordinary basic block. */ | |
222 if (!fix_bb_placement (from)) | |
223 continue; | |
224 } | |
225 | |
226 FOR_EACH_EDGE (e, ei, from->succs) | |
227 { | |
228 if (e->flags & EDGE_IRREDUCIBLE_LOOP) | |
229 *irred_invalidated = true; | |
230 } | |
231 | |
232 /* Something has changed, insert predecessors into queue. */ | |
233 FOR_EACH_EDGE (e, ei, from->preds) | |
234 { | |
235 basic_block pred = e->src; | |
236 struct loop *nca; | |
237 | |
238 if (e->flags & EDGE_IRREDUCIBLE_LOOP) | |
239 *irred_invalidated = true; | |
240 | |
241 if (TEST_BIT (in_queue, pred->index)) | |
242 continue; | |
243 | |
244 /* If it is subloop, then it either was not moved, or | |
245 the path up the loop tree from base_loop do not contain | |
246 it. */ | |
247 nca = find_common_loop (pred->loop_father, base_loop); | |
248 if (pred->loop_father != base_loop | |
249 && (nca == base_loop | |
250 || nca != pred->loop_father)) | |
251 pred = pred->loop_father->header; | |
252 else if (!flow_loop_nested_p (from->loop_father, pred->loop_father)) | |
253 { | |
254 /* No point in processing it. */ | |
255 continue; | |
256 } | |
257 | |
258 if (TEST_BIT (in_queue, pred->index)) | |
259 continue; | |
260 | |
261 /* Schedule the basic block. */ | |
262 *qend = pred; | |
263 qend++; | |
264 if (qend == qtop) | |
265 qend = queue; | |
266 SET_BIT (in_queue, pred->index); | |
267 } | |
268 } | |
269 free (in_queue); | |
270 free (queue); | |
271 } | |
272 | |
273 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E | |
274 and update loop structures and dominators. Return true if we were able | |
275 to remove the path, false otherwise (and nothing is affected then). */ | |
276 bool | |
277 remove_path (edge e) | |
278 { | |
279 edge ae; | |
280 basic_block *rem_bbs, *bord_bbs, from, bb; | |
281 VEC (basic_block, heap) *dom_bbs; | |
282 int i, nrem, n_bord_bbs, nreml; | |
283 sbitmap seen; | |
284 bool irred_invalidated = false; | |
285 struct loop **deleted_loop; | |
286 | |
287 if (!can_remove_branch_p (e)) | |
288 return false; | |
289 | |
290 /* Keep track of whether we need to update information about irreducible | |
291 regions. This is the case if the removed area is a part of the | |
292 irreducible region, or if the set of basic blocks that belong to a loop | |
293 that is inside an irreducible region is changed, or if such a loop is | |
294 removed. */ | |
295 if (e->flags & EDGE_IRREDUCIBLE_LOOP) | |
296 irred_invalidated = true; | |
297 | |
298 /* We need to check whether basic blocks are dominated by the edge | |
299 e, but we only have basic block dominators. This is easy to | |
300 fix -- when e->dest has exactly one predecessor, this corresponds | |
301 to blocks dominated by e->dest, if not, split the edge. */ | |
302 if (!single_pred_p (e->dest)) | |
303 e = single_pred_edge (split_edge (e)); | |
304 | |
305 /* It may happen that by removing path we remove one or more loops | |
306 we belong to. In this case first unloop the loops, then proceed | |
307 normally. We may assume that e->dest is not a header of any loop, | |
308 as it now has exactly one predecessor. */ | |
309 while (loop_outer (e->src->loop_father) | |
310 && dominated_by_p (CDI_DOMINATORS, | |
311 e->src->loop_father->latch, e->dest)) | |
312 unloop (e->src->loop_father, &irred_invalidated); | |
313 | |
314 /* Identify the path. */ | |
315 nrem = find_path (e, &rem_bbs); | |
316 | |
317 n_bord_bbs = 0; | |
318 bord_bbs = XCNEWVEC (basic_block, n_basic_blocks); | |
319 seen = sbitmap_alloc (last_basic_block); | |
320 sbitmap_zero (seen); | |
321 | |
322 /* Find "border" hexes -- i.e. those with predecessor in removed path. */ | |
323 for (i = 0; i < nrem; i++) | |
324 SET_BIT (seen, rem_bbs[i]->index); | |
325 for (i = 0; i < nrem; i++) | |
326 { | |
327 edge_iterator ei; | |
328 bb = rem_bbs[i]; | |
329 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs) | |
330 if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index)) | |
331 { | |
332 SET_BIT (seen, ae->dest->index); | |
333 bord_bbs[n_bord_bbs++] = ae->dest; | |
334 | |
335 if (ae->flags & EDGE_IRREDUCIBLE_LOOP) | |
336 irred_invalidated = true; | |
337 } | |
338 } | |
339 | |
340 /* Remove the path. */ | |
341 from = e->src; | |
342 remove_branch (e); | |
343 dom_bbs = NULL; | |
344 | |
345 /* Cancel loops contained in the path. */ | |
346 deleted_loop = XNEWVEC (struct loop *, nrem); | |
347 nreml = 0; | |
348 for (i = 0; i < nrem; i++) | |
349 if (rem_bbs[i]->loop_father->header == rem_bbs[i]) | |
350 deleted_loop[nreml++] = rem_bbs[i]->loop_father; | |
351 | |
352 for (i = 0; i < nreml; i++) | |
353 cancel_loop_tree (deleted_loop[i]); | |
354 free (deleted_loop); | |
355 | |
356 remove_bbs (rem_bbs, nrem); | |
357 free (rem_bbs); | |
358 | |
359 /* Find blocks whose dominators may be affected. */ | |
360 sbitmap_zero (seen); | |
361 for (i = 0; i < n_bord_bbs; i++) | |
362 { | |
363 basic_block ldom; | |
364 | |
365 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]); | |
366 if (TEST_BIT (seen, bb->index)) | |
367 continue; | |
368 SET_BIT (seen, bb->index); | |
369 | |
370 for (ldom = first_dom_son (CDI_DOMINATORS, bb); | |
371 ldom; | |
372 ldom = next_dom_son (CDI_DOMINATORS, ldom)) | |
373 if (!dominated_by_p (CDI_DOMINATORS, from, ldom)) | |
374 VEC_safe_push (basic_block, heap, dom_bbs, ldom); | |
375 } | |
376 | |
377 free (seen); | |
378 | |
379 /* Recount dominators. */ | |
380 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true); | |
381 VEC_free (basic_block, heap, dom_bbs); | |
382 free (bord_bbs); | |
383 | |
384 /* Fix placements of basic blocks inside loops and the placement of | |
385 loops in the loop tree. */ | |
386 fix_bb_placements (from, &irred_invalidated); | |
387 fix_loop_placements (from->loop_father, &irred_invalidated); | |
388 | |
389 if (irred_invalidated | |
390 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
391 mark_irreducible_loops (); | |
392 | |
393 return true; | |
394 } | |
395 | |
396 /* Creates place for a new LOOP in loops structure. */ | |
397 | |
398 static void | |
399 place_new_loop (struct loop *loop) | |
400 { | |
401 loop->num = number_of_loops (); | |
402 VEC_safe_push (loop_p, gc, current_loops->larray, loop); | |
403 } | |
404 | |
405 /* Given LOOP structure with filled header and latch, find the body of the | |
406 corresponding loop and add it to loops tree. Insert the LOOP as a son of | |
407 outer. */ | |
408 | |
409 void | |
410 add_loop (struct loop *loop, struct loop *outer) | |
411 { | |
412 basic_block *bbs; | |
413 int i, n; | |
414 struct loop *subloop; | |
415 edge e; | |
416 edge_iterator ei; | |
417 | |
418 /* Add it to loop structure. */ | |
419 place_new_loop (loop); | |
420 flow_loop_tree_node_add (outer, loop); | |
421 | |
422 /* Find its nodes. */ | |
423 bbs = XNEWVEC (basic_block, n_basic_blocks); | |
424 n = get_loop_body_with_size (loop, bbs, n_basic_blocks); | |
425 | |
426 for (i = 0; i < n; i++) | |
427 { | |
428 if (bbs[i]->loop_father == outer) | |
429 { | |
430 remove_bb_from_loops (bbs[i]); | |
431 add_bb_to_loop (bbs[i], loop); | |
432 continue; | |
433 } | |
434 | |
435 loop->num_nodes++; | |
436 | |
437 /* If we find a direct subloop of OUTER, move it to LOOP. */ | |
438 subloop = bbs[i]->loop_father; | |
439 if (loop_outer (subloop) == outer | |
440 && subloop->header == bbs[i]) | |
441 { | |
442 flow_loop_tree_node_remove (subloop); | |
443 flow_loop_tree_node_add (loop, subloop); | |
444 } | |
445 } | |
446 | |
447 /* Update the information about loop exit edges. */ | |
448 for (i = 0; i < n; i++) | |
449 { | |
450 FOR_EACH_EDGE (e, ei, bbs[i]->succs) | |
451 { | |
452 rescan_loop_exit (e, false, false); | |
453 } | |
454 } | |
455 | |
456 free (bbs); | |
457 } | |
458 | |
459 /* Multiply all frequencies in LOOP by NUM/DEN. */ | |
460 void | |
461 scale_loop_frequencies (struct loop *loop, int num, int den) | |
462 { | |
463 basic_block *bbs; | |
464 | |
465 bbs = get_loop_body (loop); | |
466 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den); | |
467 free (bbs); | |
468 } | |
469 | |
470 /* Recompute dominance information for basic blocks outside LOOP. */ | |
471 | |
472 static void | |
473 update_dominators_in_loop (struct loop *loop) | |
474 { | |
475 VEC (basic_block, heap) *dom_bbs = NULL; | |
476 sbitmap seen; | |
477 basic_block *body; | |
478 unsigned i; | |
479 | |
480 seen = sbitmap_alloc (last_basic_block); | |
481 sbitmap_zero (seen); | |
482 body = get_loop_body (loop); | |
483 | |
484 for (i = 0; i < loop->num_nodes; i++) | |
485 SET_BIT (seen, body[i]->index); | |
486 | |
487 for (i = 0; i < loop->num_nodes; i++) | |
488 { | |
489 basic_block ldom; | |
490 | |
491 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]); | |
492 ldom; | |
493 ldom = next_dom_son (CDI_DOMINATORS, ldom)) | |
494 if (!TEST_BIT (seen, ldom->index)) | |
495 { | |
496 SET_BIT (seen, ldom->index); | |
497 VEC_safe_push (basic_block, heap, dom_bbs, ldom); | |
498 } | |
499 } | |
500 | |
501 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); | |
502 free (body); | |
503 free (seen); | |
504 VEC_free (basic_block, heap, dom_bbs); | |
505 } | |
506 | |
507 /* Creates an if region as shown above. CONDITION is used to create | |
508 the test for the if. | |
509 | |
510 | | |
511 | ------------- ------------- | |
512 | | pred_bb | | pred_bb | | |
513 | ------------- ------------- | |
514 | | | | |
515 | | | ENTRY_EDGE | |
516 | | ENTRY_EDGE V | |
517 | | ====> ------------- | |
518 | | | cond_bb | | |
519 | | | CONDITION | | |
520 | | ------------- | |
521 | V / \ | |
522 | ------------- e_false / \ e_true | |
523 | | succ_bb | V V | |
524 | ------------- ----------- ----------- | |
525 | | false_bb | | true_bb | | |
526 | ----------- ----------- | |
527 | \ / | |
528 | \ / | |
529 | V V | |
530 | ------------- | |
531 | | join_bb | | |
532 | ------------- | |
533 | | exit_edge (result) | |
534 | V | |
535 | ----------- | |
536 | | succ_bb | | |
537 | ----------- | |
538 | | |
539 */ | |
540 | |
541 edge | |
542 create_empty_if_region_on_edge (edge entry_edge, tree condition) | |
543 { | |
544 | |
545 basic_block succ_bb, cond_bb, true_bb, false_bb, join_bb; | |
546 edge e_true, e_false, exit_edge; | |
547 gimple cond_stmt; | |
548 tree simple_cond; | |
549 gimple_stmt_iterator gsi; | |
550 | |
551 succ_bb = entry_edge->dest; | |
552 cond_bb = split_edge (entry_edge); | |
553 | |
554 /* Insert condition in cond_bb. */ | |
555 gsi = gsi_last_bb (cond_bb); | |
556 simple_cond = | |
557 force_gimple_operand_gsi (&gsi, condition, true, NULL, | |
558 false, GSI_NEW_STMT); | |
559 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE); | |
560 gsi = gsi_last_bb (cond_bb); | |
561 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); | |
562 | |
563 join_bb = split_edge (single_succ_edge (cond_bb)); | |
564 | |
565 e_true = single_succ_edge (cond_bb); | |
566 true_bb = split_edge (e_true); | |
567 | |
568 e_false = make_edge (cond_bb, join_bb, 0); | |
569 false_bb = split_edge (e_false); | |
570 | |
571 e_true->flags &= ~EDGE_FALLTHRU; | |
572 e_true->flags |= EDGE_TRUE_VALUE; | |
573 e_false->flags &= ~EDGE_FALLTHRU; | |
574 e_false->flags |= EDGE_FALSE_VALUE; | |
575 | |
576 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src); | |
577 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb); | |
578 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb); | |
579 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); | |
580 | |
581 exit_edge = single_succ_edge (join_bb); | |
582 | |
583 if (single_pred_p (exit_edge->dest)) | |
584 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb); | |
585 | |
586 return exit_edge; | |
587 } | |
588 | |
589 /* create_empty_loop_on_edge | |
590 | | |
591 | ------------- ------------------------ | |
592 | | pred_bb | | pred_bb | | |
593 | ------------- | IV_0 = INITIAL_VALUE | | |
594 | | ------------------------ | |
595 | | ______ | ENTRY_EDGE | |
596 | | ENTRY_EDGE / V V | |
597 | | ====> | ----------------------------- | |
598 | | | | IV_BEFORE = phi (IV_0, IV) | | |
599 | | | | loop_header | | |
600 | V | | IV_BEFORE <= UPPER_BOUND | | |
601 | ------------- | -----------------------\----- | |
602 | | succ_bb | | | \ | |
603 | ------------- | | \ exit_e | |
604 | | V V--------- | |
605 | | -------------- | succ_bb | | |
606 | | | loop_latch | ---------- | |
607 | | |IV = IV_BEFORE + STRIDE | |
608 | | -------------- | |
609 | \ / | |
610 | \ ___ / | |
611 | |
612 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME | |
613 that is used before the increment of IV. IV_BEFORE should be used for | |
614 adding code to the body that uses the IV. OUTER is the outer loop in | |
615 which the new loop should be inserted. */ | |
616 | |
617 struct loop * | |
618 create_empty_loop_on_edge (edge entry_edge, | |
619 tree initial_value, | |
620 tree stride, tree upper_bound, | |
621 tree iv, | |
622 tree *iv_before, | |
623 struct loop *outer) | |
624 { | |
625 basic_block loop_header, loop_latch, succ_bb, pred_bb; | |
626 struct loop *loop; | |
627 int freq; | |
628 gcov_type cnt; | |
629 gimple_stmt_iterator gsi; | |
630 bool insert_after; | |
631 gimple_seq stmts; | |
632 gimple cond_expr; | |
633 tree exit_test; | |
634 edge exit_e; | |
635 int prob; | |
636 tree upper_bound_gimplified; | |
637 | |
638 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv); | |
639 | |
640 /* Create header, latch and wire up the loop. */ | |
641 pred_bb = entry_edge->src; | |
642 loop_header = split_edge (entry_edge); | |
643 loop_latch = split_edge (single_succ_edge (loop_header)); | |
644 succ_bb = single_succ (loop_latch); | |
645 make_edge (loop_header, succ_bb, 0); | |
646 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header); | |
647 | |
648 /* Set immediate dominator information. */ | |
649 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb); | |
650 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header); | |
651 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header); | |
652 | |
653 /* Initialize a loop structure and put it in a loop hierarchy. */ | |
654 loop = alloc_loop (); | |
655 loop->header = loop_header; | |
656 loop->latch = loop_latch; | |
657 add_loop (loop, outer); | |
658 | |
659 /* TODO: Fix frequencies and counts. */ | |
660 freq = EDGE_FREQUENCY (entry_edge); | |
661 cnt = entry_edge->count; | |
662 | |
663 prob = REG_BR_PROB_BASE / 2; | |
664 | |
665 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE); | |
666 | |
667 /* Update dominators. */ | |
668 update_dominators_in_loop (loop); | |
669 | |
670 /* Construct IV code in loop. */ | |
671 initial_value = force_gimple_operand (initial_value, &stmts, true, iv); | |
672 if (stmts) | |
673 { | |
674 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts); | |
675 gsi_commit_edge_inserts (); | |
676 } | |
677 | |
678 standard_iv_increment_position (loop, &gsi, &insert_after); | |
679 create_iv (initial_value, stride, iv, loop, &gsi, insert_after, | |
680 iv_before, NULL); | |
681 | |
682 /* Modify edge flags. */ | |
683 exit_e = single_exit (loop); | |
684 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE; | |
685 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE; | |
686 | |
687 gsi = gsi_last_bb (exit_e->src); | |
688 | |
689 upper_bound_gimplified = | |
690 force_gimple_operand_gsi (&gsi, upper_bound, true, NULL, | |
691 false, GSI_NEW_STMT); | |
692 gsi = gsi_last_bb (exit_e->src); | |
693 | |
694 cond_expr = gimple_build_cond | |
695 (LE_EXPR, *iv_before, upper_bound_gimplified, NULL_TREE, NULL_TREE); | |
696 | |
697 exit_test = gimple_cond_lhs (cond_expr); | |
698 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL, | |
699 false, GSI_NEW_STMT); | |
700 gimple_cond_set_lhs (cond_expr, exit_test); | |
701 gsi = gsi_last_bb (exit_e->src); | |
702 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT); | |
703 | |
704 return loop; | |
705 } | |
706 | |
707 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting | |
708 latch to header and update loop tree and dominators | |
709 accordingly. Everything between them plus LATCH_EDGE destination must | |
710 be dominated by HEADER_EDGE destination, and back-reachable from | |
711 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB, | |
712 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and | |
713 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE. | |
714 Returns the newly created loop. Frequencies and counts in the new loop | |
715 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */ | |
716 | |
717 struct loop * | |
718 loopify (edge latch_edge, edge header_edge, | |
719 basic_block switch_bb, edge true_edge, edge false_edge, | |
720 bool redirect_all_edges, unsigned true_scale, unsigned false_scale) | |
721 { | |
722 basic_block succ_bb = latch_edge->dest; | |
723 basic_block pred_bb = header_edge->src; | |
724 struct loop *loop = alloc_loop (); | |
725 struct loop *outer = loop_outer (succ_bb->loop_father); | |
726 int freq; | |
727 gcov_type cnt; | |
728 edge e; | |
729 edge_iterator ei; | |
730 | |
731 loop->header = header_edge->dest; | |
732 loop->latch = latch_edge->src; | |
733 | |
734 freq = EDGE_FREQUENCY (header_edge); | |
735 cnt = header_edge->count; | |
736 | |
737 /* Redirect edges. */ | |
738 loop_redirect_edge (latch_edge, loop->header); | |
739 loop_redirect_edge (true_edge, succ_bb); | |
740 | |
741 /* During loop versioning, one of the switch_bb edge is already properly | |
742 set. Do not redirect it again unless redirect_all_edges is true. */ | |
743 if (redirect_all_edges) | |
744 { | |
745 loop_redirect_edge (header_edge, switch_bb); | |
746 loop_redirect_edge (false_edge, loop->header); | |
747 | |
748 /* Update dominators. */ | |
749 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb); | |
750 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb); | |
751 } | |
752 | |
753 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb); | |
754 | |
755 /* Compute new loop. */ | |
756 add_loop (loop, outer); | |
757 | |
758 /* Add switch_bb to appropriate loop. */ | |
759 if (switch_bb->loop_father) | |
760 remove_bb_from_loops (switch_bb); | |
761 add_bb_to_loop (switch_bb, outer); | |
762 | |
763 /* Fix frequencies. */ | |
764 if (redirect_all_edges) | |
765 { | |
766 switch_bb->frequency = freq; | |
767 switch_bb->count = cnt; | |
768 FOR_EACH_EDGE (e, ei, switch_bb->succs) | |
769 { | |
770 e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE; | |
771 } | |
772 } | |
773 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE); | |
774 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE); | |
775 update_dominators_in_loop (loop); | |
776 | |
777 return loop; | |
778 } | |
779 | |
780 /* Remove the latch edge of a LOOP and update loops to indicate that | |
781 the LOOP was removed. After this function, original loop latch will | |
782 have no successor, which caller is expected to fix somehow. | |
783 | |
784 If this may cause the information about irreducible regions to become | |
785 invalid, IRRED_INVALIDATED is set to true. */ | |
786 | |
787 static void | |
788 unloop (struct loop *loop, bool *irred_invalidated) | |
789 { | |
790 basic_block *body; | |
791 struct loop *ploop; | |
792 unsigned i, n; | |
793 basic_block latch = loop->latch; | |
794 bool dummy = false; | |
795 | |
796 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP) | |
797 *irred_invalidated = true; | |
798 | |
799 /* This is relatively straightforward. The dominators are unchanged, as | |
800 loop header dominates loop latch, so the only thing we have to care of | |
801 is the placement of loops and basic blocks inside the loop tree. We | |
802 move them all to the loop->outer, and then let fix_bb_placements do | |
803 its work. */ | |
804 | |
805 body = get_loop_body (loop); | |
806 n = loop->num_nodes; | |
807 for (i = 0; i < n; i++) | |
808 if (body[i]->loop_father == loop) | |
809 { | |
810 remove_bb_from_loops (body[i]); | |
811 add_bb_to_loop (body[i], loop_outer (loop)); | |
812 } | |
813 free(body); | |
814 | |
815 while (loop->inner) | |
816 { | |
817 ploop = loop->inner; | |
818 flow_loop_tree_node_remove (ploop); | |
819 flow_loop_tree_node_add (loop_outer (loop), ploop); | |
820 } | |
821 | |
822 /* Remove the loop and free its data. */ | |
823 delete_loop (loop); | |
824 | |
825 remove_edge (single_succ_edge (latch)); | |
826 | |
827 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if | |
828 there is an irreducible region inside the cancelled loop, the flags will | |
829 be still correct. */ | |
830 fix_bb_placements (latch, &dummy); | |
831 } | |
832 | |
833 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that | |
834 condition stated in description of fix_loop_placement holds for them. | |
835 It is used in case when we removed some edges coming out of LOOP, which | |
836 may cause the right placement of LOOP inside loop tree to change. | |
837 | |
838 IRRED_INVALIDATED is set to true if a change in the loop structures might | |
839 invalidate the information about irreducible regions. */ | |
840 | |
841 static void | |
842 fix_loop_placements (struct loop *loop, bool *irred_invalidated) | |
843 { | |
844 struct loop *outer; | |
845 | |
846 while (loop_outer (loop)) | |
847 { | |
848 outer = loop_outer (loop); | |
849 if (!fix_loop_placement (loop)) | |
850 break; | |
851 | |
852 /* Changing the placement of a loop in the loop tree may alter the | |
853 validity of condition 2) of the description of fix_bb_placement | |
854 for its preheader, because the successor is the header and belongs | |
855 to the loop. So call fix_bb_placements to fix up the placement | |
856 of the preheader and (possibly) of its predecessors. */ | |
857 fix_bb_placements (loop_preheader_edge (loop)->src, | |
858 irred_invalidated); | |
859 loop = outer; | |
860 } | |
861 } | |
862 | |
863 /* Copies copy of LOOP as subloop of TARGET loop, placing newly | |
864 created loop into loops structure. */ | |
865 struct loop * | |
866 duplicate_loop (struct loop *loop, struct loop *target) | |
867 { | |
868 struct loop *cloop; | |
869 cloop = alloc_loop (); | |
870 place_new_loop (cloop); | |
871 | |
872 /* Mark the new loop as copy of LOOP. */ | |
873 set_loop_copy (loop, cloop); | |
874 | |
875 /* Add it to target. */ | |
876 flow_loop_tree_node_add (target, cloop); | |
877 | |
878 return cloop; | |
879 } | |
880 | |
881 /* Copies structure of subloops of LOOP into TARGET loop, placing | |
882 newly created loops into loop tree. */ | |
883 static void | |
884 duplicate_subloops (struct loop *loop, struct loop *target) | |
885 { | |
886 struct loop *aloop, *cloop; | |
887 | |
888 for (aloop = loop->inner; aloop; aloop = aloop->next) | |
889 { | |
890 cloop = duplicate_loop (aloop, target); | |
891 duplicate_subloops (aloop, cloop); | |
892 } | |
893 } | |
894 | |
895 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, | |
896 into TARGET loop, placing newly created loops into loop tree. */ | |
897 static void | |
898 copy_loops_to (struct loop **copied_loops, int n, struct loop *target) | |
899 { | |
900 struct loop *aloop; | |
901 int i; | |
902 | |
903 for (i = 0; i < n; i++) | |
904 { | |
905 aloop = duplicate_loop (copied_loops[i], target); | |
906 duplicate_subloops (copied_loops[i], aloop); | |
907 } | |
908 } | |
909 | |
910 /* Redirects edge E to basic block DEST. */ | |
911 static void | |
912 loop_redirect_edge (edge e, basic_block dest) | |
913 { | |
914 if (e->dest == dest) | |
915 return; | |
916 | |
917 redirect_edge_and_branch_force (e, dest); | |
918 } | |
919 | |
920 /* Check whether LOOP's body can be duplicated. */ | |
921 bool | |
922 can_duplicate_loop_p (const struct loop *loop) | |
923 { | |
924 int ret; | |
925 basic_block *bbs = get_loop_body (loop); | |
926 | |
927 ret = can_copy_bbs_p (bbs, loop->num_nodes); | |
928 free (bbs); | |
929 | |
930 return ret; | |
931 } | |
932 | |
933 /* Sets probability and count of edge E to zero. The probability and count | |
934 is redistributed evenly to the remaining edges coming from E->src. */ | |
935 | |
936 static void | |
937 set_zero_probability (edge e) | |
938 { | |
939 basic_block bb = e->src; | |
940 edge_iterator ei; | |
941 edge ae, last = NULL; | |
942 unsigned n = EDGE_COUNT (bb->succs); | |
943 gcov_type cnt = e->count, cnt1; | |
944 unsigned prob = e->probability, prob1; | |
945 | |
946 gcc_assert (n > 1); | |
947 cnt1 = cnt / (n - 1); | |
948 prob1 = prob / (n - 1); | |
949 | |
950 FOR_EACH_EDGE (ae, ei, bb->succs) | |
951 { | |
952 if (ae == e) | |
953 continue; | |
954 | |
955 ae->probability += prob1; | |
956 ae->count += cnt1; | |
957 last = ae; | |
958 } | |
959 | |
960 /* Move the rest to one of the edges. */ | |
961 last->probability += prob % (n - 1); | |
962 last->count += cnt % (n - 1); | |
963 | |
964 e->probability = 0; | |
965 e->count = 0; | |
966 } | |
967 | |
968 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating | |
969 loop structure and dominators. E's destination must be LOOP header for | |
970 this to work, i.e. it must be entry or latch edge of this loop; these are | |
971 unique, as the loops must have preheaders for this function to work | |
972 correctly (in case E is latch, the function unrolls the loop, if E is entry | |
973 edge, it peels the loop). Store edges created by copying ORIG edge from | |
974 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to | |
975 original LOOP body, the other copies are numbered in order given by control | |
976 flow through them) into TO_REMOVE array. Returns false if duplication is | |
977 impossible. */ | |
978 | |
979 bool | |
980 duplicate_loop_to_header_edge (struct loop *loop, edge e, | |
981 unsigned int ndupl, sbitmap wont_exit, | |
982 edge orig, VEC (edge, heap) **to_remove, | |
983 int flags) | |
984 { | |
985 struct loop *target, *aloop; | |
986 struct loop **orig_loops; | |
987 unsigned n_orig_loops; | |
988 basic_block header = loop->header, latch = loop->latch; | |
989 basic_block *new_bbs, *bbs, *first_active; | |
990 basic_block new_bb, bb, first_active_latch = NULL; | |
991 edge ae, latch_edge; | |
992 edge spec_edges[2], new_spec_edges[2]; | |
993 #define SE_LATCH 0 | |
994 #define SE_ORIG 1 | |
995 unsigned i, j, n; | |
996 int is_latch = (latch == e->src); | |
997 int scale_act = 0, *scale_step = NULL, scale_main = 0; | |
998 int scale_after_exit = 0; | |
999 int p, freq_in, freq_le, freq_out_orig; | |
1000 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main; | |
1001 int add_irreducible_flag; | |
1002 basic_block place_after; | |
1003 bitmap bbs_to_scale = NULL; | |
1004 bitmap_iterator bi; | |
1005 | |
1006 gcc_assert (e->dest == loop->header); | |
1007 gcc_assert (ndupl > 0); | |
1008 | |
1009 if (orig) | |
1010 { | |
1011 /* Orig must be edge out of the loop. */ | |
1012 gcc_assert (flow_bb_inside_loop_p (loop, orig->src)); | |
1013 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest)); | |
1014 } | |
1015 | |
1016 n = loop->num_nodes; | |
1017 bbs = get_loop_body_in_dom_order (loop); | |
1018 gcc_assert (bbs[0] == loop->header); | |
1019 gcc_assert (bbs[n - 1] == loop->latch); | |
1020 | |
1021 /* Check whether duplication is possible. */ | |
1022 if (!can_copy_bbs_p (bbs, loop->num_nodes)) | |
1023 { | |
1024 free (bbs); | |
1025 return false; | |
1026 } | |
1027 new_bbs = XNEWVEC (basic_block, loop->num_nodes); | |
1028 | |
1029 /* In case we are doing loop peeling and the loop is in the middle of | |
1030 irreducible region, the peeled copies will be inside it too. */ | |
1031 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP; | |
1032 gcc_assert (!is_latch || !add_irreducible_flag); | |
1033 | |
1034 /* Find edge from latch. */ | |
1035 latch_edge = loop_latch_edge (loop); | |
1036 | |
1037 if (flags & DLTHE_FLAG_UPDATE_FREQ) | |
1038 { | |
1039 /* Calculate coefficients by that we have to scale frequencies | |
1040 of duplicated loop bodies. */ | |
1041 freq_in = header->frequency; | |
1042 freq_le = EDGE_FREQUENCY (latch_edge); | |
1043 if (freq_in == 0) | |
1044 freq_in = 1; | |
1045 if (freq_in < freq_le) | |
1046 freq_in = freq_le; | |
1047 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le; | |
1048 if (freq_out_orig > freq_in - freq_le) | |
1049 freq_out_orig = freq_in - freq_le; | |
1050 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in); | |
1051 prob_pass_wont_exit = | |
1052 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in); | |
1053 | |
1054 if (orig | |
1055 && REG_BR_PROB_BASE - orig->probability != 0) | |
1056 { | |
1057 /* The blocks that are dominated by a removed exit edge ORIG have | |
1058 frequencies scaled by this. */ | |
1059 scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, | |
1060 REG_BR_PROB_BASE - orig->probability); | |
1061 bbs_to_scale = BITMAP_ALLOC (NULL); | |
1062 for (i = 0; i < n; i++) | |
1063 { | |
1064 if (bbs[i] != orig->src | |
1065 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) | |
1066 bitmap_set_bit (bbs_to_scale, i); | |
1067 } | |
1068 } | |
1069 | |
1070 scale_step = XNEWVEC (int, ndupl); | |
1071 | |
1072 for (i = 1; i <= ndupl; i++) | |
1073 scale_step[i - 1] = TEST_BIT (wont_exit, i) | |
1074 ? prob_pass_wont_exit | |
1075 : prob_pass_thru; | |
1076 | |
1077 /* Complete peeling is special as the probability of exit in last | |
1078 copy becomes 1. */ | |
1079 if (flags & DLTHE_FLAG_COMPLETTE_PEEL) | |
1080 { | |
1081 int wanted_freq = EDGE_FREQUENCY (e); | |
1082 | |
1083 if (wanted_freq > freq_in) | |
1084 wanted_freq = freq_in; | |
1085 | |
1086 gcc_assert (!is_latch); | |
1087 /* First copy has frequency of incoming edge. Each subsequent | |
1088 frequency should be reduced by prob_pass_wont_exit. Caller | |
1089 should've managed the flags so all except for original loop | |
1090 has won't exist set. */ | |
1091 scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in); | |
1092 /* Now simulate the duplication adjustments and compute header | |
1093 frequency of the last copy. */ | |
1094 for (i = 0; i < ndupl; i++) | |
1095 wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE); | |
1096 scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in); | |
1097 } | |
1098 else if (is_latch) | |
1099 { | |
1100 prob_pass_main = TEST_BIT (wont_exit, 0) | |
1101 ? prob_pass_wont_exit | |
1102 : prob_pass_thru; | |
1103 p = prob_pass_main; | |
1104 scale_main = REG_BR_PROB_BASE; | |
1105 for (i = 0; i < ndupl; i++) | |
1106 { | |
1107 scale_main += p; | |
1108 p = RDIV (p * scale_step[i], REG_BR_PROB_BASE); | |
1109 } | |
1110 scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main); | |
1111 scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE); | |
1112 } | |
1113 else | |
1114 { | |
1115 scale_main = REG_BR_PROB_BASE; | |
1116 for (i = 0; i < ndupl; i++) | |
1117 scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE); | |
1118 scale_act = REG_BR_PROB_BASE - prob_pass_thru; | |
1119 } | |
1120 for (i = 0; i < ndupl; i++) | |
1121 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE); | |
1122 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE | |
1123 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE); | |
1124 } | |
1125 | |
1126 /* Loop the new bbs will belong to. */ | |
1127 target = e->src->loop_father; | |
1128 | |
1129 /* Original loops. */ | |
1130 n_orig_loops = 0; | |
1131 for (aloop = loop->inner; aloop; aloop = aloop->next) | |
1132 n_orig_loops++; | |
1133 orig_loops = XCNEWVEC (struct loop *, n_orig_loops); | |
1134 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++) | |
1135 orig_loops[i] = aloop; | |
1136 | |
1137 set_loop_copy (loop, target); | |
1138 | |
1139 first_active = XNEWVEC (basic_block, n); | |
1140 if (is_latch) | |
1141 { | |
1142 memcpy (first_active, bbs, n * sizeof (basic_block)); | |
1143 first_active_latch = latch; | |
1144 } | |
1145 | |
1146 spec_edges[SE_ORIG] = orig; | |
1147 spec_edges[SE_LATCH] = latch_edge; | |
1148 | |
1149 place_after = e->src; | |
1150 for (j = 0; j < ndupl; j++) | |
1151 { | |
1152 /* Copy loops. */ | |
1153 copy_loops_to (orig_loops, n_orig_loops, target); | |
1154 | |
1155 /* Copy bbs. */ | |
1156 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop, | |
1157 place_after); | |
1158 place_after = new_spec_edges[SE_LATCH]->src; | |
1159 | |
1160 if (flags & DLTHE_RECORD_COPY_NUMBER) | |
1161 for (i = 0; i < n; i++) | |
1162 { | |
1163 gcc_assert (!new_bbs[i]->aux); | |
1164 new_bbs[i]->aux = (void *)(size_t)(j + 1); | |
1165 } | |
1166 | |
1167 /* Note whether the blocks and edges belong to an irreducible loop. */ | |
1168 if (add_irreducible_flag) | |
1169 { | |
1170 for (i = 0; i < n; i++) | |
1171 new_bbs[i]->flags |= BB_DUPLICATED; | |
1172 for (i = 0; i < n; i++) | |
1173 { | |
1174 edge_iterator ei; | |
1175 new_bb = new_bbs[i]; | |
1176 if (new_bb->loop_father == target) | |
1177 new_bb->flags |= BB_IRREDUCIBLE_LOOP; | |
1178 | |
1179 FOR_EACH_EDGE (ae, ei, new_bb->succs) | |
1180 if ((ae->dest->flags & BB_DUPLICATED) | |
1181 && (ae->src->loop_father == target | |
1182 || ae->dest->loop_father == target)) | |
1183 ae->flags |= EDGE_IRREDUCIBLE_LOOP; | |
1184 } | |
1185 for (i = 0; i < n; i++) | |
1186 new_bbs[i]->flags &= ~BB_DUPLICATED; | |
1187 } | |
1188 | |
1189 /* Redirect the special edges. */ | |
1190 if (is_latch) | |
1191 { | |
1192 redirect_edge_and_branch_force (latch_edge, new_bbs[0]); | |
1193 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH], | |
1194 loop->header); | |
1195 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch); | |
1196 latch = loop->latch = new_bbs[n - 1]; | |
1197 e = latch_edge = new_spec_edges[SE_LATCH]; | |
1198 } | |
1199 else | |
1200 { | |
1201 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH], | |
1202 loop->header); | |
1203 redirect_edge_and_branch_force (e, new_bbs[0]); | |
1204 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src); | |
1205 e = new_spec_edges[SE_LATCH]; | |
1206 } | |
1207 | |
1208 /* Record exit edge in this copy. */ | |
1209 if (orig && TEST_BIT (wont_exit, j + 1)) | |
1210 { | |
1211 if (to_remove) | |
1212 VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]); | |
1213 set_zero_probability (new_spec_edges[SE_ORIG]); | |
1214 | |
1215 /* Scale the frequencies of the blocks dominated by the exit. */ | |
1216 if (bbs_to_scale) | |
1217 { | |
1218 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) | |
1219 { | |
1220 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit, | |
1221 REG_BR_PROB_BASE); | |
1222 } | |
1223 } | |
1224 } | |
1225 | |
1226 /* Record the first copy in the control flow order if it is not | |
1227 the original loop (i.e. in case of peeling). */ | |
1228 if (!first_active_latch) | |
1229 { | |
1230 memcpy (first_active, new_bbs, n * sizeof (basic_block)); | |
1231 first_active_latch = new_bbs[n - 1]; | |
1232 } | |
1233 | |
1234 /* Set counts and frequencies. */ | |
1235 if (flags & DLTHE_FLAG_UPDATE_FREQ) | |
1236 { | |
1237 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE); | |
1238 scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE); | |
1239 } | |
1240 } | |
1241 free (new_bbs); | |
1242 free (orig_loops); | |
1243 | |
1244 /* Record the exit edge in the original loop body, and update the frequencies. */ | |
1245 if (orig && TEST_BIT (wont_exit, 0)) | |
1246 { | |
1247 if (to_remove) | |
1248 VEC_safe_push (edge, heap, *to_remove, orig); | |
1249 set_zero_probability (orig); | |
1250 | |
1251 /* Scale the frequencies of the blocks dominated by the exit. */ | |
1252 if (bbs_to_scale) | |
1253 { | |
1254 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) | |
1255 { | |
1256 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit, | |
1257 REG_BR_PROB_BASE); | |
1258 } | |
1259 } | |
1260 } | |
1261 | |
1262 /* Update the original loop. */ | |
1263 if (!is_latch) | |
1264 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); | |
1265 if (flags & DLTHE_FLAG_UPDATE_FREQ) | |
1266 { | |
1267 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE); | |
1268 free (scale_step); | |
1269 } | |
1270 | |
1271 /* Update dominators of outer blocks if affected. */ | |
1272 for (i = 0; i < n; i++) | |
1273 { | |
1274 basic_block dominated, dom_bb; | |
1275 VEC (basic_block, heap) *dom_bbs; | |
1276 unsigned j; | |
1277 | |
1278 bb = bbs[i]; | |
1279 bb->aux = 0; | |
1280 | |
1281 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); | |
1282 for (j = 0; VEC_iterate (basic_block, dom_bbs, j, dominated); j++) | |
1283 { | |
1284 if (flow_bb_inside_loop_p (loop, dominated)) | |
1285 continue; | |
1286 dom_bb = nearest_common_dominator ( | |
1287 CDI_DOMINATORS, first_active[i], first_active_latch); | |
1288 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb); | |
1289 } | |
1290 VEC_free (basic_block, heap, dom_bbs); | |
1291 } | |
1292 free (first_active); | |
1293 | |
1294 free (bbs); | |
1295 BITMAP_FREE (bbs_to_scale); | |
1296 | |
1297 return true; | |
1298 } | |
1299 | |
1300 /* A callback for make_forwarder block, to redirect all edges except for | |
1301 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide | |
1302 whether to redirect it. */ | |
1303 | |
1304 edge mfb_kj_edge; | |
1305 bool | |
1306 mfb_keep_just (edge e) | |
1307 { | |
1308 return e != mfb_kj_edge; | |
1309 } | |
1310 | |
1311 /* True when a candidate preheader BLOCK has predecessors from LOOP. */ | |
1312 | |
1313 static bool | |
1314 has_preds_from_loop (basic_block block, struct loop *loop) | |
1315 { | |
1316 edge e; | |
1317 edge_iterator ei; | |
1318 | |
1319 FOR_EACH_EDGE (e, ei, block->preds) | |
1320 if (e->src->loop_father == loop) | |
1321 return true; | |
1322 return false; | |
1323 } | |
1324 | |
1325 /* Creates a pre-header for a LOOP. Returns newly created block. Unless | |
1326 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single | |
1327 entry; otherwise we also force preheader block to have only one successor. | |
1328 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block | |
1329 to be a fallthru predecessor to the loop header and to have only | |
1330 predecessors from outside of the loop. | |
1331 The function also updates dominators. */ | |
1332 | |
1333 basic_block | |
1334 create_preheader (struct loop *loop, int flags) | |
1335 { | |
1336 edge e, fallthru; | |
1337 basic_block dummy; | |
1338 int nentry = 0; | |
1339 bool irred = false; | |
1340 bool latch_edge_was_fallthru; | |
1341 edge one_succ_pred = NULL, single_entry = NULL; | |
1342 edge_iterator ei; | |
1343 | |
1344 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1345 { | |
1346 if (e->src == loop->latch) | |
1347 continue; | |
1348 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0; | |
1349 nentry++; | |
1350 single_entry = e; | |
1351 if (single_succ_p (e->src)) | |
1352 one_succ_pred = e; | |
1353 } | |
1354 gcc_assert (nentry); | |
1355 if (nentry == 1) | |
1356 { | |
1357 bool need_forwarder_block = false; | |
1358 | |
1359 /* We do not allow entry block to be the loop preheader, since we | |
1360 cannot emit code there. */ | |
1361 if (single_entry->src == ENTRY_BLOCK_PTR) | |
1362 need_forwarder_block = true; | |
1363 else | |
1364 { | |
1365 /* If we want simple preheaders, also force the preheader to have | |
1366 just a single successor. */ | |
1367 if ((flags & CP_SIMPLE_PREHEADERS) | |
1368 && !single_succ_p (single_entry->src)) | |
1369 need_forwarder_block = true; | |
1370 /* If we want fallthru preheaders, also create forwarder block when | |
1371 preheader ends with a jump or has predecessors from loop. */ | |
1372 else if ((flags & CP_FALLTHRU_PREHEADERS) | |
1373 && (JUMP_P (BB_END (single_entry->src)) | |
1374 || has_preds_from_loop (single_entry->src, loop))) | |
1375 need_forwarder_block = true; | |
1376 } | |
1377 if (! need_forwarder_block) | |
1378 return NULL; | |
1379 } | |
1380 | |
1381 mfb_kj_edge = loop_latch_edge (loop); | |
1382 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0; | |
1383 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL); | |
1384 dummy = fallthru->src; | |
1385 loop->header = fallthru->dest; | |
1386 | |
1387 /* Try to be clever in placing the newly created preheader. The idea is to | |
1388 avoid breaking any "fallthruness" relationship between blocks. | |
1389 | |
1390 The preheader was created just before the header and all incoming edges | |
1391 to the header were redirected to the preheader, except the latch edge. | |
1392 So the only problematic case is when this latch edge was a fallthru | |
1393 edge: it is not anymore after the preheader creation so we have broken | |
1394 the fallthruness. We're therefore going to look for a better place. */ | |
1395 if (latch_edge_was_fallthru) | |
1396 { | |
1397 if (one_succ_pred) | |
1398 e = one_succ_pred; | |
1399 else | |
1400 e = EDGE_PRED (dummy, 0); | |
1401 | |
1402 move_block_after (dummy, e->src); | |
1403 } | |
1404 | |
1405 if (irred) | |
1406 { | |
1407 dummy->flags |= BB_IRREDUCIBLE_LOOP; | |
1408 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP; | |
1409 } | |
1410 | |
1411 if (dump_file) | |
1412 fprintf (dump_file, "Created preheader block for loop %i\n", | |
1413 loop->num); | |
1414 | |
1415 if (flags & CP_FALLTHRU_PREHEADERS) | |
1416 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU) | |
1417 && !JUMP_P (BB_END (dummy))); | |
1418 | |
1419 return dummy; | |
1420 } | |
1421 | |
1422 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */ | |
1423 | |
1424 void | |
1425 create_preheaders (int flags) | |
1426 { | |
1427 loop_iterator li; | |
1428 struct loop *loop; | |
1429 | |
1430 if (!current_loops) | |
1431 return; | |
1432 | |
1433 FOR_EACH_LOOP (li, loop, 0) | |
1434 create_preheader (loop, flags); | |
1435 loops_state_set (LOOPS_HAVE_PREHEADERS); | |
1436 } | |
1437 | |
1438 /* Forces all loop latches to have only single successor. */ | |
1439 | |
1440 void | |
1441 force_single_succ_latches (void) | |
1442 { | |
1443 loop_iterator li; | |
1444 struct loop *loop; | |
1445 edge e; | |
1446 | |
1447 FOR_EACH_LOOP (li, loop, 0) | |
1448 { | |
1449 if (loop->latch != loop->header && single_succ_p (loop->latch)) | |
1450 continue; | |
1451 | |
1452 e = find_edge (loop->latch, loop->header); | |
1453 | |
1454 split_edge (e); | |
1455 } | |
1456 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES); | |
1457 } | |
1458 | |
1459 /* This function is called from loop_version. It splits the entry edge | |
1460 of the loop we want to version, adds the versioning condition, and | |
1461 adjust the edges to the two versions of the loop appropriately. | |
1462 e is an incoming edge. Returns the basic block containing the | |
1463 condition. | |
1464 | |
1465 --- edge e ---- > [second_head] | |
1466 | |
1467 Split it and insert new conditional expression and adjust edges. | |
1468 | |
1469 --- edge e ---> [cond expr] ---> [first_head] | |
1470 | | |
1471 +---------> [second_head] | |
1472 | |
1473 THEN_PROB is the probability of then branch of the condition. */ | |
1474 | |
1475 static basic_block | |
1476 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head, | |
1477 edge e, void *cond_expr, unsigned then_prob) | |
1478 { | |
1479 basic_block new_head = NULL; | |
1480 edge e1; | |
1481 | |
1482 gcc_assert (e->dest == second_head); | |
1483 | |
1484 /* Split edge 'e'. This will create a new basic block, where we can | |
1485 insert conditional expr. */ | |
1486 new_head = split_edge (e); | |
1487 | |
1488 lv_add_condition_to_bb (first_head, second_head, new_head, | |
1489 cond_expr); | |
1490 | |
1491 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */ | |
1492 e = single_succ_edge (new_head); | |
1493 e1 = make_edge (new_head, first_head, | |
1494 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0); | |
1495 e1->probability = then_prob; | |
1496 e->probability = REG_BR_PROB_BASE - then_prob; | |
1497 e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE); | |
1498 e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE); | |
1499 | |
1500 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head); | |
1501 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head); | |
1502 | |
1503 /* Adjust loop header phi nodes. */ | |
1504 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1); | |
1505 | |
1506 return new_head; | |
1507 } | |
1508 | |
1509 /* Main entry point for Loop Versioning transformation. | |
1510 | |
1511 This transformation given a condition and a loop, creates | |
1512 -if (condition) { loop_copy1 } else { loop_copy2 }, | |
1513 where loop_copy1 is the loop transformed in one way, and loop_copy2 | |
1514 is the loop transformed in another way (or unchanged). 'condition' | |
1515 may be a run time test for things that were not resolved by static | |
1516 analysis (overlapping ranges (anti-aliasing), alignment, etc.). | |
1517 | |
1518 THEN_PROB is the probability of the then edge of the if. THEN_SCALE | |
1519 is the ratio by that the frequencies in the original loop should | |
1520 be scaled. ELSE_SCALE is the ratio by that the frequencies in the | |
1521 new loop should be scaled. | |
1522 | |
1523 If PLACE_AFTER is true, we place the new loop after LOOP in the | |
1524 instruction stream, otherwise it is placed before LOOP. */ | |
1525 | |
1526 struct loop * | |
1527 loop_version (struct loop *loop, | |
1528 void *cond_expr, basic_block *condition_bb, | |
1529 unsigned then_prob, unsigned then_scale, unsigned else_scale, | |
1530 bool place_after) | |
1531 { | |
1532 basic_block first_head, second_head; | |
1533 edge entry, latch_edge, true_edge, false_edge; | |
1534 int irred_flag; | |
1535 struct loop *nloop; | |
1536 basic_block cond_bb; | |
1537 | |
1538 /* Record entry and latch edges for the loop */ | |
1539 entry = loop_preheader_edge (loop); | |
1540 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP; | |
1541 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP; | |
1542 | |
1543 /* Note down head of loop as first_head. */ | |
1544 first_head = entry->dest; | |
1545 | |
1546 /* Duplicate loop. */ | |
1547 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1, | |
1548 NULL, NULL, NULL, 0)) | |
1549 return NULL; | |
1550 | |
1551 /* After duplication entry edge now points to new loop head block. | |
1552 Note down new head as second_head. */ | |
1553 second_head = entry->dest; | |
1554 | |
1555 /* Split loop entry edge and insert new block with cond expr. */ | |
1556 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head, | |
1557 entry, cond_expr, then_prob); | |
1558 if (condition_bb) | |
1559 *condition_bb = cond_bb; | |
1560 | |
1561 if (!cond_bb) | |
1562 { | |
1563 entry->flags |= irred_flag; | |
1564 return NULL; | |
1565 } | |
1566 | |
1567 latch_edge = single_succ_edge (get_bb_copy (loop->latch)); | |
1568 | |
1569 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge); | |
1570 nloop = loopify (latch_edge, | |
1571 single_pred_edge (get_bb_copy (loop->header)), | |
1572 cond_bb, true_edge, false_edge, | |
1573 false /* Do not redirect all edges. */, | |
1574 then_scale, else_scale); | |
1575 | |
1576 /* loopify redirected latch_edge. Update its PENDING_STMTS. */ | |
1577 lv_flush_pending_stmts (latch_edge); | |
1578 | |
1579 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */ | |
1580 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge); | |
1581 lv_flush_pending_stmts (false_edge); | |
1582 /* Adjust irreducible flag. */ | |
1583 if (irred_flag) | |
1584 { | |
1585 cond_bb->flags |= BB_IRREDUCIBLE_LOOP; | |
1586 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP; | |
1587 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP; | |
1588 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP; | |
1589 } | |
1590 | |
1591 if (place_after) | |
1592 { | |
1593 basic_block *bbs = get_loop_body_in_dom_order (nloop), after; | |
1594 unsigned i; | |
1595 | |
1596 after = loop->latch; | |
1597 | |
1598 for (i = 0; i < nloop->num_nodes; i++) | |
1599 { | |
1600 move_block_after (bbs[i], after); | |
1601 after = bbs[i]; | |
1602 } | |
1603 free (bbs); | |
1604 } | |
1605 | |
1606 /* At this point condition_bb is loop preheader with two successors, | |
1607 first_head and second_head. Make sure that loop preheader has only | |
1608 one successor. */ | |
1609 split_edge (loop_preheader_edge (loop)); | |
1610 split_edge (loop_preheader_edge (nloop)); | |
1611 | |
1612 return nloop; | |
1613 } | |
1614 | |
1615 /* The structure of loops might have changed. Some loops might get removed | |
1616 (and their headers and latches were set to NULL), loop exists might get | |
1617 removed (thus the loop nesting may be wrong), and some blocks and edges | |
1618 were changed (so the information about bb --> loop mapping does not have | |
1619 to be correct). But still for the remaining loops the header dominates | |
1620 the latch, and loops did not get new subloops (new loops might possibly | |
1621 get created, but we are not interested in them). Fix up the mess. | |
1622 | |
1623 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are | |
1624 marked in it. */ | |
1625 | |
1626 void | |
1627 fix_loop_structure (bitmap changed_bbs) | |
1628 { | |
1629 basic_block bb; | |
1630 struct loop *loop, *ploop; | |
1631 loop_iterator li; | |
1632 bool record_exits = false; | |
1633 struct loop **superloop = XNEWVEC (struct loop *, number_of_loops ()); | |
1634 | |
1635 /* Remove the old bb -> loop mapping. Remember the depth of the blocks in | |
1636 the loop hierarchy, so that we can recognize blocks whose loop nesting | |
1637 relationship has changed. */ | |
1638 FOR_EACH_BB (bb) | |
1639 { | |
1640 if (changed_bbs) | |
1641 bb->aux = (void *) (size_t) loop_depth (bb->loop_father); | |
1642 bb->loop_father = current_loops->tree_root; | |
1643 } | |
1644 | |
1645 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS)) | |
1646 { | |
1647 release_recorded_exits (); | |
1648 record_exits = true; | |
1649 } | |
1650 | |
1651 /* Remove the dead loops from structures. We start from the innermost | |
1652 loops, so that when we remove the loops, we know that the loops inside | |
1653 are preserved, and do not waste time relinking loops that will be | |
1654 removed later. */ | |
1655 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
1656 { | |
1657 if (loop->header) | |
1658 continue; | |
1659 | |
1660 while (loop->inner) | |
1661 { | |
1662 ploop = loop->inner; | |
1663 flow_loop_tree_node_remove (ploop); | |
1664 flow_loop_tree_node_add (loop_outer (loop), ploop); | |
1665 } | |
1666 | |
1667 /* Remove the loop and free its data. */ | |
1668 delete_loop (loop); | |
1669 } | |
1670 | |
1671 /* Rescan the bodies of loops, starting from the outermost ones. We assume | |
1672 that no optimization interchanges the order of the loops, i.e., it cannot | |
1673 happen that L1 was superloop of L2 before and it is subloop of L2 now | |
1674 (without explicitly updating loop information). At the same time, we also | |
1675 determine the new loop structure. */ | |
1676 current_loops->tree_root->num_nodes = n_basic_blocks; | |
1677 FOR_EACH_LOOP (li, loop, 0) | |
1678 { | |
1679 superloop[loop->num] = loop->header->loop_father; | |
1680 loop->num_nodes = flow_loop_nodes_find (loop->header, loop); | |
1681 } | |
1682 | |
1683 /* Now fix the loop nesting. */ | |
1684 FOR_EACH_LOOP (li, loop, 0) | |
1685 { | |
1686 ploop = superloop[loop->num]; | |
1687 if (ploop != loop_outer (loop)) | |
1688 { | |
1689 flow_loop_tree_node_remove (loop); | |
1690 flow_loop_tree_node_add (ploop, loop); | |
1691 } | |
1692 } | |
1693 free (superloop); | |
1694 | |
1695 /* Mark the blocks whose loop has changed. */ | |
1696 if (changed_bbs) | |
1697 { | |
1698 FOR_EACH_BB (bb) | |
1699 { | |
1700 if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux) | |
1701 bitmap_set_bit (changed_bbs, bb->index); | |
1702 | |
1703 bb->aux = NULL; | |
1704 } | |
1705 } | |
1706 | |
1707 if (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS)) | |
1708 create_preheaders (CP_SIMPLE_PREHEADERS); | |
1709 | |
1710 if (loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES)) | |
1711 force_single_succ_latches (); | |
1712 | |
1713 if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1714 mark_irreducible_loops (); | |
1715 | |
1716 if (record_exits) | |
1717 record_loop_exits (); | |
1718 | |
1719 #ifdef ENABLE_CHECKING | |
1720 verify_loop_structure (); | |
1721 #endif | |
1722 } |