Mercurial > hg > CbC > CbC_gcc
comparison gcc/shrink-wrap.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Shrink-wrapping related optimizations. | |
2 Copyright (C) 1987-2017 Free Software Foundation, Inc. | |
3 | |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it under | |
7 the terms of the GNU General Public License as published by the Free | |
8 Software Foundation; either version 3, or (at your option) any later | |
9 version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 /* This file handles shrink-wrapping related optimizations. */ | |
21 | |
22 #include "config.h" | |
23 #include "system.h" | |
24 #include "coretypes.h" | |
25 #include "backend.h" | |
26 #include "target.h" | |
27 #include "rtl.h" | |
28 #include "tree.h" | |
29 #include "cfghooks.h" | |
30 #include "df.h" | |
31 #include "memmodel.h" | |
32 #include "tm_p.h" | |
33 #include "regs.h" | |
34 #include "insn-config.h" | |
35 #include "emit-rtl.h" | |
36 #include "output.h" | |
37 #include "tree-pass.h" | |
38 #include "cfgrtl.h" | |
39 #include "cfgbuild.h" | |
40 #include "params.h" | |
41 #include "bb-reorder.h" | |
42 #include "shrink-wrap.h" | |
43 #include "regcprop.h" | |
44 #include "rtl-iter.h" | |
45 #include "valtrack.h" | |
46 | |
47 | |
48 /* Return true if INSN requires the stack frame to be set up. | |
49 PROLOGUE_USED contains the hard registers used in the function | |
50 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the | |
51 prologue to set up for the function. */ | |
52 bool | |
53 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, | |
54 HARD_REG_SET set_up_by_prologue) | |
55 { | |
56 df_ref def, use; | |
57 HARD_REG_SET hardregs; | |
58 unsigned regno; | |
59 | |
60 if (CALL_P (insn)) | |
61 return !SIBLING_CALL_P (insn); | |
62 | |
63 /* We need a frame to get the unique CFA expected by the unwinder. */ | |
64 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) | |
65 return true; | |
66 | |
67 CLEAR_HARD_REG_SET (hardregs); | |
68 FOR_EACH_INSN_DEF (def, insn) | |
69 { | |
70 rtx dreg = DF_REF_REG (def); | |
71 | |
72 if (!REG_P (dreg)) | |
73 continue; | |
74 | |
75 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg)); | |
76 } | |
77 if (hard_reg_set_intersect_p (hardregs, prologue_used)) | |
78 return true; | |
79 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); | |
80 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) | |
81 if (TEST_HARD_REG_BIT (hardregs, regno) | |
82 && df_regs_ever_live_p (regno)) | |
83 return true; | |
84 | |
85 FOR_EACH_INSN_USE (use, insn) | |
86 { | |
87 rtx reg = DF_REF_REG (use); | |
88 | |
89 if (!REG_P (reg)) | |
90 continue; | |
91 | |
92 add_to_hard_reg_set (&hardregs, GET_MODE (reg), | |
93 REGNO (reg)); | |
94 } | |
95 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue)) | |
96 return true; | |
97 | |
98 return false; | |
99 } | |
100 | |
101 /* See whether there has a single live edge from BB, which dest uses | |
102 [REGNO, END_REGNO). Return the live edge if its dest bb has | |
103 one or two predecessors. Otherwise return NULL. */ | |
104 | |
105 static edge | |
106 live_edge_for_reg (basic_block bb, int regno, int end_regno) | |
107 { | |
108 edge e, live_edge; | |
109 edge_iterator ei; | |
110 bitmap live; | |
111 int i; | |
112 | |
113 live_edge = NULL; | |
114 FOR_EACH_EDGE (e, ei, bb->succs) | |
115 { | |
116 live = df_get_live_in (e->dest); | |
117 for (i = regno; i < end_regno; i++) | |
118 if (REGNO_REG_SET_P (live, i)) | |
119 { | |
120 if (live_edge && live_edge != e) | |
121 return NULL; | |
122 live_edge = e; | |
123 } | |
124 } | |
125 | |
126 /* We can sometimes encounter dead code. Don't try to move it | |
127 into the exit block. */ | |
128 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
129 return NULL; | |
130 | |
131 /* Reject targets of abnormal edges. This is needed for correctness | |
132 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on | |
133 exception edges even though it is generally treated as call-saved | |
134 for the majority of the compilation. Moving across abnormal edges | |
135 isn't going to be interesting for shrink-wrap usage anyway. */ | |
136 if (live_edge->flags & EDGE_ABNORMAL) | |
137 return NULL; | |
138 | |
139 /* When live_edge->dest->preds == 2, we can create a new block on | |
140 the edge to make it meet the requirement. */ | |
141 if (EDGE_COUNT (live_edge->dest->preds) > 2) | |
142 return NULL; | |
143 | |
144 return live_edge; | |
145 } | |
146 | |
147 /* Try to move INSN from BB to a successor. Return true on success. | |
148 USES and DEFS are the set of registers that are used and defined | |
149 after INSN in BB. SPLIT_P indicates whether a live edge from BB | |
150 is splitted or not. */ | |
151 | |
152 static bool | |
153 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, | |
154 const HARD_REG_SET uses, | |
155 const HARD_REG_SET defs, | |
156 bool *split_p, | |
157 struct dead_debug_local *debug) | |
158 { | |
159 rtx set, src, dest; | |
160 bitmap live_out, live_in, bb_uses, bb_defs; | |
161 unsigned int i, dregno, end_dregno; | |
162 unsigned int sregno = FIRST_PSEUDO_REGISTER; | |
163 unsigned int end_sregno = FIRST_PSEUDO_REGISTER; | |
164 basic_block next_block; | |
165 edge live_edge; | |
166 rtx_insn *dinsn; | |
167 df_ref def; | |
168 | |
169 /* Look for a simple register assignment. We don't use single_set here | |
170 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary | |
171 destinations. */ | |
172 if (!INSN_P (insn)) | |
173 return false; | |
174 set = PATTERN (insn); | |
175 if (GET_CODE (set) != SET) | |
176 return false; | |
177 src = SET_SRC (set); | |
178 dest = SET_DEST (set); | |
179 | |
180 /* For the destination, we want only a register. Also disallow STACK | |
181 or FRAME related adjustments. They are likely part of the prologue, | |
182 so keep them in the entry block. */ | |
183 if (!REG_P (dest) | |
184 || dest == stack_pointer_rtx | |
185 || dest == frame_pointer_rtx | |
186 || dest == hard_frame_pointer_rtx) | |
187 return false; | |
188 | |
189 /* For the source, we want one of: | |
190 (1) A (non-overlapping) register | |
191 (2) A constant, | |
192 (3) An expression involving no more than one register. | |
193 | |
194 That last point comes from the code following, which was originally | |
195 written to handle only register move operations, and still only handles | |
196 a single source register when checking for overlaps. Happily, the | |
197 same checks can be applied to expressions like (plus reg const). */ | |
198 | |
199 if (CONSTANT_P (src)) | |
200 ; | |
201 else if (!REG_P (src)) | |
202 { | |
203 rtx src_inner = NULL_RTX; | |
204 | |
205 if (can_throw_internal (insn)) | |
206 return false; | |
207 | |
208 subrtx_var_iterator::array_type array; | |
209 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) | |
210 { | |
211 rtx x = *iter; | |
212 switch (GET_RTX_CLASS (GET_CODE (x))) | |
213 { | |
214 case RTX_CONST_OBJ: | |
215 case RTX_COMPARE: | |
216 case RTX_COMM_COMPARE: | |
217 case RTX_BIN_ARITH: | |
218 case RTX_COMM_ARITH: | |
219 case RTX_UNARY: | |
220 case RTX_TERNARY: | |
221 /* Constant or expression. Continue. */ | |
222 break; | |
223 | |
224 case RTX_OBJ: | |
225 case RTX_EXTRA: | |
226 switch (GET_CODE (x)) | |
227 { | |
228 case UNSPEC: | |
229 case SUBREG: | |
230 case STRICT_LOW_PART: | |
231 case PC: | |
232 case LO_SUM: | |
233 /* Ok. Continue. */ | |
234 break; | |
235 | |
236 case REG: | |
237 /* Fail if we see a second inner register. */ | |
238 if (src_inner != NULL) | |
239 return false; | |
240 src_inner = x; | |
241 break; | |
242 | |
243 default: | |
244 return false; | |
245 } | |
246 break; | |
247 | |
248 default: | |
249 return false; | |
250 } | |
251 } | |
252 | |
253 if (src_inner != NULL) | |
254 src = src_inner; | |
255 } | |
256 | |
257 /* Make sure that the source register isn't defined later in BB. */ | |
258 if (REG_P (src)) | |
259 { | |
260 sregno = REGNO (src); | |
261 end_sregno = END_REGNO (src); | |
262 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) | |
263 return false; | |
264 } | |
265 | |
266 /* Make sure that the destination register isn't referenced later in BB. */ | |
267 dregno = REGNO (dest); | |
268 end_dregno = END_REGNO (dest); | |
269 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno) | |
270 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno)) | |
271 return false; | |
272 | |
273 /* See whether there is a successor block to which we could move INSN. */ | |
274 live_edge = live_edge_for_reg (bb, dregno, end_dregno); | |
275 if (!live_edge) | |
276 return false; | |
277 | |
278 next_block = live_edge->dest; | |
279 /* Create a new basic block on the edge. */ | |
280 if (EDGE_COUNT (next_block->preds) == 2) | |
281 { | |
282 /* split_edge for a block with only one successor is meaningless. */ | |
283 if (EDGE_COUNT (bb->succs) == 1) | |
284 return false; | |
285 | |
286 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ | |
287 if (!df_live) | |
288 return false; | |
289 | |
290 basic_block old_dest = live_edge->dest; | |
291 next_block = split_edge (live_edge); | |
292 | |
293 /* We create a new basic block. Call df_grow_bb_info to make sure | |
294 all data structures are allocated. */ | |
295 df_grow_bb_info (df_live); | |
296 | |
297 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb), | |
298 df_get_live_in (old_dest)); | |
299 df_set_bb_dirty (next_block); | |
300 | |
301 /* We should not split more than once for a function. */ | |
302 if (*split_p) | |
303 return false; | |
304 | |
305 *split_p = true; | |
306 } | |
307 | |
308 /* At this point we are committed to moving INSN, but let's try to | |
309 move it as far as we can. */ | |
310 do | |
311 { | |
312 if (MAY_HAVE_DEBUG_INSNS) | |
313 { | |
314 FOR_BB_INSNS_REVERSE (bb, dinsn) | |
315 if (DEBUG_INSN_P (dinsn)) | |
316 { | |
317 df_ref use; | |
318 FOR_EACH_INSN_USE (use, dinsn) | |
319 if (refers_to_regno_p (dregno, end_dregno, | |
320 DF_REF_REG (use), (rtx *) NULL)) | |
321 dead_debug_add (debug, use, DF_REF_REGNO (use)); | |
322 } | |
323 else if (dinsn == insn) | |
324 break; | |
325 } | |
326 live_out = df_get_live_out (bb); | |
327 live_in = df_get_live_in (next_block); | |
328 bb = next_block; | |
329 | |
330 /* Check whether BB uses DEST or clobbers DEST. We need to add | |
331 INSN to BB if so. Either way, DEST is no longer live on entry, | |
332 except for any part that overlaps SRC (next loop). */ | |
333 bb_uses = &DF_LR_BB_INFO (bb)->use; | |
334 bb_defs = &DF_LR_BB_INFO (bb)->def; | |
335 if (df_live) | |
336 { | |
337 for (i = dregno; i < end_dregno; i++) | |
338 { | |
339 if (*split_p | |
340 || REGNO_REG_SET_P (bb_uses, i) | |
341 || REGNO_REG_SET_P (bb_defs, i) | |
342 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) | |
343 next_block = NULL; | |
344 CLEAR_REGNO_REG_SET (live_out, i); | |
345 CLEAR_REGNO_REG_SET (live_in, i); | |
346 } | |
347 | |
348 /* Check whether BB clobbers SRC. We need to add INSN to BB if so. | |
349 Either way, SRC is now live on entry. */ | |
350 for (i = sregno; i < end_sregno; i++) | |
351 { | |
352 if (*split_p | |
353 || REGNO_REG_SET_P (bb_defs, i) | |
354 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) | |
355 next_block = NULL; | |
356 SET_REGNO_REG_SET (live_out, i); | |
357 SET_REGNO_REG_SET (live_in, i); | |
358 } | |
359 } | |
360 else | |
361 { | |
362 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and | |
363 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. | |
364 at -O1, just give up searching NEXT_BLOCK. */ | |
365 next_block = NULL; | |
366 for (i = dregno; i < end_dregno; i++) | |
367 { | |
368 CLEAR_REGNO_REG_SET (live_out, i); | |
369 CLEAR_REGNO_REG_SET (live_in, i); | |
370 } | |
371 | |
372 for (i = sregno; i < end_sregno; i++) | |
373 { | |
374 SET_REGNO_REG_SET (live_out, i); | |
375 SET_REGNO_REG_SET (live_in, i); | |
376 } | |
377 } | |
378 | |
379 /* If we don't need to add the move to BB, look for a single | |
380 successor block. */ | |
381 if (next_block) | |
382 { | |
383 live_edge = live_edge_for_reg (next_block, dregno, end_dregno); | |
384 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) | |
385 break; | |
386 next_block = live_edge->dest; | |
387 } | |
388 } | |
389 while (next_block); | |
390 | |
391 /* For the new created basic block, there is no dataflow info at all. | |
392 So skip the following dataflow update and check. */ | |
393 if (!(*split_p)) | |
394 { | |
395 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC | |
396 (next loop). */ | |
397 for (i = dregno; i < end_dregno; i++) | |
398 { | |
399 CLEAR_REGNO_REG_SET (bb_uses, i); | |
400 SET_REGNO_REG_SET (bb_defs, i); | |
401 } | |
402 | |
403 /* BB now uses SRC. */ | |
404 for (i = sregno; i < end_sregno; i++) | |
405 SET_REGNO_REG_SET (bb_uses, i); | |
406 } | |
407 | |
408 /* Insert debug temps for dead REGs used in subsequent debug insns. */ | |
409 if (debug->used && !bitmap_empty_p (debug->used)) | |
410 FOR_EACH_INSN_DEF (def, insn) | |
411 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn, | |
412 DEBUG_TEMP_BEFORE_WITH_VALUE); | |
413 | |
414 emit_insn_after (PATTERN (insn), bb_note (bb)); | |
415 delete_insn (insn); | |
416 return true; | |
417 } | |
418 | |
419 /* Look for register copies in the first block of the function, and move | |
420 them down into successor blocks if the register is used only on one | |
421 path. This exposes more opportunities for shrink-wrapping. These | |
422 kinds of sets often occur when incoming argument registers are moved | |
423 to call-saved registers because their values are live across one or | |
424 more calls during the function. */ | |
425 | |
426 static void | |
427 prepare_shrink_wrap (basic_block entry_block) | |
428 { | |
429 rtx_insn *insn, *curr; | |
430 rtx x; | |
431 HARD_REG_SET uses, defs; | |
432 df_ref def, use; | |
433 bool split_p = false; | |
434 unsigned int i; | |
435 struct dead_debug_local debug; | |
436 | |
437 if (JUMP_P (BB_END (entry_block))) | |
438 { | |
439 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries | |
440 to sink the copies from parameter to callee saved register out of | |
441 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called | |
442 to release some dependences. */ | |
443 copyprop_hardreg_forward_bb_without_debug_insn (entry_block); | |
444 } | |
445 | |
446 dead_debug_local_init (&debug, NULL, NULL); | |
447 CLEAR_HARD_REG_SET (uses); | |
448 CLEAR_HARD_REG_SET (defs); | |
449 | |
450 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) | |
451 if (NONDEBUG_INSN_P (insn) | |
452 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs, | |
453 &split_p, &debug)) | |
454 { | |
455 /* Add all defined registers to DEFs. */ | |
456 FOR_EACH_INSN_DEF (def, insn) | |
457 { | |
458 x = DF_REF_REG (def); | |
459 if (REG_P (x) && HARD_REGISTER_P (x)) | |
460 for (i = REGNO (x); i < END_REGNO (x); i++) | |
461 SET_HARD_REG_BIT (defs, i); | |
462 } | |
463 | |
464 /* Add all used registers to USESs. */ | |
465 FOR_EACH_INSN_USE (use, insn) | |
466 { | |
467 x = DF_REF_REG (use); | |
468 if (REG_P (x) && HARD_REGISTER_P (x)) | |
469 for (i = REGNO (x); i < END_REGNO (x); i++) | |
470 SET_HARD_REG_BIT (uses, i); | |
471 } | |
472 } | |
473 | |
474 dead_debug_local_finish (&debug, NULL); | |
475 } | |
476 | |
477 /* Return whether basic block PRO can get the prologue. It can not if it | |
478 has incoming complex edges that need a prologue inserted (we make a new | |
479 block for the prologue, so those edges would need to be redirected, which | |
480 does not work). It also can not if there exist registers live on entry | |
481 to PRO that are clobbered by the prologue. */ | |
482 | |
483 static bool | |
484 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered) | |
485 { | |
486 edge e; | |
487 edge_iterator ei; | |
488 FOR_EACH_EDGE (e, ei, pro->preds) | |
489 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING) | |
490 && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) | |
491 return false; | |
492 | |
493 HARD_REG_SET live; | |
494 REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro)); | |
495 if (hard_reg_set_intersect_p (live, prologue_clobbered)) | |
496 return false; | |
497 | |
498 return true; | |
499 } | |
500 | |
501 /* Return whether we can duplicate basic block BB for shrink wrapping. We | |
502 cannot if the block cannot be duplicated at all, or if any of its incoming | |
503 edges are complex and come from a block that does not require a prologue | |
504 (we cannot redirect such edges), or if the block is too big to copy. | |
505 PRO is the basic block before which we would put the prologue, MAX_SIZE is | |
506 the maximum size block we allow to be copied. */ | |
507 | |
508 static bool | |
509 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size) | |
510 { | |
511 if (!can_duplicate_block_p (bb)) | |
512 return false; | |
513 | |
514 edge e; | |
515 edge_iterator ei; | |
516 FOR_EACH_EDGE (e, ei, bb->preds) | |
517 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING) | |
518 && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) | |
519 return false; | |
520 | |
521 unsigned size = 0; | |
522 | |
523 rtx_insn *insn; | |
524 FOR_BB_INSNS (bb, insn) | |
525 if (NONDEBUG_INSN_P (insn)) | |
526 { | |
527 size += get_attr_min_length (insn); | |
528 if (size > max_size) | |
529 return false; | |
530 } | |
531 | |
532 return true; | |
533 } | |
534 | |
535 /* Do whatever needs to be done for exits that run without prologue. | |
536 Sibcalls need nothing done. Normal exits get a simple_return inserted. */ | |
537 | |
538 static void | |
539 handle_simple_exit (edge e) | |
540 { | |
541 | |
542 if (e->flags & EDGE_SIBCALL) | |
543 { | |
544 /* Tell function.c to take no further action on this edge. */ | |
545 e->flags |= EDGE_IGNORE; | |
546 | |
547 e->flags &= ~EDGE_FALLTHRU; | |
548 emit_barrier_after_bb (e->src); | |
549 return; | |
550 } | |
551 | |
552 /* If the basic block the edge comes from has multiple successors, | |
553 split the edge. */ | |
554 if (EDGE_COUNT (e->src->succs) > 1) | |
555 { | |
556 basic_block old_bb = e->src; | |
557 rtx_insn *end = BB_END (old_bb); | |
558 rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end); | |
559 basic_block new_bb = create_basic_block (note, note, old_bb); | |
560 BB_COPY_PARTITION (new_bb, old_bb); | |
561 BB_END (old_bb) = end; | |
562 | |
563 redirect_edge_succ (e, new_bb); | |
564 new_bb->frequency = EDGE_FREQUENCY (e); | |
565 e->flags |= EDGE_FALLTHRU; | |
566 | |
567 e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); | |
568 } | |
569 | |
570 e->flags &= ~EDGE_FALLTHRU; | |
571 rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (), | |
572 BB_END (e->src)); | |
573 JUMP_LABEL (ret) = simple_return_rtx; | |
574 emit_barrier_after_bb (e->src); | |
575 | |
576 if (dump_file) | |
577 fprintf (dump_file, "Made simple_return with UID %d in bb %d\n", | |
578 INSN_UID (ret), e->src->index); | |
579 } | |
580 | |
581 /* Try to perform a kind of shrink-wrapping, making sure the | |
582 prologue/epilogue is emitted only around those parts of the | |
583 function that require it. | |
584 | |
585 There will be exactly one prologue, and it will be executed either | |
586 zero or one time, on any path. Depending on where the prologue is | |
587 placed, some of the basic blocks can be reached via both paths with | |
588 and without a prologue. Such blocks will be duplicated here, and the | |
589 edges changed to match. | |
590 | |
591 Paths that go to the exit without going through the prologue will use | |
592 a simple_return instead of the epilogue. We maximize the number of | |
593 those, making sure to only duplicate blocks that can be duplicated. | |
594 If the prologue can then still be placed in multiple locations, we | |
595 place it as early as possible. | |
596 | |
597 An example, where we duplicate blocks with control flow (legend: | |
598 _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should | |
599 be taken to point down or to the right, to simplify the diagram; here, | |
600 block 3 needs a prologue, the rest does not): | |
601 | |
602 | |
603 B B | |
604 | | | |
605 2 2 | |
606 |\ |\ | |
607 | 3 becomes | 3 | |
608 |/ | \ | |
609 4 7 4 | |
610 |\ |\ |\ | |
611 | 5 | 8 | 5 | |
612 |/ |/ |/ | |
613 6 9 6 | |
614 | | | | |
615 R S R | |
616 | |
617 | |
618 (bb 4 is duplicated to 7, and so on; the prologue is inserted on the | |
619 edge 2->3). | |
620 | |
621 Another example, where part of a loop is duplicated (again, bb 3 is | |
622 the only block that needs a prologue): | |
623 | |
624 | |
625 B 3<-- B ->3<-- | |
626 | | | | | | | | |
627 | v | becomes | | v | | |
628 2---4--- 2---5-- 4--- | |
629 | | | | |
630 R S R | |
631 | |
632 | |
633 (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3). | |
634 | |
635 ENTRY_EDGE is the edge where the prologue will be placed, possibly | |
636 changed by this function. PROLOGUE_SEQ is the prologue we will insert. */ | |
637 | |
638 void | |
639 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq) | |
640 { | |
641 /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes | |
642 no sense to shrink-wrap: then do not shrink-wrap! */ | |
643 | |
644 if (!SHRINK_WRAPPING_ENABLED) | |
645 return; | |
646 | |
647 if (crtl->profile && !targetm.profile_before_prologue ()) | |
648 return; | |
649 | |
650 if (crtl->calls_eh_return) | |
651 return; | |
652 | |
653 bool empty_prologue = true; | |
654 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) | |
655 if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)) | |
656 { | |
657 empty_prologue = false; | |
658 break; | |
659 } | |
660 if (empty_prologue) | |
661 return; | |
662 | |
663 /* Move some code down to expose more shrink-wrapping opportunities. */ | |
664 | |
665 basic_block entry = (*entry_edge)->dest; | |
666 prepare_shrink_wrap (entry); | |
667 | |
668 if (dump_file) | |
669 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); | |
670 | |
671 /* Compute the registers set and used in the prologue. */ | |
672 | |
673 HARD_REG_SET prologue_clobbered, prologue_used; | |
674 CLEAR_HARD_REG_SET (prologue_clobbered); | |
675 CLEAR_HARD_REG_SET (prologue_used); | |
676 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) | |
677 if (NONDEBUG_INSN_P (insn)) | |
678 { | |
679 HARD_REG_SET this_used; | |
680 CLEAR_HARD_REG_SET (this_used); | |
681 note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used); | |
682 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); | |
683 IOR_HARD_REG_SET (prologue_used, this_used); | |
684 note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered); | |
685 } | |
686 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); | |
687 if (frame_pointer_needed) | |
688 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); | |
689 | |
690 /* Find out what registers are set up by the prologue; any use of these | |
691 cannot happen before the prologue. */ | |
692 | |
693 struct hard_reg_set_container set_up_by_prologue; | |
694 CLEAR_HARD_REG_SET (set_up_by_prologue.set); | |
695 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM); | |
696 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); | |
697 if (frame_pointer_needed) | |
698 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
699 HARD_FRAME_POINTER_REGNUM); | |
700 if (pic_offset_table_rtx | |
701 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) | |
702 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
703 PIC_OFFSET_TABLE_REGNUM); | |
704 if (crtl->drap_reg) | |
705 add_to_hard_reg_set (&set_up_by_prologue.set, | |
706 GET_MODE (crtl->drap_reg), | |
707 REGNO (crtl->drap_reg)); | |
708 if (targetm.set_up_by_prologue) | |
709 targetm.set_up_by_prologue (&set_up_by_prologue); | |
710 | |
711 /* We will insert the prologue before the basic block PRO. PRO should | |
712 dominate all basic blocks that need the prologue to be executed | |
713 before them. First, make PRO the "tightest wrap" possible. */ | |
714 | |
715 calculate_dominance_info (CDI_DOMINATORS); | |
716 | |
717 basic_block pro = 0; | |
718 | |
719 basic_block bb; | |
720 edge e; | |
721 edge_iterator ei; | |
722 FOR_EACH_BB_FN (bb, cfun) | |
723 { | |
724 rtx_insn *insn; | |
725 FOR_BB_INSNS (bb, insn) | |
726 if (NONDEBUG_INSN_P (insn) | |
727 && requires_stack_frame_p (insn, prologue_used, | |
728 set_up_by_prologue.set)) | |
729 { | |
730 if (dump_file) | |
731 fprintf (dump_file, "Block %d needs the prologue.\n", bb->index); | |
732 pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb); | |
733 break; | |
734 } | |
735 } | |
736 | |
737 /* If nothing needs a prologue, just put it at the start. This really | |
738 shouldn't happen, but we cannot fix it here. */ | |
739 | |
740 if (pro == 0) | |
741 { | |
742 if (dump_file) | |
743 fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; " | |
744 "putting it at the start.\n"); | |
745 pro = entry; | |
746 } | |
747 | |
748 if (dump_file) | |
749 fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n", | |
750 pro->index); | |
751 | |
752 /* Now see if we can put the prologue at the start of PRO. Putting it | |
753 there might require duplicating a block that cannot be duplicated, | |
754 or in some cases we cannot insert the prologue there at all. If PRO | |
755 wont't do, try again with the immediate dominator of PRO, and so on. | |
756 | |
757 The blocks that need duplicating are those reachable from PRO but | |
758 not dominated by it. We keep in BB_WITH a bitmap of the blocks | |
759 reachable from PRO that we already found, and in VEC a stack of | |
760 those we still need to consider (to find successors). */ | |
761 | |
762 auto_bitmap bb_with; | |
763 bitmap_set_bit (bb_with, pro->index); | |
764 | |
765 vec<basic_block> vec; | |
766 vec.create (n_basic_blocks_for_fn (cfun)); | |
767 vec.quick_push (pro); | |
768 | |
769 unsigned max_grow_size = get_uncond_jump_length (); | |
770 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); | |
771 | |
772 while (!vec.is_empty () && pro != entry) | |
773 { | |
774 while (pro != entry && !can_get_prologue (pro, prologue_clobbered)) | |
775 { | |
776 pro = get_immediate_dominator (CDI_DOMINATORS, pro); | |
777 | |
778 if (bitmap_set_bit (bb_with, pro->index)) | |
779 vec.quick_push (pro); | |
780 } | |
781 | |
782 basic_block bb = vec.pop (); | |
783 if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size)) | |
784 while (!dominated_by_p (CDI_DOMINATORS, bb, pro)) | |
785 { | |
786 gcc_assert (pro != entry); | |
787 | |
788 pro = get_immediate_dominator (CDI_DOMINATORS, pro); | |
789 | |
790 if (bitmap_set_bit (bb_with, pro->index)) | |
791 vec.quick_push (pro); | |
792 } | |
793 | |
794 FOR_EACH_EDGE (e, ei, bb->succs) | |
795 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) | |
796 && bitmap_set_bit (bb_with, e->dest->index)) | |
797 vec.quick_push (e->dest); | |
798 } | |
799 | |
800 if (dump_file) | |
801 fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n", | |
802 pro->index); | |
803 | |
804 /* If we can move PRO back without having to duplicate more blocks, do so. | |
805 We do this because putting the prologue earlier is better for scheduling. | |
806 | |
807 We can move back to a block PRE if every path from PRE will eventually | |
808 need a prologue, that is, PRO is a post-dominator of PRE. PRE needs | |
809 to dominate every block reachable from itself. We keep in BB_TMP a | |
810 bitmap of the blocks reachable from PRE that we already found, and in | |
811 VEC a stack of those we still need to consider. | |
812 | |
813 Any block reachable from PRE is also reachable from all predecessors | |
814 of PRE, so if we find we need to move PRE back further we can leave | |
815 everything not considered so far on the stack. Any block dominated | |
816 by PRE is also dominated by all other dominators of PRE, so anything | |
817 found good for some PRE does not need to be reconsidered later. | |
818 | |
819 We don't need to update BB_WITH because none of the new blocks found | |
820 can jump to a block that does not need the prologue. */ | |
821 | |
822 if (pro != entry) | |
823 { | |
824 calculate_dominance_info (CDI_POST_DOMINATORS); | |
825 | |
826 auto_bitmap bb_tmp; | |
827 bitmap_copy (bb_tmp, bb_with); | |
828 basic_block last_ok = pro; | |
829 vec.truncate (0); | |
830 | |
831 while (pro != entry) | |
832 { | |
833 basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro); | |
834 if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro)) | |
835 break; | |
836 | |
837 if (bitmap_set_bit (bb_tmp, pre->index)) | |
838 vec.quick_push (pre); | |
839 | |
840 bool ok = true; | |
841 while (!vec.is_empty ()) | |
842 { | |
843 if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre)) | |
844 { | |
845 ok = false; | |
846 break; | |
847 } | |
848 | |
849 basic_block bb = vec.pop (); | |
850 FOR_EACH_EDGE (e, ei, bb->succs) | |
851 if (bitmap_set_bit (bb_tmp, e->dest->index)) | |
852 vec.quick_push (e->dest); | |
853 } | |
854 | |
855 if (ok && can_get_prologue (pre, prologue_clobbered)) | |
856 last_ok = pre; | |
857 | |
858 pro = pre; | |
859 } | |
860 | |
861 pro = last_ok; | |
862 | |
863 free_dominance_info (CDI_POST_DOMINATORS); | |
864 } | |
865 | |
866 vec.release (); | |
867 | |
868 if (dump_file) | |
869 fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n", | |
870 pro->index); | |
871 | |
872 if (pro == entry) | |
873 { | |
874 free_dominance_info (CDI_DOMINATORS); | |
875 return; | |
876 } | |
877 | |
878 /* Compute what fraction of the frequency and count of the blocks that run | |
879 both with and without prologue are for running with prologue. This gives | |
880 the correct answer for reducible flow graphs; for irreducible flow graphs | |
881 our profile is messed up beyond repair anyway. */ | |
882 | |
883 gcov_type num = 0; | |
884 gcov_type den = 0; | |
885 | |
886 FOR_EACH_EDGE (e, ei, pro->preds) | |
887 if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) | |
888 { | |
889 num += EDGE_FREQUENCY (e); | |
890 den += e->src->frequency; | |
891 } | |
892 | |
893 if (den == 0) | |
894 den = 1; | |
895 | |
896 /* All is okay, so do it. */ | |
897 | |
898 crtl->shrink_wrapped = true; | |
899 if (dump_file) | |
900 fprintf (dump_file, "Performing shrink-wrapping.\n"); | |
901 | |
902 /* Copy the blocks that can run both with and without prologue. The | |
903 originals run with prologue, the copies without. Store a pointer to | |
904 the copy in the ->aux field of the original. */ | |
905 | |
906 FOR_EACH_BB_FN (bb, cfun) | |
907 if (bitmap_bit_p (bb_with, bb->index) | |
908 && !dominated_by_p (CDI_DOMINATORS, bb, pro)) | |
909 { | |
910 basic_block dup = duplicate_block (bb, 0, 0); | |
911 | |
912 bb->aux = dup; | |
913 | |
914 if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup))) | |
915 emit_barrier_after_bb (dup); | |
916 | |
917 if (EDGE_COUNT (dup->succs) == 0) | |
918 emit_barrier_after_bb (dup); | |
919 | |
920 if (dump_file) | |
921 fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index); | |
922 | |
923 bb->frequency = RDIV (num * bb->frequency, den); | |
924 dup->frequency -= bb->frequency; | |
925 bb->count = bb->count.apply_scale (num, den); | |
926 dup->count -= bb->count; | |
927 } | |
928 | |
929 /* Now change the edges to point to the copies, where appropriate. */ | |
930 | |
931 FOR_EACH_BB_FN (bb, cfun) | |
932 if (!dominated_by_p (CDI_DOMINATORS, bb, pro)) | |
933 { | |
934 basic_block src = bb; | |
935 if (bitmap_bit_p (bb_with, bb->index)) | |
936 src = (basic_block) bb->aux; | |
937 | |
938 FOR_EACH_EDGE (e, ei, src->succs) | |
939 { | |
940 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
941 continue; | |
942 | |
943 if (bitmap_bit_p (bb_with, e->dest->index) | |
944 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) | |
945 { | |
946 if (dump_file) | |
947 fprintf (dump_file, "Redirecting edge %d->%d to %d\n", | |
948 e->src->index, e->dest->index, | |
949 ((basic_block) e->dest->aux)->index); | |
950 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); | |
951 } | |
952 else if (e->flags & EDGE_FALLTHRU | |
953 && bitmap_bit_p (bb_with, bb->index)) | |
954 force_nonfallthru (e); | |
955 } | |
956 } | |
957 | |
958 /* Also redirect the function entry edge if necessary. */ | |
959 | |
960 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) | |
961 if (bitmap_bit_p (bb_with, e->dest->index) | |
962 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) | |
963 { | |
964 basic_block split_bb = split_edge (e); | |
965 e = single_succ_edge (split_bb); | |
966 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); | |
967 } | |
968 | |
969 /* Make a simple_return for those exits that run without prologue. */ | |
970 | |
971 FOR_EACH_BB_REVERSE_FN (bb, cfun) | |
972 if (!bitmap_bit_p (bb_with, bb->index)) | |
973 FOR_EACH_EDGE (e, ei, bb->succs) | |
974 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
975 handle_simple_exit (e); | |
976 | |
977 /* Finally, we want a single edge to put the prologue on. Make a new | |
978 block before the PRO block; the edge beteen them is the edge we want. | |
979 Then redirect those edges into PRO that come from blocks without the | |
980 prologue, to point to the new block instead. The new prologue block | |
981 is put at the end of the insn chain. */ | |
982 | |
983 basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); | |
984 BB_COPY_PARTITION (new_bb, pro); | |
985 new_bb->count = profile_count::zero (); | |
986 if (dump_file) | |
987 fprintf (dump_file, "Made prologue block %d\n", new_bb->index); | |
988 | |
989 for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); ) | |
990 { | |
991 if (bitmap_bit_p (bb_with, e->src->index) | |
992 || dominated_by_p (CDI_DOMINATORS, e->src, pro)) | |
993 { | |
994 ei_next (&ei); | |
995 continue; | |
996 } | |
997 | |
998 new_bb->count += e->src->count.apply_probability (e->probability); | |
999 new_bb->frequency += EDGE_FREQUENCY (e); | |
1000 | |
1001 redirect_edge_and_branch_force (e, new_bb); | |
1002 if (dump_file) | |
1003 fprintf (dump_file, "Redirected edge from %d\n", e->src->index); | |
1004 } | |
1005 | |
1006 *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU); | |
1007 force_nonfallthru (*entry_edge); | |
1008 | |
1009 free_dominance_info (CDI_DOMINATORS); | |
1010 } | |
1011 | |
1012 /* Separate shrink-wrapping | |
1013 | |
1014 Instead of putting all of the prologue and epilogue in one spot, we | |
1015 can put parts of it in places where those components are executed less | |
1016 frequently. The following code does this, for prologue and epilogue | |
1017 components that can be put in more than one location, and where those | |
1018 components can be executed more than once (the epilogue component will | |
1019 always be executed before the prologue component is executed a second | |
1020 time). | |
1021 | |
1022 What exactly is a component is target-dependent. The more usual | |
1023 components are simple saves/restores to/from the frame of callee-saved | |
1024 registers. This code treats components abstractly (as an sbitmap), | |
1025 letting the target handle all details. | |
1026 | |
1027 Prologue components are placed in such a way that for every component | |
1028 the prologue is executed as infrequently as possible. We do this by | |
1029 walking the dominator tree, comparing the cost of placing a prologue | |
1030 component before a block to the sum of costs determined for all subtrees | |
1031 of that block. | |
1032 | |
1033 From this placement, we then determine for each component all blocks | |
1034 where at least one of this block's dominators (including itself) will | |
1035 get a prologue inserted. That then is how the components are placed. | |
1036 We could place the epilogue components a bit smarter (we can save a | |
1037 bit of code size sometimes); this is a possible future improvement. | |
1038 | |
1039 Prologues and epilogues are preferably placed into a block, either at | |
1040 the beginning or end of it, if it is needed for all predecessor resp. | |
1041 successor edges; or placed on the edge otherwise. | |
1042 | |
1043 If the placement of any prologue/epilogue leads to a situation we cannot | |
1044 handle (for example, an abnormal edge would need to be split, or some | |
1045 targets want to use some specific registers that may not be available | |
1046 where we want to put them), separate shrink-wrapping for the components | |
1047 in that prologue/epilogue is aborted. */ | |
1048 | |
1049 | |
1050 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the | |
1051 label LABEL. */ | |
1052 static void | |
1053 dump_components (const char *label, sbitmap components) | |
1054 { | |
1055 if (bitmap_empty_p (components)) | |
1056 return; | |
1057 | |
1058 fprintf (dump_file, " [%s", label); | |
1059 | |
1060 for (unsigned int j = 0; j < components->n_bits; j++) | |
1061 if (bitmap_bit_p (components, j)) | |
1062 fprintf (dump_file, " %u", j); | |
1063 | |
1064 fprintf (dump_file, "]"); | |
1065 } | |
1066 | |
1067 /* The data we collect for each bb. */ | |
1068 struct sw { | |
1069 /* What components does this BB need? */ | |
1070 sbitmap needs_components; | |
1071 | |
1072 /* What components does this BB have? This is the main decision this | |
1073 pass makes. */ | |
1074 sbitmap has_components; | |
1075 | |
1076 /* The components for which we placed code at the start of the BB (instead | |
1077 of on all incoming edges). */ | |
1078 sbitmap head_components; | |
1079 | |
1080 /* The components for which we placed code at the end of the BB (instead | |
1081 of on all outgoing edges). */ | |
1082 sbitmap tail_components; | |
1083 | |
1084 /* The frequency of executing the prologue for this BB, if a prologue is | |
1085 placed on this BB. This is a pessimistic estimate (no prologue is | |
1086 needed for edges from blocks that have the component under consideration | |
1087 active already). */ | |
1088 gcov_type own_cost; | |
1089 | |
1090 /* The frequency of executing the prologue for this BB and all BBs | |
1091 dominated by it. */ | |
1092 gcov_type total_cost; | |
1093 }; | |
1094 | |
1095 /* A helper function for accessing the pass-specific info. */ | |
1096 static inline struct sw * | |
1097 SW (basic_block bb) | |
1098 { | |
1099 gcc_assert (bb->aux); | |
1100 return (struct sw *) bb->aux; | |
1101 } | |
1102 | |
1103 /* Create the pass-specific data structures for separately shrink-wrapping | |
1104 with components COMPONENTS. */ | |
1105 static void | |
1106 init_separate_shrink_wrap (sbitmap components) | |
1107 { | |
1108 basic_block bb; | |
1109 FOR_ALL_BB_FN (bb, cfun) | |
1110 { | |
1111 bb->aux = xcalloc (1, sizeof (struct sw)); | |
1112 | |
1113 SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb); | |
1114 | |
1115 /* Mark all basic blocks without successor as needing all components. | |
1116 This avoids problems in at least cfgcleanup, sel-sched, and | |
1117 regrename (largely to do with all paths to such a block still | |
1118 needing the same dwarf CFI info). */ | |
1119 if (EDGE_COUNT (bb->succs) == 0) | |
1120 bitmap_copy (SW (bb)->needs_components, components); | |
1121 | |
1122 if (dump_file) | |
1123 { | |
1124 fprintf (dump_file, "bb %d components:", bb->index); | |
1125 dump_components ("has", SW (bb)->needs_components); | |
1126 fprintf (dump_file, "\n"); | |
1127 } | |
1128 | |
1129 SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components)); | |
1130 SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components)); | |
1131 SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components)); | |
1132 bitmap_clear (SW (bb)->has_components); | |
1133 } | |
1134 } | |
1135 | |
1136 /* Destroy the pass-specific data. */ | |
1137 static void | |
1138 fini_separate_shrink_wrap (void) | |
1139 { | |
1140 basic_block bb; | |
1141 FOR_ALL_BB_FN (bb, cfun) | |
1142 if (bb->aux) | |
1143 { | |
1144 sbitmap_free (SW (bb)->needs_components); | |
1145 sbitmap_free (SW (bb)->has_components); | |
1146 sbitmap_free (SW (bb)->head_components); | |
1147 sbitmap_free (SW (bb)->tail_components); | |
1148 free (bb->aux); | |
1149 bb->aux = 0; | |
1150 } | |
1151 } | |
1152 | |
1153 /* Place the prologue for component WHICH, in the basic blocks dominated | |
1154 by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the | |
1155 HAS_COMPONENTS of a block if either the block has that bit set in | |
1156 NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all | |
1157 dominator subtrees separately. */ | |
1158 static void | |
1159 place_prologue_for_one_component (unsigned int which, basic_block head) | |
1160 { | |
1161 /* The block we are currently dealing with. */ | |
1162 basic_block bb = head; | |
1163 /* Is this the first time we visit this block, i.e. have we just gone | |
1164 down the tree. */ | |
1165 bool first_visit = true; | |
1166 | |
1167 /* Walk the dominator tree, visit one block per iteration of this loop. | |
1168 Each basic block is visited twice: once before visiting any children | |
1169 of the block, and once after visiting all of them (leaf nodes are | |
1170 visited only once). As an optimization, we do not visit subtrees | |
1171 that can no longer influence the prologue placement. */ | |
1172 for (;;) | |
1173 { | |
1174 /* First visit of a block: set the (children) cost accumulator to zero; | |
1175 if the block does not have the component itself, walk down. */ | |
1176 if (first_visit) | |
1177 { | |
1178 /* Initialize the cost. The cost is the block execution frequency | |
1179 that does not come from backedges. Calculating this by simply | |
1180 adding the cost of all edges that aren't backedges does not | |
1181 work: this does not always add up to the block frequency at | |
1182 all, and even if it does, rounding error makes for bad | |
1183 decisions. */ | |
1184 SW (bb)->own_cost = bb->frequency; | |
1185 | |
1186 edge e; | |
1187 edge_iterator ei; | |
1188 FOR_EACH_EDGE (e, ei, bb->preds) | |
1189 if (dominated_by_p (CDI_DOMINATORS, e->src, bb)) | |
1190 { | |
1191 if (SW (bb)->own_cost > EDGE_FREQUENCY (e)) | |
1192 SW (bb)->own_cost -= EDGE_FREQUENCY (e); | |
1193 else | |
1194 SW (bb)->own_cost = 0; | |
1195 } | |
1196 | |
1197 SW (bb)->total_cost = 0; | |
1198 | |
1199 if (!bitmap_bit_p (SW (bb)->needs_components, which) | |
1200 && first_dom_son (CDI_DOMINATORS, bb)) | |
1201 { | |
1202 bb = first_dom_son (CDI_DOMINATORS, bb); | |
1203 continue; | |
1204 } | |
1205 } | |
1206 | |
1207 /* If this block does need the component itself, or it is cheaper to | |
1208 put the prologue here than in all the descendants that need it, | |
1209 mark it so. If this block's immediate post-dominator is dominated | |
1210 by this block, and that needs the prologue, we can put it on this | |
1211 block as well (earlier is better). */ | |
1212 if (bitmap_bit_p (SW (bb)->needs_components, which) | |
1213 || SW (bb)->total_cost > SW (bb)->own_cost) | |
1214 { | |
1215 SW (bb)->total_cost = SW (bb)->own_cost; | |
1216 bitmap_set_bit (SW (bb)->has_components, which); | |
1217 } | |
1218 else | |
1219 { | |
1220 basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb); | |
1221 if (dominated_by_p (CDI_DOMINATORS, kid, bb) | |
1222 && bitmap_bit_p (SW (kid)->has_components, which)) | |
1223 { | |
1224 SW (bb)->total_cost = SW (bb)->own_cost; | |
1225 bitmap_set_bit (SW (bb)->has_components, which); | |
1226 } | |
1227 } | |
1228 | |
1229 /* We are back where we started, so we are done now. */ | |
1230 if (bb == head) | |
1231 return; | |
1232 | |
1233 /* We now know the cost of the subtree rooted at the current block. | |
1234 Accumulate this cost in the parent. */ | |
1235 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb); | |
1236 SW (parent)->total_cost += SW (bb)->total_cost; | |
1237 | |
1238 /* Don't walk the tree down unless necessary. */ | |
1239 if (next_dom_son (CDI_DOMINATORS, bb) | |
1240 && SW (parent)->total_cost <= SW (parent)->own_cost) | |
1241 { | |
1242 bb = next_dom_son (CDI_DOMINATORS, bb); | |
1243 first_visit = true; | |
1244 } | |
1245 else | |
1246 { | |
1247 bb = parent; | |
1248 first_visit = false; | |
1249 } | |
1250 } | |
1251 } | |
1252 | |
1253 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without | |
1254 setting it on any path from entry to exit where it was not already set | |
1255 somewhere (or, for blocks that have no path to the exit, consider only | |
1256 paths from the entry to the block itself). */ | |
1257 static void | |
1258 spread_components (sbitmap components) | |
1259 { | |
1260 basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
1261 basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun); | |
1262 | |
1263 /* A stack of all blocks left to consider, and a bitmap of all blocks | |
1264 on that stack. */ | |
1265 vec<basic_block> todo; | |
1266 todo.create (n_basic_blocks_for_fn (cfun)); | |
1267 auto_bitmap seen; | |
1268 | |
1269 auto_sbitmap old (SBITMAP_SIZE (components)); | |
1270 | |
1271 /* Find for every block the components that are *not* needed on some path | |
1272 from the entry to that block. Do this with a flood fill from the entry | |
1273 block. Every block can be visited at most as often as the number of | |
1274 components (plus one), and usually much less often. */ | |
1275 | |
1276 if (dump_file) | |
1277 fprintf (dump_file, "Spreading down...\n"); | |
1278 | |
1279 basic_block bb; | |
1280 FOR_ALL_BB_FN (bb, cfun) | |
1281 bitmap_clear (SW (bb)->head_components); | |
1282 | |
1283 bitmap_copy (SW (entry_block)->head_components, components); | |
1284 | |
1285 edge e; | |
1286 edge_iterator ei; | |
1287 | |
1288 todo.quick_push (single_succ (entry_block)); | |
1289 bitmap_set_bit (seen, single_succ (entry_block)->index); | |
1290 while (!todo.is_empty ()) | |
1291 { | |
1292 bb = todo.pop (); | |
1293 | |
1294 bitmap_copy (old, SW (bb)->head_components); | |
1295 | |
1296 FOR_EACH_EDGE (e, ei, bb->preds) | |
1297 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, | |
1298 SW (e->src)->head_components); | |
1299 | |
1300 bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components, | |
1301 SW (bb)->has_components); | |
1302 | |
1303 if (!bitmap_equal_p (old, SW (bb)->head_components)) | |
1304 FOR_EACH_EDGE (e, ei, bb->succs) | |
1305 if (bitmap_set_bit (seen, e->dest->index)) | |
1306 todo.quick_push (e->dest); | |
1307 | |
1308 bitmap_clear_bit (seen, bb->index); | |
1309 } | |
1310 | |
1311 /* Find for every block the components that are *not* needed on some reverse | |
1312 path from the exit to that block. */ | |
1313 | |
1314 if (dump_file) | |
1315 fprintf (dump_file, "Spreading up...\n"); | |
1316 | |
1317 /* First, mark all blocks not reachable from the exit block as not needing | |
1318 any component on any path to the exit. Mark everything, and then clear | |
1319 again by a flood fill. */ | |
1320 | |
1321 FOR_ALL_BB_FN (bb, cfun) | |
1322 bitmap_copy (SW (bb)->tail_components, components); | |
1323 | |
1324 FOR_EACH_EDGE (e, ei, exit_block->preds) | |
1325 { | |
1326 todo.quick_push (e->src); | |
1327 bitmap_set_bit (seen, e->src->index); | |
1328 } | |
1329 | |
1330 while (!todo.is_empty ()) | |
1331 { | |
1332 bb = todo.pop (); | |
1333 | |
1334 if (!bitmap_empty_p (SW (bb)->tail_components)) | |
1335 FOR_EACH_EDGE (e, ei, bb->preds) | |
1336 if (bitmap_set_bit (seen, e->src->index)) | |
1337 todo.quick_push (e->src); | |
1338 | |
1339 bitmap_clear (SW (bb)->tail_components); | |
1340 | |
1341 bitmap_clear_bit (seen, bb->index); | |
1342 } | |
1343 | |
1344 /* And then, flood fill backwards to find for every block the components | |
1345 not needed on some path to the exit. */ | |
1346 | |
1347 bitmap_copy (SW (exit_block)->tail_components, components); | |
1348 | |
1349 FOR_EACH_EDGE (e, ei, exit_block->preds) | |
1350 { | |
1351 todo.quick_push (e->src); | |
1352 bitmap_set_bit (seen, e->src->index); | |
1353 } | |
1354 | |
1355 while (!todo.is_empty ()) | |
1356 { | |
1357 bb = todo.pop (); | |
1358 | |
1359 bitmap_copy (old, SW (bb)->tail_components); | |
1360 | |
1361 FOR_EACH_EDGE (e, ei, bb->succs) | |
1362 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, | |
1363 SW (e->dest)->tail_components); | |
1364 | |
1365 bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components, | |
1366 SW (bb)->has_components); | |
1367 | |
1368 if (!bitmap_equal_p (old, SW (bb)->tail_components)) | |
1369 FOR_EACH_EDGE (e, ei, bb->preds) | |
1370 if (bitmap_set_bit (seen, e->src->index)) | |
1371 todo.quick_push (e->src); | |
1372 | |
1373 bitmap_clear_bit (seen, bb->index); | |
1374 } | |
1375 | |
1376 /* Finally, mark everything not not needed both forwards and backwards. */ | |
1377 | |
1378 FOR_EACH_BB_FN (bb, cfun) | |
1379 { | |
1380 bitmap_and (SW (bb)->head_components, SW (bb)->head_components, | |
1381 SW (bb)->tail_components); | |
1382 bitmap_and_compl (SW (bb)->has_components, components, | |
1383 SW (bb)->head_components); | |
1384 } | |
1385 | |
1386 FOR_ALL_BB_FN (bb, cfun) | |
1387 { | |
1388 if (dump_file) | |
1389 { | |
1390 fprintf (dump_file, "bb %d components:", bb->index); | |
1391 dump_components ("has", SW (bb)->has_components); | |
1392 fprintf (dump_file, "\n"); | |
1393 } | |
1394 } | |
1395 } | |
1396 | |
1397 /* If we cannot handle placing some component's prologues or epilogues where | |
1398 we decided we should place them, unmark that component in COMPONENTS so | |
1399 that it is not wrapped separately. */ | |
1400 static void | |
1401 disqualify_problematic_components (sbitmap components) | |
1402 { | |
1403 auto_sbitmap pro (SBITMAP_SIZE (components)); | |
1404 auto_sbitmap epi (SBITMAP_SIZE (components)); | |
1405 | |
1406 basic_block bb; | |
1407 FOR_EACH_BB_FN (bb, cfun) | |
1408 { | |
1409 edge e; | |
1410 edge_iterator ei; | |
1411 FOR_EACH_EDGE (e, ei, bb->succs) | |
1412 { | |
1413 /* Find which components we want pro/epilogues for here. */ | |
1414 bitmap_and_compl (epi, SW (e->src)->has_components, | |
1415 SW (e->dest)->has_components); | |
1416 bitmap_and_compl (pro, SW (e->dest)->has_components, | |
1417 SW (e->src)->has_components); | |
1418 | |
1419 /* Ask the target what it thinks about things. */ | |
1420 if (!bitmap_empty_p (epi)) | |
1421 targetm.shrink_wrap.disqualify_components (components, e, epi, | |
1422 false); | |
1423 if (!bitmap_empty_p (pro)) | |
1424 targetm.shrink_wrap.disqualify_components (components, e, pro, | |
1425 true); | |
1426 | |
1427 /* If this edge doesn't need splitting, we're fine. */ | |
1428 if (single_pred_p (e->dest) | |
1429 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
1430 continue; | |
1431 | |
1432 /* If the edge can be split, that is fine too. */ | |
1433 if ((e->flags & EDGE_ABNORMAL) == 0) | |
1434 continue; | |
1435 | |
1436 /* We also can handle sibcalls. */ | |
1437 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
1438 { | |
1439 gcc_assert (e->flags & EDGE_SIBCALL); | |
1440 continue; | |
1441 } | |
1442 | |
1443 /* Remove from consideration those components we would need | |
1444 pro/epilogues for on edges where we cannot insert them. */ | |
1445 bitmap_and_compl (components, components, epi); | |
1446 bitmap_and_compl (components, components, pro); | |
1447 | |
1448 if (dump_file && !bitmap_subset_p (epi, components)) | |
1449 { | |
1450 fprintf (dump_file, " BAD epi %d->%d", e->src->index, | |
1451 e->dest->index); | |
1452 if (e->flags & EDGE_EH) | |
1453 fprintf (dump_file, " for EH"); | |
1454 dump_components ("epi", epi); | |
1455 fprintf (dump_file, "\n"); | |
1456 } | |
1457 | |
1458 if (dump_file && !bitmap_subset_p (pro, components)) | |
1459 { | |
1460 fprintf (dump_file, " BAD pro %d->%d", e->src->index, | |
1461 e->dest->index); | |
1462 if (e->flags & EDGE_EH) | |
1463 fprintf (dump_file, " for EH"); | |
1464 dump_components ("pro", pro); | |
1465 fprintf (dump_file, "\n"); | |
1466 } | |
1467 } | |
1468 } | |
1469 } | |
1470 | |
1471 /* Place code for prologues and epilogues for COMPONENTS where we can put | |
1472 that code at the start of basic blocks. */ | |
1473 static void | |
1474 emit_common_heads_for_components (sbitmap components) | |
1475 { | |
1476 auto_sbitmap pro (SBITMAP_SIZE (components)); | |
1477 auto_sbitmap epi (SBITMAP_SIZE (components)); | |
1478 auto_sbitmap tmp (SBITMAP_SIZE (components)); | |
1479 | |
1480 basic_block bb; | |
1481 FOR_ALL_BB_FN (bb, cfun) | |
1482 bitmap_clear (SW (bb)->head_components); | |
1483 | |
1484 FOR_EACH_BB_FN (bb, cfun) | |
1485 { | |
1486 /* Find which prologue resp. epilogue components are needed for all | |
1487 predecessor edges to this block. */ | |
1488 | |
1489 /* First, select all possible components. */ | |
1490 bitmap_copy (epi, components); | |
1491 bitmap_copy (pro, components); | |
1492 | |
1493 edge e; | |
1494 edge_iterator ei; | |
1495 FOR_EACH_EDGE (e, ei, bb->preds) | |
1496 { | |
1497 if (e->flags & EDGE_ABNORMAL) | |
1498 { | |
1499 bitmap_clear (epi); | |
1500 bitmap_clear (pro); | |
1501 break; | |
1502 } | |
1503 | |
1504 /* Deselect those epilogue components that should not be inserted | |
1505 for this edge. */ | |
1506 bitmap_and_compl (tmp, SW (e->src)->has_components, | |
1507 SW (e->dest)->has_components); | |
1508 bitmap_and (epi, epi, tmp); | |
1509 | |
1510 /* Similar, for the prologue. */ | |
1511 bitmap_and_compl (tmp, SW (e->dest)->has_components, | |
1512 SW (e->src)->has_components); | |
1513 bitmap_and (pro, pro, tmp); | |
1514 } | |
1515 | |
1516 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1517 fprintf (dump_file, " bb %d", bb->index); | |
1518 | |
1519 if (dump_file && !bitmap_empty_p (epi)) | |
1520 dump_components ("epi", epi); | |
1521 if (dump_file && !bitmap_empty_p (pro)) | |
1522 dump_components ("pro", pro); | |
1523 | |
1524 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1525 fprintf (dump_file, "\n"); | |
1526 | |
1527 /* Place code after the BB note. */ | |
1528 if (!bitmap_empty_p (pro)) | |
1529 { | |
1530 start_sequence (); | |
1531 targetm.shrink_wrap.emit_prologue_components (pro); | |
1532 rtx_insn *seq = get_insns (); | |
1533 end_sequence (); | |
1534 record_prologue_seq (seq); | |
1535 | |
1536 emit_insn_after (seq, bb_note (bb)); | |
1537 | |
1538 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro); | |
1539 } | |
1540 | |
1541 if (!bitmap_empty_p (epi)) | |
1542 { | |
1543 start_sequence (); | |
1544 targetm.shrink_wrap.emit_epilogue_components (epi); | |
1545 rtx_insn *seq = get_insns (); | |
1546 end_sequence (); | |
1547 record_epilogue_seq (seq); | |
1548 | |
1549 emit_insn_after (seq, bb_note (bb)); | |
1550 | |
1551 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi); | |
1552 } | |
1553 } | |
1554 } | |
1555 | |
1556 /* Place code for prologues and epilogues for COMPONENTS where we can put | |
1557 that code at the end of basic blocks. */ | |
1558 static void | |
1559 emit_common_tails_for_components (sbitmap components) | |
1560 { | |
1561 auto_sbitmap pro (SBITMAP_SIZE (components)); | |
1562 auto_sbitmap epi (SBITMAP_SIZE (components)); | |
1563 auto_sbitmap tmp (SBITMAP_SIZE (components)); | |
1564 | |
1565 basic_block bb; | |
1566 FOR_ALL_BB_FN (bb, cfun) | |
1567 bitmap_clear (SW (bb)->tail_components); | |
1568 | |
1569 FOR_EACH_BB_FN (bb, cfun) | |
1570 { | |
1571 /* Find which prologue resp. epilogue components are needed for all | |
1572 successor edges from this block. */ | |
1573 if (EDGE_COUNT (bb->succs) == 0) | |
1574 continue; | |
1575 | |
1576 /* First, select all possible components. */ | |
1577 bitmap_copy (epi, components); | |
1578 bitmap_copy (pro, components); | |
1579 | |
1580 edge e; | |
1581 edge_iterator ei; | |
1582 FOR_EACH_EDGE (e, ei, bb->succs) | |
1583 { | |
1584 if (e->flags & EDGE_ABNORMAL) | |
1585 { | |
1586 bitmap_clear (epi); | |
1587 bitmap_clear (pro); | |
1588 break; | |
1589 } | |
1590 | |
1591 /* Deselect those epilogue components that should not be inserted | |
1592 for this edge, and also those that are already put at the head | |
1593 of the successor block. */ | |
1594 bitmap_and_compl (tmp, SW (e->src)->has_components, | |
1595 SW (e->dest)->has_components); | |
1596 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components); | |
1597 bitmap_and (epi, epi, tmp); | |
1598 | |
1599 /* Similarly, for the prologue. */ | |
1600 bitmap_and_compl (tmp, SW (e->dest)->has_components, | |
1601 SW (e->src)->has_components); | |
1602 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components); | |
1603 bitmap_and (pro, pro, tmp); | |
1604 } | |
1605 | |
1606 /* If the last insn of this block is a control flow insn we cannot | |
1607 put anything after it. We can put our code before it instead, | |
1608 but only if that jump insn is a simple jump. */ | |
1609 rtx_insn *last_insn = BB_END (bb); | |
1610 if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn)) | |
1611 { | |
1612 bitmap_clear (epi); | |
1613 bitmap_clear (pro); | |
1614 } | |
1615 | |
1616 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1617 fprintf (dump_file, " bb %d", bb->index); | |
1618 | |
1619 if (dump_file && !bitmap_empty_p (epi)) | |
1620 dump_components ("epi", epi); | |
1621 if (dump_file && !bitmap_empty_p (pro)) | |
1622 dump_components ("pro", pro); | |
1623 | |
1624 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1625 fprintf (dump_file, "\n"); | |
1626 | |
1627 /* Put the code at the end of the BB, but before any final jump. */ | |
1628 if (!bitmap_empty_p (epi)) | |
1629 { | |
1630 start_sequence (); | |
1631 targetm.shrink_wrap.emit_epilogue_components (epi); | |
1632 rtx_insn *seq = get_insns (); | |
1633 end_sequence (); | |
1634 record_epilogue_seq (seq); | |
1635 | |
1636 if (control_flow_insn_p (last_insn)) | |
1637 emit_insn_before (seq, last_insn); | |
1638 else | |
1639 emit_insn_after (seq, last_insn); | |
1640 | |
1641 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi); | |
1642 } | |
1643 | |
1644 if (!bitmap_empty_p (pro)) | |
1645 { | |
1646 start_sequence (); | |
1647 targetm.shrink_wrap.emit_prologue_components (pro); | |
1648 rtx_insn *seq = get_insns (); | |
1649 end_sequence (); | |
1650 record_prologue_seq (seq); | |
1651 | |
1652 if (control_flow_insn_p (last_insn)) | |
1653 emit_insn_before (seq, last_insn); | |
1654 else | |
1655 emit_insn_after (seq, last_insn); | |
1656 | |
1657 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro); | |
1658 } | |
1659 } | |
1660 } | |
1661 | |
1662 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already | |
1663 placed them inside blocks directly. */ | |
1664 static void | |
1665 insert_prologue_epilogue_for_components (sbitmap components) | |
1666 { | |
1667 auto_sbitmap pro (SBITMAP_SIZE (components)); | |
1668 auto_sbitmap epi (SBITMAP_SIZE (components)); | |
1669 | |
1670 basic_block bb; | |
1671 FOR_EACH_BB_FN (bb, cfun) | |
1672 { | |
1673 if (!bb->aux) | |
1674 continue; | |
1675 | |
1676 edge e; | |
1677 edge_iterator ei; | |
1678 FOR_EACH_EDGE (e, ei, bb->succs) | |
1679 { | |
1680 /* Find which pro/epilogue components are needed on this edge. */ | |
1681 bitmap_and_compl (epi, SW (e->src)->has_components, | |
1682 SW (e->dest)->has_components); | |
1683 bitmap_and_compl (pro, SW (e->dest)->has_components, | |
1684 SW (e->src)->has_components); | |
1685 bitmap_and (epi, epi, components); | |
1686 bitmap_and (pro, pro, components); | |
1687 | |
1688 /* Deselect those we already have put at the head or tail of the | |
1689 edge's dest resp. src. */ | |
1690 bitmap_and_compl (epi, epi, SW (e->dest)->head_components); | |
1691 bitmap_and_compl (pro, pro, SW (e->dest)->head_components); | |
1692 bitmap_and_compl (epi, epi, SW (e->src)->tail_components); | |
1693 bitmap_and_compl (pro, pro, SW (e->src)->tail_components); | |
1694 | |
1695 if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro)) | |
1696 { | |
1697 if (dump_file) | |
1698 { | |
1699 fprintf (dump_file, " %d->%d", e->src->index, | |
1700 e->dest->index); | |
1701 dump_components ("epi", epi); | |
1702 dump_components ("pro", pro); | |
1703 if (e->flags & EDGE_SIBCALL) | |
1704 fprintf (dump_file, " (SIBCALL)"); | |
1705 else if (e->flags & EDGE_ABNORMAL) | |
1706 fprintf (dump_file, " (ABNORMAL)"); | |
1707 fprintf (dump_file, "\n"); | |
1708 } | |
1709 | |
1710 /* Put the epilogue components in place. */ | |
1711 start_sequence (); | |
1712 targetm.shrink_wrap.emit_epilogue_components (epi); | |
1713 rtx_insn *seq = get_insns (); | |
1714 end_sequence (); | |
1715 record_epilogue_seq (seq); | |
1716 | |
1717 if (e->flags & EDGE_SIBCALL) | |
1718 { | |
1719 gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)); | |
1720 | |
1721 rtx_insn *insn = BB_END (e->src); | |
1722 gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn)); | |
1723 emit_insn_before (seq, insn); | |
1724 } | |
1725 else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
1726 { | |
1727 gcc_assert (e->flags & EDGE_FALLTHRU); | |
1728 basic_block new_bb = split_edge (e); | |
1729 emit_insn_after (seq, BB_END (new_bb)); | |
1730 } | |
1731 else | |
1732 insert_insn_on_edge (seq, e); | |
1733 | |
1734 /* Put the prologue components in place. */ | |
1735 start_sequence (); | |
1736 targetm.shrink_wrap.emit_prologue_components (pro); | |
1737 seq = get_insns (); | |
1738 end_sequence (); | |
1739 record_prologue_seq (seq); | |
1740 | |
1741 insert_insn_on_edge (seq, e); | |
1742 } | |
1743 } | |
1744 } | |
1745 | |
1746 commit_edge_insertions (); | |
1747 } | |
1748 | |
1749 /* The main entry point to this subpass. FIRST_BB is where the prologue | |
1750 would be normally put. */ | |
1751 void | |
1752 try_shrink_wrapping_separate (basic_block first_bb) | |
1753 { | |
1754 if (HAVE_cc0) | |
1755 return; | |
1756 | |
1757 if (!(SHRINK_WRAPPING_ENABLED | |
1758 && flag_shrink_wrap_separate | |
1759 && optimize_function_for_speed_p (cfun) | |
1760 && targetm.shrink_wrap.get_separate_components)) | |
1761 return; | |
1762 | |
1763 /* We don't handle "strange" functions. */ | |
1764 if (cfun->calls_alloca | |
1765 || cfun->calls_setjmp | |
1766 || cfun->can_throw_non_call_exceptions | |
1767 || crtl->calls_eh_return | |
1768 || crtl->has_nonlocal_goto | |
1769 || crtl->saves_all_registers) | |
1770 return; | |
1771 | |
1772 /* Ask the target what components there are. If it returns NULL, don't | |
1773 do anything. */ | |
1774 sbitmap components = targetm.shrink_wrap.get_separate_components (); | |
1775 if (!components) | |
1776 return; | |
1777 | |
1778 /* We need LIVE info, not defining anything in the entry block and not | |
1779 using anything in the exit block. A block then needs a component if | |
1780 the register for that component is in the IN or GEN or KILL set for | |
1781 that block. */ | |
1782 df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT; | |
1783 df_update_entry_exit_and_calls (); | |
1784 df_live_add_problem (); | |
1785 df_live_set_all_dirty (); | |
1786 df_analyze (); | |
1787 | |
1788 calculate_dominance_info (CDI_DOMINATORS); | |
1789 calculate_dominance_info (CDI_POST_DOMINATORS); | |
1790 | |
1791 init_separate_shrink_wrap (components); | |
1792 | |
1793 sbitmap_iterator sbi; | |
1794 unsigned int j; | |
1795 EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi) | |
1796 place_prologue_for_one_component (j, first_bb); | |
1797 | |
1798 spread_components (components); | |
1799 | |
1800 disqualify_problematic_components (components); | |
1801 | |
1802 /* Don't separately shrink-wrap anything where the "main" prologue will | |
1803 go; the target code can often optimize things if it is presented with | |
1804 all components together (say, if it generates store-multiple insns). */ | |
1805 bitmap_and_compl (components, components, SW (first_bb)->has_components); | |
1806 | |
1807 if (bitmap_empty_p (components)) | |
1808 { | |
1809 if (dump_file) | |
1810 fprintf (dump_file, "Not wrapping anything separately.\n"); | |
1811 } | |
1812 else | |
1813 { | |
1814 if (dump_file) | |
1815 { | |
1816 fprintf (dump_file, "The components we wrap separately are"); | |
1817 dump_components ("sep", components); | |
1818 fprintf (dump_file, "\n"); | |
1819 | |
1820 fprintf (dump_file, "... Inserting common heads...\n"); | |
1821 } | |
1822 | |
1823 emit_common_heads_for_components (components); | |
1824 | |
1825 if (dump_file) | |
1826 fprintf (dump_file, "... Inserting common tails...\n"); | |
1827 | |
1828 emit_common_tails_for_components (components); | |
1829 | |
1830 if (dump_file) | |
1831 fprintf (dump_file, "... Inserting the more difficult ones...\n"); | |
1832 | |
1833 insert_prologue_epilogue_for_components (components); | |
1834 | |
1835 if (dump_file) | |
1836 fprintf (dump_file, "... Done.\n"); | |
1837 | |
1838 targetm.shrink_wrap.set_handled_components (components); | |
1839 | |
1840 crtl->shrink_wrapped_separate = true; | |
1841 } | |
1842 | |
1843 fini_separate_shrink_wrap (); | |
1844 | |
1845 sbitmap_free (components); | |
1846 free_dominance_info (CDI_DOMINATORS); | |
1847 free_dominance_info (CDI_POST_DOMINATORS); | |
1848 | |
1849 /* All done. */ | |
1850 df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT; | |
1851 df_update_entry_exit_and_calls (); | |
1852 df_live_set_all_dirty (); | |
1853 df_analyze (); | |
1854 } |