111
|
1 /* Shrink-wrapping related optimizations.
|
145
|
2 Copyright (C) 1987-2020 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 /* This file handles shrink-wrapping related optimizations. */
|
|
21
|
|
22 #include "config.h"
|
|
23 #include "system.h"
|
|
24 #include "coretypes.h"
|
|
25 #include "backend.h"
|
|
26 #include "target.h"
|
|
27 #include "rtl.h"
|
|
28 #include "tree.h"
|
|
29 #include "cfghooks.h"
|
|
30 #include "df.h"
|
|
31 #include "memmodel.h"
|
|
32 #include "tm_p.h"
|
|
33 #include "regs.h"
|
|
34 #include "insn-config.h"
|
|
35 #include "emit-rtl.h"
|
|
36 #include "output.h"
|
|
37 #include "tree-pass.h"
|
|
38 #include "cfgrtl.h"
|
|
39 #include "cfgbuild.h"
|
|
40 #include "bb-reorder.h"
|
|
41 #include "shrink-wrap.h"
|
|
42 #include "regcprop.h"
|
|
43 #include "rtl-iter.h"
|
|
44 #include "valtrack.h"
|
145
|
45 #include "function-abi.h"
|
111
|
46
|
|
47 /* Return true if INSN requires the stack frame to be set up.
|
|
48 PROLOGUE_USED contains the hard registers used in the function
|
|
49 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
|
|
50 prologue to set up for the function. */
|
|
51 bool
|
|
52 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
|
|
53 HARD_REG_SET set_up_by_prologue)
|
|
54 {
|
|
55 df_ref def, use;
|
|
56 HARD_REG_SET hardregs;
|
|
57 unsigned regno;
|
|
58
|
|
59 if (CALL_P (insn))
|
|
60 return !SIBLING_CALL_P (insn);
|
|
61
|
|
62 /* We need a frame to get the unique CFA expected by the unwinder. */
|
|
63 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
|
|
64 return true;
|
|
65
|
|
66 CLEAR_HARD_REG_SET (hardregs);
|
|
67 FOR_EACH_INSN_DEF (def, insn)
|
|
68 {
|
|
69 rtx dreg = DF_REF_REG (def);
|
|
70
|
|
71 if (!REG_P (dreg))
|
|
72 continue;
|
|
73
|
|
74 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
|
|
75 }
|
|
76 if (hard_reg_set_intersect_p (hardregs, prologue_used))
|
|
77 return true;
|
145
|
78 hardregs &= ~crtl->abi->full_reg_clobbers ();
|
111
|
79 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
|
80 if (TEST_HARD_REG_BIT (hardregs, regno)
|
|
81 && df_regs_ever_live_p (regno))
|
|
82 return true;
|
|
83
|
|
84 FOR_EACH_INSN_USE (use, insn)
|
|
85 {
|
|
86 rtx reg = DF_REF_REG (use);
|
|
87
|
|
88 if (!REG_P (reg))
|
|
89 continue;
|
|
90
|
|
91 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
|
|
92 REGNO (reg));
|
|
93 }
|
|
94 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
|
|
95 return true;
|
|
96
|
|
97 return false;
|
|
98 }
|
|
99
|
|
100 /* See whether there has a single live edge from BB, which dest uses
|
|
101 [REGNO, END_REGNO). Return the live edge if its dest bb has
|
|
102 one or two predecessors. Otherwise return NULL. */
|
|
103
|
|
104 static edge
|
|
105 live_edge_for_reg (basic_block bb, int regno, int end_regno)
|
|
106 {
|
|
107 edge e, live_edge;
|
|
108 edge_iterator ei;
|
|
109 bitmap live;
|
|
110 int i;
|
|
111
|
|
112 live_edge = NULL;
|
|
113 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
114 {
|
|
115 live = df_get_live_in (e->dest);
|
|
116 for (i = regno; i < end_regno; i++)
|
|
117 if (REGNO_REG_SET_P (live, i))
|
|
118 {
|
|
119 if (live_edge && live_edge != e)
|
|
120 return NULL;
|
|
121 live_edge = e;
|
|
122 }
|
|
123 }
|
|
124
|
|
125 /* We can sometimes encounter dead code. Don't try to move it
|
|
126 into the exit block. */
|
|
127 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
128 return NULL;
|
|
129
|
|
130 /* Reject targets of abnormal edges. This is needed for correctness
|
|
131 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
|
|
132 exception edges even though it is generally treated as call-saved
|
|
133 for the majority of the compilation. Moving across abnormal edges
|
|
134 isn't going to be interesting for shrink-wrap usage anyway. */
|
|
135 if (live_edge->flags & EDGE_ABNORMAL)
|
|
136 return NULL;
|
|
137
|
|
138 /* When live_edge->dest->preds == 2, we can create a new block on
|
|
139 the edge to make it meet the requirement. */
|
|
140 if (EDGE_COUNT (live_edge->dest->preds) > 2)
|
|
141 return NULL;
|
|
142
|
|
143 return live_edge;
|
|
144 }
|
|
145
|
|
146 /* Try to move INSN from BB to a successor. Return true on success.
|
|
147 USES and DEFS are the set of registers that are used and defined
|
|
148 after INSN in BB. SPLIT_P indicates whether a live edge from BB
|
|
149 is splitted or not. */
|
|
150
|
|
151 static bool
|
|
152 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
|
145
|
153 const_hard_reg_set uses,
|
|
154 const_hard_reg_set defs,
|
111
|
155 bool *split_p,
|
|
156 struct dead_debug_local *debug)
|
|
157 {
|
|
158 rtx set, src, dest;
|
131
|
159 bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
|
111
|
160 unsigned int i, dregno, end_dregno;
|
|
161 unsigned int sregno = FIRST_PSEUDO_REGISTER;
|
|
162 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
|
|
163 basic_block next_block;
|
|
164 edge live_edge;
|
|
165 rtx_insn *dinsn;
|
|
166 df_ref def;
|
|
167
|
|
168 /* Look for a simple register assignment. We don't use single_set here
|
|
169 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
|
|
170 destinations. */
|
|
171 if (!INSN_P (insn))
|
|
172 return false;
|
|
173 set = PATTERN (insn);
|
|
174 if (GET_CODE (set) != SET)
|
|
175 return false;
|
|
176 src = SET_SRC (set);
|
|
177 dest = SET_DEST (set);
|
|
178
|
|
179 /* For the destination, we want only a register. Also disallow STACK
|
|
180 or FRAME related adjustments. They are likely part of the prologue,
|
|
181 so keep them in the entry block. */
|
|
182 if (!REG_P (dest)
|
|
183 || dest == stack_pointer_rtx
|
|
184 || dest == frame_pointer_rtx
|
|
185 || dest == hard_frame_pointer_rtx)
|
|
186 return false;
|
|
187
|
|
188 /* For the source, we want one of:
|
|
189 (1) A (non-overlapping) register
|
|
190 (2) A constant,
|
|
191 (3) An expression involving no more than one register.
|
|
192
|
|
193 That last point comes from the code following, which was originally
|
|
194 written to handle only register move operations, and still only handles
|
|
195 a single source register when checking for overlaps. Happily, the
|
|
196 same checks can be applied to expressions like (plus reg const). */
|
|
197
|
|
198 if (CONSTANT_P (src))
|
|
199 ;
|
|
200 else if (!REG_P (src))
|
|
201 {
|
|
202 rtx src_inner = NULL_RTX;
|
|
203
|
|
204 if (can_throw_internal (insn))
|
|
205 return false;
|
|
206
|
|
207 subrtx_var_iterator::array_type array;
|
|
208 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
|
|
209 {
|
|
210 rtx x = *iter;
|
|
211 switch (GET_RTX_CLASS (GET_CODE (x)))
|
|
212 {
|
|
213 case RTX_CONST_OBJ:
|
|
214 case RTX_COMPARE:
|
|
215 case RTX_COMM_COMPARE:
|
|
216 case RTX_BIN_ARITH:
|
|
217 case RTX_COMM_ARITH:
|
|
218 case RTX_UNARY:
|
|
219 case RTX_TERNARY:
|
|
220 /* Constant or expression. Continue. */
|
|
221 break;
|
|
222
|
|
223 case RTX_OBJ:
|
|
224 case RTX_EXTRA:
|
|
225 switch (GET_CODE (x))
|
|
226 {
|
|
227 case UNSPEC:
|
|
228 case SUBREG:
|
|
229 case STRICT_LOW_PART:
|
|
230 case PC:
|
|
231 case LO_SUM:
|
|
232 /* Ok. Continue. */
|
|
233 break;
|
|
234
|
|
235 case REG:
|
|
236 /* Fail if we see a second inner register. */
|
|
237 if (src_inner != NULL)
|
|
238 return false;
|
|
239 src_inner = x;
|
|
240 break;
|
|
241
|
|
242 default:
|
|
243 return false;
|
|
244 }
|
|
245 break;
|
|
246
|
|
247 default:
|
|
248 return false;
|
|
249 }
|
|
250 }
|
|
251
|
|
252 if (src_inner != NULL)
|
|
253 src = src_inner;
|
|
254 }
|
|
255
|
|
256 /* Make sure that the source register isn't defined later in BB. */
|
|
257 if (REG_P (src))
|
|
258 {
|
|
259 sregno = REGNO (src);
|
|
260 end_sregno = END_REGNO (src);
|
|
261 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
|
|
262 return false;
|
|
263 }
|
|
264
|
|
265 /* Make sure that the destination register isn't referenced later in BB. */
|
|
266 dregno = REGNO (dest);
|
|
267 end_dregno = END_REGNO (dest);
|
|
268 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
|
|
269 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
|
|
270 return false;
|
|
271
|
|
272 /* See whether there is a successor block to which we could move INSN. */
|
|
273 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
|
|
274 if (!live_edge)
|
|
275 return false;
|
|
276
|
|
277 next_block = live_edge->dest;
|
|
278 /* Create a new basic block on the edge. */
|
|
279 if (EDGE_COUNT (next_block->preds) == 2)
|
|
280 {
|
|
281 /* split_edge for a block with only one successor is meaningless. */
|
|
282 if (EDGE_COUNT (bb->succs) == 1)
|
|
283 return false;
|
|
284
|
|
285 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
|
|
286 if (!df_live)
|
|
287 return false;
|
|
288
|
|
289 basic_block old_dest = live_edge->dest;
|
|
290 next_block = split_edge (live_edge);
|
|
291
|
|
292 /* We create a new basic block. Call df_grow_bb_info to make sure
|
|
293 all data structures are allocated. */
|
|
294 df_grow_bb_info (df_live);
|
|
295
|
|
296 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
|
|
297 df_get_live_in (old_dest));
|
|
298 df_set_bb_dirty (next_block);
|
|
299
|
|
300 /* We should not split more than once for a function. */
|
|
301 if (*split_p)
|
|
302 return false;
|
|
303
|
|
304 *split_p = true;
|
|
305 }
|
|
306
|
|
307 /* At this point we are committed to moving INSN, but let's try to
|
|
308 move it as far as we can. */
|
|
309 do
|
|
310 {
|
131
|
311 if (MAY_HAVE_DEBUG_BIND_INSNS)
|
111
|
312 {
|
|
313 FOR_BB_INSNS_REVERSE (bb, dinsn)
|
131
|
314 if (DEBUG_BIND_INSN_P (dinsn))
|
111
|
315 {
|
|
316 df_ref use;
|
|
317 FOR_EACH_INSN_USE (use, dinsn)
|
|
318 if (refers_to_regno_p (dregno, end_dregno,
|
|
319 DF_REF_REG (use), (rtx *) NULL))
|
|
320 dead_debug_add (debug, use, DF_REF_REGNO (use));
|
|
321 }
|
|
322 else if (dinsn == insn)
|
|
323 break;
|
|
324 }
|
|
325 live_out = df_get_live_out (bb);
|
|
326 live_in = df_get_live_in (next_block);
|
|
327 bb = next_block;
|
|
328
|
|
329 /* Check whether BB uses DEST or clobbers DEST. We need to add
|
|
330 INSN to BB if so. Either way, DEST is no longer live on entry,
|
|
331 except for any part that overlaps SRC (next loop). */
|
131
|
332 if (!*split_p)
|
|
333 {
|
|
334 bb_uses = &DF_LR_BB_INFO (bb)->use;
|
|
335 bb_defs = &DF_LR_BB_INFO (bb)->def;
|
|
336 }
|
111
|
337 if (df_live)
|
|
338 {
|
|
339 for (i = dregno; i < end_dregno; i++)
|
|
340 {
|
|
341 if (*split_p
|
|
342 || REGNO_REG_SET_P (bb_uses, i)
|
|
343 || REGNO_REG_SET_P (bb_defs, i)
|
|
344 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
345 next_block = NULL;
|
|
346 CLEAR_REGNO_REG_SET (live_out, i);
|
|
347 CLEAR_REGNO_REG_SET (live_in, i);
|
|
348 }
|
|
349
|
|
350 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
|
|
351 Either way, SRC is now live on entry. */
|
|
352 for (i = sregno; i < end_sregno; i++)
|
|
353 {
|
|
354 if (*split_p
|
|
355 || REGNO_REG_SET_P (bb_defs, i)
|
|
356 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
357 next_block = NULL;
|
|
358 SET_REGNO_REG_SET (live_out, i);
|
|
359 SET_REGNO_REG_SET (live_in, i);
|
|
360 }
|
|
361 }
|
|
362 else
|
|
363 {
|
|
364 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
|
|
365 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
|
|
366 at -O1, just give up searching NEXT_BLOCK. */
|
|
367 next_block = NULL;
|
|
368 for (i = dregno; i < end_dregno; i++)
|
|
369 {
|
|
370 CLEAR_REGNO_REG_SET (live_out, i);
|
|
371 CLEAR_REGNO_REG_SET (live_in, i);
|
|
372 }
|
|
373
|
|
374 for (i = sregno; i < end_sregno; i++)
|
|
375 {
|
|
376 SET_REGNO_REG_SET (live_out, i);
|
|
377 SET_REGNO_REG_SET (live_in, i);
|
|
378 }
|
|
379 }
|
|
380
|
|
381 /* If we don't need to add the move to BB, look for a single
|
|
382 successor block. */
|
|
383 if (next_block)
|
|
384 {
|
|
385 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
|
|
386 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
|
|
387 break;
|
|
388 next_block = live_edge->dest;
|
|
389 }
|
|
390 }
|
|
391 while (next_block);
|
|
392
|
|
393 /* For the new created basic block, there is no dataflow info at all.
|
|
394 So skip the following dataflow update and check. */
|
|
395 if (!(*split_p))
|
|
396 {
|
|
397 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
|
|
398 (next loop). */
|
|
399 for (i = dregno; i < end_dregno; i++)
|
|
400 {
|
|
401 CLEAR_REGNO_REG_SET (bb_uses, i);
|
|
402 SET_REGNO_REG_SET (bb_defs, i);
|
|
403 }
|
|
404
|
|
405 /* BB now uses SRC. */
|
|
406 for (i = sregno; i < end_sregno; i++)
|
|
407 SET_REGNO_REG_SET (bb_uses, i);
|
|
408 }
|
|
409
|
|
410 /* Insert debug temps for dead REGs used in subsequent debug insns. */
|
|
411 if (debug->used && !bitmap_empty_p (debug->used))
|
|
412 FOR_EACH_INSN_DEF (def, insn)
|
|
413 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
|
|
414 DEBUG_TEMP_BEFORE_WITH_VALUE);
|
|
415
|
145
|
416 rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
|
|
417 /* Update the LABEL_NUSES count on any referenced labels. The ideal
|
|
418 solution here would be to actually move the instruction instead
|
|
419 of copying/deleting it as this loses some notations on the
|
|
420 insn. */
|
|
421 mark_jump_label (PATTERN (insn), insn_copy, 0);
|
111
|
422 delete_insn (insn);
|
|
423 return true;
|
|
424 }
|
|
425
|
|
426 /* Look for register copies in the first block of the function, and move
|
|
427 them down into successor blocks if the register is used only on one
|
|
428 path. This exposes more opportunities for shrink-wrapping. These
|
|
429 kinds of sets often occur when incoming argument registers are moved
|
|
430 to call-saved registers because their values are live across one or
|
|
431 more calls during the function. */
|
|
432
|
|
433 static void
|
|
434 prepare_shrink_wrap (basic_block entry_block)
|
|
435 {
|
|
436 rtx_insn *insn, *curr;
|
|
437 rtx x;
|
|
438 HARD_REG_SET uses, defs;
|
|
439 df_ref def, use;
|
|
440 bool split_p = false;
|
|
441 unsigned int i;
|
|
442 struct dead_debug_local debug;
|
|
443
|
|
444 if (JUMP_P (BB_END (entry_block)))
|
|
445 {
|
|
446 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
|
|
447 to sink the copies from parameter to callee saved register out of
|
|
448 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
|
|
449 to release some dependences. */
|
|
450 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
|
|
451 }
|
|
452
|
|
453 dead_debug_local_init (&debug, NULL, NULL);
|
|
454 CLEAR_HARD_REG_SET (uses);
|
|
455 CLEAR_HARD_REG_SET (defs);
|
|
456
|
|
457 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
|
|
458 if (NONDEBUG_INSN_P (insn)
|
|
459 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
|
|
460 &split_p, &debug))
|
|
461 {
|
|
462 /* Add all defined registers to DEFs. */
|
|
463 FOR_EACH_INSN_DEF (def, insn)
|
|
464 {
|
|
465 x = DF_REF_REG (def);
|
|
466 if (REG_P (x) && HARD_REGISTER_P (x))
|
|
467 for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
468 SET_HARD_REG_BIT (defs, i);
|
|
469 }
|
|
470
|
|
471 /* Add all used registers to USESs. */
|
|
472 FOR_EACH_INSN_USE (use, insn)
|
|
473 {
|
|
474 x = DF_REF_REG (use);
|
|
475 if (REG_P (x) && HARD_REGISTER_P (x))
|
|
476 for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
477 SET_HARD_REG_BIT (uses, i);
|
|
478 }
|
|
479 }
|
|
480
|
|
481 dead_debug_local_finish (&debug, NULL);
|
|
482 }
|
|
483
|
145
|
484 /* Return whether basic block PRO can get the prologue. It cannot if it
|
111
|
485 has incoming complex edges that need a prologue inserted (we make a new
|
|
486 block for the prologue, so those edges would need to be redirected, which
|
145
|
487 does not work). It also cannot if there exist registers live on entry
|
111
|
488 to PRO that are clobbered by the prologue. */
|
|
489
|
|
490 static bool
|
|
491 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
|
|
492 {
|
|
493 edge e;
|
|
494 edge_iterator ei;
|
|
495 FOR_EACH_EDGE (e, ei, pro->preds)
|
|
496 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
|
|
497 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
498 return false;
|
|
499
|
|
500 HARD_REG_SET live;
|
|
501 REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
|
|
502 if (hard_reg_set_intersect_p (live, prologue_clobbered))
|
|
503 return false;
|
|
504
|
|
505 return true;
|
|
506 }
|
|
507
|
|
508 /* Return whether we can duplicate basic block BB for shrink wrapping. We
|
|
509 cannot if the block cannot be duplicated at all, or if any of its incoming
|
|
510 edges are complex and come from a block that does not require a prologue
|
|
511 (we cannot redirect such edges), or if the block is too big to copy.
|
|
512 PRO is the basic block before which we would put the prologue, MAX_SIZE is
|
|
513 the maximum size block we allow to be copied. */
|
|
514
|
|
515 static bool
|
|
516 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
|
|
517 {
|
|
518 if (!can_duplicate_block_p (bb))
|
|
519 return false;
|
|
520
|
|
521 edge e;
|
|
522 edge_iterator ei;
|
|
523 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
524 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
|
|
525 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
526 return false;
|
|
527
|
|
528 unsigned size = 0;
|
|
529
|
|
530 rtx_insn *insn;
|
|
531 FOR_BB_INSNS (bb, insn)
|
|
532 if (NONDEBUG_INSN_P (insn))
|
|
533 {
|
|
534 size += get_attr_min_length (insn);
|
|
535 if (size > max_size)
|
|
536 return false;
|
|
537 }
|
|
538
|
|
539 return true;
|
|
540 }
|
|
541
|
|
542 /* Do whatever needs to be done for exits that run without prologue.
|
|
543 Sibcalls need nothing done. Normal exits get a simple_return inserted. */
|
|
544
|
|
545 static void
|
|
546 handle_simple_exit (edge e)
|
|
547 {
|
|
548
|
|
549 if (e->flags & EDGE_SIBCALL)
|
|
550 {
|
|
551 /* Tell function.c to take no further action on this edge. */
|
|
552 e->flags |= EDGE_IGNORE;
|
|
553
|
|
554 e->flags &= ~EDGE_FALLTHRU;
|
|
555 emit_barrier_after_bb (e->src);
|
|
556 return;
|
|
557 }
|
|
558
|
|
559 /* If the basic block the edge comes from has multiple successors,
|
|
560 split the edge. */
|
|
561 if (EDGE_COUNT (e->src->succs) > 1)
|
|
562 {
|
|
563 basic_block old_bb = e->src;
|
|
564 rtx_insn *end = BB_END (old_bb);
|
|
565 rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
|
|
566 basic_block new_bb = create_basic_block (note, note, old_bb);
|
|
567 BB_COPY_PARTITION (new_bb, old_bb);
|
|
568 BB_END (old_bb) = end;
|
|
569
|
|
570 redirect_edge_succ (e, new_bb);
|
131
|
571 new_bb->count = e->count ();
|
111
|
572 e->flags |= EDGE_FALLTHRU;
|
|
573
|
|
574 e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
|
|
575 }
|
|
576
|
|
577 e->flags &= ~EDGE_FALLTHRU;
|
|
578 rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
|
|
579 BB_END (e->src));
|
|
580 JUMP_LABEL (ret) = simple_return_rtx;
|
|
581 emit_barrier_after_bb (e->src);
|
|
582
|
|
583 if (dump_file)
|
|
584 fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
|
|
585 INSN_UID (ret), e->src->index);
|
|
586 }
|
|
587
|
|
588 /* Try to perform a kind of shrink-wrapping, making sure the
|
|
589 prologue/epilogue is emitted only around those parts of the
|
|
590 function that require it.
|
|
591
|
|
592 There will be exactly one prologue, and it will be executed either
|
|
593 zero or one time, on any path. Depending on where the prologue is
|
|
594 placed, some of the basic blocks can be reached via both paths with
|
|
595 and without a prologue. Such blocks will be duplicated here, and the
|
|
596 edges changed to match.
|
|
597
|
|
598 Paths that go to the exit without going through the prologue will use
|
|
599 a simple_return instead of the epilogue. We maximize the number of
|
|
600 those, making sure to only duplicate blocks that can be duplicated.
|
|
601 If the prologue can then still be placed in multiple locations, we
|
|
602 place it as early as possible.
|
|
603
|
|
604 An example, where we duplicate blocks with control flow (legend:
|
|
605 _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
|
|
606 be taken to point down or to the right, to simplify the diagram; here,
|
|
607 block 3 needs a prologue, the rest does not):
|
|
608
|
|
609
|
|
610 B B
|
|
611 | |
|
|
612 2 2
|
|
613 |\ |\
|
|
614 | 3 becomes | 3
|
|
615 |/ | \
|
|
616 4 7 4
|
|
617 |\ |\ |\
|
|
618 | 5 | 8 | 5
|
|
619 |/ |/ |/
|
|
620 6 9 6
|
|
621 | | |
|
|
622 R S R
|
|
623
|
|
624
|
|
625 (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
|
|
626 edge 2->3).
|
|
627
|
|
628 Another example, where part of a loop is duplicated (again, bb 3 is
|
|
629 the only block that needs a prologue):
|
|
630
|
|
631
|
|
632 B 3<-- B ->3<--
|
|
633 | | | | | | |
|
|
634 | v | becomes | | v |
|
|
635 2---4--- 2---5-- 4---
|
|
636 | | |
|
|
637 R S R
|
|
638
|
|
639
|
|
640 (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
|
|
641
|
|
642 ENTRY_EDGE is the edge where the prologue will be placed, possibly
|
|
643 changed by this function. PROLOGUE_SEQ is the prologue we will insert. */
|
|
644
|
|
645 void
|
|
646 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
|
|
647 {
|
|
648 /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
|
|
649 no sense to shrink-wrap: then do not shrink-wrap! */
|
|
650
|
|
651 if (!SHRINK_WRAPPING_ENABLED)
|
|
652 return;
|
|
653
|
|
654 if (crtl->profile && !targetm.profile_before_prologue ())
|
|
655 return;
|
|
656
|
|
657 if (crtl->calls_eh_return)
|
|
658 return;
|
|
659
|
|
660 bool empty_prologue = true;
|
|
661 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
662 if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
|
|
663 {
|
|
664 empty_prologue = false;
|
|
665 break;
|
|
666 }
|
|
667 if (empty_prologue)
|
|
668 return;
|
|
669
|
|
670 /* Move some code down to expose more shrink-wrapping opportunities. */
|
|
671
|
|
672 basic_block entry = (*entry_edge)->dest;
|
|
673 prepare_shrink_wrap (entry);
|
|
674
|
|
675 if (dump_file)
|
|
676 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
|
|
677
|
|
678 /* Compute the registers set and used in the prologue. */
|
|
679
|
|
680 HARD_REG_SET prologue_clobbered, prologue_used;
|
|
681 CLEAR_HARD_REG_SET (prologue_clobbered);
|
|
682 CLEAR_HARD_REG_SET (prologue_used);
|
|
683 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
684 if (NONDEBUG_INSN_P (insn))
|
|
685 {
|
|
686 HARD_REG_SET this_used;
|
|
687 CLEAR_HARD_REG_SET (this_used);
|
|
688 note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
|
145
|
689 this_used &= ~prologue_clobbered;
|
|
690 prologue_used |= this_used;
|
|
691 note_stores (insn, record_hard_reg_sets, &prologue_clobbered);
|
111
|
692 }
|
|
693 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
|
|
694 if (frame_pointer_needed)
|
|
695 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
|
|
696
|
|
697 /* Find out what registers are set up by the prologue; any use of these
|
|
698 cannot happen before the prologue. */
|
|
699
|
|
700 struct hard_reg_set_container set_up_by_prologue;
|
|
701 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
|
|
702 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
|
|
703 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
|
|
704 if (frame_pointer_needed)
|
|
705 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
706 HARD_FRAME_POINTER_REGNUM);
|
|
707 if (pic_offset_table_rtx
|
|
708 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
|
|
709 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
710 PIC_OFFSET_TABLE_REGNUM);
|
|
711 if (crtl->drap_reg)
|
|
712 add_to_hard_reg_set (&set_up_by_prologue.set,
|
|
713 GET_MODE (crtl->drap_reg),
|
|
714 REGNO (crtl->drap_reg));
|
|
715 if (targetm.set_up_by_prologue)
|
|
716 targetm.set_up_by_prologue (&set_up_by_prologue);
|
|
717
|
|
718 /* We will insert the prologue before the basic block PRO. PRO should
|
|
719 dominate all basic blocks that need the prologue to be executed
|
|
720 before them. First, make PRO the "tightest wrap" possible. */
|
|
721
|
|
722 calculate_dominance_info (CDI_DOMINATORS);
|
|
723
|
|
724 basic_block pro = 0;
|
|
725
|
|
726 basic_block bb;
|
|
727 edge e;
|
|
728 edge_iterator ei;
|
|
729 FOR_EACH_BB_FN (bb, cfun)
|
|
730 {
|
|
731 rtx_insn *insn;
|
|
732 FOR_BB_INSNS (bb, insn)
|
|
733 if (NONDEBUG_INSN_P (insn)
|
|
734 && requires_stack_frame_p (insn, prologue_used,
|
|
735 set_up_by_prologue.set))
|
|
736 {
|
|
737 if (dump_file)
|
|
738 fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
|
|
739 pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
|
|
740 break;
|
|
741 }
|
|
742 }
|
|
743
|
|
744 /* If nothing needs a prologue, just put it at the start. This really
|
|
745 shouldn't happen, but we cannot fix it here. */
|
|
746
|
|
747 if (pro == 0)
|
|
748 {
|
|
749 if (dump_file)
|
|
750 fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
|
|
751 "putting it at the start.\n");
|
|
752 pro = entry;
|
|
753 }
|
|
754
|
|
755 if (dump_file)
|
|
756 fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
|
|
757 pro->index);
|
|
758
|
|
759 /* Now see if we can put the prologue at the start of PRO. Putting it
|
|
760 there might require duplicating a block that cannot be duplicated,
|
|
761 or in some cases we cannot insert the prologue there at all. If PRO
|
|
762 wont't do, try again with the immediate dominator of PRO, and so on.
|
|
763
|
|
764 The blocks that need duplicating are those reachable from PRO but
|
|
765 not dominated by it. We keep in BB_WITH a bitmap of the blocks
|
|
766 reachable from PRO that we already found, and in VEC a stack of
|
|
767 those we still need to consider (to find successors). */
|
|
768
|
|
769 auto_bitmap bb_with;
|
|
770 bitmap_set_bit (bb_with, pro->index);
|
|
771
|
|
772 vec<basic_block> vec;
|
|
773 vec.create (n_basic_blocks_for_fn (cfun));
|
|
774 vec.quick_push (pro);
|
|
775
|
|
776 unsigned max_grow_size = get_uncond_jump_length ();
|
145
|
777 max_grow_size *= param_max_grow_copy_bb_insns;
|
111
|
778
|
|
779 while (!vec.is_empty () && pro != entry)
|
|
780 {
|
|
781 while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
|
|
782 {
|
|
783 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
784
|
|
785 if (bitmap_set_bit (bb_with, pro->index))
|
|
786 vec.quick_push (pro);
|
|
787 }
|
|
788
|
|
789 basic_block bb = vec.pop ();
|
|
790 if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
|
|
791 while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
792 {
|
|
793 gcc_assert (pro != entry);
|
|
794
|
|
795 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
796
|
|
797 if (bitmap_set_bit (bb_with, pro->index))
|
|
798 vec.quick_push (pro);
|
|
799 }
|
|
800
|
|
801 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
802 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
|
|
803 && bitmap_set_bit (bb_with, e->dest->index))
|
|
804 vec.quick_push (e->dest);
|
|
805 }
|
|
806
|
|
807 if (dump_file)
|
|
808 fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
|
|
809 pro->index);
|
|
810
|
|
811 /* If we can move PRO back without having to duplicate more blocks, do so.
|
|
812 We do this because putting the prologue earlier is better for scheduling.
|
|
813
|
|
814 We can move back to a block PRE if every path from PRE will eventually
|
|
815 need a prologue, that is, PRO is a post-dominator of PRE. PRE needs
|
|
816 to dominate every block reachable from itself. We keep in BB_TMP a
|
|
817 bitmap of the blocks reachable from PRE that we already found, and in
|
|
818 VEC a stack of those we still need to consider.
|
|
819
|
|
820 Any block reachable from PRE is also reachable from all predecessors
|
|
821 of PRE, so if we find we need to move PRE back further we can leave
|
|
822 everything not considered so far on the stack. Any block dominated
|
|
823 by PRE is also dominated by all other dominators of PRE, so anything
|
|
824 found good for some PRE does not need to be reconsidered later.
|
|
825
|
|
826 We don't need to update BB_WITH because none of the new blocks found
|
|
827 can jump to a block that does not need the prologue. */
|
|
828
|
|
829 if (pro != entry)
|
|
830 {
|
|
831 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
832
|
|
833 auto_bitmap bb_tmp;
|
|
834 bitmap_copy (bb_tmp, bb_with);
|
|
835 basic_block last_ok = pro;
|
|
836 vec.truncate (0);
|
|
837
|
|
838 while (pro != entry)
|
|
839 {
|
|
840 basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
841 if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
|
|
842 break;
|
|
843
|
|
844 if (bitmap_set_bit (bb_tmp, pre->index))
|
|
845 vec.quick_push (pre);
|
|
846
|
|
847 bool ok = true;
|
|
848 while (!vec.is_empty ())
|
|
849 {
|
|
850 if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
|
|
851 {
|
|
852 ok = false;
|
|
853 break;
|
|
854 }
|
|
855
|
|
856 basic_block bb = vec.pop ();
|
|
857 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
858 if (bitmap_set_bit (bb_tmp, e->dest->index))
|
|
859 vec.quick_push (e->dest);
|
|
860 }
|
|
861
|
|
862 if (ok && can_get_prologue (pre, prologue_clobbered))
|
|
863 last_ok = pre;
|
|
864
|
|
865 pro = pre;
|
|
866 }
|
|
867
|
|
868 pro = last_ok;
|
|
869
|
|
870 free_dominance_info (CDI_POST_DOMINATORS);
|
|
871 }
|
|
872
|
|
873 vec.release ();
|
|
874
|
|
875 if (dump_file)
|
|
876 fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
|
|
877 pro->index);
|
|
878
|
|
879 if (pro == entry)
|
|
880 {
|
|
881 free_dominance_info (CDI_DOMINATORS);
|
|
882 return;
|
|
883 }
|
|
884
|
|
885 /* Compute what fraction of the frequency and count of the blocks that run
|
|
886 both with and without prologue are for running with prologue. This gives
|
|
887 the correct answer for reducible flow graphs; for irreducible flow graphs
|
|
888 our profile is messed up beyond repair anyway. */
|
|
889
|
131
|
890 profile_count num = profile_count::zero ();
|
|
891 profile_count den = profile_count::zero ();
|
111
|
892
|
|
893 FOR_EACH_EDGE (e, ei, pro->preds)
|
|
894 if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
895 {
|
131
|
896 if (e->count ().initialized_p ())
|
|
897 num += e->count ();
|
|
898 if (e->src->count.initialized_p ())
|
|
899 den += e->src->count;
|
111
|
900 }
|
|
901
|
|
902 /* All is okay, so do it. */
|
|
903
|
|
904 crtl->shrink_wrapped = true;
|
|
905 if (dump_file)
|
|
906 fprintf (dump_file, "Performing shrink-wrapping.\n");
|
|
907
|
|
908 /* Copy the blocks that can run both with and without prologue. The
|
|
909 originals run with prologue, the copies without. Store a pointer to
|
|
910 the copy in the ->aux field of the original. */
|
|
911
|
|
912 FOR_EACH_BB_FN (bb, cfun)
|
|
913 if (bitmap_bit_p (bb_with, bb->index)
|
|
914 && !dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
915 {
|
|
916 basic_block dup = duplicate_block (bb, 0, 0);
|
|
917
|
|
918 bb->aux = dup;
|
|
919
|
|
920 if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
|
|
921 emit_barrier_after_bb (dup);
|
|
922
|
|
923 if (EDGE_COUNT (dup->succs) == 0)
|
|
924 emit_barrier_after_bb (dup);
|
|
925
|
|
926 if (dump_file)
|
|
927 fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
|
131
|
928
|
|
929 if (num == profile_count::zero () || den.nonzero_p ())
|
|
930 bb->count = bb->count.apply_scale (num, den);
|
111
|
931 dup->count -= bb->count;
|
|
932 }
|
|
933
|
|
934 /* Now change the edges to point to the copies, where appropriate. */
|
|
935
|
|
936 FOR_EACH_BB_FN (bb, cfun)
|
|
937 if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
938 {
|
|
939 basic_block src = bb;
|
|
940 if (bitmap_bit_p (bb_with, bb->index))
|
|
941 src = (basic_block) bb->aux;
|
|
942
|
|
943 FOR_EACH_EDGE (e, ei, src->succs)
|
|
944 {
|
|
945 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
946 continue;
|
|
947
|
|
948 if (bitmap_bit_p (bb_with, e->dest->index)
|
|
949 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
|
950 {
|
|
951 if (dump_file)
|
|
952 fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
|
|
953 e->src->index, e->dest->index,
|
|
954 ((basic_block) e->dest->aux)->index);
|
|
955 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
|
956 }
|
|
957 else if (e->flags & EDGE_FALLTHRU
|
|
958 && bitmap_bit_p (bb_with, bb->index))
|
|
959 force_nonfallthru (e);
|
|
960 }
|
|
961 }
|
|
962
|
|
963 /* Also redirect the function entry edge if necessary. */
|
|
964
|
|
965 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
|
|
966 if (bitmap_bit_p (bb_with, e->dest->index)
|
|
967 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
|
968 {
|
|
969 basic_block split_bb = split_edge (e);
|
|
970 e = single_succ_edge (split_bb);
|
|
971 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
|
972 }
|
|
973
|
|
974 /* Make a simple_return for those exits that run without prologue. */
|
|
975
|
|
976 FOR_EACH_BB_REVERSE_FN (bb, cfun)
|
|
977 if (!bitmap_bit_p (bb_with, bb->index))
|
|
978 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
979 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
980 handle_simple_exit (e);
|
|
981
|
|
982 /* Finally, we want a single edge to put the prologue on. Make a new
|
|
983 block before the PRO block; the edge beteen them is the edge we want.
|
|
984 Then redirect those edges into PRO that come from blocks without the
|
|
985 prologue, to point to the new block instead. The new prologue block
|
|
986 is put at the end of the insn chain. */
|
|
987
|
|
988 basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
|
|
989 BB_COPY_PARTITION (new_bb, pro);
|
|
990 new_bb->count = profile_count::zero ();
|
|
991 if (dump_file)
|
|
992 fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
|
|
993
|
|
994 for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
|
|
995 {
|
|
996 if (bitmap_bit_p (bb_with, e->src->index)
|
|
997 || dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
998 {
|
|
999 ei_next (&ei);
|
|
1000 continue;
|
|
1001 }
|
|
1002
|
131
|
1003 new_bb->count += e->count ();
|
111
|
1004
|
|
1005 redirect_edge_and_branch_force (e, new_bb);
|
|
1006 if (dump_file)
|
|
1007 fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
|
|
1008 }
|
|
1009
|
|
1010 *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
|
|
1011 force_nonfallthru (*entry_edge);
|
|
1012
|
|
1013 free_dominance_info (CDI_DOMINATORS);
|
|
1014 }
|
|
1015
|
|
1016 /* Separate shrink-wrapping
|
|
1017
|
|
1018 Instead of putting all of the prologue and epilogue in one spot, we
|
|
1019 can put parts of it in places where those components are executed less
|
|
1020 frequently. The following code does this, for prologue and epilogue
|
|
1021 components that can be put in more than one location, and where those
|
|
1022 components can be executed more than once (the epilogue component will
|
|
1023 always be executed before the prologue component is executed a second
|
|
1024 time).
|
|
1025
|
|
1026 What exactly is a component is target-dependent. The more usual
|
|
1027 components are simple saves/restores to/from the frame of callee-saved
|
|
1028 registers. This code treats components abstractly (as an sbitmap),
|
|
1029 letting the target handle all details.
|
|
1030
|
|
1031 Prologue components are placed in such a way that for every component
|
|
1032 the prologue is executed as infrequently as possible. We do this by
|
|
1033 walking the dominator tree, comparing the cost of placing a prologue
|
|
1034 component before a block to the sum of costs determined for all subtrees
|
|
1035 of that block.
|
|
1036
|
|
1037 From this placement, we then determine for each component all blocks
|
|
1038 where at least one of this block's dominators (including itself) will
|
|
1039 get a prologue inserted. That then is how the components are placed.
|
|
1040 We could place the epilogue components a bit smarter (we can save a
|
|
1041 bit of code size sometimes); this is a possible future improvement.
|
|
1042
|
|
1043 Prologues and epilogues are preferably placed into a block, either at
|
|
1044 the beginning or end of it, if it is needed for all predecessor resp.
|
|
1045 successor edges; or placed on the edge otherwise.
|
|
1046
|
|
1047 If the placement of any prologue/epilogue leads to a situation we cannot
|
|
1048 handle (for example, an abnormal edge would need to be split, or some
|
|
1049 targets want to use some specific registers that may not be available
|
|
1050 where we want to put them), separate shrink-wrapping for the components
|
|
1051 in that prologue/epilogue is aborted. */
|
|
1052
|
|
1053
|
|
1054 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
|
|
1055 label LABEL. */
|
|
1056 static void
|
|
1057 dump_components (const char *label, sbitmap components)
|
|
1058 {
|
|
1059 if (bitmap_empty_p (components))
|
|
1060 return;
|
|
1061
|
|
1062 fprintf (dump_file, " [%s", label);
|
|
1063
|
|
1064 for (unsigned int j = 0; j < components->n_bits; j++)
|
|
1065 if (bitmap_bit_p (components, j))
|
|
1066 fprintf (dump_file, " %u", j);
|
|
1067
|
|
1068 fprintf (dump_file, "]");
|
|
1069 }
|
|
1070
|
|
1071 /* The data we collect for each bb. */
|
|
1072 struct sw {
|
|
1073 /* What components does this BB need? */
|
|
1074 sbitmap needs_components;
|
|
1075
|
|
1076 /* What components does this BB have? This is the main decision this
|
|
1077 pass makes. */
|
|
1078 sbitmap has_components;
|
|
1079
|
|
1080 /* The components for which we placed code at the start of the BB (instead
|
|
1081 of on all incoming edges). */
|
|
1082 sbitmap head_components;
|
|
1083
|
|
1084 /* The components for which we placed code at the end of the BB (instead
|
|
1085 of on all outgoing edges). */
|
|
1086 sbitmap tail_components;
|
|
1087
|
|
1088 /* The frequency of executing the prologue for this BB, if a prologue is
|
|
1089 placed on this BB. This is a pessimistic estimate (no prologue is
|
|
1090 needed for edges from blocks that have the component under consideration
|
|
1091 active already). */
|
|
1092 gcov_type own_cost;
|
|
1093
|
|
1094 /* The frequency of executing the prologue for this BB and all BBs
|
|
1095 dominated by it. */
|
|
1096 gcov_type total_cost;
|
|
1097 };
|
|
1098
|
|
1099 /* A helper function for accessing the pass-specific info. */
|
|
1100 static inline struct sw *
|
|
1101 SW (basic_block bb)
|
|
1102 {
|
|
1103 gcc_assert (bb->aux);
|
|
1104 return (struct sw *) bb->aux;
|
|
1105 }
|
|
1106
|
|
1107 /* Create the pass-specific data structures for separately shrink-wrapping
|
|
1108 with components COMPONENTS. */
|
|
1109 static void
|
|
1110 init_separate_shrink_wrap (sbitmap components)
|
|
1111 {
|
|
1112 basic_block bb;
|
|
1113 FOR_ALL_BB_FN (bb, cfun)
|
|
1114 {
|
|
1115 bb->aux = xcalloc (1, sizeof (struct sw));
|
|
1116
|
|
1117 SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
|
|
1118
|
|
1119 /* Mark all basic blocks without successor as needing all components.
|
|
1120 This avoids problems in at least cfgcleanup, sel-sched, and
|
|
1121 regrename (largely to do with all paths to such a block still
|
|
1122 needing the same dwarf CFI info). */
|
|
1123 if (EDGE_COUNT (bb->succs) == 0)
|
|
1124 bitmap_copy (SW (bb)->needs_components, components);
|
|
1125
|
|
1126 if (dump_file)
|
|
1127 {
|
|
1128 fprintf (dump_file, "bb %d components:", bb->index);
|
|
1129 dump_components ("has", SW (bb)->needs_components);
|
|
1130 fprintf (dump_file, "\n");
|
|
1131 }
|
|
1132
|
|
1133 SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1134 SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1135 SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1136 bitmap_clear (SW (bb)->has_components);
|
|
1137 }
|
|
1138 }
|
|
1139
|
|
1140 /* Destroy the pass-specific data. */
|
|
1141 static void
|
|
1142 fini_separate_shrink_wrap (void)
|
|
1143 {
|
|
1144 basic_block bb;
|
|
1145 FOR_ALL_BB_FN (bb, cfun)
|
|
1146 if (bb->aux)
|
|
1147 {
|
|
1148 sbitmap_free (SW (bb)->needs_components);
|
|
1149 sbitmap_free (SW (bb)->has_components);
|
|
1150 sbitmap_free (SW (bb)->head_components);
|
|
1151 sbitmap_free (SW (bb)->tail_components);
|
|
1152 free (bb->aux);
|
|
1153 bb->aux = 0;
|
|
1154 }
|
|
1155 }
|
|
1156
|
|
1157 /* Place the prologue for component WHICH, in the basic blocks dominated
|
|
1158 by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the
|
|
1159 HAS_COMPONENTS of a block if either the block has that bit set in
|
|
1160 NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
|
|
1161 dominator subtrees separately. */
|
|
1162 static void
|
|
1163 place_prologue_for_one_component (unsigned int which, basic_block head)
|
|
1164 {
|
|
1165 /* The block we are currently dealing with. */
|
|
1166 basic_block bb = head;
|
|
1167 /* Is this the first time we visit this block, i.e. have we just gone
|
|
1168 down the tree. */
|
|
1169 bool first_visit = true;
|
|
1170
|
|
1171 /* Walk the dominator tree, visit one block per iteration of this loop.
|
|
1172 Each basic block is visited twice: once before visiting any children
|
|
1173 of the block, and once after visiting all of them (leaf nodes are
|
|
1174 visited only once). As an optimization, we do not visit subtrees
|
|
1175 that can no longer influence the prologue placement. */
|
|
1176 for (;;)
|
|
1177 {
|
|
1178 /* First visit of a block: set the (children) cost accumulator to zero;
|
|
1179 if the block does not have the component itself, walk down. */
|
|
1180 if (first_visit)
|
|
1181 {
|
|
1182 /* Initialize the cost. The cost is the block execution frequency
|
|
1183 that does not come from backedges. Calculating this by simply
|
|
1184 adding the cost of all edges that aren't backedges does not
|
|
1185 work: this does not always add up to the block frequency at
|
|
1186 all, and even if it does, rounding error makes for bad
|
|
1187 decisions. */
|
131
|
1188 SW (bb)->own_cost = bb->count.to_frequency (cfun);
|
111
|
1189
|
|
1190 edge e;
|
|
1191 edge_iterator ei;
|
|
1192 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1193 if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
|
|
1194 {
|
|
1195 if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
|
|
1196 SW (bb)->own_cost -= EDGE_FREQUENCY (e);
|
|
1197 else
|
|
1198 SW (bb)->own_cost = 0;
|
|
1199 }
|
|
1200
|
|
1201 SW (bb)->total_cost = 0;
|
|
1202
|
|
1203 if (!bitmap_bit_p (SW (bb)->needs_components, which)
|
|
1204 && first_dom_son (CDI_DOMINATORS, bb))
|
|
1205 {
|
|
1206 bb = first_dom_son (CDI_DOMINATORS, bb);
|
|
1207 continue;
|
|
1208 }
|
|
1209 }
|
|
1210
|
|
1211 /* If this block does need the component itself, or it is cheaper to
|
|
1212 put the prologue here than in all the descendants that need it,
|
|
1213 mark it so. If this block's immediate post-dominator is dominated
|
|
1214 by this block, and that needs the prologue, we can put it on this
|
|
1215 block as well (earlier is better). */
|
|
1216 if (bitmap_bit_p (SW (bb)->needs_components, which)
|
|
1217 || SW (bb)->total_cost > SW (bb)->own_cost)
|
|
1218 {
|
|
1219 SW (bb)->total_cost = SW (bb)->own_cost;
|
|
1220 bitmap_set_bit (SW (bb)->has_components, which);
|
|
1221 }
|
|
1222 else
|
|
1223 {
|
|
1224 basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
|
|
1225 if (dominated_by_p (CDI_DOMINATORS, kid, bb)
|
|
1226 && bitmap_bit_p (SW (kid)->has_components, which))
|
|
1227 {
|
|
1228 SW (bb)->total_cost = SW (bb)->own_cost;
|
|
1229 bitmap_set_bit (SW (bb)->has_components, which);
|
|
1230 }
|
|
1231 }
|
|
1232
|
|
1233 /* We are back where we started, so we are done now. */
|
|
1234 if (bb == head)
|
|
1235 return;
|
|
1236
|
|
1237 /* We now know the cost of the subtree rooted at the current block.
|
|
1238 Accumulate this cost in the parent. */
|
|
1239 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
|
|
1240 SW (parent)->total_cost += SW (bb)->total_cost;
|
|
1241
|
|
1242 /* Don't walk the tree down unless necessary. */
|
|
1243 if (next_dom_son (CDI_DOMINATORS, bb)
|
|
1244 && SW (parent)->total_cost <= SW (parent)->own_cost)
|
|
1245 {
|
|
1246 bb = next_dom_son (CDI_DOMINATORS, bb);
|
|
1247 first_visit = true;
|
|
1248 }
|
|
1249 else
|
|
1250 {
|
|
1251 bb = parent;
|
|
1252 first_visit = false;
|
|
1253 }
|
|
1254 }
|
|
1255 }
|
|
1256
|
|
1257 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
|
|
1258 setting it on any path from entry to exit where it was not already set
|
|
1259 somewhere (or, for blocks that have no path to the exit, consider only
|
131
|
1260 paths from the entry to the block itself). Return whether any changes
|
|
1261 were made to some HAS_COMPONENTS. */
|
|
1262 static bool
|
111
|
1263 spread_components (sbitmap components)
|
|
1264 {
|
|
1265 basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
1266 basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
|
|
1267
|
|
1268 /* A stack of all blocks left to consider, and a bitmap of all blocks
|
|
1269 on that stack. */
|
|
1270 vec<basic_block> todo;
|
|
1271 todo.create (n_basic_blocks_for_fn (cfun));
|
|
1272 auto_bitmap seen;
|
|
1273
|
|
1274 auto_sbitmap old (SBITMAP_SIZE (components));
|
|
1275
|
|
1276 /* Find for every block the components that are *not* needed on some path
|
|
1277 from the entry to that block. Do this with a flood fill from the entry
|
|
1278 block. Every block can be visited at most as often as the number of
|
|
1279 components (plus one), and usually much less often. */
|
|
1280
|
|
1281 if (dump_file)
|
|
1282 fprintf (dump_file, "Spreading down...\n");
|
|
1283
|
|
1284 basic_block bb;
|
|
1285 FOR_ALL_BB_FN (bb, cfun)
|
|
1286 bitmap_clear (SW (bb)->head_components);
|
|
1287
|
|
1288 bitmap_copy (SW (entry_block)->head_components, components);
|
|
1289
|
|
1290 edge e;
|
|
1291 edge_iterator ei;
|
|
1292
|
|
1293 todo.quick_push (single_succ (entry_block));
|
|
1294 bitmap_set_bit (seen, single_succ (entry_block)->index);
|
|
1295 while (!todo.is_empty ())
|
|
1296 {
|
|
1297 bb = todo.pop ();
|
|
1298
|
|
1299 bitmap_copy (old, SW (bb)->head_components);
|
|
1300
|
|
1301 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1302 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
|
|
1303 SW (e->src)->head_components);
|
|
1304
|
|
1305 bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
|
|
1306 SW (bb)->has_components);
|
|
1307
|
|
1308 if (!bitmap_equal_p (old, SW (bb)->head_components))
|
|
1309 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1310 if (bitmap_set_bit (seen, e->dest->index))
|
|
1311 todo.quick_push (e->dest);
|
|
1312
|
|
1313 bitmap_clear_bit (seen, bb->index);
|
|
1314 }
|
|
1315
|
|
1316 /* Find for every block the components that are *not* needed on some reverse
|
|
1317 path from the exit to that block. */
|
|
1318
|
|
1319 if (dump_file)
|
|
1320 fprintf (dump_file, "Spreading up...\n");
|
|
1321
|
|
1322 /* First, mark all blocks not reachable from the exit block as not needing
|
|
1323 any component on any path to the exit. Mark everything, and then clear
|
|
1324 again by a flood fill. */
|
|
1325
|
|
1326 FOR_ALL_BB_FN (bb, cfun)
|
|
1327 bitmap_copy (SW (bb)->tail_components, components);
|
|
1328
|
|
1329 FOR_EACH_EDGE (e, ei, exit_block->preds)
|
|
1330 {
|
|
1331 todo.quick_push (e->src);
|
|
1332 bitmap_set_bit (seen, e->src->index);
|
|
1333 }
|
|
1334
|
|
1335 while (!todo.is_empty ())
|
|
1336 {
|
|
1337 bb = todo.pop ();
|
|
1338
|
|
1339 if (!bitmap_empty_p (SW (bb)->tail_components))
|
|
1340 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1341 if (bitmap_set_bit (seen, e->src->index))
|
|
1342 todo.quick_push (e->src);
|
|
1343
|
|
1344 bitmap_clear (SW (bb)->tail_components);
|
|
1345
|
|
1346 bitmap_clear_bit (seen, bb->index);
|
|
1347 }
|
|
1348
|
|
1349 /* And then, flood fill backwards to find for every block the components
|
|
1350 not needed on some path to the exit. */
|
|
1351
|
|
1352 bitmap_copy (SW (exit_block)->tail_components, components);
|
|
1353
|
|
1354 FOR_EACH_EDGE (e, ei, exit_block->preds)
|
|
1355 {
|
|
1356 todo.quick_push (e->src);
|
|
1357 bitmap_set_bit (seen, e->src->index);
|
|
1358 }
|
|
1359
|
|
1360 while (!todo.is_empty ())
|
|
1361 {
|
|
1362 bb = todo.pop ();
|
|
1363
|
|
1364 bitmap_copy (old, SW (bb)->tail_components);
|
|
1365
|
|
1366 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1367 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
1368 SW (e->dest)->tail_components);
|
|
1369
|
|
1370 bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
1371 SW (bb)->has_components);
|
|
1372
|
|
1373 if (!bitmap_equal_p (old, SW (bb)->tail_components))
|
|
1374 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1375 if (bitmap_set_bit (seen, e->src->index))
|
|
1376 todo.quick_push (e->src);
|
|
1377
|
|
1378 bitmap_clear_bit (seen, bb->index);
|
|
1379 }
|
|
1380
|
131
|
1381 todo.release ();
|
|
1382
|
111
|
1383 /* Finally, mark everything not not needed both forwards and backwards. */
|
|
1384
|
131
|
1385 bool did_changes = false;
|
|
1386
|
111
|
1387 FOR_EACH_BB_FN (bb, cfun)
|
|
1388 {
|
131
|
1389 bitmap_copy (old, SW (bb)->has_components);
|
|
1390
|
111
|
1391 bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
|
|
1392 SW (bb)->tail_components);
|
|
1393 bitmap_and_compl (SW (bb)->has_components, components,
|
|
1394 SW (bb)->head_components);
|
131
|
1395
|
|
1396 if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
|
|
1397 did_changes = true;
|
111
|
1398 }
|
|
1399
|
|
1400 FOR_ALL_BB_FN (bb, cfun)
|
|
1401 {
|
|
1402 if (dump_file)
|
|
1403 {
|
|
1404 fprintf (dump_file, "bb %d components:", bb->index);
|
|
1405 dump_components ("has", SW (bb)->has_components);
|
|
1406 fprintf (dump_file, "\n");
|
|
1407 }
|
|
1408 }
|
131
|
1409
|
|
1410 return did_changes;
|
111
|
1411 }
|
|
1412
|
|
1413 /* If we cannot handle placing some component's prologues or epilogues where
|
|
1414 we decided we should place them, unmark that component in COMPONENTS so
|
|
1415 that it is not wrapped separately. */
|
|
1416 static void
|
|
1417 disqualify_problematic_components (sbitmap components)
|
|
1418 {
|
|
1419 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1420 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1421
|
|
1422 basic_block bb;
|
|
1423 FOR_EACH_BB_FN (bb, cfun)
|
|
1424 {
|
|
1425 edge e;
|
|
1426 edge_iterator ei;
|
|
1427 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1428 {
|
|
1429 /* Find which components we want pro/epilogues for here. */
|
|
1430 bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
1431 SW (e->dest)->has_components);
|
|
1432 bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
1433 SW (e->src)->has_components);
|
|
1434
|
|
1435 /* Ask the target what it thinks about things. */
|
|
1436 if (!bitmap_empty_p (epi))
|
|
1437 targetm.shrink_wrap.disqualify_components (components, e, epi,
|
|
1438 false);
|
|
1439 if (!bitmap_empty_p (pro))
|
|
1440 targetm.shrink_wrap.disqualify_components (components, e, pro,
|
|
1441 true);
|
|
1442
|
|
1443 /* If this edge doesn't need splitting, we're fine. */
|
|
1444 if (single_pred_p (e->dest)
|
|
1445 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1446 continue;
|
|
1447
|
|
1448 /* If the edge can be split, that is fine too. */
|
|
1449 if ((e->flags & EDGE_ABNORMAL) == 0)
|
|
1450 continue;
|
|
1451
|
|
1452 /* We also can handle sibcalls. */
|
|
1453 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1454 {
|
|
1455 gcc_assert (e->flags & EDGE_SIBCALL);
|
|
1456 continue;
|
|
1457 }
|
|
1458
|
|
1459 /* Remove from consideration those components we would need
|
|
1460 pro/epilogues for on edges where we cannot insert them. */
|
|
1461 bitmap_and_compl (components, components, epi);
|
|
1462 bitmap_and_compl (components, components, pro);
|
|
1463
|
|
1464 if (dump_file && !bitmap_subset_p (epi, components))
|
|
1465 {
|
|
1466 fprintf (dump_file, " BAD epi %d->%d", e->src->index,
|
|
1467 e->dest->index);
|
|
1468 if (e->flags & EDGE_EH)
|
|
1469 fprintf (dump_file, " for EH");
|
|
1470 dump_components ("epi", epi);
|
|
1471 fprintf (dump_file, "\n");
|
|
1472 }
|
|
1473
|
|
1474 if (dump_file && !bitmap_subset_p (pro, components))
|
|
1475 {
|
|
1476 fprintf (dump_file, " BAD pro %d->%d", e->src->index,
|
|
1477 e->dest->index);
|
|
1478 if (e->flags & EDGE_EH)
|
|
1479 fprintf (dump_file, " for EH");
|
|
1480 dump_components ("pro", pro);
|
|
1481 fprintf (dump_file, "\n");
|
|
1482 }
|
|
1483 }
|
|
1484 }
|
|
1485 }
|
|
1486
|
|
1487 /* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
1488 that code at the start of basic blocks. */
|
|
1489 static void
|
|
1490 emit_common_heads_for_components (sbitmap components)
|
|
1491 {
|
|
1492 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1493 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1494 auto_sbitmap tmp (SBITMAP_SIZE (components));
|
|
1495
|
|
1496 basic_block bb;
|
|
1497 FOR_ALL_BB_FN (bb, cfun)
|
|
1498 bitmap_clear (SW (bb)->head_components);
|
|
1499
|
|
1500 FOR_EACH_BB_FN (bb, cfun)
|
|
1501 {
|
|
1502 /* Find which prologue resp. epilogue components are needed for all
|
|
1503 predecessor edges to this block. */
|
|
1504
|
|
1505 /* First, select all possible components. */
|
|
1506 bitmap_copy (epi, components);
|
|
1507 bitmap_copy (pro, components);
|
|
1508
|
|
1509 edge e;
|
|
1510 edge_iterator ei;
|
|
1511 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1512 {
|
|
1513 if (e->flags & EDGE_ABNORMAL)
|
|
1514 {
|
|
1515 bitmap_clear (epi);
|
|
1516 bitmap_clear (pro);
|
|
1517 break;
|
|
1518 }
|
|
1519
|
|
1520 /* Deselect those epilogue components that should not be inserted
|
|
1521 for this edge. */
|
|
1522 bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
1523 SW (e->dest)->has_components);
|
|
1524 bitmap_and (epi, epi, tmp);
|
|
1525
|
|
1526 /* Similar, for the prologue. */
|
|
1527 bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
1528 SW (e->src)->has_components);
|
|
1529 bitmap_and (pro, pro, tmp);
|
|
1530 }
|
|
1531
|
|
1532 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1533 fprintf (dump_file, " bb %d", bb->index);
|
|
1534
|
|
1535 if (dump_file && !bitmap_empty_p (epi))
|
|
1536 dump_components ("epi", epi);
|
|
1537 if (dump_file && !bitmap_empty_p (pro))
|
|
1538 dump_components ("pro", pro);
|
|
1539
|
|
1540 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1541 fprintf (dump_file, "\n");
|
|
1542
|
|
1543 /* Place code after the BB note. */
|
|
1544 if (!bitmap_empty_p (pro))
|
|
1545 {
|
|
1546 start_sequence ();
|
|
1547 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1548 rtx_insn *seq = get_insns ();
|
|
1549 end_sequence ();
|
|
1550 record_prologue_seq (seq);
|
|
1551
|
|
1552 emit_insn_after (seq, bb_note (bb));
|
|
1553
|
|
1554 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
|
|
1555 }
|
|
1556
|
|
1557 if (!bitmap_empty_p (epi))
|
|
1558 {
|
|
1559 start_sequence ();
|
|
1560 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1561 rtx_insn *seq = get_insns ();
|
|
1562 end_sequence ();
|
|
1563 record_epilogue_seq (seq);
|
|
1564
|
|
1565 emit_insn_after (seq, bb_note (bb));
|
|
1566
|
|
1567 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
|
|
1568 }
|
|
1569 }
|
|
1570 }
|
|
1571
|
|
1572 /* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
1573 that code at the end of basic blocks. */
|
|
1574 static void
|
|
1575 emit_common_tails_for_components (sbitmap components)
|
|
1576 {
|
|
1577 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1578 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1579 auto_sbitmap tmp (SBITMAP_SIZE (components));
|
|
1580
|
|
1581 basic_block bb;
|
|
1582 FOR_ALL_BB_FN (bb, cfun)
|
|
1583 bitmap_clear (SW (bb)->tail_components);
|
|
1584
|
|
1585 FOR_EACH_BB_FN (bb, cfun)
|
|
1586 {
|
|
1587 /* Find which prologue resp. epilogue components are needed for all
|
|
1588 successor edges from this block. */
|
|
1589 if (EDGE_COUNT (bb->succs) == 0)
|
|
1590 continue;
|
|
1591
|
|
1592 /* First, select all possible components. */
|
|
1593 bitmap_copy (epi, components);
|
|
1594 bitmap_copy (pro, components);
|
|
1595
|
|
1596 edge e;
|
|
1597 edge_iterator ei;
|
|
1598 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1599 {
|
|
1600 if (e->flags & EDGE_ABNORMAL)
|
|
1601 {
|
|
1602 bitmap_clear (epi);
|
|
1603 bitmap_clear (pro);
|
|
1604 break;
|
|
1605 }
|
|
1606
|
|
1607 /* Deselect those epilogue components that should not be inserted
|
|
1608 for this edge, and also those that are already put at the head
|
|
1609 of the successor block. */
|
|
1610 bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
1611 SW (e->dest)->has_components);
|
|
1612 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
1613 bitmap_and (epi, epi, tmp);
|
|
1614
|
|
1615 /* Similarly, for the prologue. */
|
|
1616 bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
1617 SW (e->src)->has_components);
|
|
1618 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
1619 bitmap_and (pro, pro, tmp);
|
|
1620 }
|
|
1621
|
|
1622 /* If the last insn of this block is a control flow insn we cannot
|
|
1623 put anything after it. We can put our code before it instead,
|
|
1624 but only if that jump insn is a simple jump. */
|
|
1625 rtx_insn *last_insn = BB_END (bb);
|
|
1626 if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
|
|
1627 {
|
|
1628 bitmap_clear (epi);
|
|
1629 bitmap_clear (pro);
|
|
1630 }
|
|
1631
|
|
1632 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1633 fprintf (dump_file, " bb %d", bb->index);
|
|
1634
|
|
1635 if (dump_file && !bitmap_empty_p (epi))
|
|
1636 dump_components ("epi", epi);
|
|
1637 if (dump_file && !bitmap_empty_p (pro))
|
|
1638 dump_components ("pro", pro);
|
|
1639
|
|
1640 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1641 fprintf (dump_file, "\n");
|
|
1642
|
|
1643 /* Put the code at the end of the BB, but before any final jump. */
|
|
1644 if (!bitmap_empty_p (epi))
|
|
1645 {
|
|
1646 start_sequence ();
|
|
1647 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1648 rtx_insn *seq = get_insns ();
|
|
1649 end_sequence ();
|
|
1650 record_epilogue_seq (seq);
|
|
1651
|
|
1652 if (control_flow_insn_p (last_insn))
|
|
1653 emit_insn_before (seq, last_insn);
|
|
1654 else
|
|
1655 emit_insn_after (seq, last_insn);
|
|
1656
|
|
1657 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
|
|
1658 }
|
|
1659
|
|
1660 if (!bitmap_empty_p (pro))
|
|
1661 {
|
|
1662 start_sequence ();
|
|
1663 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1664 rtx_insn *seq = get_insns ();
|
|
1665 end_sequence ();
|
|
1666 record_prologue_seq (seq);
|
|
1667
|
|
1668 if (control_flow_insn_p (last_insn))
|
|
1669 emit_insn_before (seq, last_insn);
|
|
1670 else
|
|
1671 emit_insn_after (seq, last_insn);
|
|
1672
|
|
1673 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
|
|
1674 }
|
|
1675 }
|
|
1676 }
|
|
1677
|
|
1678 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
|
|
1679 placed them inside blocks directly. */
|
|
1680 static void
|
|
1681 insert_prologue_epilogue_for_components (sbitmap components)
|
|
1682 {
|
|
1683 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1684 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1685
|
|
1686 basic_block bb;
|
|
1687 FOR_EACH_BB_FN (bb, cfun)
|
|
1688 {
|
|
1689 if (!bb->aux)
|
|
1690 continue;
|
|
1691
|
|
1692 edge e;
|
|
1693 edge_iterator ei;
|
|
1694 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1695 {
|
|
1696 /* Find which pro/epilogue components are needed on this edge. */
|
|
1697 bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
1698 SW (e->dest)->has_components);
|
|
1699 bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
1700 SW (e->src)->has_components);
|
|
1701 bitmap_and (epi, epi, components);
|
|
1702 bitmap_and (pro, pro, components);
|
|
1703
|
|
1704 /* Deselect those we already have put at the head or tail of the
|
|
1705 edge's dest resp. src. */
|
|
1706 bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
|
|
1707 bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
|
|
1708 bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
|
|
1709 bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
|
|
1710
|
|
1711 if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
|
|
1712 {
|
|
1713 if (dump_file)
|
|
1714 {
|
|
1715 fprintf (dump_file, " %d->%d", e->src->index,
|
|
1716 e->dest->index);
|
|
1717 dump_components ("epi", epi);
|
|
1718 dump_components ("pro", pro);
|
|
1719 if (e->flags & EDGE_SIBCALL)
|
|
1720 fprintf (dump_file, " (SIBCALL)");
|
|
1721 else if (e->flags & EDGE_ABNORMAL)
|
|
1722 fprintf (dump_file, " (ABNORMAL)");
|
|
1723 fprintf (dump_file, "\n");
|
|
1724 }
|
|
1725
|
|
1726 /* Put the epilogue components in place. */
|
|
1727 start_sequence ();
|
|
1728 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1729 rtx_insn *seq = get_insns ();
|
|
1730 end_sequence ();
|
|
1731 record_epilogue_seq (seq);
|
|
1732
|
|
1733 if (e->flags & EDGE_SIBCALL)
|
|
1734 {
|
|
1735 gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
|
|
1736
|
|
1737 rtx_insn *insn = BB_END (e->src);
|
|
1738 gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
|
|
1739 emit_insn_before (seq, insn);
|
|
1740 }
|
|
1741 else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1742 {
|
|
1743 gcc_assert (e->flags & EDGE_FALLTHRU);
|
|
1744 basic_block new_bb = split_edge (e);
|
|
1745 emit_insn_after (seq, BB_END (new_bb));
|
|
1746 }
|
|
1747 else
|
|
1748 insert_insn_on_edge (seq, e);
|
|
1749
|
|
1750 /* Put the prologue components in place. */
|
|
1751 start_sequence ();
|
|
1752 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1753 seq = get_insns ();
|
|
1754 end_sequence ();
|
|
1755 record_prologue_seq (seq);
|
|
1756
|
|
1757 insert_insn_on_edge (seq, e);
|
|
1758 }
|
|
1759 }
|
|
1760 }
|
|
1761
|
|
1762 commit_edge_insertions ();
|
|
1763 }
|
|
1764
|
|
1765 /* The main entry point to this subpass. FIRST_BB is where the prologue
|
|
1766 would be normally put. */
|
|
1767 void
|
|
1768 try_shrink_wrapping_separate (basic_block first_bb)
|
|
1769 {
|
|
1770 if (HAVE_cc0)
|
|
1771 return;
|
|
1772
|
|
1773 if (!(SHRINK_WRAPPING_ENABLED
|
|
1774 && flag_shrink_wrap_separate
|
|
1775 && optimize_function_for_speed_p (cfun)
|
|
1776 && targetm.shrink_wrap.get_separate_components))
|
|
1777 return;
|
|
1778
|
|
1779 /* We don't handle "strange" functions. */
|
|
1780 if (cfun->calls_alloca
|
|
1781 || cfun->calls_setjmp
|
|
1782 || cfun->can_throw_non_call_exceptions
|
|
1783 || crtl->calls_eh_return
|
|
1784 || crtl->has_nonlocal_goto
|
|
1785 || crtl->saves_all_registers)
|
|
1786 return;
|
|
1787
|
|
1788 /* Ask the target what components there are. If it returns NULL, don't
|
|
1789 do anything. */
|
|
1790 sbitmap components = targetm.shrink_wrap.get_separate_components ();
|
|
1791 if (!components)
|
|
1792 return;
|
|
1793
|
|
1794 /* We need LIVE info, not defining anything in the entry block and not
|
|
1795 using anything in the exit block. A block then needs a component if
|
|
1796 the register for that component is in the IN or GEN or KILL set for
|
|
1797 that block. */
|
|
1798 df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
1799 df_update_entry_exit_and_calls ();
|
|
1800 df_live_add_problem ();
|
|
1801 df_live_set_all_dirty ();
|
|
1802 df_analyze ();
|
|
1803
|
|
1804 calculate_dominance_info (CDI_DOMINATORS);
|
|
1805 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
1806
|
|
1807 init_separate_shrink_wrap (components);
|
|
1808
|
|
1809 sbitmap_iterator sbi;
|
|
1810 unsigned int j;
|
|
1811 EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
|
|
1812 place_prologue_for_one_component (j, first_bb);
|
|
1813
|
131
|
1814 /* Try to minimize the number of saves and restores. Do this as long as
|
|
1815 it changes anything. This does not iterate more than a few times. */
|
|
1816 int spread_times = 0;
|
|
1817 while (spread_components (components))
|
|
1818 {
|
|
1819 spread_times++;
|
|
1820
|
|
1821 if (dump_file)
|
|
1822 fprintf (dump_file, "Now spread %d times.\n", spread_times);
|
|
1823 }
|
111
|
1824
|
|
1825 disqualify_problematic_components (components);
|
|
1826
|
|
1827 /* Don't separately shrink-wrap anything where the "main" prologue will
|
|
1828 go; the target code can often optimize things if it is presented with
|
|
1829 all components together (say, if it generates store-multiple insns). */
|
|
1830 bitmap_and_compl (components, components, SW (first_bb)->has_components);
|
|
1831
|
|
1832 if (bitmap_empty_p (components))
|
|
1833 {
|
|
1834 if (dump_file)
|
|
1835 fprintf (dump_file, "Not wrapping anything separately.\n");
|
|
1836 }
|
|
1837 else
|
|
1838 {
|
|
1839 if (dump_file)
|
|
1840 {
|
|
1841 fprintf (dump_file, "The components we wrap separately are");
|
|
1842 dump_components ("sep", components);
|
|
1843 fprintf (dump_file, "\n");
|
|
1844
|
|
1845 fprintf (dump_file, "... Inserting common heads...\n");
|
|
1846 }
|
|
1847
|
|
1848 emit_common_heads_for_components (components);
|
|
1849
|
|
1850 if (dump_file)
|
|
1851 fprintf (dump_file, "... Inserting common tails...\n");
|
|
1852
|
|
1853 emit_common_tails_for_components (components);
|
|
1854
|
|
1855 if (dump_file)
|
|
1856 fprintf (dump_file, "... Inserting the more difficult ones...\n");
|
|
1857
|
|
1858 insert_prologue_epilogue_for_components (components);
|
|
1859
|
|
1860 if (dump_file)
|
|
1861 fprintf (dump_file, "... Done.\n");
|
|
1862
|
|
1863 targetm.shrink_wrap.set_handled_components (components);
|
|
1864
|
|
1865 crtl->shrink_wrapped_separate = true;
|
|
1866 }
|
|
1867
|
|
1868 fini_separate_shrink_wrap ();
|
|
1869
|
|
1870 sbitmap_free (components);
|
|
1871 free_dominance_info (CDI_DOMINATORS);
|
|
1872 free_dominance_info (CDI_POST_DOMINATORS);
|
|
1873
|
|
1874 /* All done. */
|
|
1875 df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
1876 df_update_entry_exit_and_calls ();
|
|
1877 df_live_set_all_dirty ();
|
|
1878 df_analyze ();
|
|
1879 }
|