111
|
1 /* Shrink-wrapping related optimizations.
|
|
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 /* This file handles shrink-wrapping related optimizations. */
|
|
21
|
|
22 #include "config.h"
|
|
23 #include "system.h"
|
|
24 #include "coretypes.h"
|
|
25 #include "backend.h"
|
|
26 #include "target.h"
|
|
27 #include "rtl.h"
|
|
28 #include "tree.h"
|
|
29 #include "cfghooks.h"
|
|
30 #include "df.h"
|
|
31 #include "memmodel.h"
|
|
32 #include "tm_p.h"
|
|
33 #include "regs.h"
|
|
34 #include "insn-config.h"
|
|
35 #include "emit-rtl.h"
|
|
36 #include "output.h"
|
|
37 #include "tree-pass.h"
|
|
38 #include "cfgrtl.h"
|
|
39 #include "cfgbuild.h"
|
|
40 #include "params.h"
|
|
41 #include "bb-reorder.h"
|
|
42 #include "shrink-wrap.h"
|
|
43 #include "regcprop.h"
|
|
44 #include "rtl-iter.h"
|
|
45 #include "valtrack.h"
|
|
46
|
|
47
|
|
48 /* Return true if INSN requires the stack frame to be set up.
|
|
49 PROLOGUE_USED contains the hard registers used in the function
|
|
50 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
|
|
51 prologue to set up for the function. */
|
|
52 bool
|
|
53 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
|
|
54 HARD_REG_SET set_up_by_prologue)
|
|
55 {
|
|
56 df_ref def, use;
|
|
57 HARD_REG_SET hardregs;
|
|
58 unsigned regno;
|
|
59
|
|
60 if (CALL_P (insn))
|
|
61 return !SIBLING_CALL_P (insn);
|
|
62
|
|
63 /* We need a frame to get the unique CFA expected by the unwinder. */
|
|
64 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
|
|
65 return true;
|
|
66
|
|
67 CLEAR_HARD_REG_SET (hardregs);
|
|
68 FOR_EACH_INSN_DEF (def, insn)
|
|
69 {
|
|
70 rtx dreg = DF_REF_REG (def);
|
|
71
|
|
72 if (!REG_P (dreg))
|
|
73 continue;
|
|
74
|
|
75 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
|
|
76 }
|
|
77 if (hard_reg_set_intersect_p (hardregs, prologue_used))
|
|
78 return true;
|
|
79 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
|
|
80 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
|
81 if (TEST_HARD_REG_BIT (hardregs, regno)
|
|
82 && df_regs_ever_live_p (regno))
|
|
83 return true;
|
|
84
|
|
85 FOR_EACH_INSN_USE (use, insn)
|
|
86 {
|
|
87 rtx reg = DF_REF_REG (use);
|
|
88
|
|
89 if (!REG_P (reg))
|
|
90 continue;
|
|
91
|
|
92 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
|
|
93 REGNO (reg));
|
|
94 }
|
|
95 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
|
|
96 return true;
|
|
97
|
|
98 return false;
|
|
99 }
|
|
100
|
|
101 /* See whether there has a single live edge from BB, which dest uses
|
|
102 [REGNO, END_REGNO). Return the live edge if its dest bb has
|
|
103 one or two predecessors. Otherwise return NULL. */
|
|
104
|
|
105 static edge
|
|
106 live_edge_for_reg (basic_block bb, int regno, int end_regno)
|
|
107 {
|
|
108 edge e, live_edge;
|
|
109 edge_iterator ei;
|
|
110 bitmap live;
|
|
111 int i;
|
|
112
|
|
113 live_edge = NULL;
|
|
114 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
115 {
|
|
116 live = df_get_live_in (e->dest);
|
|
117 for (i = regno; i < end_regno; i++)
|
|
118 if (REGNO_REG_SET_P (live, i))
|
|
119 {
|
|
120 if (live_edge && live_edge != e)
|
|
121 return NULL;
|
|
122 live_edge = e;
|
|
123 }
|
|
124 }
|
|
125
|
|
126 /* We can sometimes encounter dead code. Don't try to move it
|
|
127 into the exit block. */
|
|
128 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
129 return NULL;
|
|
130
|
|
131 /* Reject targets of abnormal edges. This is needed for correctness
|
|
132 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
|
|
133 exception edges even though it is generally treated as call-saved
|
|
134 for the majority of the compilation. Moving across abnormal edges
|
|
135 isn't going to be interesting for shrink-wrap usage anyway. */
|
|
136 if (live_edge->flags & EDGE_ABNORMAL)
|
|
137 return NULL;
|
|
138
|
|
139 /* When live_edge->dest->preds == 2, we can create a new block on
|
|
140 the edge to make it meet the requirement. */
|
|
141 if (EDGE_COUNT (live_edge->dest->preds) > 2)
|
|
142 return NULL;
|
|
143
|
|
144 return live_edge;
|
|
145 }
|
|
146
|
|
147 /* Try to move INSN from BB to a successor. Return true on success.
|
|
148 USES and DEFS are the set of registers that are used and defined
|
|
149 after INSN in BB. SPLIT_P indicates whether a live edge from BB
|
|
150 is splitted or not. */
|
|
151
|
|
152 static bool
|
|
153 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
|
|
154 const HARD_REG_SET uses,
|
|
155 const HARD_REG_SET defs,
|
|
156 bool *split_p,
|
|
157 struct dead_debug_local *debug)
|
|
158 {
|
|
159 rtx set, src, dest;
|
|
160 bitmap live_out, live_in, bb_uses, bb_defs;
|
|
161 unsigned int i, dregno, end_dregno;
|
|
162 unsigned int sregno = FIRST_PSEUDO_REGISTER;
|
|
163 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
|
|
164 basic_block next_block;
|
|
165 edge live_edge;
|
|
166 rtx_insn *dinsn;
|
|
167 df_ref def;
|
|
168
|
|
169 /* Look for a simple register assignment. We don't use single_set here
|
|
170 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
|
|
171 destinations. */
|
|
172 if (!INSN_P (insn))
|
|
173 return false;
|
|
174 set = PATTERN (insn);
|
|
175 if (GET_CODE (set) != SET)
|
|
176 return false;
|
|
177 src = SET_SRC (set);
|
|
178 dest = SET_DEST (set);
|
|
179
|
|
180 /* For the destination, we want only a register. Also disallow STACK
|
|
181 or FRAME related adjustments. They are likely part of the prologue,
|
|
182 so keep them in the entry block. */
|
|
183 if (!REG_P (dest)
|
|
184 || dest == stack_pointer_rtx
|
|
185 || dest == frame_pointer_rtx
|
|
186 || dest == hard_frame_pointer_rtx)
|
|
187 return false;
|
|
188
|
|
189 /* For the source, we want one of:
|
|
190 (1) A (non-overlapping) register
|
|
191 (2) A constant,
|
|
192 (3) An expression involving no more than one register.
|
|
193
|
|
194 That last point comes from the code following, which was originally
|
|
195 written to handle only register move operations, and still only handles
|
|
196 a single source register when checking for overlaps. Happily, the
|
|
197 same checks can be applied to expressions like (plus reg const). */
|
|
198
|
|
199 if (CONSTANT_P (src))
|
|
200 ;
|
|
201 else if (!REG_P (src))
|
|
202 {
|
|
203 rtx src_inner = NULL_RTX;
|
|
204
|
|
205 if (can_throw_internal (insn))
|
|
206 return false;
|
|
207
|
|
208 subrtx_var_iterator::array_type array;
|
|
209 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
|
|
210 {
|
|
211 rtx x = *iter;
|
|
212 switch (GET_RTX_CLASS (GET_CODE (x)))
|
|
213 {
|
|
214 case RTX_CONST_OBJ:
|
|
215 case RTX_COMPARE:
|
|
216 case RTX_COMM_COMPARE:
|
|
217 case RTX_BIN_ARITH:
|
|
218 case RTX_COMM_ARITH:
|
|
219 case RTX_UNARY:
|
|
220 case RTX_TERNARY:
|
|
221 /* Constant or expression. Continue. */
|
|
222 break;
|
|
223
|
|
224 case RTX_OBJ:
|
|
225 case RTX_EXTRA:
|
|
226 switch (GET_CODE (x))
|
|
227 {
|
|
228 case UNSPEC:
|
|
229 case SUBREG:
|
|
230 case STRICT_LOW_PART:
|
|
231 case PC:
|
|
232 case LO_SUM:
|
|
233 /* Ok. Continue. */
|
|
234 break;
|
|
235
|
|
236 case REG:
|
|
237 /* Fail if we see a second inner register. */
|
|
238 if (src_inner != NULL)
|
|
239 return false;
|
|
240 src_inner = x;
|
|
241 break;
|
|
242
|
|
243 default:
|
|
244 return false;
|
|
245 }
|
|
246 break;
|
|
247
|
|
248 default:
|
|
249 return false;
|
|
250 }
|
|
251 }
|
|
252
|
|
253 if (src_inner != NULL)
|
|
254 src = src_inner;
|
|
255 }
|
|
256
|
|
257 /* Make sure that the source register isn't defined later in BB. */
|
|
258 if (REG_P (src))
|
|
259 {
|
|
260 sregno = REGNO (src);
|
|
261 end_sregno = END_REGNO (src);
|
|
262 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
|
|
263 return false;
|
|
264 }
|
|
265
|
|
266 /* Make sure that the destination register isn't referenced later in BB. */
|
|
267 dregno = REGNO (dest);
|
|
268 end_dregno = END_REGNO (dest);
|
|
269 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
|
|
270 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
|
|
271 return false;
|
|
272
|
|
273 /* See whether there is a successor block to which we could move INSN. */
|
|
274 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
|
|
275 if (!live_edge)
|
|
276 return false;
|
|
277
|
|
278 next_block = live_edge->dest;
|
|
279 /* Create a new basic block on the edge. */
|
|
280 if (EDGE_COUNT (next_block->preds) == 2)
|
|
281 {
|
|
282 /* split_edge for a block with only one successor is meaningless. */
|
|
283 if (EDGE_COUNT (bb->succs) == 1)
|
|
284 return false;
|
|
285
|
|
286 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
|
|
287 if (!df_live)
|
|
288 return false;
|
|
289
|
|
290 basic_block old_dest = live_edge->dest;
|
|
291 next_block = split_edge (live_edge);
|
|
292
|
|
293 /* We create a new basic block. Call df_grow_bb_info to make sure
|
|
294 all data structures are allocated. */
|
|
295 df_grow_bb_info (df_live);
|
|
296
|
|
297 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
|
|
298 df_get_live_in (old_dest));
|
|
299 df_set_bb_dirty (next_block);
|
|
300
|
|
301 /* We should not split more than once for a function. */
|
|
302 if (*split_p)
|
|
303 return false;
|
|
304
|
|
305 *split_p = true;
|
|
306 }
|
|
307
|
|
308 /* At this point we are committed to moving INSN, but let's try to
|
|
309 move it as far as we can. */
|
|
310 do
|
|
311 {
|
|
312 if (MAY_HAVE_DEBUG_INSNS)
|
|
313 {
|
|
314 FOR_BB_INSNS_REVERSE (bb, dinsn)
|
|
315 if (DEBUG_INSN_P (dinsn))
|
|
316 {
|
|
317 df_ref use;
|
|
318 FOR_EACH_INSN_USE (use, dinsn)
|
|
319 if (refers_to_regno_p (dregno, end_dregno,
|
|
320 DF_REF_REG (use), (rtx *) NULL))
|
|
321 dead_debug_add (debug, use, DF_REF_REGNO (use));
|
|
322 }
|
|
323 else if (dinsn == insn)
|
|
324 break;
|
|
325 }
|
|
326 live_out = df_get_live_out (bb);
|
|
327 live_in = df_get_live_in (next_block);
|
|
328 bb = next_block;
|
|
329
|
|
330 /* Check whether BB uses DEST or clobbers DEST. We need to add
|
|
331 INSN to BB if so. Either way, DEST is no longer live on entry,
|
|
332 except for any part that overlaps SRC (next loop). */
|
|
333 bb_uses = &DF_LR_BB_INFO (bb)->use;
|
|
334 bb_defs = &DF_LR_BB_INFO (bb)->def;
|
|
335 if (df_live)
|
|
336 {
|
|
337 for (i = dregno; i < end_dregno; i++)
|
|
338 {
|
|
339 if (*split_p
|
|
340 || REGNO_REG_SET_P (bb_uses, i)
|
|
341 || REGNO_REG_SET_P (bb_defs, i)
|
|
342 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
343 next_block = NULL;
|
|
344 CLEAR_REGNO_REG_SET (live_out, i);
|
|
345 CLEAR_REGNO_REG_SET (live_in, i);
|
|
346 }
|
|
347
|
|
348 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
|
|
349 Either way, SRC is now live on entry. */
|
|
350 for (i = sregno; i < end_sregno; i++)
|
|
351 {
|
|
352 if (*split_p
|
|
353 || REGNO_REG_SET_P (bb_defs, i)
|
|
354 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
355 next_block = NULL;
|
|
356 SET_REGNO_REG_SET (live_out, i);
|
|
357 SET_REGNO_REG_SET (live_in, i);
|
|
358 }
|
|
359 }
|
|
360 else
|
|
361 {
|
|
362 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
|
|
363 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
|
|
364 at -O1, just give up searching NEXT_BLOCK. */
|
|
365 next_block = NULL;
|
|
366 for (i = dregno; i < end_dregno; i++)
|
|
367 {
|
|
368 CLEAR_REGNO_REG_SET (live_out, i);
|
|
369 CLEAR_REGNO_REG_SET (live_in, i);
|
|
370 }
|
|
371
|
|
372 for (i = sregno; i < end_sregno; i++)
|
|
373 {
|
|
374 SET_REGNO_REG_SET (live_out, i);
|
|
375 SET_REGNO_REG_SET (live_in, i);
|
|
376 }
|
|
377 }
|
|
378
|
|
379 /* If we don't need to add the move to BB, look for a single
|
|
380 successor block. */
|
|
381 if (next_block)
|
|
382 {
|
|
383 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
|
|
384 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
|
|
385 break;
|
|
386 next_block = live_edge->dest;
|
|
387 }
|
|
388 }
|
|
389 while (next_block);
|
|
390
|
|
391 /* For the new created basic block, there is no dataflow info at all.
|
|
392 So skip the following dataflow update and check. */
|
|
393 if (!(*split_p))
|
|
394 {
|
|
395 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
|
|
396 (next loop). */
|
|
397 for (i = dregno; i < end_dregno; i++)
|
|
398 {
|
|
399 CLEAR_REGNO_REG_SET (bb_uses, i);
|
|
400 SET_REGNO_REG_SET (bb_defs, i);
|
|
401 }
|
|
402
|
|
403 /* BB now uses SRC. */
|
|
404 for (i = sregno; i < end_sregno; i++)
|
|
405 SET_REGNO_REG_SET (bb_uses, i);
|
|
406 }
|
|
407
|
|
408 /* Insert debug temps for dead REGs used in subsequent debug insns. */
|
|
409 if (debug->used && !bitmap_empty_p (debug->used))
|
|
410 FOR_EACH_INSN_DEF (def, insn)
|
|
411 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
|
|
412 DEBUG_TEMP_BEFORE_WITH_VALUE);
|
|
413
|
|
414 emit_insn_after (PATTERN (insn), bb_note (bb));
|
|
415 delete_insn (insn);
|
|
416 return true;
|
|
417 }
|
|
418
|
|
419 /* Look for register copies in the first block of the function, and move
|
|
420 them down into successor blocks if the register is used only on one
|
|
421 path. This exposes more opportunities for shrink-wrapping. These
|
|
422 kinds of sets often occur when incoming argument registers are moved
|
|
423 to call-saved registers because their values are live across one or
|
|
424 more calls during the function. */
|
|
425
|
|
426 static void
|
|
427 prepare_shrink_wrap (basic_block entry_block)
|
|
428 {
|
|
429 rtx_insn *insn, *curr;
|
|
430 rtx x;
|
|
431 HARD_REG_SET uses, defs;
|
|
432 df_ref def, use;
|
|
433 bool split_p = false;
|
|
434 unsigned int i;
|
|
435 struct dead_debug_local debug;
|
|
436
|
|
437 if (JUMP_P (BB_END (entry_block)))
|
|
438 {
|
|
439 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
|
|
440 to sink the copies from parameter to callee saved register out of
|
|
441 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
|
|
442 to release some dependences. */
|
|
443 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
|
|
444 }
|
|
445
|
|
446 dead_debug_local_init (&debug, NULL, NULL);
|
|
447 CLEAR_HARD_REG_SET (uses);
|
|
448 CLEAR_HARD_REG_SET (defs);
|
|
449
|
|
450 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
|
|
451 if (NONDEBUG_INSN_P (insn)
|
|
452 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
|
|
453 &split_p, &debug))
|
|
454 {
|
|
455 /* Add all defined registers to DEFs. */
|
|
456 FOR_EACH_INSN_DEF (def, insn)
|
|
457 {
|
|
458 x = DF_REF_REG (def);
|
|
459 if (REG_P (x) && HARD_REGISTER_P (x))
|
|
460 for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
461 SET_HARD_REG_BIT (defs, i);
|
|
462 }
|
|
463
|
|
464 /* Add all used registers to USESs. */
|
|
465 FOR_EACH_INSN_USE (use, insn)
|
|
466 {
|
|
467 x = DF_REF_REG (use);
|
|
468 if (REG_P (x) && HARD_REGISTER_P (x))
|
|
469 for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
470 SET_HARD_REG_BIT (uses, i);
|
|
471 }
|
|
472 }
|
|
473
|
|
474 dead_debug_local_finish (&debug, NULL);
|
|
475 }
|
|
476
|
|
477 /* Return whether basic block PRO can get the prologue. It can not if it
|
|
478 has incoming complex edges that need a prologue inserted (we make a new
|
|
479 block for the prologue, so those edges would need to be redirected, which
|
|
480 does not work). It also can not if there exist registers live on entry
|
|
481 to PRO that are clobbered by the prologue. */
|
|
482
|
|
483 static bool
|
|
484 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
|
|
485 {
|
|
486 edge e;
|
|
487 edge_iterator ei;
|
|
488 FOR_EACH_EDGE (e, ei, pro->preds)
|
|
489 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
|
|
490 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
491 return false;
|
|
492
|
|
493 HARD_REG_SET live;
|
|
494 REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
|
|
495 if (hard_reg_set_intersect_p (live, prologue_clobbered))
|
|
496 return false;
|
|
497
|
|
498 return true;
|
|
499 }
|
|
500
|
|
501 /* Return whether we can duplicate basic block BB for shrink wrapping. We
|
|
502 cannot if the block cannot be duplicated at all, or if any of its incoming
|
|
503 edges are complex and come from a block that does not require a prologue
|
|
504 (we cannot redirect such edges), or if the block is too big to copy.
|
|
505 PRO is the basic block before which we would put the prologue, MAX_SIZE is
|
|
506 the maximum size block we allow to be copied. */
|
|
507
|
|
508 static bool
|
|
509 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
|
|
510 {
|
|
511 if (!can_duplicate_block_p (bb))
|
|
512 return false;
|
|
513
|
|
514 edge e;
|
|
515 edge_iterator ei;
|
|
516 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
517 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
|
|
518 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
519 return false;
|
|
520
|
|
521 unsigned size = 0;
|
|
522
|
|
523 rtx_insn *insn;
|
|
524 FOR_BB_INSNS (bb, insn)
|
|
525 if (NONDEBUG_INSN_P (insn))
|
|
526 {
|
|
527 size += get_attr_min_length (insn);
|
|
528 if (size > max_size)
|
|
529 return false;
|
|
530 }
|
|
531
|
|
532 return true;
|
|
533 }
|
|
534
|
|
535 /* Do whatever needs to be done for exits that run without prologue.
|
|
536 Sibcalls need nothing done. Normal exits get a simple_return inserted. */
|
|
537
|
|
538 static void
|
|
539 handle_simple_exit (edge e)
|
|
540 {
|
|
541
|
|
542 if (e->flags & EDGE_SIBCALL)
|
|
543 {
|
|
544 /* Tell function.c to take no further action on this edge. */
|
|
545 e->flags |= EDGE_IGNORE;
|
|
546
|
|
547 e->flags &= ~EDGE_FALLTHRU;
|
|
548 emit_barrier_after_bb (e->src);
|
|
549 return;
|
|
550 }
|
|
551
|
|
552 /* If the basic block the edge comes from has multiple successors,
|
|
553 split the edge. */
|
|
554 if (EDGE_COUNT (e->src->succs) > 1)
|
|
555 {
|
|
556 basic_block old_bb = e->src;
|
|
557 rtx_insn *end = BB_END (old_bb);
|
|
558 rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
|
|
559 basic_block new_bb = create_basic_block (note, note, old_bb);
|
|
560 BB_COPY_PARTITION (new_bb, old_bb);
|
|
561 BB_END (old_bb) = end;
|
|
562
|
|
563 redirect_edge_succ (e, new_bb);
|
|
564 new_bb->frequency = EDGE_FREQUENCY (e);
|
|
565 e->flags |= EDGE_FALLTHRU;
|
|
566
|
|
567 e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
|
|
568 }
|
|
569
|
|
570 e->flags &= ~EDGE_FALLTHRU;
|
|
571 rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
|
|
572 BB_END (e->src));
|
|
573 JUMP_LABEL (ret) = simple_return_rtx;
|
|
574 emit_barrier_after_bb (e->src);
|
|
575
|
|
576 if (dump_file)
|
|
577 fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
|
|
578 INSN_UID (ret), e->src->index);
|
|
579 }
|
|
580
|
|
581 /* Try to perform a kind of shrink-wrapping, making sure the
|
|
582 prologue/epilogue is emitted only around those parts of the
|
|
583 function that require it.
|
|
584
|
|
585 There will be exactly one prologue, and it will be executed either
|
|
586 zero or one time, on any path. Depending on where the prologue is
|
|
587 placed, some of the basic blocks can be reached via both paths with
|
|
588 and without a prologue. Such blocks will be duplicated here, and the
|
|
589 edges changed to match.
|
|
590
|
|
591 Paths that go to the exit without going through the prologue will use
|
|
592 a simple_return instead of the epilogue. We maximize the number of
|
|
593 those, making sure to only duplicate blocks that can be duplicated.
|
|
594 If the prologue can then still be placed in multiple locations, we
|
|
595 place it as early as possible.
|
|
596
|
|
597 An example, where we duplicate blocks with control flow (legend:
|
|
598 _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
|
|
599 be taken to point down or to the right, to simplify the diagram; here,
|
|
600 block 3 needs a prologue, the rest does not):
|
|
601
|
|
602
|
|
603 B B
|
|
604 | |
|
|
605 2 2
|
|
606 |\ |\
|
|
607 | 3 becomes | 3
|
|
608 |/ | \
|
|
609 4 7 4
|
|
610 |\ |\ |\
|
|
611 | 5 | 8 | 5
|
|
612 |/ |/ |/
|
|
613 6 9 6
|
|
614 | | |
|
|
615 R S R
|
|
616
|
|
617
|
|
618 (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
|
|
619 edge 2->3).
|
|
620
|
|
621 Another example, where part of a loop is duplicated (again, bb 3 is
|
|
622 the only block that needs a prologue):
|
|
623
|
|
624
|
|
625 B 3<-- B ->3<--
|
|
626 | | | | | | |
|
|
627 | v | becomes | | v |
|
|
628 2---4--- 2---5-- 4---
|
|
629 | | |
|
|
630 R S R
|
|
631
|
|
632
|
|
633 (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
|
|
634
|
|
635 ENTRY_EDGE is the edge where the prologue will be placed, possibly
|
|
636 changed by this function. PROLOGUE_SEQ is the prologue we will insert. */
|
|
637
|
|
638 void
|
|
639 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
|
|
640 {
|
|
641 /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
|
|
642 no sense to shrink-wrap: then do not shrink-wrap! */
|
|
643
|
|
644 if (!SHRINK_WRAPPING_ENABLED)
|
|
645 return;
|
|
646
|
|
647 if (crtl->profile && !targetm.profile_before_prologue ())
|
|
648 return;
|
|
649
|
|
650 if (crtl->calls_eh_return)
|
|
651 return;
|
|
652
|
|
653 bool empty_prologue = true;
|
|
654 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
655 if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
|
|
656 {
|
|
657 empty_prologue = false;
|
|
658 break;
|
|
659 }
|
|
660 if (empty_prologue)
|
|
661 return;
|
|
662
|
|
663 /* Move some code down to expose more shrink-wrapping opportunities. */
|
|
664
|
|
665 basic_block entry = (*entry_edge)->dest;
|
|
666 prepare_shrink_wrap (entry);
|
|
667
|
|
668 if (dump_file)
|
|
669 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
|
|
670
|
|
671 /* Compute the registers set and used in the prologue. */
|
|
672
|
|
673 HARD_REG_SET prologue_clobbered, prologue_used;
|
|
674 CLEAR_HARD_REG_SET (prologue_clobbered);
|
|
675 CLEAR_HARD_REG_SET (prologue_used);
|
|
676 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
677 if (NONDEBUG_INSN_P (insn))
|
|
678 {
|
|
679 HARD_REG_SET this_used;
|
|
680 CLEAR_HARD_REG_SET (this_used);
|
|
681 note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
|
|
682 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
|
|
683 IOR_HARD_REG_SET (prologue_used, this_used);
|
|
684 note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
|
|
685 }
|
|
686 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
|
|
687 if (frame_pointer_needed)
|
|
688 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
|
|
689
|
|
690 /* Find out what registers are set up by the prologue; any use of these
|
|
691 cannot happen before the prologue. */
|
|
692
|
|
693 struct hard_reg_set_container set_up_by_prologue;
|
|
694 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
|
|
695 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
|
|
696 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
|
|
697 if (frame_pointer_needed)
|
|
698 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
699 HARD_FRAME_POINTER_REGNUM);
|
|
700 if (pic_offset_table_rtx
|
|
701 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
|
|
702 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
703 PIC_OFFSET_TABLE_REGNUM);
|
|
704 if (crtl->drap_reg)
|
|
705 add_to_hard_reg_set (&set_up_by_prologue.set,
|
|
706 GET_MODE (crtl->drap_reg),
|
|
707 REGNO (crtl->drap_reg));
|
|
708 if (targetm.set_up_by_prologue)
|
|
709 targetm.set_up_by_prologue (&set_up_by_prologue);
|
|
710
|
|
711 /* We will insert the prologue before the basic block PRO. PRO should
|
|
712 dominate all basic blocks that need the prologue to be executed
|
|
713 before them. First, make PRO the "tightest wrap" possible. */
|
|
714
|
|
715 calculate_dominance_info (CDI_DOMINATORS);
|
|
716
|
|
717 basic_block pro = 0;
|
|
718
|
|
719 basic_block bb;
|
|
720 edge e;
|
|
721 edge_iterator ei;
|
|
722 FOR_EACH_BB_FN (bb, cfun)
|
|
723 {
|
|
724 rtx_insn *insn;
|
|
725 FOR_BB_INSNS (bb, insn)
|
|
726 if (NONDEBUG_INSN_P (insn)
|
|
727 && requires_stack_frame_p (insn, prologue_used,
|
|
728 set_up_by_prologue.set))
|
|
729 {
|
|
730 if (dump_file)
|
|
731 fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
|
|
732 pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
|
|
733 break;
|
|
734 }
|
|
735 }
|
|
736
|
|
737 /* If nothing needs a prologue, just put it at the start. This really
|
|
738 shouldn't happen, but we cannot fix it here. */
|
|
739
|
|
740 if (pro == 0)
|
|
741 {
|
|
742 if (dump_file)
|
|
743 fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
|
|
744 "putting it at the start.\n");
|
|
745 pro = entry;
|
|
746 }
|
|
747
|
|
748 if (dump_file)
|
|
749 fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
|
|
750 pro->index);
|
|
751
|
|
752 /* Now see if we can put the prologue at the start of PRO. Putting it
|
|
753 there might require duplicating a block that cannot be duplicated,
|
|
754 or in some cases we cannot insert the prologue there at all. If PRO
|
|
755 wont't do, try again with the immediate dominator of PRO, and so on.
|
|
756
|
|
757 The blocks that need duplicating are those reachable from PRO but
|
|
758 not dominated by it. We keep in BB_WITH a bitmap of the blocks
|
|
759 reachable from PRO that we already found, and in VEC a stack of
|
|
760 those we still need to consider (to find successors). */
|
|
761
|
|
762 auto_bitmap bb_with;
|
|
763 bitmap_set_bit (bb_with, pro->index);
|
|
764
|
|
765 vec<basic_block> vec;
|
|
766 vec.create (n_basic_blocks_for_fn (cfun));
|
|
767 vec.quick_push (pro);
|
|
768
|
|
769 unsigned max_grow_size = get_uncond_jump_length ();
|
|
770 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
|
|
771
|
|
772 while (!vec.is_empty () && pro != entry)
|
|
773 {
|
|
774 while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
|
|
775 {
|
|
776 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
777
|
|
778 if (bitmap_set_bit (bb_with, pro->index))
|
|
779 vec.quick_push (pro);
|
|
780 }
|
|
781
|
|
782 basic_block bb = vec.pop ();
|
|
783 if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
|
|
784 while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
785 {
|
|
786 gcc_assert (pro != entry);
|
|
787
|
|
788 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
789
|
|
790 if (bitmap_set_bit (bb_with, pro->index))
|
|
791 vec.quick_push (pro);
|
|
792 }
|
|
793
|
|
794 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
795 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
|
|
796 && bitmap_set_bit (bb_with, e->dest->index))
|
|
797 vec.quick_push (e->dest);
|
|
798 }
|
|
799
|
|
800 if (dump_file)
|
|
801 fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
|
|
802 pro->index);
|
|
803
|
|
804 /* If we can move PRO back without having to duplicate more blocks, do so.
|
|
805 We do this because putting the prologue earlier is better for scheduling.
|
|
806
|
|
807 We can move back to a block PRE if every path from PRE will eventually
|
|
808 need a prologue, that is, PRO is a post-dominator of PRE. PRE needs
|
|
809 to dominate every block reachable from itself. We keep in BB_TMP a
|
|
810 bitmap of the blocks reachable from PRE that we already found, and in
|
|
811 VEC a stack of those we still need to consider.
|
|
812
|
|
813 Any block reachable from PRE is also reachable from all predecessors
|
|
814 of PRE, so if we find we need to move PRE back further we can leave
|
|
815 everything not considered so far on the stack. Any block dominated
|
|
816 by PRE is also dominated by all other dominators of PRE, so anything
|
|
817 found good for some PRE does not need to be reconsidered later.
|
|
818
|
|
819 We don't need to update BB_WITH because none of the new blocks found
|
|
820 can jump to a block that does not need the prologue. */
|
|
821
|
|
822 if (pro != entry)
|
|
823 {
|
|
824 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
825
|
|
826 auto_bitmap bb_tmp;
|
|
827 bitmap_copy (bb_tmp, bb_with);
|
|
828 basic_block last_ok = pro;
|
|
829 vec.truncate (0);
|
|
830
|
|
831 while (pro != entry)
|
|
832 {
|
|
833 basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
834 if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
|
|
835 break;
|
|
836
|
|
837 if (bitmap_set_bit (bb_tmp, pre->index))
|
|
838 vec.quick_push (pre);
|
|
839
|
|
840 bool ok = true;
|
|
841 while (!vec.is_empty ())
|
|
842 {
|
|
843 if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
|
|
844 {
|
|
845 ok = false;
|
|
846 break;
|
|
847 }
|
|
848
|
|
849 basic_block bb = vec.pop ();
|
|
850 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
851 if (bitmap_set_bit (bb_tmp, e->dest->index))
|
|
852 vec.quick_push (e->dest);
|
|
853 }
|
|
854
|
|
855 if (ok && can_get_prologue (pre, prologue_clobbered))
|
|
856 last_ok = pre;
|
|
857
|
|
858 pro = pre;
|
|
859 }
|
|
860
|
|
861 pro = last_ok;
|
|
862
|
|
863 free_dominance_info (CDI_POST_DOMINATORS);
|
|
864 }
|
|
865
|
|
866 vec.release ();
|
|
867
|
|
868 if (dump_file)
|
|
869 fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
|
|
870 pro->index);
|
|
871
|
|
872 if (pro == entry)
|
|
873 {
|
|
874 free_dominance_info (CDI_DOMINATORS);
|
|
875 return;
|
|
876 }
|
|
877
|
|
878 /* Compute what fraction of the frequency and count of the blocks that run
|
|
879 both with and without prologue are for running with prologue. This gives
|
|
880 the correct answer for reducible flow graphs; for irreducible flow graphs
|
|
881 our profile is messed up beyond repair anyway. */
|
|
882
|
|
883 gcov_type num = 0;
|
|
884 gcov_type den = 0;
|
|
885
|
|
886 FOR_EACH_EDGE (e, ei, pro->preds)
|
|
887 if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
888 {
|
|
889 num += EDGE_FREQUENCY (e);
|
|
890 den += e->src->frequency;
|
|
891 }
|
|
892
|
|
893 if (den == 0)
|
|
894 den = 1;
|
|
895
|
|
896 /* All is okay, so do it. */
|
|
897
|
|
898 crtl->shrink_wrapped = true;
|
|
899 if (dump_file)
|
|
900 fprintf (dump_file, "Performing shrink-wrapping.\n");
|
|
901
|
|
902 /* Copy the blocks that can run both with and without prologue. The
|
|
903 originals run with prologue, the copies without. Store a pointer to
|
|
904 the copy in the ->aux field of the original. */
|
|
905
|
|
906 FOR_EACH_BB_FN (bb, cfun)
|
|
907 if (bitmap_bit_p (bb_with, bb->index)
|
|
908 && !dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
909 {
|
|
910 basic_block dup = duplicate_block (bb, 0, 0);
|
|
911
|
|
912 bb->aux = dup;
|
|
913
|
|
914 if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
|
|
915 emit_barrier_after_bb (dup);
|
|
916
|
|
917 if (EDGE_COUNT (dup->succs) == 0)
|
|
918 emit_barrier_after_bb (dup);
|
|
919
|
|
920 if (dump_file)
|
|
921 fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
|
|
922
|
|
923 bb->frequency = RDIV (num * bb->frequency, den);
|
|
924 dup->frequency -= bb->frequency;
|
|
925 bb->count = bb->count.apply_scale (num, den);
|
|
926 dup->count -= bb->count;
|
|
927 }
|
|
928
|
|
929 /* Now change the edges to point to the copies, where appropriate. */
|
|
930
|
|
931 FOR_EACH_BB_FN (bb, cfun)
|
|
932 if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
933 {
|
|
934 basic_block src = bb;
|
|
935 if (bitmap_bit_p (bb_with, bb->index))
|
|
936 src = (basic_block) bb->aux;
|
|
937
|
|
938 FOR_EACH_EDGE (e, ei, src->succs)
|
|
939 {
|
|
940 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
941 continue;
|
|
942
|
|
943 if (bitmap_bit_p (bb_with, e->dest->index)
|
|
944 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
|
945 {
|
|
946 if (dump_file)
|
|
947 fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
|
|
948 e->src->index, e->dest->index,
|
|
949 ((basic_block) e->dest->aux)->index);
|
|
950 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
|
951 }
|
|
952 else if (e->flags & EDGE_FALLTHRU
|
|
953 && bitmap_bit_p (bb_with, bb->index))
|
|
954 force_nonfallthru (e);
|
|
955 }
|
|
956 }
|
|
957
|
|
958 /* Also redirect the function entry edge if necessary. */
|
|
959
|
|
960 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
|
|
961 if (bitmap_bit_p (bb_with, e->dest->index)
|
|
962 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
|
963 {
|
|
964 basic_block split_bb = split_edge (e);
|
|
965 e = single_succ_edge (split_bb);
|
|
966 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
|
967 }
|
|
968
|
|
969 /* Make a simple_return for those exits that run without prologue. */
|
|
970
|
|
971 FOR_EACH_BB_REVERSE_FN (bb, cfun)
|
|
972 if (!bitmap_bit_p (bb_with, bb->index))
|
|
973 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
974 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
975 handle_simple_exit (e);
|
|
976
|
|
977 /* Finally, we want a single edge to put the prologue on. Make a new
|
|
978 block before the PRO block; the edge beteen them is the edge we want.
|
|
979 Then redirect those edges into PRO that come from blocks without the
|
|
980 prologue, to point to the new block instead. The new prologue block
|
|
981 is put at the end of the insn chain. */
|
|
982
|
|
983 basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
|
|
984 BB_COPY_PARTITION (new_bb, pro);
|
|
985 new_bb->count = profile_count::zero ();
|
|
986 if (dump_file)
|
|
987 fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
|
|
988
|
|
989 for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
|
|
990 {
|
|
991 if (bitmap_bit_p (bb_with, e->src->index)
|
|
992 || dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
993 {
|
|
994 ei_next (&ei);
|
|
995 continue;
|
|
996 }
|
|
997
|
|
998 new_bb->count += e->src->count.apply_probability (e->probability);
|
|
999 new_bb->frequency += EDGE_FREQUENCY (e);
|
|
1000
|
|
1001 redirect_edge_and_branch_force (e, new_bb);
|
|
1002 if (dump_file)
|
|
1003 fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
|
|
1004 }
|
|
1005
|
|
1006 *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
|
|
1007 force_nonfallthru (*entry_edge);
|
|
1008
|
|
1009 free_dominance_info (CDI_DOMINATORS);
|
|
1010 }
|
|
1011
|
|
1012 /* Separate shrink-wrapping
|
|
1013
|
|
1014 Instead of putting all of the prologue and epilogue in one spot, we
|
|
1015 can put parts of it in places where those components are executed less
|
|
1016 frequently. The following code does this, for prologue and epilogue
|
|
1017 components that can be put in more than one location, and where those
|
|
1018 components can be executed more than once (the epilogue component will
|
|
1019 always be executed before the prologue component is executed a second
|
|
1020 time).
|
|
1021
|
|
1022 What exactly is a component is target-dependent. The more usual
|
|
1023 components are simple saves/restores to/from the frame of callee-saved
|
|
1024 registers. This code treats components abstractly (as an sbitmap),
|
|
1025 letting the target handle all details.
|
|
1026
|
|
1027 Prologue components are placed in such a way that for every component
|
|
1028 the prologue is executed as infrequently as possible. We do this by
|
|
1029 walking the dominator tree, comparing the cost of placing a prologue
|
|
1030 component before a block to the sum of costs determined for all subtrees
|
|
1031 of that block.
|
|
1032
|
|
1033 From this placement, we then determine for each component all blocks
|
|
1034 where at least one of this block's dominators (including itself) will
|
|
1035 get a prologue inserted. That then is how the components are placed.
|
|
1036 We could place the epilogue components a bit smarter (we can save a
|
|
1037 bit of code size sometimes); this is a possible future improvement.
|
|
1038
|
|
1039 Prologues and epilogues are preferably placed into a block, either at
|
|
1040 the beginning or end of it, if it is needed for all predecessor resp.
|
|
1041 successor edges; or placed on the edge otherwise.
|
|
1042
|
|
1043 If the placement of any prologue/epilogue leads to a situation we cannot
|
|
1044 handle (for example, an abnormal edge would need to be split, or some
|
|
1045 targets want to use some specific registers that may not be available
|
|
1046 where we want to put them), separate shrink-wrapping for the components
|
|
1047 in that prologue/epilogue is aborted. */
|
|
1048
|
|
1049
|
|
1050 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
|
|
1051 label LABEL. */
|
|
1052 static void
|
|
1053 dump_components (const char *label, sbitmap components)
|
|
1054 {
|
|
1055 if (bitmap_empty_p (components))
|
|
1056 return;
|
|
1057
|
|
1058 fprintf (dump_file, " [%s", label);
|
|
1059
|
|
1060 for (unsigned int j = 0; j < components->n_bits; j++)
|
|
1061 if (bitmap_bit_p (components, j))
|
|
1062 fprintf (dump_file, " %u", j);
|
|
1063
|
|
1064 fprintf (dump_file, "]");
|
|
1065 }
|
|
1066
|
|
1067 /* The data we collect for each bb. */
|
|
1068 struct sw {
|
|
1069 /* What components does this BB need? */
|
|
1070 sbitmap needs_components;
|
|
1071
|
|
1072 /* What components does this BB have? This is the main decision this
|
|
1073 pass makes. */
|
|
1074 sbitmap has_components;
|
|
1075
|
|
1076 /* The components for which we placed code at the start of the BB (instead
|
|
1077 of on all incoming edges). */
|
|
1078 sbitmap head_components;
|
|
1079
|
|
1080 /* The components for which we placed code at the end of the BB (instead
|
|
1081 of on all outgoing edges). */
|
|
1082 sbitmap tail_components;
|
|
1083
|
|
1084 /* The frequency of executing the prologue for this BB, if a prologue is
|
|
1085 placed on this BB. This is a pessimistic estimate (no prologue is
|
|
1086 needed for edges from blocks that have the component under consideration
|
|
1087 active already). */
|
|
1088 gcov_type own_cost;
|
|
1089
|
|
1090 /* The frequency of executing the prologue for this BB and all BBs
|
|
1091 dominated by it. */
|
|
1092 gcov_type total_cost;
|
|
1093 };
|
|
1094
|
|
1095 /* A helper function for accessing the pass-specific info. */
|
|
1096 static inline struct sw *
|
|
1097 SW (basic_block bb)
|
|
1098 {
|
|
1099 gcc_assert (bb->aux);
|
|
1100 return (struct sw *) bb->aux;
|
|
1101 }
|
|
1102
|
|
1103 /* Create the pass-specific data structures for separately shrink-wrapping
|
|
1104 with components COMPONENTS. */
|
|
1105 static void
|
|
1106 init_separate_shrink_wrap (sbitmap components)
|
|
1107 {
|
|
1108 basic_block bb;
|
|
1109 FOR_ALL_BB_FN (bb, cfun)
|
|
1110 {
|
|
1111 bb->aux = xcalloc (1, sizeof (struct sw));
|
|
1112
|
|
1113 SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
|
|
1114
|
|
1115 /* Mark all basic blocks without successor as needing all components.
|
|
1116 This avoids problems in at least cfgcleanup, sel-sched, and
|
|
1117 regrename (largely to do with all paths to such a block still
|
|
1118 needing the same dwarf CFI info). */
|
|
1119 if (EDGE_COUNT (bb->succs) == 0)
|
|
1120 bitmap_copy (SW (bb)->needs_components, components);
|
|
1121
|
|
1122 if (dump_file)
|
|
1123 {
|
|
1124 fprintf (dump_file, "bb %d components:", bb->index);
|
|
1125 dump_components ("has", SW (bb)->needs_components);
|
|
1126 fprintf (dump_file, "\n");
|
|
1127 }
|
|
1128
|
|
1129 SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1130 SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1131 SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1132 bitmap_clear (SW (bb)->has_components);
|
|
1133 }
|
|
1134 }
|
|
1135
|
|
1136 /* Destroy the pass-specific data. */
|
|
1137 static void
|
|
1138 fini_separate_shrink_wrap (void)
|
|
1139 {
|
|
1140 basic_block bb;
|
|
1141 FOR_ALL_BB_FN (bb, cfun)
|
|
1142 if (bb->aux)
|
|
1143 {
|
|
1144 sbitmap_free (SW (bb)->needs_components);
|
|
1145 sbitmap_free (SW (bb)->has_components);
|
|
1146 sbitmap_free (SW (bb)->head_components);
|
|
1147 sbitmap_free (SW (bb)->tail_components);
|
|
1148 free (bb->aux);
|
|
1149 bb->aux = 0;
|
|
1150 }
|
|
1151 }
|
|
1152
|
|
1153 /* Place the prologue for component WHICH, in the basic blocks dominated
|
|
1154 by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the
|
|
1155 HAS_COMPONENTS of a block if either the block has that bit set in
|
|
1156 NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
|
|
1157 dominator subtrees separately. */
|
|
1158 static void
|
|
1159 place_prologue_for_one_component (unsigned int which, basic_block head)
|
|
1160 {
|
|
1161 /* The block we are currently dealing with. */
|
|
1162 basic_block bb = head;
|
|
1163 /* Is this the first time we visit this block, i.e. have we just gone
|
|
1164 down the tree. */
|
|
1165 bool first_visit = true;
|
|
1166
|
|
1167 /* Walk the dominator tree, visit one block per iteration of this loop.
|
|
1168 Each basic block is visited twice: once before visiting any children
|
|
1169 of the block, and once after visiting all of them (leaf nodes are
|
|
1170 visited only once). As an optimization, we do not visit subtrees
|
|
1171 that can no longer influence the prologue placement. */
|
|
1172 for (;;)
|
|
1173 {
|
|
1174 /* First visit of a block: set the (children) cost accumulator to zero;
|
|
1175 if the block does not have the component itself, walk down. */
|
|
1176 if (first_visit)
|
|
1177 {
|
|
1178 /* Initialize the cost. The cost is the block execution frequency
|
|
1179 that does not come from backedges. Calculating this by simply
|
|
1180 adding the cost of all edges that aren't backedges does not
|
|
1181 work: this does not always add up to the block frequency at
|
|
1182 all, and even if it does, rounding error makes for bad
|
|
1183 decisions. */
|
|
1184 SW (bb)->own_cost = bb->frequency;
|
|
1185
|
|
1186 edge e;
|
|
1187 edge_iterator ei;
|
|
1188 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1189 if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
|
|
1190 {
|
|
1191 if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
|
|
1192 SW (bb)->own_cost -= EDGE_FREQUENCY (e);
|
|
1193 else
|
|
1194 SW (bb)->own_cost = 0;
|
|
1195 }
|
|
1196
|
|
1197 SW (bb)->total_cost = 0;
|
|
1198
|
|
1199 if (!bitmap_bit_p (SW (bb)->needs_components, which)
|
|
1200 && first_dom_son (CDI_DOMINATORS, bb))
|
|
1201 {
|
|
1202 bb = first_dom_son (CDI_DOMINATORS, bb);
|
|
1203 continue;
|
|
1204 }
|
|
1205 }
|
|
1206
|
|
1207 /* If this block does need the component itself, or it is cheaper to
|
|
1208 put the prologue here than in all the descendants that need it,
|
|
1209 mark it so. If this block's immediate post-dominator is dominated
|
|
1210 by this block, and that needs the prologue, we can put it on this
|
|
1211 block as well (earlier is better). */
|
|
1212 if (bitmap_bit_p (SW (bb)->needs_components, which)
|
|
1213 || SW (bb)->total_cost > SW (bb)->own_cost)
|
|
1214 {
|
|
1215 SW (bb)->total_cost = SW (bb)->own_cost;
|
|
1216 bitmap_set_bit (SW (bb)->has_components, which);
|
|
1217 }
|
|
1218 else
|
|
1219 {
|
|
1220 basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
|
|
1221 if (dominated_by_p (CDI_DOMINATORS, kid, bb)
|
|
1222 && bitmap_bit_p (SW (kid)->has_components, which))
|
|
1223 {
|
|
1224 SW (bb)->total_cost = SW (bb)->own_cost;
|
|
1225 bitmap_set_bit (SW (bb)->has_components, which);
|
|
1226 }
|
|
1227 }
|
|
1228
|
|
1229 /* We are back where we started, so we are done now. */
|
|
1230 if (bb == head)
|
|
1231 return;
|
|
1232
|
|
1233 /* We now know the cost of the subtree rooted at the current block.
|
|
1234 Accumulate this cost in the parent. */
|
|
1235 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
|
|
1236 SW (parent)->total_cost += SW (bb)->total_cost;
|
|
1237
|
|
1238 /* Don't walk the tree down unless necessary. */
|
|
1239 if (next_dom_son (CDI_DOMINATORS, bb)
|
|
1240 && SW (parent)->total_cost <= SW (parent)->own_cost)
|
|
1241 {
|
|
1242 bb = next_dom_son (CDI_DOMINATORS, bb);
|
|
1243 first_visit = true;
|
|
1244 }
|
|
1245 else
|
|
1246 {
|
|
1247 bb = parent;
|
|
1248 first_visit = false;
|
|
1249 }
|
|
1250 }
|
|
1251 }
|
|
1252
|
|
1253 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
|
|
1254 setting it on any path from entry to exit where it was not already set
|
|
1255 somewhere (or, for blocks that have no path to the exit, consider only
|
|
1256 paths from the entry to the block itself). */
|
|
1257 static void
|
|
1258 spread_components (sbitmap components)
|
|
1259 {
|
|
1260 basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
1261 basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
|
|
1262
|
|
1263 /* A stack of all blocks left to consider, and a bitmap of all blocks
|
|
1264 on that stack. */
|
|
1265 vec<basic_block> todo;
|
|
1266 todo.create (n_basic_blocks_for_fn (cfun));
|
|
1267 auto_bitmap seen;
|
|
1268
|
|
1269 auto_sbitmap old (SBITMAP_SIZE (components));
|
|
1270
|
|
1271 /* Find for every block the components that are *not* needed on some path
|
|
1272 from the entry to that block. Do this with a flood fill from the entry
|
|
1273 block. Every block can be visited at most as often as the number of
|
|
1274 components (plus one), and usually much less often. */
|
|
1275
|
|
1276 if (dump_file)
|
|
1277 fprintf (dump_file, "Spreading down...\n");
|
|
1278
|
|
1279 basic_block bb;
|
|
1280 FOR_ALL_BB_FN (bb, cfun)
|
|
1281 bitmap_clear (SW (bb)->head_components);
|
|
1282
|
|
1283 bitmap_copy (SW (entry_block)->head_components, components);
|
|
1284
|
|
1285 edge e;
|
|
1286 edge_iterator ei;
|
|
1287
|
|
1288 todo.quick_push (single_succ (entry_block));
|
|
1289 bitmap_set_bit (seen, single_succ (entry_block)->index);
|
|
1290 while (!todo.is_empty ())
|
|
1291 {
|
|
1292 bb = todo.pop ();
|
|
1293
|
|
1294 bitmap_copy (old, SW (bb)->head_components);
|
|
1295
|
|
1296 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1297 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
|
|
1298 SW (e->src)->head_components);
|
|
1299
|
|
1300 bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
|
|
1301 SW (bb)->has_components);
|
|
1302
|
|
1303 if (!bitmap_equal_p (old, SW (bb)->head_components))
|
|
1304 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1305 if (bitmap_set_bit (seen, e->dest->index))
|
|
1306 todo.quick_push (e->dest);
|
|
1307
|
|
1308 bitmap_clear_bit (seen, bb->index);
|
|
1309 }
|
|
1310
|
|
1311 /* Find for every block the components that are *not* needed on some reverse
|
|
1312 path from the exit to that block. */
|
|
1313
|
|
1314 if (dump_file)
|
|
1315 fprintf (dump_file, "Spreading up...\n");
|
|
1316
|
|
1317 /* First, mark all blocks not reachable from the exit block as not needing
|
|
1318 any component on any path to the exit. Mark everything, and then clear
|
|
1319 again by a flood fill. */
|
|
1320
|
|
1321 FOR_ALL_BB_FN (bb, cfun)
|
|
1322 bitmap_copy (SW (bb)->tail_components, components);
|
|
1323
|
|
1324 FOR_EACH_EDGE (e, ei, exit_block->preds)
|
|
1325 {
|
|
1326 todo.quick_push (e->src);
|
|
1327 bitmap_set_bit (seen, e->src->index);
|
|
1328 }
|
|
1329
|
|
1330 while (!todo.is_empty ())
|
|
1331 {
|
|
1332 bb = todo.pop ();
|
|
1333
|
|
1334 if (!bitmap_empty_p (SW (bb)->tail_components))
|
|
1335 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1336 if (bitmap_set_bit (seen, e->src->index))
|
|
1337 todo.quick_push (e->src);
|
|
1338
|
|
1339 bitmap_clear (SW (bb)->tail_components);
|
|
1340
|
|
1341 bitmap_clear_bit (seen, bb->index);
|
|
1342 }
|
|
1343
|
|
1344 /* And then, flood fill backwards to find for every block the components
|
|
1345 not needed on some path to the exit. */
|
|
1346
|
|
1347 bitmap_copy (SW (exit_block)->tail_components, components);
|
|
1348
|
|
1349 FOR_EACH_EDGE (e, ei, exit_block->preds)
|
|
1350 {
|
|
1351 todo.quick_push (e->src);
|
|
1352 bitmap_set_bit (seen, e->src->index);
|
|
1353 }
|
|
1354
|
|
1355 while (!todo.is_empty ())
|
|
1356 {
|
|
1357 bb = todo.pop ();
|
|
1358
|
|
1359 bitmap_copy (old, SW (bb)->tail_components);
|
|
1360
|
|
1361 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1362 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
1363 SW (e->dest)->tail_components);
|
|
1364
|
|
1365 bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
1366 SW (bb)->has_components);
|
|
1367
|
|
1368 if (!bitmap_equal_p (old, SW (bb)->tail_components))
|
|
1369 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1370 if (bitmap_set_bit (seen, e->src->index))
|
|
1371 todo.quick_push (e->src);
|
|
1372
|
|
1373 bitmap_clear_bit (seen, bb->index);
|
|
1374 }
|
|
1375
|
|
1376 /* Finally, mark everything not not needed both forwards and backwards. */
|
|
1377
|
|
1378 FOR_EACH_BB_FN (bb, cfun)
|
|
1379 {
|
|
1380 bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
|
|
1381 SW (bb)->tail_components);
|
|
1382 bitmap_and_compl (SW (bb)->has_components, components,
|
|
1383 SW (bb)->head_components);
|
|
1384 }
|
|
1385
|
|
1386 FOR_ALL_BB_FN (bb, cfun)
|
|
1387 {
|
|
1388 if (dump_file)
|
|
1389 {
|
|
1390 fprintf (dump_file, "bb %d components:", bb->index);
|
|
1391 dump_components ("has", SW (bb)->has_components);
|
|
1392 fprintf (dump_file, "\n");
|
|
1393 }
|
|
1394 }
|
|
1395 }
|
|
1396
|
|
1397 /* If we cannot handle placing some component's prologues or epilogues where
|
|
1398 we decided we should place them, unmark that component in COMPONENTS so
|
|
1399 that it is not wrapped separately. */
|
|
1400 static void
|
|
1401 disqualify_problematic_components (sbitmap components)
|
|
1402 {
|
|
1403 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1404 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1405
|
|
1406 basic_block bb;
|
|
1407 FOR_EACH_BB_FN (bb, cfun)
|
|
1408 {
|
|
1409 edge e;
|
|
1410 edge_iterator ei;
|
|
1411 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1412 {
|
|
1413 /* Find which components we want pro/epilogues for here. */
|
|
1414 bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
1415 SW (e->dest)->has_components);
|
|
1416 bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
1417 SW (e->src)->has_components);
|
|
1418
|
|
1419 /* Ask the target what it thinks about things. */
|
|
1420 if (!bitmap_empty_p (epi))
|
|
1421 targetm.shrink_wrap.disqualify_components (components, e, epi,
|
|
1422 false);
|
|
1423 if (!bitmap_empty_p (pro))
|
|
1424 targetm.shrink_wrap.disqualify_components (components, e, pro,
|
|
1425 true);
|
|
1426
|
|
1427 /* If this edge doesn't need splitting, we're fine. */
|
|
1428 if (single_pred_p (e->dest)
|
|
1429 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1430 continue;
|
|
1431
|
|
1432 /* If the edge can be split, that is fine too. */
|
|
1433 if ((e->flags & EDGE_ABNORMAL) == 0)
|
|
1434 continue;
|
|
1435
|
|
1436 /* We also can handle sibcalls. */
|
|
1437 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1438 {
|
|
1439 gcc_assert (e->flags & EDGE_SIBCALL);
|
|
1440 continue;
|
|
1441 }
|
|
1442
|
|
1443 /* Remove from consideration those components we would need
|
|
1444 pro/epilogues for on edges where we cannot insert them. */
|
|
1445 bitmap_and_compl (components, components, epi);
|
|
1446 bitmap_and_compl (components, components, pro);
|
|
1447
|
|
1448 if (dump_file && !bitmap_subset_p (epi, components))
|
|
1449 {
|
|
1450 fprintf (dump_file, " BAD epi %d->%d", e->src->index,
|
|
1451 e->dest->index);
|
|
1452 if (e->flags & EDGE_EH)
|
|
1453 fprintf (dump_file, " for EH");
|
|
1454 dump_components ("epi", epi);
|
|
1455 fprintf (dump_file, "\n");
|
|
1456 }
|
|
1457
|
|
1458 if (dump_file && !bitmap_subset_p (pro, components))
|
|
1459 {
|
|
1460 fprintf (dump_file, " BAD pro %d->%d", e->src->index,
|
|
1461 e->dest->index);
|
|
1462 if (e->flags & EDGE_EH)
|
|
1463 fprintf (dump_file, " for EH");
|
|
1464 dump_components ("pro", pro);
|
|
1465 fprintf (dump_file, "\n");
|
|
1466 }
|
|
1467 }
|
|
1468 }
|
|
1469 }
|
|
1470
|
|
1471 /* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
1472 that code at the start of basic blocks. */
|
|
1473 static void
|
|
1474 emit_common_heads_for_components (sbitmap components)
|
|
1475 {
|
|
1476 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1477 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1478 auto_sbitmap tmp (SBITMAP_SIZE (components));
|
|
1479
|
|
1480 basic_block bb;
|
|
1481 FOR_ALL_BB_FN (bb, cfun)
|
|
1482 bitmap_clear (SW (bb)->head_components);
|
|
1483
|
|
1484 FOR_EACH_BB_FN (bb, cfun)
|
|
1485 {
|
|
1486 /* Find which prologue resp. epilogue components are needed for all
|
|
1487 predecessor edges to this block. */
|
|
1488
|
|
1489 /* First, select all possible components. */
|
|
1490 bitmap_copy (epi, components);
|
|
1491 bitmap_copy (pro, components);
|
|
1492
|
|
1493 edge e;
|
|
1494 edge_iterator ei;
|
|
1495 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1496 {
|
|
1497 if (e->flags & EDGE_ABNORMAL)
|
|
1498 {
|
|
1499 bitmap_clear (epi);
|
|
1500 bitmap_clear (pro);
|
|
1501 break;
|
|
1502 }
|
|
1503
|
|
1504 /* Deselect those epilogue components that should not be inserted
|
|
1505 for this edge. */
|
|
1506 bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
1507 SW (e->dest)->has_components);
|
|
1508 bitmap_and (epi, epi, tmp);
|
|
1509
|
|
1510 /* Similar, for the prologue. */
|
|
1511 bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
1512 SW (e->src)->has_components);
|
|
1513 bitmap_and (pro, pro, tmp);
|
|
1514 }
|
|
1515
|
|
1516 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1517 fprintf (dump_file, " bb %d", bb->index);
|
|
1518
|
|
1519 if (dump_file && !bitmap_empty_p (epi))
|
|
1520 dump_components ("epi", epi);
|
|
1521 if (dump_file && !bitmap_empty_p (pro))
|
|
1522 dump_components ("pro", pro);
|
|
1523
|
|
1524 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1525 fprintf (dump_file, "\n");
|
|
1526
|
|
1527 /* Place code after the BB note. */
|
|
1528 if (!bitmap_empty_p (pro))
|
|
1529 {
|
|
1530 start_sequence ();
|
|
1531 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1532 rtx_insn *seq = get_insns ();
|
|
1533 end_sequence ();
|
|
1534 record_prologue_seq (seq);
|
|
1535
|
|
1536 emit_insn_after (seq, bb_note (bb));
|
|
1537
|
|
1538 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
|
|
1539 }
|
|
1540
|
|
1541 if (!bitmap_empty_p (epi))
|
|
1542 {
|
|
1543 start_sequence ();
|
|
1544 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1545 rtx_insn *seq = get_insns ();
|
|
1546 end_sequence ();
|
|
1547 record_epilogue_seq (seq);
|
|
1548
|
|
1549 emit_insn_after (seq, bb_note (bb));
|
|
1550
|
|
1551 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
|
|
1552 }
|
|
1553 }
|
|
1554 }
|
|
1555
|
|
1556 /* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
1557 that code at the end of basic blocks. */
|
|
1558 static void
|
|
1559 emit_common_tails_for_components (sbitmap components)
|
|
1560 {
|
|
1561 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1562 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1563 auto_sbitmap tmp (SBITMAP_SIZE (components));
|
|
1564
|
|
1565 basic_block bb;
|
|
1566 FOR_ALL_BB_FN (bb, cfun)
|
|
1567 bitmap_clear (SW (bb)->tail_components);
|
|
1568
|
|
1569 FOR_EACH_BB_FN (bb, cfun)
|
|
1570 {
|
|
1571 /* Find which prologue resp. epilogue components are needed for all
|
|
1572 successor edges from this block. */
|
|
1573 if (EDGE_COUNT (bb->succs) == 0)
|
|
1574 continue;
|
|
1575
|
|
1576 /* First, select all possible components. */
|
|
1577 bitmap_copy (epi, components);
|
|
1578 bitmap_copy (pro, components);
|
|
1579
|
|
1580 edge e;
|
|
1581 edge_iterator ei;
|
|
1582 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1583 {
|
|
1584 if (e->flags & EDGE_ABNORMAL)
|
|
1585 {
|
|
1586 bitmap_clear (epi);
|
|
1587 bitmap_clear (pro);
|
|
1588 break;
|
|
1589 }
|
|
1590
|
|
1591 /* Deselect those epilogue components that should not be inserted
|
|
1592 for this edge, and also those that are already put at the head
|
|
1593 of the successor block. */
|
|
1594 bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
1595 SW (e->dest)->has_components);
|
|
1596 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
1597 bitmap_and (epi, epi, tmp);
|
|
1598
|
|
1599 /* Similarly, for the prologue. */
|
|
1600 bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
1601 SW (e->src)->has_components);
|
|
1602 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
1603 bitmap_and (pro, pro, tmp);
|
|
1604 }
|
|
1605
|
|
1606 /* If the last insn of this block is a control flow insn we cannot
|
|
1607 put anything after it. We can put our code before it instead,
|
|
1608 but only if that jump insn is a simple jump. */
|
|
1609 rtx_insn *last_insn = BB_END (bb);
|
|
1610 if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
|
|
1611 {
|
|
1612 bitmap_clear (epi);
|
|
1613 bitmap_clear (pro);
|
|
1614 }
|
|
1615
|
|
1616 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1617 fprintf (dump_file, " bb %d", bb->index);
|
|
1618
|
|
1619 if (dump_file && !bitmap_empty_p (epi))
|
|
1620 dump_components ("epi", epi);
|
|
1621 if (dump_file && !bitmap_empty_p (pro))
|
|
1622 dump_components ("pro", pro);
|
|
1623
|
|
1624 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1625 fprintf (dump_file, "\n");
|
|
1626
|
|
1627 /* Put the code at the end of the BB, but before any final jump. */
|
|
1628 if (!bitmap_empty_p (epi))
|
|
1629 {
|
|
1630 start_sequence ();
|
|
1631 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1632 rtx_insn *seq = get_insns ();
|
|
1633 end_sequence ();
|
|
1634 record_epilogue_seq (seq);
|
|
1635
|
|
1636 if (control_flow_insn_p (last_insn))
|
|
1637 emit_insn_before (seq, last_insn);
|
|
1638 else
|
|
1639 emit_insn_after (seq, last_insn);
|
|
1640
|
|
1641 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
|
|
1642 }
|
|
1643
|
|
1644 if (!bitmap_empty_p (pro))
|
|
1645 {
|
|
1646 start_sequence ();
|
|
1647 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1648 rtx_insn *seq = get_insns ();
|
|
1649 end_sequence ();
|
|
1650 record_prologue_seq (seq);
|
|
1651
|
|
1652 if (control_flow_insn_p (last_insn))
|
|
1653 emit_insn_before (seq, last_insn);
|
|
1654 else
|
|
1655 emit_insn_after (seq, last_insn);
|
|
1656
|
|
1657 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
|
|
1658 }
|
|
1659 }
|
|
1660 }
|
|
1661
|
|
1662 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
|
|
1663 placed them inside blocks directly. */
|
|
1664 static void
|
|
1665 insert_prologue_epilogue_for_components (sbitmap components)
|
|
1666 {
|
|
1667 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1668 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1669
|
|
1670 basic_block bb;
|
|
1671 FOR_EACH_BB_FN (bb, cfun)
|
|
1672 {
|
|
1673 if (!bb->aux)
|
|
1674 continue;
|
|
1675
|
|
1676 edge e;
|
|
1677 edge_iterator ei;
|
|
1678 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1679 {
|
|
1680 /* Find which pro/epilogue components are needed on this edge. */
|
|
1681 bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
1682 SW (e->dest)->has_components);
|
|
1683 bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
1684 SW (e->src)->has_components);
|
|
1685 bitmap_and (epi, epi, components);
|
|
1686 bitmap_and (pro, pro, components);
|
|
1687
|
|
1688 /* Deselect those we already have put at the head or tail of the
|
|
1689 edge's dest resp. src. */
|
|
1690 bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
|
|
1691 bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
|
|
1692 bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
|
|
1693 bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
|
|
1694
|
|
1695 if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
|
|
1696 {
|
|
1697 if (dump_file)
|
|
1698 {
|
|
1699 fprintf (dump_file, " %d->%d", e->src->index,
|
|
1700 e->dest->index);
|
|
1701 dump_components ("epi", epi);
|
|
1702 dump_components ("pro", pro);
|
|
1703 if (e->flags & EDGE_SIBCALL)
|
|
1704 fprintf (dump_file, " (SIBCALL)");
|
|
1705 else if (e->flags & EDGE_ABNORMAL)
|
|
1706 fprintf (dump_file, " (ABNORMAL)");
|
|
1707 fprintf (dump_file, "\n");
|
|
1708 }
|
|
1709
|
|
1710 /* Put the epilogue components in place. */
|
|
1711 start_sequence ();
|
|
1712 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1713 rtx_insn *seq = get_insns ();
|
|
1714 end_sequence ();
|
|
1715 record_epilogue_seq (seq);
|
|
1716
|
|
1717 if (e->flags & EDGE_SIBCALL)
|
|
1718 {
|
|
1719 gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
|
|
1720
|
|
1721 rtx_insn *insn = BB_END (e->src);
|
|
1722 gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
|
|
1723 emit_insn_before (seq, insn);
|
|
1724 }
|
|
1725 else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1726 {
|
|
1727 gcc_assert (e->flags & EDGE_FALLTHRU);
|
|
1728 basic_block new_bb = split_edge (e);
|
|
1729 emit_insn_after (seq, BB_END (new_bb));
|
|
1730 }
|
|
1731 else
|
|
1732 insert_insn_on_edge (seq, e);
|
|
1733
|
|
1734 /* Put the prologue components in place. */
|
|
1735 start_sequence ();
|
|
1736 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1737 seq = get_insns ();
|
|
1738 end_sequence ();
|
|
1739 record_prologue_seq (seq);
|
|
1740
|
|
1741 insert_insn_on_edge (seq, e);
|
|
1742 }
|
|
1743 }
|
|
1744 }
|
|
1745
|
|
1746 commit_edge_insertions ();
|
|
1747 }
|
|
1748
|
|
1749 /* The main entry point to this subpass. FIRST_BB is where the prologue
|
|
1750 would be normally put. */
|
|
1751 void
|
|
1752 try_shrink_wrapping_separate (basic_block first_bb)
|
|
1753 {
|
|
1754 if (HAVE_cc0)
|
|
1755 return;
|
|
1756
|
|
1757 if (!(SHRINK_WRAPPING_ENABLED
|
|
1758 && flag_shrink_wrap_separate
|
|
1759 && optimize_function_for_speed_p (cfun)
|
|
1760 && targetm.shrink_wrap.get_separate_components))
|
|
1761 return;
|
|
1762
|
|
1763 /* We don't handle "strange" functions. */
|
|
1764 if (cfun->calls_alloca
|
|
1765 || cfun->calls_setjmp
|
|
1766 || cfun->can_throw_non_call_exceptions
|
|
1767 || crtl->calls_eh_return
|
|
1768 || crtl->has_nonlocal_goto
|
|
1769 || crtl->saves_all_registers)
|
|
1770 return;
|
|
1771
|
|
1772 /* Ask the target what components there are. If it returns NULL, don't
|
|
1773 do anything. */
|
|
1774 sbitmap components = targetm.shrink_wrap.get_separate_components ();
|
|
1775 if (!components)
|
|
1776 return;
|
|
1777
|
|
1778 /* We need LIVE info, not defining anything in the entry block and not
|
|
1779 using anything in the exit block. A block then needs a component if
|
|
1780 the register for that component is in the IN or GEN or KILL set for
|
|
1781 that block. */
|
|
1782 df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
1783 df_update_entry_exit_and_calls ();
|
|
1784 df_live_add_problem ();
|
|
1785 df_live_set_all_dirty ();
|
|
1786 df_analyze ();
|
|
1787
|
|
1788 calculate_dominance_info (CDI_DOMINATORS);
|
|
1789 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
1790
|
|
1791 init_separate_shrink_wrap (components);
|
|
1792
|
|
1793 sbitmap_iterator sbi;
|
|
1794 unsigned int j;
|
|
1795 EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
|
|
1796 place_prologue_for_one_component (j, first_bb);
|
|
1797
|
|
1798 spread_components (components);
|
|
1799
|
|
1800 disqualify_problematic_components (components);
|
|
1801
|
|
1802 /* Don't separately shrink-wrap anything where the "main" prologue will
|
|
1803 go; the target code can often optimize things if it is presented with
|
|
1804 all components together (say, if it generates store-multiple insns). */
|
|
1805 bitmap_and_compl (components, components, SW (first_bb)->has_components);
|
|
1806
|
|
1807 if (bitmap_empty_p (components))
|
|
1808 {
|
|
1809 if (dump_file)
|
|
1810 fprintf (dump_file, "Not wrapping anything separately.\n");
|
|
1811 }
|
|
1812 else
|
|
1813 {
|
|
1814 if (dump_file)
|
|
1815 {
|
|
1816 fprintf (dump_file, "The components we wrap separately are");
|
|
1817 dump_components ("sep", components);
|
|
1818 fprintf (dump_file, "\n");
|
|
1819
|
|
1820 fprintf (dump_file, "... Inserting common heads...\n");
|
|
1821 }
|
|
1822
|
|
1823 emit_common_heads_for_components (components);
|
|
1824
|
|
1825 if (dump_file)
|
|
1826 fprintf (dump_file, "... Inserting common tails...\n");
|
|
1827
|
|
1828 emit_common_tails_for_components (components);
|
|
1829
|
|
1830 if (dump_file)
|
|
1831 fprintf (dump_file, "... Inserting the more difficult ones...\n");
|
|
1832
|
|
1833 insert_prologue_epilogue_for_components (components);
|
|
1834
|
|
1835 if (dump_file)
|
|
1836 fprintf (dump_file, "... Done.\n");
|
|
1837
|
|
1838 targetm.shrink_wrap.set_handled_components (components);
|
|
1839
|
|
1840 crtl->shrink_wrapped_separate = true;
|
|
1841 }
|
|
1842
|
|
1843 fini_separate_shrink_wrap ();
|
|
1844
|
|
1845 sbitmap_free (components);
|
|
1846 free_dominance_info (CDI_DOMINATORS);
|
|
1847 free_dominance_info (CDI_POST_DOMINATORS);
|
|
1848
|
|
1849 /* All done. */
|
|
1850 df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
1851 df_update_entry_exit_and_calls ();
|
|
1852 df_live_set_all_dirty ();
|
|
1853 df_analyze ();
|
|
1854 }
|