111
|
1 /* Shrink-wrapping related optimizations.
|
131
|
2 Copyright (C) 1987-2018 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 /* This file handles shrink-wrapping related optimizations. */
|
|
21
|
|
22 #include "config.h"
|
|
23 #include "system.h"
|
|
24 #include "coretypes.h"
|
|
25 #include "backend.h"
|
|
26 #include "target.h"
|
|
27 #include "rtl.h"
|
|
28 #include "tree.h"
|
|
29 #include "cfghooks.h"
|
|
30 #include "df.h"
|
|
31 #include "memmodel.h"
|
|
32 #include "tm_p.h"
|
|
33 #include "regs.h"
|
|
34 #include "insn-config.h"
|
|
35 #include "emit-rtl.h"
|
|
36 #include "output.h"
|
|
37 #include "tree-pass.h"
|
|
38 #include "cfgrtl.h"
|
|
39 #include "cfgbuild.h"
|
|
40 #include "params.h"
|
|
41 #include "bb-reorder.h"
|
|
42 #include "shrink-wrap.h"
|
|
43 #include "regcprop.h"
|
|
44 #include "rtl-iter.h"
|
|
45 #include "valtrack.h"
|
|
46
|
|
47
|
|
48 /* Return true if INSN requires the stack frame to be set up.
|
|
49 PROLOGUE_USED contains the hard registers used in the function
|
|
50 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
|
|
51 prologue to set up for the function. */
|
|
52 bool
|
|
53 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
|
|
54 HARD_REG_SET set_up_by_prologue)
|
|
55 {
|
|
56 df_ref def, use;
|
|
57 HARD_REG_SET hardregs;
|
|
58 unsigned regno;
|
|
59
|
|
60 if (CALL_P (insn))
|
|
61 return !SIBLING_CALL_P (insn);
|
|
62
|
|
63 /* We need a frame to get the unique CFA expected by the unwinder. */
|
|
64 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
|
|
65 return true;
|
|
66
|
|
67 CLEAR_HARD_REG_SET (hardregs);
|
|
68 FOR_EACH_INSN_DEF (def, insn)
|
|
69 {
|
|
70 rtx dreg = DF_REF_REG (def);
|
|
71
|
|
72 if (!REG_P (dreg))
|
|
73 continue;
|
|
74
|
|
75 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
|
|
76 }
|
|
77 if (hard_reg_set_intersect_p (hardregs, prologue_used))
|
|
78 return true;
|
|
79 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
|
|
80 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
|
81 if (TEST_HARD_REG_BIT (hardregs, regno)
|
|
82 && df_regs_ever_live_p (regno))
|
|
83 return true;
|
|
84
|
|
85 FOR_EACH_INSN_USE (use, insn)
|
|
86 {
|
|
87 rtx reg = DF_REF_REG (use);
|
|
88
|
|
89 if (!REG_P (reg))
|
|
90 continue;
|
|
91
|
|
92 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
|
|
93 REGNO (reg));
|
|
94 }
|
|
95 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
|
|
96 return true;
|
|
97
|
|
98 return false;
|
|
99 }
|
|
100
|
|
101 /* See whether there has a single live edge from BB, which dest uses
|
|
102 [REGNO, END_REGNO). Return the live edge if its dest bb has
|
|
103 one or two predecessors. Otherwise return NULL. */
|
|
104
|
|
105 static edge
|
|
106 live_edge_for_reg (basic_block bb, int regno, int end_regno)
|
|
107 {
|
|
108 edge e, live_edge;
|
|
109 edge_iterator ei;
|
|
110 bitmap live;
|
|
111 int i;
|
|
112
|
|
113 live_edge = NULL;
|
|
114 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
115 {
|
|
116 live = df_get_live_in (e->dest);
|
|
117 for (i = regno; i < end_regno; i++)
|
|
118 if (REGNO_REG_SET_P (live, i))
|
|
119 {
|
|
120 if (live_edge && live_edge != e)
|
|
121 return NULL;
|
|
122 live_edge = e;
|
|
123 }
|
|
124 }
|
|
125
|
|
126 /* We can sometimes encounter dead code. Don't try to move it
|
|
127 into the exit block. */
|
|
128 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
129 return NULL;
|
|
130
|
|
131 /* Reject targets of abnormal edges. This is needed for correctness
|
|
132 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
|
|
133 exception edges even though it is generally treated as call-saved
|
|
134 for the majority of the compilation. Moving across abnormal edges
|
|
135 isn't going to be interesting for shrink-wrap usage anyway. */
|
|
136 if (live_edge->flags & EDGE_ABNORMAL)
|
|
137 return NULL;
|
|
138
|
|
139 /* When live_edge->dest->preds == 2, we can create a new block on
|
|
140 the edge to make it meet the requirement. */
|
|
141 if (EDGE_COUNT (live_edge->dest->preds) > 2)
|
|
142 return NULL;
|
|
143
|
|
144 return live_edge;
|
|
145 }
|
|
146
|
|
147 /* Try to move INSN from BB to a successor. Return true on success.
|
|
148 USES and DEFS are the set of registers that are used and defined
|
|
149 after INSN in BB. SPLIT_P indicates whether a live edge from BB
|
|
150 is splitted or not. */
|
|
151
|
|
152 static bool
|
|
153 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
|
|
154 const HARD_REG_SET uses,
|
|
155 const HARD_REG_SET defs,
|
|
156 bool *split_p,
|
|
157 struct dead_debug_local *debug)
|
|
158 {
|
|
159 rtx set, src, dest;
|
131
|
160 bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
|
111
|
161 unsigned int i, dregno, end_dregno;
|
|
162 unsigned int sregno = FIRST_PSEUDO_REGISTER;
|
|
163 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
|
|
164 basic_block next_block;
|
|
165 edge live_edge;
|
|
166 rtx_insn *dinsn;
|
|
167 df_ref def;
|
|
168
|
|
169 /* Look for a simple register assignment. We don't use single_set here
|
|
170 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
|
|
171 destinations. */
|
|
172 if (!INSN_P (insn))
|
|
173 return false;
|
|
174 set = PATTERN (insn);
|
|
175 if (GET_CODE (set) != SET)
|
|
176 return false;
|
|
177 src = SET_SRC (set);
|
|
178 dest = SET_DEST (set);
|
|
179
|
|
180 /* For the destination, we want only a register. Also disallow STACK
|
|
181 or FRAME related adjustments. They are likely part of the prologue,
|
|
182 so keep them in the entry block. */
|
|
183 if (!REG_P (dest)
|
|
184 || dest == stack_pointer_rtx
|
|
185 || dest == frame_pointer_rtx
|
|
186 || dest == hard_frame_pointer_rtx)
|
|
187 return false;
|
|
188
|
|
189 /* For the source, we want one of:
|
|
190 (1) A (non-overlapping) register
|
|
191 (2) A constant,
|
|
192 (3) An expression involving no more than one register.
|
|
193
|
|
194 That last point comes from the code following, which was originally
|
|
195 written to handle only register move operations, and still only handles
|
|
196 a single source register when checking for overlaps. Happily, the
|
|
197 same checks can be applied to expressions like (plus reg const). */
|
|
198
|
|
199 if (CONSTANT_P (src))
|
|
200 ;
|
|
201 else if (!REG_P (src))
|
|
202 {
|
|
203 rtx src_inner = NULL_RTX;
|
|
204
|
|
205 if (can_throw_internal (insn))
|
|
206 return false;
|
|
207
|
|
208 subrtx_var_iterator::array_type array;
|
|
209 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
|
|
210 {
|
|
211 rtx x = *iter;
|
|
212 switch (GET_RTX_CLASS (GET_CODE (x)))
|
|
213 {
|
|
214 case RTX_CONST_OBJ:
|
|
215 case RTX_COMPARE:
|
|
216 case RTX_COMM_COMPARE:
|
|
217 case RTX_BIN_ARITH:
|
|
218 case RTX_COMM_ARITH:
|
|
219 case RTX_UNARY:
|
|
220 case RTX_TERNARY:
|
|
221 /* Constant or expression. Continue. */
|
|
222 break;
|
|
223
|
|
224 case RTX_OBJ:
|
|
225 case RTX_EXTRA:
|
|
226 switch (GET_CODE (x))
|
|
227 {
|
|
228 case UNSPEC:
|
|
229 case SUBREG:
|
|
230 case STRICT_LOW_PART:
|
|
231 case PC:
|
|
232 case LO_SUM:
|
|
233 /* Ok. Continue. */
|
|
234 break;
|
|
235
|
|
236 case REG:
|
|
237 /* Fail if we see a second inner register. */
|
|
238 if (src_inner != NULL)
|
|
239 return false;
|
|
240 src_inner = x;
|
|
241 break;
|
|
242
|
|
243 default:
|
|
244 return false;
|
|
245 }
|
|
246 break;
|
|
247
|
|
248 default:
|
|
249 return false;
|
|
250 }
|
|
251 }
|
|
252
|
|
253 if (src_inner != NULL)
|
|
254 src = src_inner;
|
|
255 }
|
|
256
|
|
257 /* Make sure that the source register isn't defined later in BB. */
|
|
258 if (REG_P (src))
|
|
259 {
|
|
260 sregno = REGNO (src);
|
|
261 end_sregno = END_REGNO (src);
|
|
262 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
|
|
263 return false;
|
|
264 }
|
|
265
|
|
266 /* Make sure that the destination register isn't referenced later in BB. */
|
|
267 dregno = REGNO (dest);
|
|
268 end_dregno = END_REGNO (dest);
|
|
269 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
|
|
270 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
|
|
271 return false;
|
|
272
|
|
273 /* See whether there is a successor block to which we could move INSN. */
|
|
274 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
|
|
275 if (!live_edge)
|
|
276 return false;
|
|
277
|
|
278 next_block = live_edge->dest;
|
|
279 /* Create a new basic block on the edge. */
|
|
280 if (EDGE_COUNT (next_block->preds) == 2)
|
|
281 {
|
|
282 /* split_edge for a block with only one successor is meaningless. */
|
|
283 if (EDGE_COUNT (bb->succs) == 1)
|
|
284 return false;
|
|
285
|
|
286 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
|
|
287 if (!df_live)
|
|
288 return false;
|
|
289
|
|
290 basic_block old_dest = live_edge->dest;
|
|
291 next_block = split_edge (live_edge);
|
|
292
|
|
293 /* We create a new basic block. Call df_grow_bb_info to make sure
|
|
294 all data structures are allocated. */
|
|
295 df_grow_bb_info (df_live);
|
|
296
|
|
297 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
|
|
298 df_get_live_in (old_dest));
|
|
299 df_set_bb_dirty (next_block);
|
|
300
|
|
301 /* We should not split more than once for a function. */
|
|
302 if (*split_p)
|
|
303 return false;
|
|
304
|
|
305 *split_p = true;
|
|
306 }
|
|
307
|
|
308 /* At this point we are committed to moving INSN, but let's try to
|
|
309 move it as far as we can. */
|
|
310 do
|
|
311 {
|
131
|
312 if (MAY_HAVE_DEBUG_BIND_INSNS)
|
111
|
313 {
|
|
314 FOR_BB_INSNS_REVERSE (bb, dinsn)
|
131
|
315 if (DEBUG_BIND_INSN_P (dinsn))
|
111
|
316 {
|
|
317 df_ref use;
|
|
318 FOR_EACH_INSN_USE (use, dinsn)
|
|
319 if (refers_to_regno_p (dregno, end_dregno,
|
|
320 DF_REF_REG (use), (rtx *) NULL))
|
|
321 dead_debug_add (debug, use, DF_REF_REGNO (use));
|
|
322 }
|
|
323 else if (dinsn == insn)
|
|
324 break;
|
|
325 }
|
|
326 live_out = df_get_live_out (bb);
|
|
327 live_in = df_get_live_in (next_block);
|
|
328 bb = next_block;
|
|
329
|
|
330 /* Check whether BB uses DEST or clobbers DEST. We need to add
|
|
331 INSN to BB if so. Either way, DEST is no longer live on entry,
|
|
332 except for any part that overlaps SRC (next loop). */
|
131
|
333 if (!*split_p)
|
|
334 {
|
|
335 bb_uses = &DF_LR_BB_INFO (bb)->use;
|
|
336 bb_defs = &DF_LR_BB_INFO (bb)->def;
|
|
337 }
|
111
|
338 if (df_live)
|
|
339 {
|
|
340 for (i = dregno; i < end_dregno; i++)
|
|
341 {
|
|
342 if (*split_p
|
|
343 || REGNO_REG_SET_P (bb_uses, i)
|
|
344 || REGNO_REG_SET_P (bb_defs, i)
|
|
345 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
346 next_block = NULL;
|
|
347 CLEAR_REGNO_REG_SET (live_out, i);
|
|
348 CLEAR_REGNO_REG_SET (live_in, i);
|
|
349 }
|
|
350
|
|
351 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
|
|
352 Either way, SRC is now live on entry. */
|
|
353 for (i = sregno; i < end_sregno; i++)
|
|
354 {
|
|
355 if (*split_p
|
|
356 || REGNO_REG_SET_P (bb_defs, i)
|
|
357 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
358 next_block = NULL;
|
|
359 SET_REGNO_REG_SET (live_out, i);
|
|
360 SET_REGNO_REG_SET (live_in, i);
|
|
361 }
|
|
362 }
|
|
363 else
|
|
364 {
|
|
365 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
|
|
366 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
|
|
367 at -O1, just give up searching NEXT_BLOCK. */
|
|
368 next_block = NULL;
|
|
369 for (i = dregno; i < end_dregno; i++)
|
|
370 {
|
|
371 CLEAR_REGNO_REG_SET (live_out, i);
|
|
372 CLEAR_REGNO_REG_SET (live_in, i);
|
|
373 }
|
|
374
|
|
375 for (i = sregno; i < end_sregno; i++)
|
|
376 {
|
|
377 SET_REGNO_REG_SET (live_out, i);
|
|
378 SET_REGNO_REG_SET (live_in, i);
|
|
379 }
|
|
380 }
|
|
381
|
|
382 /* If we don't need to add the move to BB, look for a single
|
|
383 successor block. */
|
|
384 if (next_block)
|
|
385 {
|
|
386 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
|
|
387 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
|
|
388 break;
|
|
389 next_block = live_edge->dest;
|
|
390 }
|
|
391 }
|
|
392 while (next_block);
|
|
393
|
|
394 /* For the new created basic block, there is no dataflow info at all.
|
|
395 So skip the following dataflow update and check. */
|
|
396 if (!(*split_p))
|
|
397 {
|
|
398 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
|
|
399 (next loop). */
|
|
400 for (i = dregno; i < end_dregno; i++)
|
|
401 {
|
|
402 CLEAR_REGNO_REG_SET (bb_uses, i);
|
|
403 SET_REGNO_REG_SET (bb_defs, i);
|
|
404 }
|
|
405
|
|
406 /* BB now uses SRC. */
|
|
407 for (i = sregno; i < end_sregno; i++)
|
|
408 SET_REGNO_REG_SET (bb_uses, i);
|
|
409 }
|
|
410
|
|
411 /* Insert debug temps for dead REGs used in subsequent debug insns. */
|
|
412 if (debug->used && !bitmap_empty_p (debug->used))
|
|
413 FOR_EACH_INSN_DEF (def, insn)
|
|
414 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
|
|
415 DEBUG_TEMP_BEFORE_WITH_VALUE);
|
|
416
|
|
417 emit_insn_after (PATTERN (insn), bb_note (bb));
|
|
418 delete_insn (insn);
|
|
419 return true;
|
|
420 }
|
|
421
|
|
422 /* Look for register copies in the first block of the function, and move
|
|
423 them down into successor blocks if the register is used only on one
|
|
424 path. This exposes more opportunities for shrink-wrapping. These
|
|
425 kinds of sets often occur when incoming argument registers are moved
|
|
426 to call-saved registers because their values are live across one or
|
|
427 more calls during the function. */
|
|
428
|
|
429 static void
|
|
430 prepare_shrink_wrap (basic_block entry_block)
|
|
431 {
|
|
432 rtx_insn *insn, *curr;
|
|
433 rtx x;
|
|
434 HARD_REG_SET uses, defs;
|
|
435 df_ref def, use;
|
|
436 bool split_p = false;
|
|
437 unsigned int i;
|
|
438 struct dead_debug_local debug;
|
|
439
|
|
440 if (JUMP_P (BB_END (entry_block)))
|
|
441 {
|
|
442 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
|
|
443 to sink the copies from parameter to callee saved register out of
|
|
444 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
|
|
445 to release some dependences. */
|
|
446 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
|
|
447 }
|
|
448
|
|
449 dead_debug_local_init (&debug, NULL, NULL);
|
|
450 CLEAR_HARD_REG_SET (uses);
|
|
451 CLEAR_HARD_REG_SET (defs);
|
|
452
|
|
453 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
|
|
454 if (NONDEBUG_INSN_P (insn)
|
|
455 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
|
|
456 &split_p, &debug))
|
|
457 {
|
|
458 /* Add all defined registers to DEFs. */
|
|
459 FOR_EACH_INSN_DEF (def, insn)
|
|
460 {
|
|
461 x = DF_REF_REG (def);
|
|
462 if (REG_P (x) && HARD_REGISTER_P (x))
|
|
463 for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
464 SET_HARD_REG_BIT (defs, i);
|
|
465 }
|
|
466
|
|
467 /* Add all used registers to USESs. */
|
|
468 FOR_EACH_INSN_USE (use, insn)
|
|
469 {
|
|
470 x = DF_REF_REG (use);
|
|
471 if (REG_P (x) && HARD_REGISTER_P (x))
|
|
472 for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
473 SET_HARD_REG_BIT (uses, i);
|
|
474 }
|
|
475 }
|
|
476
|
|
477 dead_debug_local_finish (&debug, NULL);
|
|
478 }
|
|
479
|
|
480 /* Return whether basic block PRO can get the prologue. It can not if it
|
|
481 has incoming complex edges that need a prologue inserted (we make a new
|
|
482 block for the prologue, so those edges would need to be redirected, which
|
|
483 does not work). It also can not if there exist registers live on entry
|
|
484 to PRO that are clobbered by the prologue. */
|
|
485
|
|
486 static bool
|
|
487 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
|
|
488 {
|
|
489 edge e;
|
|
490 edge_iterator ei;
|
|
491 FOR_EACH_EDGE (e, ei, pro->preds)
|
|
492 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
|
|
493 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
494 return false;
|
|
495
|
|
496 HARD_REG_SET live;
|
|
497 REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
|
|
498 if (hard_reg_set_intersect_p (live, prologue_clobbered))
|
|
499 return false;
|
|
500
|
|
501 return true;
|
|
502 }
|
|
503
|
|
504 /* Return whether we can duplicate basic block BB for shrink wrapping. We
|
|
505 cannot if the block cannot be duplicated at all, or if any of its incoming
|
|
506 edges are complex and come from a block that does not require a prologue
|
|
507 (we cannot redirect such edges), or if the block is too big to copy.
|
|
508 PRO is the basic block before which we would put the prologue, MAX_SIZE is
|
|
509 the maximum size block we allow to be copied. */
|
|
510
|
|
511 static bool
|
|
512 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
|
|
513 {
|
|
514 if (!can_duplicate_block_p (bb))
|
|
515 return false;
|
|
516
|
|
517 edge e;
|
|
518 edge_iterator ei;
|
|
519 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
520 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
|
|
521 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
522 return false;
|
|
523
|
|
524 unsigned size = 0;
|
|
525
|
|
526 rtx_insn *insn;
|
|
527 FOR_BB_INSNS (bb, insn)
|
|
528 if (NONDEBUG_INSN_P (insn))
|
|
529 {
|
|
530 size += get_attr_min_length (insn);
|
|
531 if (size > max_size)
|
|
532 return false;
|
|
533 }
|
|
534
|
|
535 return true;
|
|
536 }
|
|
537
|
|
538 /* Do whatever needs to be done for exits that run without prologue.
|
|
539 Sibcalls need nothing done. Normal exits get a simple_return inserted. */
|
|
540
|
|
541 static void
|
|
542 handle_simple_exit (edge e)
|
|
543 {
|
|
544
|
|
545 if (e->flags & EDGE_SIBCALL)
|
|
546 {
|
|
547 /* Tell function.c to take no further action on this edge. */
|
|
548 e->flags |= EDGE_IGNORE;
|
|
549
|
|
550 e->flags &= ~EDGE_FALLTHRU;
|
|
551 emit_barrier_after_bb (e->src);
|
|
552 return;
|
|
553 }
|
|
554
|
|
555 /* If the basic block the edge comes from has multiple successors,
|
|
556 split the edge. */
|
|
557 if (EDGE_COUNT (e->src->succs) > 1)
|
|
558 {
|
|
559 basic_block old_bb = e->src;
|
|
560 rtx_insn *end = BB_END (old_bb);
|
|
561 rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
|
|
562 basic_block new_bb = create_basic_block (note, note, old_bb);
|
|
563 BB_COPY_PARTITION (new_bb, old_bb);
|
|
564 BB_END (old_bb) = end;
|
|
565
|
|
566 redirect_edge_succ (e, new_bb);
|
131
|
567 new_bb->count = e->count ();
|
111
|
568 e->flags |= EDGE_FALLTHRU;
|
|
569
|
|
570 e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
|
|
571 }
|
|
572
|
|
573 e->flags &= ~EDGE_FALLTHRU;
|
|
574 rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
|
|
575 BB_END (e->src));
|
|
576 JUMP_LABEL (ret) = simple_return_rtx;
|
|
577 emit_barrier_after_bb (e->src);
|
|
578
|
|
579 if (dump_file)
|
|
580 fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
|
|
581 INSN_UID (ret), e->src->index);
|
|
582 }
|
|
583
|
|
584 /* Try to perform a kind of shrink-wrapping, making sure the
|
|
585 prologue/epilogue is emitted only around those parts of the
|
|
586 function that require it.
|
|
587
|
|
588 There will be exactly one prologue, and it will be executed either
|
|
589 zero or one time, on any path. Depending on where the prologue is
|
|
590 placed, some of the basic blocks can be reached via both paths with
|
|
591 and without a prologue. Such blocks will be duplicated here, and the
|
|
592 edges changed to match.
|
|
593
|
|
594 Paths that go to the exit without going through the prologue will use
|
|
595 a simple_return instead of the epilogue. We maximize the number of
|
|
596 those, making sure to only duplicate blocks that can be duplicated.
|
|
597 If the prologue can then still be placed in multiple locations, we
|
|
598 place it as early as possible.
|
|
599
|
|
600 An example, where we duplicate blocks with control flow (legend:
|
|
601 _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
|
|
602 be taken to point down or to the right, to simplify the diagram; here,
|
|
603 block 3 needs a prologue, the rest does not):
|
|
604
|
|
605
|
|
606 B B
|
|
607 | |
|
|
608 2 2
|
|
609 |\ |\
|
|
610 | 3 becomes | 3
|
|
611 |/ | \
|
|
612 4 7 4
|
|
613 |\ |\ |\
|
|
614 | 5 | 8 | 5
|
|
615 |/ |/ |/
|
|
616 6 9 6
|
|
617 | | |
|
|
618 R S R
|
|
619
|
|
620
|
|
621 (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
|
|
622 edge 2->3).
|
|
623
|
|
624 Another example, where part of a loop is duplicated (again, bb 3 is
|
|
625 the only block that needs a prologue):
|
|
626
|
|
627
|
|
628 B 3<-- B ->3<--
|
|
629 | | | | | | |
|
|
630 | v | becomes | | v |
|
|
631 2---4--- 2---5-- 4---
|
|
632 | | |
|
|
633 R S R
|
|
634
|
|
635
|
|
636 (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
|
|
637
|
|
638 ENTRY_EDGE is the edge where the prologue will be placed, possibly
|
|
639 changed by this function. PROLOGUE_SEQ is the prologue we will insert. */
|
|
640
|
|
641 void
|
|
642 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
|
|
643 {
|
|
644 /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
|
|
645 no sense to shrink-wrap: then do not shrink-wrap! */
|
|
646
|
|
647 if (!SHRINK_WRAPPING_ENABLED)
|
|
648 return;
|
|
649
|
|
650 if (crtl->profile && !targetm.profile_before_prologue ())
|
|
651 return;
|
|
652
|
|
653 if (crtl->calls_eh_return)
|
|
654 return;
|
|
655
|
|
656 bool empty_prologue = true;
|
|
657 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
658 if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
|
|
659 {
|
|
660 empty_prologue = false;
|
|
661 break;
|
|
662 }
|
|
663 if (empty_prologue)
|
|
664 return;
|
|
665
|
|
666 /* Move some code down to expose more shrink-wrapping opportunities. */
|
|
667
|
|
668 basic_block entry = (*entry_edge)->dest;
|
|
669 prepare_shrink_wrap (entry);
|
|
670
|
|
671 if (dump_file)
|
|
672 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
|
|
673
|
|
674 /* Compute the registers set and used in the prologue. */
|
|
675
|
|
676 HARD_REG_SET prologue_clobbered, prologue_used;
|
|
677 CLEAR_HARD_REG_SET (prologue_clobbered);
|
|
678 CLEAR_HARD_REG_SET (prologue_used);
|
|
679 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
680 if (NONDEBUG_INSN_P (insn))
|
|
681 {
|
|
682 HARD_REG_SET this_used;
|
|
683 CLEAR_HARD_REG_SET (this_used);
|
|
684 note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
|
|
685 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
|
|
686 IOR_HARD_REG_SET (prologue_used, this_used);
|
|
687 note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
|
|
688 }
|
|
689 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
|
|
690 if (frame_pointer_needed)
|
|
691 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
|
|
692
|
|
693 /* Find out what registers are set up by the prologue; any use of these
|
|
694 cannot happen before the prologue. */
|
|
695
|
|
696 struct hard_reg_set_container set_up_by_prologue;
|
|
697 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
|
|
698 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
|
|
699 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
|
|
700 if (frame_pointer_needed)
|
|
701 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
702 HARD_FRAME_POINTER_REGNUM);
|
|
703 if (pic_offset_table_rtx
|
|
704 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
|
|
705 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
706 PIC_OFFSET_TABLE_REGNUM);
|
|
707 if (crtl->drap_reg)
|
|
708 add_to_hard_reg_set (&set_up_by_prologue.set,
|
|
709 GET_MODE (crtl->drap_reg),
|
|
710 REGNO (crtl->drap_reg));
|
|
711 if (targetm.set_up_by_prologue)
|
|
712 targetm.set_up_by_prologue (&set_up_by_prologue);
|
|
713
|
|
714 /* We will insert the prologue before the basic block PRO. PRO should
|
|
715 dominate all basic blocks that need the prologue to be executed
|
|
716 before them. First, make PRO the "tightest wrap" possible. */
|
|
717
|
|
718 calculate_dominance_info (CDI_DOMINATORS);
|
|
719
|
|
720 basic_block pro = 0;
|
|
721
|
|
722 basic_block bb;
|
|
723 edge e;
|
|
724 edge_iterator ei;
|
|
725 FOR_EACH_BB_FN (bb, cfun)
|
|
726 {
|
|
727 rtx_insn *insn;
|
|
728 FOR_BB_INSNS (bb, insn)
|
|
729 if (NONDEBUG_INSN_P (insn)
|
|
730 && requires_stack_frame_p (insn, prologue_used,
|
|
731 set_up_by_prologue.set))
|
|
732 {
|
|
733 if (dump_file)
|
|
734 fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
|
|
735 pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
|
|
736 break;
|
|
737 }
|
|
738 }
|
|
739
|
|
740 /* If nothing needs a prologue, just put it at the start. This really
|
|
741 shouldn't happen, but we cannot fix it here. */
|
|
742
|
|
743 if (pro == 0)
|
|
744 {
|
|
745 if (dump_file)
|
|
746 fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
|
|
747 "putting it at the start.\n");
|
|
748 pro = entry;
|
|
749 }
|
|
750
|
|
751 if (dump_file)
|
|
752 fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
|
|
753 pro->index);
|
|
754
|
|
755 /* Now see if we can put the prologue at the start of PRO. Putting it
|
|
756 there might require duplicating a block that cannot be duplicated,
|
|
757 or in some cases we cannot insert the prologue there at all. If PRO
|
|
758 wont't do, try again with the immediate dominator of PRO, and so on.
|
|
759
|
|
760 The blocks that need duplicating are those reachable from PRO but
|
|
761 not dominated by it. We keep in BB_WITH a bitmap of the blocks
|
|
762 reachable from PRO that we already found, and in VEC a stack of
|
|
763 those we still need to consider (to find successors). */
|
|
764
|
|
765 auto_bitmap bb_with;
|
|
766 bitmap_set_bit (bb_with, pro->index);
|
|
767
|
|
768 vec<basic_block> vec;
|
|
769 vec.create (n_basic_blocks_for_fn (cfun));
|
|
770 vec.quick_push (pro);
|
|
771
|
|
772 unsigned max_grow_size = get_uncond_jump_length ();
|
|
773 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
|
|
774
|
|
775 while (!vec.is_empty () && pro != entry)
|
|
776 {
|
|
777 while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
|
|
778 {
|
|
779 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
780
|
|
781 if (bitmap_set_bit (bb_with, pro->index))
|
|
782 vec.quick_push (pro);
|
|
783 }
|
|
784
|
|
785 basic_block bb = vec.pop ();
|
|
786 if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
|
|
787 while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
788 {
|
|
789 gcc_assert (pro != entry);
|
|
790
|
|
791 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
792
|
|
793 if (bitmap_set_bit (bb_with, pro->index))
|
|
794 vec.quick_push (pro);
|
|
795 }
|
|
796
|
|
797 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
798 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
|
|
799 && bitmap_set_bit (bb_with, e->dest->index))
|
|
800 vec.quick_push (e->dest);
|
|
801 }
|
|
802
|
|
803 if (dump_file)
|
|
804 fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
|
|
805 pro->index);
|
|
806
|
|
807 /* If we can move PRO back without having to duplicate more blocks, do so.
|
|
808 We do this because putting the prologue earlier is better for scheduling.
|
|
809
|
|
810 We can move back to a block PRE if every path from PRE will eventually
|
|
811 need a prologue, that is, PRO is a post-dominator of PRE. PRE needs
|
|
812 to dominate every block reachable from itself. We keep in BB_TMP a
|
|
813 bitmap of the blocks reachable from PRE that we already found, and in
|
|
814 VEC a stack of those we still need to consider.
|
|
815
|
|
816 Any block reachable from PRE is also reachable from all predecessors
|
|
817 of PRE, so if we find we need to move PRE back further we can leave
|
|
818 everything not considered so far on the stack. Any block dominated
|
|
819 by PRE is also dominated by all other dominators of PRE, so anything
|
|
820 found good for some PRE does not need to be reconsidered later.
|
|
821
|
|
822 We don't need to update BB_WITH because none of the new blocks found
|
|
823 can jump to a block that does not need the prologue. */
|
|
824
|
|
825 if (pro != entry)
|
|
826 {
|
|
827 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
828
|
|
829 auto_bitmap bb_tmp;
|
|
830 bitmap_copy (bb_tmp, bb_with);
|
|
831 basic_block last_ok = pro;
|
|
832 vec.truncate (0);
|
|
833
|
|
834 while (pro != entry)
|
|
835 {
|
|
836 basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
837 if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
|
|
838 break;
|
|
839
|
|
840 if (bitmap_set_bit (bb_tmp, pre->index))
|
|
841 vec.quick_push (pre);
|
|
842
|
|
843 bool ok = true;
|
|
844 while (!vec.is_empty ())
|
|
845 {
|
|
846 if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
|
|
847 {
|
|
848 ok = false;
|
|
849 break;
|
|
850 }
|
|
851
|
|
852 basic_block bb = vec.pop ();
|
|
853 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
854 if (bitmap_set_bit (bb_tmp, e->dest->index))
|
|
855 vec.quick_push (e->dest);
|
|
856 }
|
|
857
|
|
858 if (ok && can_get_prologue (pre, prologue_clobbered))
|
|
859 last_ok = pre;
|
|
860
|
|
861 pro = pre;
|
|
862 }
|
|
863
|
|
864 pro = last_ok;
|
|
865
|
|
866 free_dominance_info (CDI_POST_DOMINATORS);
|
|
867 }
|
|
868
|
|
869 vec.release ();
|
|
870
|
|
871 if (dump_file)
|
|
872 fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
|
|
873 pro->index);
|
|
874
|
|
875 if (pro == entry)
|
|
876 {
|
|
877 free_dominance_info (CDI_DOMINATORS);
|
|
878 return;
|
|
879 }
|
|
880
|
|
881 /* Compute what fraction of the frequency and count of the blocks that run
|
|
882 both with and without prologue are for running with prologue. This gives
|
|
883 the correct answer for reducible flow graphs; for irreducible flow graphs
|
|
884 our profile is messed up beyond repair anyway. */
|
|
885
|
131
|
886 profile_count num = profile_count::zero ();
|
|
887 profile_count den = profile_count::zero ();
|
111
|
888
|
|
889 FOR_EACH_EDGE (e, ei, pro->preds)
|
|
890 if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
891 {
|
131
|
892 if (e->count ().initialized_p ())
|
|
893 num += e->count ();
|
|
894 if (e->src->count.initialized_p ())
|
|
895 den += e->src->count;
|
111
|
896 }
|
|
897
|
|
898 /* All is okay, so do it. */
|
|
899
|
|
900 crtl->shrink_wrapped = true;
|
|
901 if (dump_file)
|
|
902 fprintf (dump_file, "Performing shrink-wrapping.\n");
|
|
903
|
|
904 /* Copy the blocks that can run both with and without prologue. The
|
|
905 originals run with prologue, the copies without. Store a pointer to
|
|
906 the copy in the ->aux field of the original. */
|
|
907
|
|
908 FOR_EACH_BB_FN (bb, cfun)
|
|
909 if (bitmap_bit_p (bb_with, bb->index)
|
|
910 && !dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
911 {
|
|
912 basic_block dup = duplicate_block (bb, 0, 0);
|
|
913
|
|
914 bb->aux = dup;
|
|
915
|
|
916 if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
|
|
917 emit_barrier_after_bb (dup);
|
|
918
|
|
919 if (EDGE_COUNT (dup->succs) == 0)
|
|
920 emit_barrier_after_bb (dup);
|
|
921
|
|
922 if (dump_file)
|
|
923 fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
|
131
|
924
|
|
925 if (num == profile_count::zero () || den.nonzero_p ())
|
|
926 bb->count = bb->count.apply_scale (num, den);
|
111
|
927 dup->count -= bb->count;
|
|
928 }
|
|
929
|
|
930 /* Now change the edges to point to the copies, where appropriate. */
|
|
931
|
|
932 FOR_EACH_BB_FN (bb, cfun)
|
|
933 if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
934 {
|
|
935 basic_block src = bb;
|
|
936 if (bitmap_bit_p (bb_with, bb->index))
|
|
937 src = (basic_block) bb->aux;
|
|
938
|
|
939 FOR_EACH_EDGE (e, ei, src->succs)
|
|
940 {
|
|
941 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
942 continue;
|
|
943
|
|
944 if (bitmap_bit_p (bb_with, e->dest->index)
|
|
945 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
|
946 {
|
|
947 if (dump_file)
|
|
948 fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
|
|
949 e->src->index, e->dest->index,
|
|
950 ((basic_block) e->dest->aux)->index);
|
|
951 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
|
952 }
|
|
953 else if (e->flags & EDGE_FALLTHRU
|
|
954 && bitmap_bit_p (bb_with, bb->index))
|
|
955 force_nonfallthru (e);
|
|
956 }
|
|
957 }
|
|
958
|
|
959 /* Also redirect the function entry edge if necessary. */
|
|
960
|
|
961 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
|
|
962 if (bitmap_bit_p (bb_with, e->dest->index)
|
|
963 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
|
964 {
|
|
965 basic_block split_bb = split_edge (e);
|
|
966 e = single_succ_edge (split_bb);
|
|
967 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
|
968 }
|
|
969
|
|
970 /* Make a simple_return for those exits that run without prologue. */
|
|
971
|
|
972 FOR_EACH_BB_REVERSE_FN (bb, cfun)
|
|
973 if (!bitmap_bit_p (bb_with, bb->index))
|
|
974 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
975 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
976 handle_simple_exit (e);
|
|
977
|
|
978 /* Finally, we want a single edge to put the prologue on. Make a new
|
|
979 block before the PRO block; the edge beteen them is the edge we want.
|
|
980 Then redirect those edges into PRO that come from blocks without the
|
|
981 prologue, to point to the new block instead. The new prologue block
|
|
982 is put at the end of the insn chain. */
|
|
983
|
|
984 basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
|
|
985 BB_COPY_PARTITION (new_bb, pro);
|
|
986 new_bb->count = profile_count::zero ();
|
|
987 if (dump_file)
|
|
988 fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
|
|
989
|
|
990 for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
|
|
991 {
|
|
992 if (bitmap_bit_p (bb_with, e->src->index)
|
|
993 || dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
994 {
|
|
995 ei_next (&ei);
|
|
996 continue;
|
|
997 }
|
|
998
|
131
|
999 new_bb->count += e->count ();
|
111
|
1000
|
|
1001 redirect_edge_and_branch_force (e, new_bb);
|
|
1002 if (dump_file)
|
|
1003 fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
|
|
1004 }
|
|
1005
|
|
1006 *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
|
|
1007 force_nonfallthru (*entry_edge);
|
|
1008
|
|
1009 free_dominance_info (CDI_DOMINATORS);
|
|
1010 }
|
|
1011
|
|
1012 /* Separate shrink-wrapping
|
|
1013
|
|
1014 Instead of putting all of the prologue and epilogue in one spot, we
|
|
1015 can put parts of it in places where those components are executed less
|
|
1016 frequently. The following code does this, for prologue and epilogue
|
|
1017 components that can be put in more than one location, and where those
|
|
1018 components can be executed more than once (the epilogue component will
|
|
1019 always be executed before the prologue component is executed a second
|
|
1020 time).
|
|
1021
|
|
1022 What exactly is a component is target-dependent. The more usual
|
|
1023 components are simple saves/restores to/from the frame of callee-saved
|
|
1024 registers. This code treats components abstractly (as an sbitmap),
|
|
1025 letting the target handle all details.
|
|
1026
|
|
1027 Prologue components are placed in such a way that for every component
|
|
1028 the prologue is executed as infrequently as possible. We do this by
|
|
1029 walking the dominator tree, comparing the cost of placing a prologue
|
|
1030 component before a block to the sum of costs determined for all subtrees
|
|
1031 of that block.
|
|
1032
|
|
1033 From this placement, we then determine for each component all blocks
|
|
1034 where at least one of this block's dominators (including itself) will
|
|
1035 get a prologue inserted. That then is how the components are placed.
|
|
1036 We could place the epilogue components a bit smarter (we can save a
|
|
1037 bit of code size sometimes); this is a possible future improvement.
|
|
1038
|
|
1039 Prologues and epilogues are preferably placed into a block, either at
|
|
1040 the beginning or end of it, if it is needed for all predecessor resp.
|
|
1041 successor edges; or placed on the edge otherwise.
|
|
1042
|
|
1043 If the placement of any prologue/epilogue leads to a situation we cannot
|
|
1044 handle (for example, an abnormal edge would need to be split, or some
|
|
1045 targets want to use some specific registers that may not be available
|
|
1046 where we want to put them), separate shrink-wrapping for the components
|
|
1047 in that prologue/epilogue is aborted. */
|
|
1048
|
|
1049
|
|
1050 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
|
|
1051 label LABEL. */
|
|
1052 static void
|
|
1053 dump_components (const char *label, sbitmap components)
|
|
1054 {
|
|
1055 if (bitmap_empty_p (components))
|
|
1056 return;
|
|
1057
|
|
1058 fprintf (dump_file, " [%s", label);
|
|
1059
|
|
1060 for (unsigned int j = 0; j < components->n_bits; j++)
|
|
1061 if (bitmap_bit_p (components, j))
|
|
1062 fprintf (dump_file, " %u", j);
|
|
1063
|
|
1064 fprintf (dump_file, "]");
|
|
1065 }
|
|
1066
|
|
1067 /* The data we collect for each bb. */
|
|
1068 struct sw {
|
|
1069 /* What components does this BB need? */
|
|
1070 sbitmap needs_components;
|
|
1071
|
|
1072 /* What components does this BB have? This is the main decision this
|
|
1073 pass makes. */
|
|
1074 sbitmap has_components;
|
|
1075
|
|
1076 /* The components for which we placed code at the start of the BB (instead
|
|
1077 of on all incoming edges). */
|
|
1078 sbitmap head_components;
|
|
1079
|
|
1080 /* The components for which we placed code at the end of the BB (instead
|
|
1081 of on all outgoing edges). */
|
|
1082 sbitmap tail_components;
|
|
1083
|
|
1084 /* The frequency of executing the prologue for this BB, if a prologue is
|
|
1085 placed on this BB. This is a pessimistic estimate (no prologue is
|
|
1086 needed for edges from blocks that have the component under consideration
|
|
1087 active already). */
|
|
1088 gcov_type own_cost;
|
|
1089
|
|
1090 /* The frequency of executing the prologue for this BB and all BBs
|
|
1091 dominated by it. */
|
|
1092 gcov_type total_cost;
|
|
1093 };
|
|
1094
|
|
1095 /* A helper function for accessing the pass-specific info. */
|
|
1096 static inline struct sw *
|
|
1097 SW (basic_block bb)
|
|
1098 {
|
|
1099 gcc_assert (bb->aux);
|
|
1100 return (struct sw *) bb->aux;
|
|
1101 }
|
|
1102
|
|
1103 /* Create the pass-specific data structures for separately shrink-wrapping
|
|
1104 with components COMPONENTS. */
|
|
1105 static void
|
|
1106 init_separate_shrink_wrap (sbitmap components)
|
|
1107 {
|
|
1108 basic_block bb;
|
|
1109 FOR_ALL_BB_FN (bb, cfun)
|
|
1110 {
|
|
1111 bb->aux = xcalloc (1, sizeof (struct sw));
|
|
1112
|
|
1113 SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
|
|
1114
|
|
1115 /* Mark all basic blocks without successor as needing all components.
|
|
1116 This avoids problems in at least cfgcleanup, sel-sched, and
|
|
1117 regrename (largely to do with all paths to such a block still
|
|
1118 needing the same dwarf CFI info). */
|
|
1119 if (EDGE_COUNT (bb->succs) == 0)
|
|
1120 bitmap_copy (SW (bb)->needs_components, components);
|
|
1121
|
|
1122 if (dump_file)
|
|
1123 {
|
|
1124 fprintf (dump_file, "bb %d components:", bb->index);
|
|
1125 dump_components ("has", SW (bb)->needs_components);
|
|
1126 fprintf (dump_file, "\n");
|
|
1127 }
|
|
1128
|
|
1129 SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1130 SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1131 SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
1132 bitmap_clear (SW (bb)->has_components);
|
|
1133 }
|
|
1134 }
|
|
1135
|
|
1136 /* Destroy the pass-specific data. */
|
|
1137 static void
|
|
1138 fini_separate_shrink_wrap (void)
|
|
1139 {
|
|
1140 basic_block bb;
|
|
1141 FOR_ALL_BB_FN (bb, cfun)
|
|
1142 if (bb->aux)
|
|
1143 {
|
|
1144 sbitmap_free (SW (bb)->needs_components);
|
|
1145 sbitmap_free (SW (bb)->has_components);
|
|
1146 sbitmap_free (SW (bb)->head_components);
|
|
1147 sbitmap_free (SW (bb)->tail_components);
|
|
1148 free (bb->aux);
|
|
1149 bb->aux = 0;
|
|
1150 }
|
|
1151 }
|
|
1152
|
|
1153 /* Place the prologue for component WHICH, in the basic blocks dominated
|
|
1154 by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the
|
|
1155 HAS_COMPONENTS of a block if either the block has that bit set in
|
|
1156 NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
|
|
1157 dominator subtrees separately. */
|
|
1158 static void
|
|
1159 place_prologue_for_one_component (unsigned int which, basic_block head)
|
|
1160 {
|
|
1161 /* The block we are currently dealing with. */
|
|
1162 basic_block bb = head;
|
|
1163 /* Is this the first time we visit this block, i.e. have we just gone
|
|
1164 down the tree. */
|
|
1165 bool first_visit = true;
|
|
1166
|
|
1167 /* Walk the dominator tree, visit one block per iteration of this loop.
|
|
1168 Each basic block is visited twice: once before visiting any children
|
|
1169 of the block, and once after visiting all of them (leaf nodes are
|
|
1170 visited only once). As an optimization, we do not visit subtrees
|
|
1171 that can no longer influence the prologue placement. */
|
|
1172 for (;;)
|
|
1173 {
|
|
1174 /* First visit of a block: set the (children) cost accumulator to zero;
|
|
1175 if the block does not have the component itself, walk down. */
|
|
1176 if (first_visit)
|
|
1177 {
|
|
1178 /* Initialize the cost. The cost is the block execution frequency
|
|
1179 that does not come from backedges. Calculating this by simply
|
|
1180 adding the cost of all edges that aren't backedges does not
|
|
1181 work: this does not always add up to the block frequency at
|
|
1182 all, and even if it does, rounding error makes for bad
|
|
1183 decisions. */
|
131
|
1184 SW (bb)->own_cost = bb->count.to_frequency (cfun);
|
111
|
1185
|
|
1186 edge e;
|
|
1187 edge_iterator ei;
|
|
1188 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1189 if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
|
|
1190 {
|
|
1191 if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
|
|
1192 SW (bb)->own_cost -= EDGE_FREQUENCY (e);
|
|
1193 else
|
|
1194 SW (bb)->own_cost = 0;
|
|
1195 }
|
|
1196
|
|
1197 SW (bb)->total_cost = 0;
|
|
1198
|
|
1199 if (!bitmap_bit_p (SW (bb)->needs_components, which)
|
|
1200 && first_dom_son (CDI_DOMINATORS, bb))
|
|
1201 {
|
|
1202 bb = first_dom_son (CDI_DOMINATORS, bb);
|
|
1203 continue;
|
|
1204 }
|
|
1205 }
|
|
1206
|
|
1207 /* If this block does need the component itself, or it is cheaper to
|
|
1208 put the prologue here than in all the descendants that need it,
|
|
1209 mark it so. If this block's immediate post-dominator is dominated
|
|
1210 by this block, and that needs the prologue, we can put it on this
|
|
1211 block as well (earlier is better). */
|
|
1212 if (bitmap_bit_p (SW (bb)->needs_components, which)
|
|
1213 || SW (bb)->total_cost > SW (bb)->own_cost)
|
|
1214 {
|
|
1215 SW (bb)->total_cost = SW (bb)->own_cost;
|
|
1216 bitmap_set_bit (SW (bb)->has_components, which);
|
|
1217 }
|
|
1218 else
|
|
1219 {
|
|
1220 basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
|
|
1221 if (dominated_by_p (CDI_DOMINATORS, kid, bb)
|
|
1222 && bitmap_bit_p (SW (kid)->has_components, which))
|
|
1223 {
|
|
1224 SW (bb)->total_cost = SW (bb)->own_cost;
|
|
1225 bitmap_set_bit (SW (bb)->has_components, which);
|
|
1226 }
|
|
1227 }
|
|
1228
|
|
1229 /* We are back where we started, so we are done now. */
|
|
1230 if (bb == head)
|
|
1231 return;
|
|
1232
|
|
1233 /* We now know the cost of the subtree rooted at the current block.
|
|
1234 Accumulate this cost in the parent. */
|
|
1235 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
|
|
1236 SW (parent)->total_cost += SW (bb)->total_cost;
|
|
1237
|
|
1238 /* Don't walk the tree down unless necessary. */
|
|
1239 if (next_dom_son (CDI_DOMINATORS, bb)
|
|
1240 && SW (parent)->total_cost <= SW (parent)->own_cost)
|
|
1241 {
|
|
1242 bb = next_dom_son (CDI_DOMINATORS, bb);
|
|
1243 first_visit = true;
|
|
1244 }
|
|
1245 else
|
|
1246 {
|
|
1247 bb = parent;
|
|
1248 first_visit = false;
|
|
1249 }
|
|
1250 }
|
|
1251 }
|
|
1252
|
|
1253 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
|
|
1254 setting it on any path from entry to exit where it was not already set
|
|
1255 somewhere (or, for blocks that have no path to the exit, consider only
|
131
|
1256 paths from the entry to the block itself). Return whether any changes
|
|
1257 were made to some HAS_COMPONENTS. */
|
|
1258 static bool
|
111
|
1259 spread_components (sbitmap components)
|
|
1260 {
|
|
1261 basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
1262 basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
|
|
1263
|
|
1264 /* A stack of all blocks left to consider, and a bitmap of all blocks
|
|
1265 on that stack. */
|
|
1266 vec<basic_block> todo;
|
|
1267 todo.create (n_basic_blocks_for_fn (cfun));
|
|
1268 auto_bitmap seen;
|
|
1269
|
|
1270 auto_sbitmap old (SBITMAP_SIZE (components));
|
|
1271
|
|
1272 /* Find for every block the components that are *not* needed on some path
|
|
1273 from the entry to that block. Do this with a flood fill from the entry
|
|
1274 block. Every block can be visited at most as often as the number of
|
|
1275 components (plus one), and usually much less often. */
|
|
1276
|
|
1277 if (dump_file)
|
|
1278 fprintf (dump_file, "Spreading down...\n");
|
|
1279
|
|
1280 basic_block bb;
|
|
1281 FOR_ALL_BB_FN (bb, cfun)
|
|
1282 bitmap_clear (SW (bb)->head_components);
|
|
1283
|
|
1284 bitmap_copy (SW (entry_block)->head_components, components);
|
|
1285
|
|
1286 edge e;
|
|
1287 edge_iterator ei;
|
|
1288
|
|
1289 todo.quick_push (single_succ (entry_block));
|
|
1290 bitmap_set_bit (seen, single_succ (entry_block)->index);
|
|
1291 while (!todo.is_empty ())
|
|
1292 {
|
|
1293 bb = todo.pop ();
|
|
1294
|
|
1295 bitmap_copy (old, SW (bb)->head_components);
|
|
1296
|
|
1297 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1298 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
|
|
1299 SW (e->src)->head_components);
|
|
1300
|
|
1301 bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
|
|
1302 SW (bb)->has_components);
|
|
1303
|
|
1304 if (!bitmap_equal_p (old, SW (bb)->head_components))
|
|
1305 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1306 if (bitmap_set_bit (seen, e->dest->index))
|
|
1307 todo.quick_push (e->dest);
|
|
1308
|
|
1309 bitmap_clear_bit (seen, bb->index);
|
|
1310 }
|
|
1311
|
|
1312 /* Find for every block the components that are *not* needed on some reverse
|
|
1313 path from the exit to that block. */
|
|
1314
|
|
1315 if (dump_file)
|
|
1316 fprintf (dump_file, "Spreading up...\n");
|
|
1317
|
|
1318 /* First, mark all blocks not reachable from the exit block as not needing
|
|
1319 any component on any path to the exit. Mark everything, and then clear
|
|
1320 again by a flood fill. */
|
|
1321
|
|
1322 FOR_ALL_BB_FN (bb, cfun)
|
|
1323 bitmap_copy (SW (bb)->tail_components, components);
|
|
1324
|
|
1325 FOR_EACH_EDGE (e, ei, exit_block->preds)
|
|
1326 {
|
|
1327 todo.quick_push (e->src);
|
|
1328 bitmap_set_bit (seen, e->src->index);
|
|
1329 }
|
|
1330
|
|
1331 while (!todo.is_empty ())
|
|
1332 {
|
|
1333 bb = todo.pop ();
|
|
1334
|
|
1335 if (!bitmap_empty_p (SW (bb)->tail_components))
|
|
1336 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1337 if (bitmap_set_bit (seen, e->src->index))
|
|
1338 todo.quick_push (e->src);
|
|
1339
|
|
1340 bitmap_clear (SW (bb)->tail_components);
|
|
1341
|
|
1342 bitmap_clear_bit (seen, bb->index);
|
|
1343 }
|
|
1344
|
|
1345 /* And then, flood fill backwards to find for every block the components
|
|
1346 not needed on some path to the exit. */
|
|
1347
|
|
1348 bitmap_copy (SW (exit_block)->tail_components, components);
|
|
1349
|
|
1350 FOR_EACH_EDGE (e, ei, exit_block->preds)
|
|
1351 {
|
|
1352 todo.quick_push (e->src);
|
|
1353 bitmap_set_bit (seen, e->src->index);
|
|
1354 }
|
|
1355
|
|
1356 while (!todo.is_empty ())
|
|
1357 {
|
|
1358 bb = todo.pop ();
|
|
1359
|
|
1360 bitmap_copy (old, SW (bb)->tail_components);
|
|
1361
|
|
1362 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1363 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
1364 SW (e->dest)->tail_components);
|
|
1365
|
|
1366 bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
1367 SW (bb)->has_components);
|
|
1368
|
|
1369 if (!bitmap_equal_p (old, SW (bb)->tail_components))
|
|
1370 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1371 if (bitmap_set_bit (seen, e->src->index))
|
|
1372 todo.quick_push (e->src);
|
|
1373
|
|
1374 bitmap_clear_bit (seen, bb->index);
|
|
1375 }
|
|
1376
|
131
|
1377 todo.release ();
|
|
1378
|
111
|
1379 /* Finally, mark everything not not needed both forwards and backwards. */
|
|
1380
|
131
|
1381 bool did_changes = false;
|
|
1382
|
111
|
1383 FOR_EACH_BB_FN (bb, cfun)
|
|
1384 {
|
131
|
1385 bitmap_copy (old, SW (bb)->has_components);
|
|
1386
|
111
|
1387 bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
|
|
1388 SW (bb)->tail_components);
|
|
1389 bitmap_and_compl (SW (bb)->has_components, components,
|
|
1390 SW (bb)->head_components);
|
131
|
1391
|
|
1392 if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
|
|
1393 did_changes = true;
|
111
|
1394 }
|
|
1395
|
|
1396 FOR_ALL_BB_FN (bb, cfun)
|
|
1397 {
|
|
1398 if (dump_file)
|
|
1399 {
|
|
1400 fprintf (dump_file, "bb %d components:", bb->index);
|
|
1401 dump_components ("has", SW (bb)->has_components);
|
|
1402 fprintf (dump_file, "\n");
|
|
1403 }
|
|
1404 }
|
131
|
1405
|
|
1406 return did_changes;
|
111
|
1407 }
|
|
1408
|
|
1409 /* If we cannot handle placing some component's prologues or epilogues where
|
|
1410 we decided we should place them, unmark that component in COMPONENTS so
|
|
1411 that it is not wrapped separately. */
|
|
1412 static void
|
|
1413 disqualify_problematic_components (sbitmap components)
|
|
1414 {
|
|
1415 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1416 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1417
|
|
1418 basic_block bb;
|
|
1419 FOR_EACH_BB_FN (bb, cfun)
|
|
1420 {
|
|
1421 edge e;
|
|
1422 edge_iterator ei;
|
|
1423 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1424 {
|
|
1425 /* Find which components we want pro/epilogues for here. */
|
|
1426 bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
1427 SW (e->dest)->has_components);
|
|
1428 bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
1429 SW (e->src)->has_components);
|
|
1430
|
|
1431 /* Ask the target what it thinks about things. */
|
|
1432 if (!bitmap_empty_p (epi))
|
|
1433 targetm.shrink_wrap.disqualify_components (components, e, epi,
|
|
1434 false);
|
|
1435 if (!bitmap_empty_p (pro))
|
|
1436 targetm.shrink_wrap.disqualify_components (components, e, pro,
|
|
1437 true);
|
|
1438
|
|
1439 /* If this edge doesn't need splitting, we're fine. */
|
|
1440 if (single_pred_p (e->dest)
|
|
1441 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1442 continue;
|
|
1443
|
|
1444 /* If the edge can be split, that is fine too. */
|
|
1445 if ((e->flags & EDGE_ABNORMAL) == 0)
|
|
1446 continue;
|
|
1447
|
|
1448 /* We also can handle sibcalls. */
|
|
1449 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1450 {
|
|
1451 gcc_assert (e->flags & EDGE_SIBCALL);
|
|
1452 continue;
|
|
1453 }
|
|
1454
|
|
1455 /* Remove from consideration those components we would need
|
|
1456 pro/epilogues for on edges where we cannot insert them. */
|
|
1457 bitmap_and_compl (components, components, epi);
|
|
1458 bitmap_and_compl (components, components, pro);
|
|
1459
|
|
1460 if (dump_file && !bitmap_subset_p (epi, components))
|
|
1461 {
|
|
1462 fprintf (dump_file, " BAD epi %d->%d", e->src->index,
|
|
1463 e->dest->index);
|
|
1464 if (e->flags & EDGE_EH)
|
|
1465 fprintf (dump_file, " for EH");
|
|
1466 dump_components ("epi", epi);
|
|
1467 fprintf (dump_file, "\n");
|
|
1468 }
|
|
1469
|
|
1470 if (dump_file && !bitmap_subset_p (pro, components))
|
|
1471 {
|
|
1472 fprintf (dump_file, " BAD pro %d->%d", e->src->index,
|
|
1473 e->dest->index);
|
|
1474 if (e->flags & EDGE_EH)
|
|
1475 fprintf (dump_file, " for EH");
|
|
1476 dump_components ("pro", pro);
|
|
1477 fprintf (dump_file, "\n");
|
|
1478 }
|
|
1479 }
|
|
1480 }
|
|
1481 }
|
|
1482
|
|
1483 /* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
1484 that code at the start of basic blocks. */
|
|
1485 static void
|
|
1486 emit_common_heads_for_components (sbitmap components)
|
|
1487 {
|
|
1488 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1489 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1490 auto_sbitmap tmp (SBITMAP_SIZE (components));
|
|
1491
|
|
1492 basic_block bb;
|
|
1493 FOR_ALL_BB_FN (bb, cfun)
|
|
1494 bitmap_clear (SW (bb)->head_components);
|
|
1495
|
|
1496 FOR_EACH_BB_FN (bb, cfun)
|
|
1497 {
|
|
1498 /* Find which prologue resp. epilogue components are needed for all
|
|
1499 predecessor edges to this block. */
|
|
1500
|
|
1501 /* First, select all possible components. */
|
|
1502 bitmap_copy (epi, components);
|
|
1503 bitmap_copy (pro, components);
|
|
1504
|
|
1505 edge e;
|
|
1506 edge_iterator ei;
|
|
1507 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1508 {
|
|
1509 if (e->flags & EDGE_ABNORMAL)
|
|
1510 {
|
|
1511 bitmap_clear (epi);
|
|
1512 bitmap_clear (pro);
|
|
1513 break;
|
|
1514 }
|
|
1515
|
|
1516 /* Deselect those epilogue components that should not be inserted
|
|
1517 for this edge. */
|
|
1518 bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
1519 SW (e->dest)->has_components);
|
|
1520 bitmap_and (epi, epi, tmp);
|
|
1521
|
|
1522 /* Similar, for the prologue. */
|
|
1523 bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
1524 SW (e->src)->has_components);
|
|
1525 bitmap_and (pro, pro, tmp);
|
|
1526 }
|
|
1527
|
|
1528 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1529 fprintf (dump_file, " bb %d", bb->index);
|
|
1530
|
|
1531 if (dump_file && !bitmap_empty_p (epi))
|
|
1532 dump_components ("epi", epi);
|
|
1533 if (dump_file && !bitmap_empty_p (pro))
|
|
1534 dump_components ("pro", pro);
|
|
1535
|
|
1536 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1537 fprintf (dump_file, "\n");
|
|
1538
|
|
1539 /* Place code after the BB note. */
|
|
1540 if (!bitmap_empty_p (pro))
|
|
1541 {
|
|
1542 start_sequence ();
|
|
1543 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1544 rtx_insn *seq = get_insns ();
|
|
1545 end_sequence ();
|
|
1546 record_prologue_seq (seq);
|
|
1547
|
|
1548 emit_insn_after (seq, bb_note (bb));
|
|
1549
|
|
1550 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
|
|
1551 }
|
|
1552
|
|
1553 if (!bitmap_empty_p (epi))
|
|
1554 {
|
|
1555 start_sequence ();
|
|
1556 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1557 rtx_insn *seq = get_insns ();
|
|
1558 end_sequence ();
|
|
1559 record_epilogue_seq (seq);
|
|
1560
|
|
1561 emit_insn_after (seq, bb_note (bb));
|
|
1562
|
|
1563 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
|
|
1564 }
|
|
1565 }
|
|
1566 }
|
|
1567
|
|
1568 /* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
1569 that code at the end of basic blocks. */
|
|
1570 static void
|
|
1571 emit_common_tails_for_components (sbitmap components)
|
|
1572 {
|
|
1573 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1574 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1575 auto_sbitmap tmp (SBITMAP_SIZE (components));
|
|
1576
|
|
1577 basic_block bb;
|
|
1578 FOR_ALL_BB_FN (bb, cfun)
|
|
1579 bitmap_clear (SW (bb)->tail_components);
|
|
1580
|
|
1581 FOR_EACH_BB_FN (bb, cfun)
|
|
1582 {
|
|
1583 /* Find which prologue resp. epilogue components are needed for all
|
|
1584 successor edges from this block. */
|
|
1585 if (EDGE_COUNT (bb->succs) == 0)
|
|
1586 continue;
|
|
1587
|
|
1588 /* First, select all possible components. */
|
|
1589 bitmap_copy (epi, components);
|
|
1590 bitmap_copy (pro, components);
|
|
1591
|
|
1592 edge e;
|
|
1593 edge_iterator ei;
|
|
1594 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1595 {
|
|
1596 if (e->flags & EDGE_ABNORMAL)
|
|
1597 {
|
|
1598 bitmap_clear (epi);
|
|
1599 bitmap_clear (pro);
|
|
1600 break;
|
|
1601 }
|
|
1602
|
|
1603 /* Deselect those epilogue components that should not be inserted
|
|
1604 for this edge, and also those that are already put at the head
|
|
1605 of the successor block. */
|
|
1606 bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
1607 SW (e->dest)->has_components);
|
|
1608 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
1609 bitmap_and (epi, epi, tmp);
|
|
1610
|
|
1611 /* Similarly, for the prologue. */
|
|
1612 bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
1613 SW (e->src)->has_components);
|
|
1614 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
1615 bitmap_and (pro, pro, tmp);
|
|
1616 }
|
|
1617
|
|
1618 /* If the last insn of this block is a control flow insn we cannot
|
|
1619 put anything after it. We can put our code before it instead,
|
|
1620 but only if that jump insn is a simple jump. */
|
|
1621 rtx_insn *last_insn = BB_END (bb);
|
|
1622 if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
|
|
1623 {
|
|
1624 bitmap_clear (epi);
|
|
1625 bitmap_clear (pro);
|
|
1626 }
|
|
1627
|
|
1628 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1629 fprintf (dump_file, " bb %d", bb->index);
|
|
1630
|
|
1631 if (dump_file && !bitmap_empty_p (epi))
|
|
1632 dump_components ("epi", epi);
|
|
1633 if (dump_file && !bitmap_empty_p (pro))
|
|
1634 dump_components ("pro", pro);
|
|
1635
|
|
1636 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
1637 fprintf (dump_file, "\n");
|
|
1638
|
|
1639 /* Put the code at the end of the BB, but before any final jump. */
|
|
1640 if (!bitmap_empty_p (epi))
|
|
1641 {
|
|
1642 start_sequence ();
|
|
1643 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1644 rtx_insn *seq = get_insns ();
|
|
1645 end_sequence ();
|
|
1646 record_epilogue_seq (seq);
|
|
1647
|
|
1648 if (control_flow_insn_p (last_insn))
|
|
1649 emit_insn_before (seq, last_insn);
|
|
1650 else
|
|
1651 emit_insn_after (seq, last_insn);
|
|
1652
|
|
1653 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
|
|
1654 }
|
|
1655
|
|
1656 if (!bitmap_empty_p (pro))
|
|
1657 {
|
|
1658 start_sequence ();
|
|
1659 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1660 rtx_insn *seq = get_insns ();
|
|
1661 end_sequence ();
|
|
1662 record_prologue_seq (seq);
|
|
1663
|
|
1664 if (control_flow_insn_p (last_insn))
|
|
1665 emit_insn_before (seq, last_insn);
|
|
1666 else
|
|
1667 emit_insn_after (seq, last_insn);
|
|
1668
|
|
1669 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
|
|
1670 }
|
|
1671 }
|
|
1672 }
|
|
1673
|
|
1674 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
|
|
1675 placed them inside blocks directly. */
|
|
1676 static void
|
|
1677 insert_prologue_epilogue_for_components (sbitmap components)
|
|
1678 {
|
|
1679 auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
1680 auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
1681
|
|
1682 basic_block bb;
|
|
1683 FOR_EACH_BB_FN (bb, cfun)
|
|
1684 {
|
|
1685 if (!bb->aux)
|
|
1686 continue;
|
|
1687
|
|
1688 edge e;
|
|
1689 edge_iterator ei;
|
|
1690 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1691 {
|
|
1692 /* Find which pro/epilogue components are needed on this edge. */
|
|
1693 bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
1694 SW (e->dest)->has_components);
|
|
1695 bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
1696 SW (e->src)->has_components);
|
|
1697 bitmap_and (epi, epi, components);
|
|
1698 bitmap_and (pro, pro, components);
|
|
1699
|
|
1700 /* Deselect those we already have put at the head or tail of the
|
|
1701 edge's dest resp. src. */
|
|
1702 bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
|
|
1703 bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
|
|
1704 bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
|
|
1705 bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
|
|
1706
|
|
1707 if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
|
|
1708 {
|
|
1709 if (dump_file)
|
|
1710 {
|
|
1711 fprintf (dump_file, " %d->%d", e->src->index,
|
|
1712 e->dest->index);
|
|
1713 dump_components ("epi", epi);
|
|
1714 dump_components ("pro", pro);
|
|
1715 if (e->flags & EDGE_SIBCALL)
|
|
1716 fprintf (dump_file, " (SIBCALL)");
|
|
1717 else if (e->flags & EDGE_ABNORMAL)
|
|
1718 fprintf (dump_file, " (ABNORMAL)");
|
|
1719 fprintf (dump_file, "\n");
|
|
1720 }
|
|
1721
|
|
1722 /* Put the epilogue components in place. */
|
|
1723 start_sequence ();
|
|
1724 targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
1725 rtx_insn *seq = get_insns ();
|
|
1726 end_sequence ();
|
|
1727 record_epilogue_seq (seq);
|
|
1728
|
|
1729 if (e->flags & EDGE_SIBCALL)
|
|
1730 {
|
|
1731 gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
|
|
1732
|
|
1733 rtx_insn *insn = BB_END (e->src);
|
|
1734 gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
|
|
1735 emit_insn_before (seq, insn);
|
|
1736 }
|
|
1737 else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
1738 {
|
|
1739 gcc_assert (e->flags & EDGE_FALLTHRU);
|
|
1740 basic_block new_bb = split_edge (e);
|
|
1741 emit_insn_after (seq, BB_END (new_bb));
|
|
1742 }
|
|
1743 else
|
|
1744 insert_insn_on_edge (seq, e);
|
|
1745
|
|
1746 /* Put the prologue components in place. */
|
|
1747 start_sequence ();
|
|
1748 targetm.shrink_wrap.emit_prologue_components (pro);
|
|
1749 seq = get_insns ();
|
|
1750 end_sequence ();
|
|
1751 record_prologue_seq (seq);
|
|
1752
|
|
1753 insert_insn_on_edge (seq, e);
|
|
1754 }
|
|
1755 }
|
|
1756 }
|
|
1757
|
|
1758 commit_edge_insertions ();
|
|
1759 }
|
|
1760
|
|
1761 /* The main entry point to this subpass. FIRST_BB is where the prologue
|
|
1762 would be normally put. */
|
|
1763 void
|
|
1764 try_shrink_wrapping_separate (basic_block first_bb)
|
|
1765 {
|
|
1766 if (HAVE_cc0)
|
|
1767 return;
|
|
1768
|
|
1769 if (!(SHRINK_WRAPPING_ENABLED
|
|
1770 && flag_shrink_wrap_separate
|
|
1771 && optimize_function_for_speed_p (cfun)
|
|
1772 && targetm.shrink_wrap.get_separate_components))
|
|
1773 return;
|
|
1774
|
|
1775 /* We don't handle "strange" functions. */
|
|
1776 if (cfun->calls_alloca
|
|
1777 || cfun->calls_setjmp
|
|
1778 || cfun->can_throw_non_call_exceptions
|
|
1779 || crtl->calls_eh_return
|
|
1780 || crtl->has_nonlocal_goto
|
|
1781 || crtl->saves_all_registers)
|
|
1782 return;
|
|
1783
|
|
1784 /* Ask the target what components there are. If it returns NULL, don't
|
|
1785 do anything. */
|
|
1786 sbitmap components = targetm.shrink_wrap.get_separate_components ();
|
|
1787 if (!components)
|
|
1788 return;
|
|
1789
|
|
1790 /* We need LIVE info, not defining anything in the entry block and not
|
|
1791 using anything in the exit block. A block then needs a component if
|
|
1792 the register for that component is in the IN or GEN or KILL set for
|
|
1793 that block. */
|
|
1794 df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
1795 df_update_entry_exit_and_calls ();
|
|
1796 df_live_add_problem ();
|
|
1797 df_live_set_all_dirty ();
|
|
1798 df_analyze ();
|
|
1799
|
|
1800 calculate_dominance_info (CDI_DOMINATORS);
|
|
1801 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
1802
|
|
1803 init_separate_shrink_wrap (components);
|
|
1804
|
|
1805 sbitmap_iterator sbi;
|
|
1806 unsigned int j;
|
|
1807 EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
|
|
1808 place_prologue_for_one_component (j, first_bb);
|
|
1809
|
131
|
1810 /* Try to minimize the number of saves and restores. Do this as long as
|
|
1811 it changes anything. This does not iterate more than a few times. */
|
|
1812 int spread_times = 0;
|
|
1813 while (spread_components (components))
|
|
1814 {
|
|
1815 spread_times++;
|
|
1816
|
|
1817 if (dump_file)
|
|
1818 fprintf (dump_file, "Now spread %d times.\n", spread_times);
|
|
1819 }
|
111
|
1820
|
|
1821 disqualify_problematic_components (components);
|
|
1822
|
|
1823 /* Don't separately shrink-wrap anything where the "main" prologue will
|
|
1824 go; the target code can often optimize things if it is presented with
|
|
1825 all components together (say, if it generates store-multiple insns). */
|
|
1826 bitmap_and_compl (components, components, SW (first_bb)->has_components);
|
|
1827
|
|
1828 if (bitmap_empty_p (components))
|
|
1829 {
|
|
1830 if (dump_file)
|
|
1831 fprintf (dump_file, "Not wrapping anything separately.\n");
|
|
1832 }
|
|
1833 else
|
|
1834 {
|
|
1835 if (dump_file)
|
|
1836 {
|
|
1837 fprintf (dump_file, "The components we wrap separately are");
|
|
1838 dump_components ("sep", components);
|
|
1839 fprintf (dump_file, "\n");
|
|
1840
|
|
1841 fprintf (dump_file, "... Inserting common heads...\n");
|
|
1842 }
|
|
1843
|
|
1844 emit_common_heads_for_components (components);
|
|
1845
|
|
1846 if (dump_file)
|
|
1847 fprintf (dump_file, "... Inserting common tails...\n");
|
|
1848
|
|
1849 emit_common_tails_for_components (components);
|
|
1850
|
|
1851 if (dump_file)
|
|
1852 fprintf (dump_file, "... Inserting the more difficult ones...\n");
|
|
1853
|
|
1854 insert_prologue_epilogue_for_components (components);
|
|
1855
|
|
1856 if (dump_file)
|
|
1857 fprintf (dump_file, "... Done.\n");
|
|
1858
|
|
1859 targetm.shrink_wrap.set_handled_components (components);
|
|
1860
|
|
1861 crtl->shrink_wrapped_separate = true;
|
|
1862 }
|
|
1863
|
|
1864 fini_separate_shrink_wrap ();
|
|
1865
|
|
1866 sbitmap_free (components);
|
|
1867 free_dominance_info (CDI_DOMINATORS);
|
|
1868 free_dominance_info (CDI_POST_DOMINATORS);
|
|
1869
|
|
1870 /* All done. */
|
|
1871 df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
1872 df_update_entry_exit_and_calls ();
|
|
1873 df_live_set_all_dirty ();
|
|
1874 df_analyze ();
|
|
1875 }
|