Mercurial > hg > CbC > CbC_gcc
comparison gcc/ipa-inline.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Inlining decision heuristics. | |
2 Copyright (C) 2003, 2004, 2007, 2008, 2009 Free Software Foundation, Inc. | |
3 Contributed by Jan Hubicka | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 /* Inlining decision heuristics | |
22 | |
23 We separate inlining decisions from the inliner itself and store it | |
24 inside callgraph as so called inline plan. Refer to cgraph.c | |
25 documentation about particular representation of inline plans in the | |
26 callgraph. | |
27 | |
28 There are three major parts of this file: | |
29 | |
30 cgraph_mark_inline implementation | |
31 | |
32 This function allows to mark given call inline and performs necessary | |
33 modifications of cgraph (production of the clones and updating overall | |
34 statistics) | |
35 | |
36 inlining heuristics limits | |
37 | |
38 These functions allow to check that particular inlining is allowed | |
39 by the limits specified by user (allowed function growth, overall unit | |
40 growth and so on). | |
41 | |
42 inlining heuristics | |
43 | |
44 This is implementation of IPA pass aiming to get as much of benefit | |
45 from inlining obeying the limits checked above. | |
46 | |
47 The implementation of particular heuristics is separated from | |
48 the rest of code to make it easier to replace it with more complicated | |
49 implementation in the future. The rest of inlining code acts as a | |
50 library aimed to modify the callgraph and verify that the parameters | |
51 on code size growth fits. | |
52 | |
53 To mark given call inline, use cgraph_mark_inline function, the | |
54 verification is performed by cgraph_default_inline_p and | |
55 cgraph_check_inline_limits. | |
56 | |
57 The heuristics implements simple knapsack style algorithm ordering | |
58 all functions by their "profitability" (estimated by code size growth) | |
59 and inlining them in priority order. | |
60 | |
61 cgraph_decide_inlining implements heuristics taking whole callgraph | |
62 into account, while cgraph_decide_inlining_incrementally considers | |
63 only one function at a time and is used by early inliner. | |
64 | |
65 The inliner itself is split into several passes: | |
66 | |
67 pass_inline_parameters | |
68 | |
69 This pass computes local properties of functions that are used by inliner: | |
70 estimated function body size, whether function is inlinable at all and | |
71 stack frame consumption. | |
72 | |
73 Before executing any of inliner passes, this local pass has to be applied | |
74 to each function in the callgraph (ie run as subpass of some earlier | |
75 IPA pass). The results are made out of date by any optimization applied | |
76 on the function body. | |
77 | |
78 pass_early_inlining | |
79 | |
80 Simple local inlining pass inlining callees into current function. This | |
81 pass makes no global whole compilation unit analysis and this when allowed | |
82 to do inlining expanding code size it might result in unbounded growth of | |
83 whole unit. | |
84 | |
85 The pass is run during conversion into SSA form. Only functions already | |
86 converted into SSA form are inlined, so the conversion must happen in | |
87 topological order on the callgraph (that is maintained by pass manager). | |
88 The functions after inlining are early optimized so the early inliner sees | |
89 unoptimized function itself, but all considered callees are already | |
90 optimized allowing it to unfold abstraction penalty on C++ effectively and | |
91 cheaply. | |
92 | |
93 pass_ipa_early_inlining | |
94 | |
95 With profiling, the early inlining is also necessary to reduce | |
96 instrumentation costs on program with high abstraction penalty (doing | |
97 many redundant calls). This can't happen in parallel with early | |
98 optimization and profile instrumentation, because we would end up | |
99 re-instrumenting already instrumented function bodies we brought in via | |
100 inlining. | |
101 | |
102 To avoid this, this pass is executed as IPA pass before profiling. It is | |
103 simple wrapper to pass_early_inlining and ensures first inlining. | |
104 | |
105 pass_ipa_inline | |
106 | |
107 This is the main pass implementing simple greedy algorithm to do inlining | |
108 of small functions that results in overall growth of compilation unit and | |
109 inlining of functions called once. The pass compute just so called inline | |
110 plan (representation of inlining to be done in callgraph) and unlike early | |
111 inlining it is not performing the inlining itself. | |
112 | |
113 pass_apply_inline | |
114 | |
115 This pass performs actual inlining according to pass_ipa_inline on given | |
116 function. Possible the function body before inlining is saved when it is | |
117 needed for further inlining later. | |
118 */ | |
119 | |
120 #include "config.h" | |
121 #include "system.h" | |
122 #include "coretypes.h" | |
123 #include "tm.h" | |
124 #include "tree.h" | |
125 #include "tree-inline.h" | |
126 #include "langhooks.h" | |
127 #include "flags.h" | |
128 #include "cgraph.h" | |
129 #include "diagnostic.h" | |
130 #include "timevar.h" | |
131 #include "params.h" | |
132 #include "fibheap.h" | |
133 #include "intl.h" | |
134 #include "tree-pass.h" | |
135 #include "hashtab.h" | |
136 #include "coverage.h" | |
137 #include "ggc.h" | |
138 #include "tree-flow.h" | |
139 #include "rtl.h" | |
140 #include "ipa-prop.h" | |
141 | |
142 /* Mode incremental inliner operate on: | |
143 | |
144 In ALWAYS_INLINE only functions marked | |
145 always_inline are inlined. This mode is used after detecting cycle during | |
146 flattening. | |
147 | |
148 In SIZE mode, only functions that reduce function body size after inlining | |
149 are inlined, this is used during early inlining. | |
150 | |
151 in ALL mode, everything is inlined. This is used during flattening. */ | |
152 enum inlining_mode { | |
153 INLINE_NONE = 0, | |
154 INLINE_ALWAYS_INLINE, | |
155 INLINE_SIZE, | |
156 INLINE_ALL | |
157 }; | |
158 static bool | |
159 cgraph_decide_inlining_incrementally (struct cgraph_node *, enum inlining_mode, | |
160 int); | |
161 | |
162 | |
163 /* Statistics we collect about inlining algorithm. */ | |
164 static int ncalls_inlined; | |
165 static int nfunctions_inlined; | |
166 static int overall_insns; | |
167 static gcov_type max_count; | |
168 | |
169 /* Holders of ipa cgraph hooks: */ | |
170 static struct cgraph_node_hook_list *function_insertion_hook_holder; | |
171 | |
172 static inline struct inline_summary * | |
173 inline_summary (struct cgraph_node *node) | |
174 { | |
175 return &node->local.inline_summary; | |
176 } | |
177 | |
178 /* Estimate size of the function after inlining WHAT into TO. */ | |
179 | |
180 static int | |
181 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to, | |
182 struct cgraph_node *what) | |
183 { | |
184 int size; | |
185 tree fndecl = what->decl, arg; | |
186 int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST); | |
187 | |
188 for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg)) | |
189 call_insns += estimate_move_cost (TREE_TYPE (arg)); | |
190 size = (what->global.insns - call_insns) * times + to->global.insns; | |
191 gcc_assert (size >= 0); | |
192 return size; | |
193 } | |
194 | |
195 /* E is expected to be an edge being inlined. Clone destination node of | |
196 the edge and redirect it to the new clone. | |
197 DUPLICATE is used for bookkeeping on whether we are actually creating new | |
198 clones or re-using node originally representing out-of-line function call. | |
199 */ | |
200 void | |
201 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, | |
202 bool update_original) | |
203 { | |
204 HOST_WIDE_INT peak; | |
205 | |
206 if (duplicate) | |
207 { | |
208 /* We may eliminate the need for out-of-line copy to be output. | |
209 In that case just go ahead and re-use it. */ | |
210 if (!e->callee->callers->next_caller | |
211 && !e->callee->needed | |
212 && !cgraph_new_nodes) | |
213 { | |
214 gcc_assert (!e->callee->global.inlined_to); | |
215 if (e->callee->analyzed) | |
216 overall_insns -= e->callee->global.insns, nfunctions_inlined++; | |
217 duplicate = false; | |
218 } | |
219 else | |
220 { | |
221 struct cgraph_node *n; | |
222 n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest, | |
223 update_original); | |
224 cgraph_redirect_edge_callee (e, n); | |
225 } | |
226 } | |
227 | |
228 if (e->caller->global.inlined_to) | |
229 e->callee->global.inlined_to = e->caller->global.inlined_to; | |
230 else | |
231 e->callee->global.inlined_to = e->caller; | |
232 e->callee->global.stack_frame_offset | |
233 = e->caller->global.stack_frame_offset | |
234 + inline_summary (e->caller)->estimated_self_stack_size; | |
235 peak = e->callee->global.stack_frame_offset | |
236 + inline_summary (e->callee)->estimated_self_stack_size; | |
237 if (e->callee->global.inlined_to->global.estimated_stack_size < peak) | |
238 e->callee->global.inlined_to->global.estimated_stack_size = peak; | |
239 | |
240 /* Recursively clone all bodies. */ | |
241 for (e = e->callee->callees; e; e = e->next_callee) | |
242 if (!e->inline_failed) | |
243 cgraph_clone_inlined_nodes (e, duplicate, update_original); | |
244 } | |
245 | |
246 /* Mark edge E as inlined and update callgraph accordingly. UPDATE_ORIGINAL | |
247 specify whether profile of original function should be updated. If any new | |
248 indirect edges are discovered in the process, add them to NEW_EDGES, unless | |
249 it is NULL. Return true iff any new callgraph edges were discovered as a | |
250 result of inlining. */ | |
251 | |
252 static bool | |
253 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original, | |
254 VEC (cgraph_edge_p, heap) **new_edges) | |
255 { | |
256 int old_insns = 0, new_insns = 0; | |
257 struct cgraph_node *to = NULL, *what; | |
258 struct cgraph_edge *curr = e; | |
259 | |
260 if (e->callee->inline_decl) | |
261 cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl)); | |
262 | |
263 gcc_assert (e->inline_failed); | |
264 e->inline_failed = NULL; | |
265 | |
266 if (!e->callee->global.inlined) | |
267 DECL_POSSIBLY_INLINED (e->callee->decl) = true; | |
268 e->callee->global.inlined = true; | |
269 | |
270 cgraph_clone_inlined_nodes (e, true, update_original); | |
271 | |
272 what = e->callee; | |
273 | |
274 /* Now update size of caller and all functions caller is inlined into. */ | |
275 for (;e && !e->inline_failed; e = e->caller->callers) | |
276 { | |
277 old_insns = e->caller->global.insns; | |
278 new_insns = cgraph_estimate_size_after_inlining (1, e->caller, | |
279 what); | |
280 gcc_assert (new_insns >= 0); | |
281 to = e->caller; | |
282 to->global.insns = new_insns; | |
283 } | |
284 gcc_assert (what->global.inlined_to == to); | |
285 if (new_insns > old_insns) | |
286 overall_insns += new_insns - old_insns; | |
287 ncalls_inlined++; | |
288 | |
289 if (flag_indirect_inlining) | |
290 return ipa_propagate_indirect_call_infos (curr, new_edges); | |
291 else | |
292 return false; | |
293 } | |
294 | |
295 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER. | |
296 Return following unredirected edge in the list of callers | |
297 of EDGE->CALLEE */ | |
298 | |
299 static struct cgraph_edge * | |
300 cgraph_mark_inline (struct cgraph_edge *edge) | |
301 { | |
302 struct cgraph_node *to = edge->caller; | |
303 struct cgraph_node *what = edge->callee; | |
304 struct cgraph_edge *e, *next; | |
305 | |
306 gcc_assert (!gimple_call_cannot_inline_p (edge->call_stmt)); | |
307 /* Look for all calls, mark them inline and clone recursively | |
308 all inlined functions. */ | |
309 for (e = what->callers; e; e = next) | |
310 { | |
311 next = e->next_caller; | |
312 if (e->caller == to && e->inline_failed) | |
313 { | |
314 cgraph_mark_inline_edge (e, true, NULL); | |
315 if (e == edge) | |
316 edge = next; | |
317 } | |
318 } | |
319 | |
320 return edge; | |
321 } | |
322 | |
323 /* Estimate the growth caused by inlining NODE into all callees. */ | |
324 | |
325 static int | |
326 cgraph_estimate_growth (struct cgraph_node *node) | |
327 { | |
328 int growth = 0; | |
329 struct cgraph_edge *e; | |
330 bool self_recursive = false; | |
331 | |
332 if (node->global.estimated_growth != INT_MIN) | |
333 return node->global.estimated_growth; | |
334 | |
335 for (e = node->callers; e; e = e->next_caller) | |
336 { | |
337 if (e->caller == node) | |
338 self_recursive = true; | |
339 if (e->inline_failed) | |
340 growth += (cgraph_estimate_size_after_inlining (1, e->caller, node) | |
341 - e->caller->global.insns); | |
342 } | |
343 | |
344 /* ??? Wrong for non-trivially self recursive functions or cases where | |
345 we decide to not inline for different reasons, but it is not big deal | |
346 as in that case we will keep the body around, but we will also avoid | |
347 some inlining. */ | |
348 if (!node->needed && !DECL_EXTERNAL (node->decl) && !self_recursive) | |
349 growth -= node->global.insns; | |
350 | |
351 node->global.estimated_growth = growth; | |
352 return growth; | |
353 } | |
354 | |
355 /* Return false when inlining WHAT into TO is not good idea | |
356 as it would cause too large growth of function bodies. | |
357 When ONE_ONLY is true, assume that only one call site is going | |
358 to be inlined, otherwise figure out how many call sites in | |
359 TO calls WHAT and verify that all can be inlined. | |
360 */ | |
361 | |
362 static bool | |
363 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what, | |
364 const char **reason, bool one_only) | |
365 { | |
366 int times = 0; | |
367 struct cgraph_edge *e; | |
368 int newsize; | |
369 int limit; | |
370 HOST_WIDE_INT stack_size_limit, inlined_stack; | |
371 | |
372 if (one_only) | |
373 times = 1; | |
374 else | |
375 for (e = to->callees; e; e = e->next_callee) | |
376 if (e->callee == what) | |
377 times++; | |
378 | |
379 if (to->global.inlined_to) | |
380 to = to->global.inlined_to; | |
381 | |
382 /* When inlining large function body called once into small function, | |
383 take the inlined function as base for limiting the growth. */ | |
384 if (inline_summary (to)->self_insns > inline_summary(what)->self_insns) | |
385 limit = inline_summary (to)->self_insns; | |
386 else | |
387 limit = inline_summary (what)->self_insns; | |
388 | |
389 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100; | |
390 | |
391 /* Check the size after inlining against the function limits. But allow | |
392 the function to shrink if it went over the limits by forced inlining. */ | |
393 newsize = cgraph_estimate_size_after_inlining (times, to, what); | |
394 if (newsize >= to->global.insns | |
395 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS) | |
396 && newsize > limit) | |
397 { | |
398 if (reason) | |
399 *reason = N_("--param large-function-growth limit reached"); | |
400 return false; | |
401 } | |
402 | |
403 stack_size_limit = inline_summary (to)->estimated_self_stack_size; | |
404 | |
405 stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100; | |
406 | |
407 inlined_stack = (to->global.stack_frame_offset | |
408 + inline_summary (to)->estimated_self_stack_size | |
409 + what->global.estimated_stack_size); | |
410 if (inlined_stack > stack_size_limit | |
411 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME)) | |
412 { | |
413 if (reason) | |
414 *reason = N_("--param large-stack-frame-growth limit reached"); | |
415 return false; | |
416 } | |
417 return true; | |
418 } | |
419 | |
420 /* Return true when function N is small enough to be inlined. */ | |
421 | |
422 bool | |
423 cgraph_default_inline_p (struct cgraph_node *n, const char **reason) | |
424 { | |
425 tree decl = n->decl; | |
426 | |
427 if (n->inline_decl) | |
428 decl = n->inline_decl; | |
429 if (!flag_inline_small_functions && !DECL_DECLARED_INLINE_P (decl)) | |
430 { | |
431 if (reason) | |
432 *reason = N_("function not inline candidate"); | |
433 return false; | |
434 } | |
435 | |
436 if (!DECL_STRUCT_FUNCTION (decl)->cfg) | |
437 { | |
438 if (reason) | |
439 *reason = N_("function body not available"); | |
440 return false; | |
441 } | |
442 | |
443 if (DECL_DECLARED_INLINE_P (decl)) | |
444 { | |
445 if (n->global.insns >= MAX_INLINE_INSNS_SINGLE) | |
446 { | |
447 if (reason) | |
448 *reason = N_("--param max-inline-insns-single limit reached"); | |
449 return false; | |
450 } | |
451 } | |
452 else | |
453 { | |
454 if (n->global.insns >= MAX_INLINE_INSNS_AUTO) | |
455 { | |
456 if (reason) | |
457 *reason = N_("--param max-inline-insns-auto limit reached"); | |
458 return false; | |
459 } | |
460 } | |
461 | |
462 return true; | |
463 } | |
464 | |
465 /* Return true when inlining WHAT would create recursive inlining. | |
466 We call recursive inlining all cases where same function appears more than | |
467 once in the single recursion nest path in the inline graph. */ | |
468 | |
469 static bool | |
470 cgraph_recursive_inlining_p (struct cgraph_node *to, | |
471 struct cgraph_node *what, | |
472 const char **reason) | |
473 { | |
474 bool recursive; | |
475 if (to->global.inlined_to) | |
476 recursive = what->decl == to->global.inlined_to->decl; | |
477 else | |
478 recursive = what->decl == to->decl; | |
479 /* Marking recursive function inline has sane semantic and thus we should | |
480 not warn on it. */ | |
481 if (recursive && reason) | |
482 *reason = (what->local.disregard_inline_limits | |
483 ? N_("recursive inlining") : ""); | |
484 return recursive; | |
485 } | |
486 | |
487 /* A cost model driving the inlining heuristics in a way so the edges with | |
488 smallest badness are inlined first. After each inlining is performed | |
489 the costs of all caller edges of nodes affected are recomputed so the | |
490 metrics may accurately depend on values such as number of inlinable callers | |
491 of the function or function body size. */ | |
492 | |
493 static int | |
494 cgraph_edge_badness (struct cgraph_edge *edge) | |
495 { | |
496 int badness; | |
497 int growth = | |
498 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee); | |
499 | |
500 growth -= edge->caller->global.insns; | |
501 | |
502 /* Always prefer inlining saving code size. */ | |
503 if (growth <= 0) | |
504 badness = INT_MIN - growth; | |
505 | |
506 /* When profiling is available, base priorities -(#calls / growth). | |
507 So we optimize for overall number of "executed" inlined calls. */ | |
508 else if (max_count) | |
509 badness = ((int)((double)edge->count * INT_MIN / max_count)) / growth; | |
510 | |
511 /* When function local profile is available, base priorities on | |
512 growth / frequency, so we optimize for overall frequency of inlined | |
513 calls. This is not too accurate since while the call might be frequent | |
514 within function, the function itself is infrequent. | |
515 | |
516 Other objective to optimize for is number of different calls inlined. | |
517 We add the estimated growth after inlining all functions to bias the | |
518 priorities slightly in this direction (so fewer times called functions | |
519 of the same size gets priority). */ | |
520 else if (flag_guess_branch_prob) | |
521 { | |
522 int div = edge->frequency * 100 / CGRAPH_FREQ_BASE; | |
523 int growth = | |
524 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee); | |
525 growth -= edge->caller->global.insns; | |
526 badness = growth * 256; | |
527 | |
528 /* Decrease badness if call is nested. */ | |
529 /* Compress the range so we don't overflow. */ | |
530 if (div > 256) | |
531 div = 256 + ceil_log2 (div) - 8; | |
532 if (div < 1) | |
533 div = 1; | |
534 if (badness > 0) | |
535 badness /= div; | |
536 badness += cgraph_estimate_growth (edge->callee); | |
537 } | |
538 /* When function local profile is not available or it does not give | |
539 useful information (ie frequency is zero), base the cost on | |
540 loop nest and overall size growth, so we optimize for overall number | |
541 of functions fully inlined in program. */ | |
542 else | |
543 { | |
544 int nest = MIN (edge->loop_nest, 8); | |
545 badness = cgraph_estimate_growth (edge->callee) * 256; | |
546 | |
547 /* Decrease badness if call is nested. */ | |
548 if (badness > 0) | |
549 badness >>= nest; | |
550 else | |
551 { | |
552 badness <<= nest; | |
553 } | |
554 } | |
555 /* Make recursive inlining happen always after other inlining is done. */ | |
556 if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL)) | |
557 return badness + 1; | |
558 else | |
559 return badness; | |
560 } | |
561 | |
562 /* Recompute heap nodes for each of caller edge. */ | |
563 | |
564 static void | |
565 update_caller_keys (fibheap_t heap, struct cgraph_node *node, | |
566 bitmap updated_nodes) | |
567 { | |
568 struct cgraph_edge *edge; | |
569 const char *failed_reason; | |
570 | |
571 if (!node->local.inlinable || node->local.disregard_inline_limits | |
572 || node->global.inlined_to) | |
573 return; | |
574 if (bitmap_bit_p (updated_nodes, node->uid)) | |
575 return; | |
576 bitmap_set_bit (updated_nodes, node->uid); | |
577 node->global.estimated_growth = INT_MIN; | |
578 | |
579 if (!node->local.inlinable) | |
580 return; | |
581 /* Prune out edges we won't inline into anymore. */ | |
582 if (!cgraph_default_inline_p (node, &failed_reason)) | |
583 { | |
584 for (edge = node->callers; edge; edge = edge->next_caller) | |
585 if (edge->aux) | |
586 { | |
587 fibheap_delete_node (heap, (fibnode_t) edge->aux); | |
588 edge->aux = NULL; | |
589 if (edge->inline_failed) | |
590 edge->inline_failed = failed_reason; | |
591 } | |
592 return; | |
593 } | |
594 | |
595 for (edge = node->callers; edge; edge = edge->next_caller) | |
596 if (edge->inline_failed) | |
597 { | |
598 int badness = cgraph_edge_badness (edge); | |
599 if (edge->aux) | |
600 { | |
601 fibnode_t n = (fibnode_t) edge->aux; | |
602 gcc_assert (n->data == edge); | |
603 if (n->key == badness) | |
604 continue; | |
605 | |
606 /* fibheap_replace_key only increase the keys. */ | |
607 if (fibheap_replace_key (heap, n, badness)) | |
608 continue; | |
609 fibheap_delete_node (heap, (fibnode_t) edge->aux); | |
610 } | |
611 edge->aux = fibheap_insert (heap, badness, edge); | |
612 } | |
613 } | |
614 | |
615 /* Recompute heap nodes for each of caller edges of each of callees. */ | |
616 | |
617 static void | |
618 update_callee_keys (fibheap_t heap, struct cgraph_node *node, | |
619 bitmap updated_nodes) | |
620 { | |
621 struct cgraph_edge *e; | |
622 node->global.estimated_growth = INT_MIN; | |
623 | |
624 for (e = node->callees; e; e = e->next_callee) | |
625 if (e->inline_failed) | |
626 update_caller_keys (heap, e->callee, updated_nodes); | |
627 else if (!e->inline_failed) | |
628 update_callee_keys (heap, e->callee, updated_nodes); | |
629 } | |
630 | |
631 /* Enqueue all recursive calls from NODE into priority queue depending on | |
632 how likely we want to recursively inline the call. */ | |
633 | |
634 static void | |
635 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where, | |
636 fibheap_t heap) | |
637 { | |
638 static int priority; | |
639 struct cgraph_edge *e; | |
640 for (e = where->callees; e; e = e->next_callee) | |
641 if (e->callee == node) | |
642 { | |
643 /* When profile feedback is available, prioritize by expected number | |
644 of calls. Without profile feedback we maintain simple queue | |
645 to order candidates via recursive depths. */ | |
646 fibheap_insert (heap, | |
647 !max_count ? priority++ | |
648 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))), | |
649 e); | |
650 } | |
651 for (e = where->callees; e; e = e->next_callee) | |
652 if (!e->inline_failed) | |
653 lookup_recursive_calls (node, e->callee, heap); | |
654 } | |
655 | |
656 /* Decide on recursive inlining: in the case function has recursive calls, | |
657 inline until body size reaches given argument. If any new indirect edges | |
658 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES | |
659 is NULL. */ | |
660 | |
661 static bool | |
662 cgraph_decide_recursive_inlining (struct cgraph_node *node, | |
663 VEC (cgraph_edge_p, heap) **new_edges) | |
664 { | |
665 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO); | |
666 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO); | |
667 int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY); | |
668 fibheap_t heap; | |
669 struct cgraph_edge *e; | |
670 struct cgraph_node *master_clone, *next; | |
671 int depth = 0; | |
672 int n = 0; | |
673 | |
674 if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)) | |
675 || (!flag_inline_functions && !DECL_DECLARED_INLINE_P (node->decl))) | |
676 return false; | |
677 | |
678 if (DECL_DECLARED_INLINE_P (node->decl)) | |
679 { | |
680 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE); | |
681 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH); | |
682 } | |
683 | |
684 /* Make sure that function is small enough to be considered for inlining. */ | |
685 if (!max_depth | |
686 || cgraph_estimate_size_after_inlining (1, node, node) >= limit) | |
687 return false; | |
688 heap = fibheap_new (); | |
689 lookup_recursive_calls (node, node, heap); | |
690 if (fibheap_empty (heap)) | |
691 { | |
692 fibheap_delete (heap); | |
693 return false; | |
694 } | |
695 | |
696 if (dump_file) | |
697 fprintf (dump_file, | |
698 " Performing recursive inlining on %s\n", | |
699 cgraph_node_name (node)); | |
700 | |
701 /* We need original clone to copy around. */ | |
702 master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1, false); | |
703 master_clone->needed = true; | |
704 for (e = master_clone->callees; e; e = e->next_callee) | |
705 if (!e->inline_failed) | |
706 cgraph_clone_inlined_nodes (e, true, false); | |
707 | |
708 /* Do the inlining and update list of recursive call during process. */ | |
709 while (!fibheap_empty (heap) | |
710 && (cgraph_estimate_size_after_inlining (1, node, master_clone) | |
711 <= limit)) | |
712 { | |
713 struct cgraph_edge *curr | |
714 = (struct cgraph_edge *) fibheap_extract_min (heap); | |
715 struct cgraph_node *cnode; | |
716 | |
717 depth = 1; | |
718 for (cnode = curr->caller; | |
719 cnode->global.inlined_to; cnode = cnode->callers->caller) | |
720 if (node->decl == curr->callee->decl) | |
721 depth++; | |
722 if (depth > max_depth) | |
723 { | |
724 if (dump_file) | |
725 fprintf (dump_file, | |
726 " maximal depth reached\n"); | |
727 continue; | |
728 } | |
729 | |
730 if (max_count) | |
731 { | |
732 if (!cgraph_maybe_hot_edge_p (curr)) | |
733 { | |
734 if (dump_file) | |
735 fprintf (dump_file, " Not inlining cold call\n"); | |
736 continue; | |
737 } | |
738 if (curr->count * 100 / node->count < probability) | |
739 { | |
740 if (dump_file) | |
741 fprintf (dump_file, | |
742 " Probability of edge is too small\n"); | |
743 continue; | |
744 } | |
745 } | |
746 | |
747 if (dump_file) | |
748 { | |
749 fprintf (dump_file, | |
750 " Inlining call of depth %i", depth); | |
751 if (node->count) | |
752 { | |
753 fprintf (dump_file, " called approx. %.2f times per call", | |
754 (double)curr->count / node->count); | |
755 } | |
756 fprintf (dump_file, "\n"); | |
757 } | |
758 cgraph_redirect_edge_callee (curr, master_clone); | |
759 cgraph_mark_inline_edge (curr, false, new_edges); | |
760 lookup_recursive_calls (node, curr->callee, heap); | |
761 n++; | |
762 } | |
763 if (!fibheap_empty (heap) && dump_file) | |
764 fprintf (dump_file, " Recursive inlining growth limit met.\n"); | |
765 | |
766 fibheap_delete (heap); | |
767 if (dump_file) | |
768 fprintf (dump_file, | |
769 "\n Inlined %i times, body grown from %i to %i insns\n", n, | |
770 master_clone->global.insns, node->global.insns); | |
771 | |
772 /* Remove master clone we used for inlining. We rely that clones inlined | |
773 into master clone gets queued just before master clone so we don't | |
774 need recursion. */ | |
775 for (node = cgraph_nodes; node != master_clone; | |
776 node = next) | |
777 { | |
778 next = node->next; | |
779 if (node->global.inlined_to == master_clone) | |
780 cgraph_remove_node (node); | |
781 } | |
782 cgraph_remove_node (master_clone); | |
783 /* FIXME: Recursive inlining actually reduces number of calls of the | |
784 function. At this place we should probably walk the function and | |
785 inline clones and compensate the counts accordingly. This probably | |
786 doesn't matter much in practice. */ | |
787 return n > 0; | |
788 } | |
789 | |
790 /* Set inline_failed for all callers of given function to REASON. */ | |
791 | |
792 static void | |
793 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason) | |
794 { | |
795 struct cgraph_edge *e; | |
796 | |
797 if (dump_file) | |
798 fprintf (dump_file, "Inlining failed: %s\n", reason); | |
799 for (e = node->callers; e; e = e->next_caller) | |
800 if (e->inline_failed) | |
801 e->inline_failed = reason; | |
802 } | |
803 | |
804 /* Given whole compilation unit estimate of INSNS, compute how large we can | |
805 allow the unit to grow. */ | |
806 static int | |
807 compute_max_insns (int insns) | |
808 { | |
809 int max_insns = insns; | |
810 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) | |
811 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); | |
812 | |
813 return ((HOST_WIDEST_INT) max_insns | |
814 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100); | |
815 } | |
816 | |
817 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */ | |
818 static void | |
819 add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges) | |
820 { | |
821 while (VEC_length (cgraph_edge_p, new_edges) > 0) | |
822 { | |
823 struct cgraph_edge *edge = VEC_pop (cgraph_edge_p, new_edges); | |
824 | |
825 gcc_assert (!edge->aux); | |
826 edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge); | |
827 } | |
828 } | |
829 | |
830 | |
831 /* We use greedy algorithm for inlining of small functions: | |
832 All inline candidates are put into prioritized heap based on estimated | |
833 growth of the overall number of instructions and then update the estimates. | |
834 | |
835 INLINED and INLINED_CALEES are just pointers to arrays large enough | |
836 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */ | |
837 | |
838 static void | |
839 cgraph_decide_inlining_of_small_functions (void) | |
840 { | |
841 struct cgraph_node *node; | |
842 struct cgraph_edge *edge; | |
843 const char *failed_reason; | |
844 fibheap_t heap = fibheap_new (); | |
845 bitmap updated_nodes = BITMAP_ALLOC (NULL); | |
846 int min_insns, max_insns; | |
847 VEC (cgraph_edge_p, heap) *new_indirect_edges = NULL; | |
848 | |
849 if (flag_indirect_inlining) | |
850 new_indirect_edges = VEC_alloc (cgraph_edge_p, heap, 8); | |
851 | |
852 if (dump_file) | |
853 fprintf (dump_file, "\nDeciding on smaller functions:\n"); | |
854 | |
855 /* Put all inline candidates into the heap. */ | |
856 | |
857 for (node = cgraph_nodes; node; node = node->next) | |
858 { | |
859 if (!node->local.inlinable || !node->callers | |
860 || node->local.disregard_inline_limits) | |
861 continue; | |
862 if (dump_file) | |
863 fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node)); | |
864 | |
865 node->global.estimated_growth = INT_MIN; | |
866 if (!cgraph_default_inline_p (node, &failed_reason)) | |
867 { | |
868 cgraph_set_inline_failed (node, failed_reason); | |
869 continue; | |
870 } | |
871 | |
872 for (edge = node->callers; edge; edge = edge->next_caller) | |
873 if (edge->inline_failed) | |
874 { | |
875 gcc_assert (!edge->aux); | |
876 edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge); | |
877 } | |
878 } | |
879 | |
880 max_insns = compute_max_insns (overall_insns); | |
881 min_insns = overall_insns; | |
882 | |
883 while (overall_insns <= max_insns | |
884 && (edge = (struct cgraph_edge *) fibheap_extract_min (heap))) | |
885 { | |
886 int old_insns = overall_insns; | |
887 struct cgraph_node *where; | |
888 int growth = | |
889 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee); | |
890 const char *not_good = NULL; | |
891 | |
892 growth -= edge->caller->global.insns; | |
893 | |
894 if (dump_file) | |
895 { | |
896 fprintf (dump_file, | |
897 "\nConsidering %s with %i insns\n", | |
898 cgraph_node_name (edge->callee), | |
899 edge->callee->global.insns); | |
900 fprintf (dump_file, | |
901 " to be inlined into %s\n" | |
902 " Estimated growth after inlined into all callees is %+i insns.\n" | |
903 " Estimated badness is %i, frequency %.2f.\n", | |
904 cgraph_node_name (edge->caller), | |
905 cgraph_estimate_growth (edge->callee), | |
906 cgraph_edge_badness (edge), | |
907 edge->frequency / (double)CGRAPH_FREQ_BASE); | |
908 if (edge->count) | |
909 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count); | |
910 } | |
911 gcc_assert (edge->aux); | |
912 edge->aux = NULL; | |
913 if (!edge->inline_failed) | |
914 continue; | |
915 | |
916 /* When not having profile info ready we don't weight by any way the | |
917 position of call in procedure itself. This means if call of | |
918 function A from function B seems profitable to inline, the recursive | |
919 call of function A in inline copy of A in B will look profitable too | |
920 and we end up inlining until reaching maximal function growth. This | |
921 is not good idea so prohibit the recursive inlining. | |
922 | |
923 ??? When the frequencies are taken into account we might not need this | |
924 restriction. | |
925 | |
926 We need to be cureful here, in some testcases, e.g. directivec.c in | |
927 libcpp, we can estimate self recursive function to have negative growth | |
928 for inlining completely. | |
929 */ | |
930 if (!edge->count) | |
931 { | |
932 where = edge->caller; | |
933 while (where->global.inlined_to) | |
934 { | |
935 if (where->decl == edge->callee->decl) | |
936 break; | |
937 where = where->callers->caller; | |
938 } | |
939 if (where->global.inlined_to) | |
940 { | |
941 edge->inline_failed | |
942 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : ""); | |
943 if (dump_file) | |
944 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n"); | |
945 continue; | |
946 } | |
947 } | |
948 | |
949 if (!cgraph_maybe_hot_edge_p (edge)) | |
950 not_good = N_("call is unlikely and code size would grow"); | |
951 if (!flag_inline_functions | |
952 && !DECL_DECLARED_INLINE_P (edge->callee->decl)) | |
953 not_good = N_("function not declared inline and code size would grow"); | |
954 if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION(edge->caller->decl))) | |
955 not_good = N_("optimizing for size and code size would grow"); | |
956 if (not_good && growth > 0 && cgraph_estimate_growth (edge->callee) > 0) | |
957 { | |
958 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
959 &edge->inline_failed)) | |
960 { | |
961 edge->inline_failed = not_good; | |
962 if (dump_file) | |
963 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed); | |
964 } | |
965 continue; | |
966 } | |
967 if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed)) | |
968 { | |
969 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
970 &edge->inline_failed)) | |
971 { | |
972 if (dump_file) | |
973 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed); | |
974 } | |
975 continue; | |
976 } | |
977 if (!tree_can_inline_p (edge->caller->decl, edge->callee->decl)) | |
978 { | |
979 gimple_call_set_cannot_inline (edge->call_stmt, true); | |
980 edge->inline_failed = N_("target specific option mismatch"); | |
981 if (dump_file) | |
982 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed); | |
983 continue; | |
984 } | |
985 if (cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
986 &edge->inline_failed)) | |
987 { | |
988 where = edge->caller; | |
989 if (where->global.inlined_to) | |
990 where = where->global.inlined_to; | |
991 if (!cgraph_decide_recursive_inlining (where, | |
992 flag_indirect_inlining | |
993 ? &new_indirect_edges : NULL)) | |
994 continue; | |
995 if (flag_indirect_inlining) | |
996 add_new_edges_to_heap (heap, new_indirect_edges); | |
997 update_callee_keys (heap, where, updated_nodes); | |
998 } | |
999 else | |
1000 { | |
1001 struct cgraph_node *callee; | |
1002 if (gimple_call_cannot_inline_p (edge->call_stmt) | |
1003 || !cgraph_check_inline_limits (edge->caller, edge->callee, | |
1004 &edge->inline_failed, true)) | |
1005 { | |
1006 if (dump_file) | |
1007 fprintf (dump_file, " Not inlining into %s:%s.\n", | |
1008 cgraph_node_name (edge->caller), edge->inline_failed); | |
1009 continue; | |
1010 } | |
1011 callee = edge->callee; | |
1012 cgraph_mark_inline_edge (edge, true, &new_indirect_edges); | |
1013 if (flag_indirect_inlining) | |
1014 add_new_edges_to_heap (heap, new_indirect_edges); | |
1015 | |
1016 update_callee_keys (heap, callee, updated_nodes); | |
1017 } | |
1018 where = edge->caller; | |
1019 if (where->global.inlined_to) | |
1020 where = where->global.inlined_to; | |
1021 | |
1022 /* Our profitability metric can depend on local properties | |
1023 such as number of inlinable calls and size of the function body. | |
1024 After inlining these properties might change for the function we | |
1025 inlined into (since it's body size changed) and for the functions | |
1026 called by function we inlined (since number of it inlinable callers | |
1027 might change). */ | |
1028 update_caller_keys (heap, where, updated_nodes); | |
1029 bitmap_clear (updated_nodes); | |
1030 | |
1031 if (dump_file) | |
1032 { | |
1033 fprintf (dump_file, | |
1034 " Inlined into %s which now has %i insns," | |
1035 "net change of %+i insns.\n", | |
1036 cgraph_node_name (edge->caller), | |
1037 edge->caller->global.insns, | |
1038 overall_insns - old_insns); | |
1039 } | |
1040 if (min_insns > overall_insns) | |
1041 { | |
1042 min_insns = overall_insns; | |
1043 max_insns = compute_max_insns (min_insns); | |
1044 | |
1045 if (dump_file) | |
1046 fprintf (dump_file, "New minimal insns reached: %i\n", min_insns); | |
1047 } | |
1048 } | |
1049 while ((edge = (struct cgraph_edge *) fibheap_extract_min (heap)) != NULL) | |
1050 { | |
1051 gcc_assert (edge->aux); | |
1052 edge->aux = NULL; | |
1053 if (!edge->callee->local.disregard_inline_limits && edge->inline_failed | |
1054 && !cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
1055 &edge->inline_failed)) | |
1056 edge->inline_failed = N_("--param inline-unit-growth limit reached"); | |
1057 } | |
1058 | |
1059 if (new_indirect_edges) | |
1060 VEC_free (cgraph_edge_p, heap, new_indirect_edges); | |
1061 fibheap_delete (heap); | |
1062 BITMAP_FREE (updated_nodes); | |
1063 } | |
1064 | |
1065 /* Decide on the inlining. We do so in the topological order to avoid | |
1066 expenses on updating data structures. */ | |
1067 | |
1068 static unsigned int | |
1069 cgraph_decide_inlining (void) | |
1070 { | |
1071 struct cgraph_node *node; | |
1072 int nnodes; | |
1073 struct cgraph_node **order = | |
1074 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); | |
1075 int old_insns = 0; | |
1076 int i; | |
1077 int initial_insns = 0; | |
1078 bool redo_always_inline = true; | |
1079 | |
1080 cgraph_remove_function_insertion_hook (function_insertion_hook_holder); | |
1081 | |
1082 max_count = 0; | |
1083 for (node = cgraph_nodes; node; node = node->next) | |
1084 if (node->analyzed && (node->needed || node->reachable)) | |
1085 { | |
1086 struct cgraph_edge *e; | |
1087 | |
1088 initial_insns += inline_summary (node)->self_insns; | |
1089 gcc_assert (inline_summary (node)->self_insns == node->global.insns); | |
1090 for (e = node->callees; e; e = e->next_callee) | |
1091 if (max_count < e->count) | |
1092 max_count = e->count; | |
1093 } | |
1094 overall_insns = initial_insns; | |
1095 gcc_assert (!max_count || (profile_info && flag_branch_probabilities)); | |
1096 | |
1097 nnodes = cgraph_postorder (order); | |
1098 | |
1099 if (dump_file) | |
1100 fprintf (dump_file, | |
1101 "\nDeciding on inlining. Starting with %i insns.\n", | |
1102 initial_insns); | |
1103 | |
1104 for (node = cgraph_nodes; node; node = node->next) | |
1105 node->aux = 0; | |
1106 | |
1107 if (dump_file) | |
1108 fprintf (dump_file, "\nInlining always_inline functions:\n"); | |
1109 | |
1110 /* In the first pass mark all always_inline edges. Do this with a priority | |
1111 so none of our later choices will make this impossible. */ | |
1112 while (redo_always_inline) | |
1113 { | |
1114 redo_always_inline = false; | |
1115 for (i = nnodes - 1; i >= 0; i--) | |
1116 { | |
1117 struct cgraph_edge *e, *next; | |
1118 | |
1119 node = order[i]; | |
1120 | |
1121 /* Handle nodes to be flattened, but don't update overall unit | |
1122 size. */ | |
1123 if (lookup_attribute ("flatten", | |
1124 DECL_ATTRIBUTES (node->decl)) != NULL) | |
1125 { | |
1126 if (dump_file) | |
1127 fprintf (dump_file, | |
1128 "Flattening %s\n", cgraph_node_name (node)); | |
1129 cgraph_decide_inlining_incrementally (node, INLINE_ALL, 0); | |
1130 } | |
1131 | |
1132 if (!node->local.disregard_inline_limits) | |
1133 continue; | |
1134 if (dump_file) | |
1135 fprintf (dump_file, | |
1136 "\nConsidering %s %i insns (always inline)\n", | |
1137 cgraph_node_name (node), node->global.insns); | |
1138 old_insns = overall_insns; | |
1139 for (e = node->callers; e; e = next) | |
1140 { | |
1141 next = e->next_caller; | |
1142 if (!e->inline_failed | |
1143 || gimple_call_cannot_inline_p (e->call_stmt)) | |
1144 continue; | |
1145 if (cgraph_recursive_inlining_p (e->caller, e->callee, | |
1146 &e->inline_failed)) | |
1147 continue; | |
1148 if (!tree_can_inline_p (e->caller->decl, e->callee->decl)) | |
1149 { | |
1150 gimple_call_set_cannot_inline (e->call_stmt, true); | |
1151 continue; | |
1152 } | |
1153 if (cgraph_mark_inline_edge (e, true, NULL)) | |
1154 redo_always_inline = true; | |
1155 if (dump_file) | |
1156 fprintf (dump_file, | |
1157 " Inlined into %s which now has %i insns.\n", | |
1158 cgraph_node_name (e->caller), | |
1159 e->caller->global.insns); | |
1160 } | |
1161 /* Inlining self recursive function might introduce new calls to | |
1162 themselves we didn't see in the loop above. Fill in the proper | |
1163 reason why inline failed. */ | |
1164 for (e = node->callers; e; e = e->next_caller) | |
1165 if (e->inline_failed) | |
1166 e->inline_failed = N_("recursive inlining"); | |
1167 if (dump_file) | |
1168 fprintf (dump_file, | |
1169 " Inlined for a net change of %+i insns.\n", | |
1170 overall_insns - old_insns); | |
1171 } | |
1172 } | |
1173 | |
1174 cgraph_decide_inlining_of_small_functions (); | |
1175 | |
1176 if (flag_inline_functions_called_once) | |
1177 { | |
1178 if (dump_file) | |
1179 fprintf (dump_file, "\nDeciding on functions called once:\n"); | |
1180 | |
1181 /* And finally decide what functions are called once. */ | |
1182 for (i = nnodes - 1; i >= 0; i--) | |
1183 { | |
1184 node = order[i]; | |
1185 | |
1186 if (node->callers | |
1187 && !node->callers->next_caller | |
1188 && !node->needed | |
1189 && node->local.inlinable | |
1190 && node->callers->inline_failed | |
1191 && !gimple_call_cannot_inline_p (node->callers->call_stmt) | |
1192 && !DECL_EXTERNAL (node->decl) | |
1193 && !DECL_COMDAT (node->decl)) | |
1194 { | |
1195 if (dump_file) | |
1196 { | |
1197 fprintf (dump_file, | |
1198 "\nConsidering %s %i insns.\n", | |
1199 cgraph_node_name (node), node->global.insns); | |
1200 fprintf (dump_file, | |
1201 " Called once from %s %i insns.\n", | |
1202 cgraph_node_name (node->callers->caller), | |
1203 node->callers->caller->global.insns); | |
1204 } | |
1205 | |
1206 old_insns = overall_insns; | |
1207 | |
1208 if (cgraph_check_inline_limits (node->callers->caller, node, | |
1209 NULL, false)) | |
1210 { | |
1211 cgraph_mark_inline (node->callers); | |
1212 if (dump_file) | |
1213 fprintf (dump_file, | |
1214 " Inlined into %s which now has %i insns" | |
1215 " for a net change of %+i insns.\n", | |
1216 cgraph_node_name (node->callers->caller), | |
1217 node->callers->caller->global.insns, | |
1218 overall_insns - old_insns); | |
1219 } | |
1220 else | |
1221 { | |
1222 if (dump_file) | |
1223 fprintf (dump_file, | |
1224 " Inline limit reached, not inlined.\n"); | |
1225 } | |
1226 } | |
1227 } | |
1228 } | |
1229 | |
1230 /* Free ipa-prop structures if they are no longer needed. */ | |
1231 if (flag_indirect_inlining) | |
1232 free_all_ipa_structures_after_iinln (); | |
1233 | |
1234 if (dump_file) | |
1235 fprintf (dump_file, | |
1236 "\nInlined %i calls, eliminated %i functions, " | |
1237 "%i insns turned to %i insns.\n\n", | |
1238 ncalls_inlined, nfunctions_inlined, initial_insns, | |
1239 overall_insns); | |
1240 free (order); | |
1241 return 0; | |
1242 } | |
1243 | |
1244 /* Try to inline edge E from incremental inliner. MODE specifies mode | |
1245 of inliner. | |
1246 | |
1247 We are detecting cycles by storing mode of inliner into cgraph_node last | |
1248 time we visited it in the recursion. In general when mode is set, we have | |
1249 recursive inlining, but as an special case, we want to try harder inline | |
1250 ALWAYS_INLINE functions: consider callgraph a->b->c->b, with a being | |
1251 flatten, b being always inline. Flattening 'a' will collapse | |
1252 a->b->c before hitting cycle. To accommodate always inline, we however | |
1253 need to inline a->b->c->b. | |
1254 | |
1255 So after hitting cycle first time, we switch into ALWAYS_INLINE mode and | |
1256 stop inlining only after hitting ALWAYS_INLINE in ALWAY_INLINE mode. */ | |
1257 static bool | |
1258 try_inline (struct cgraph_edge *e, enum inlining_mode mode, int depth) | |
1259 { | |
1260 struct cgraph_node *callee = e->callee; | |
1261 enum inlining_mode callee_mode = (enum inlining_mode) (size_t) callee->aux; | |
1262 bool always_inline = e->callee->local.disregard_inline_limits; | |
1263 | |
1264 /* We've hit cycle? */ | |
1265 if (callee_mode) | |
1266 { | |
1267 /* It is first time we see it and we are not in ALWAY_INLINE only | |
1268 mode yet. and the function in question is always_inline. */ | |
1269 if (always_inline && mode != INLINE_ALWAYS_INLINE) | |
1270 { | |
1271 if (dump_file) | |
1272 { | |
1273 indent_to (dump_file, depth); | |
1274 fprintf (dump_file, | |
1275 "Hit cycle in %s, switching to always inline only.\n", | |
1276 cgraph_node_name (callee)); | |
1277 } | |
1278 mode = INLINE_ALWAYS_INLINE; | |
1279 } | |
1280 /* Otherwise it is time to give up. */ | |
1281 else | |
1282 { | |
1283 if (dump_file) | |
1284 { | |
1285 indent_to (dump_file, depth); | |
1286 fprintf (dump_file, | |
1287 "Not inlining %s into %s to avoid cycle.\n", | |
1288 cgraph_node_name (callee), | |
1289 cgraph_node_name (e->caller)); | |
1290 } | |
1291 e->inline_failed = (e->callee->local.disregard_inline_limits | |
1292 ? N_("recursive inlining") : ""); | |
1293 return false; | |
1294 } | |
1295 } | |
1296 | |
1297 callee->aux = (void *)(size_t) mode; | |
1298 if (dump_file) | |
1299 { | |
1300 indent_to (dump_file, depth); | |
1301 fprintf (dump_file, " Inlining %s into %s.\n", | |
1302 cgraph_node_name (e->callee), | |
1303 cgraph_node_name (e->caller)); | |
1304 } | |
1305 if (e->inline_failed) | |
1306 { | |
1307 cgraph_mark_inline (e); | |
1308 | |
1309 /* In order to fully inline always_inline functions, we need to | |
1310 recurse here, since the inlined functions might not be processed by | |
1311 incremental inlining at all yet. | |
1312 | |
1313 Also flattening needs to be done recursively. */ | |
1314 | |
1315 if (mode == INLINE_ALL || always_inline) | |
1316 cgraph_decide_inlining_incrementally (e->callee, mode, depth + 1); | |
1317 } | |
1318 callee->aux = (void *)(size_t) callee_mode; | |
1319 return true; | |
1320 } | |
1321 | |
1322 /* Decide on the inlining. We do so in the topological order to avoid | |
1323 expenses on updating data structures. | |
1324 DEPTH is depth of recursion, used only for debug output. */ | |
1325 | |
1326 static bool | |
1327 cgraph_decide_inlining_incrementally (struct cgraph_node *node, | |
1328 enum inlining_mode mode, | |
1329 int depth) | |
1330 { | |
1331 struct cgraph_edge *e; | |
1332 bool inlined = false; | |
1333 const char *failed_reason; | |
1334 enum inlining_mode old_mode; | |
1335 | |
1336 #ifdef ENABLE_CHECKING | |
1337 verify_cgraph_node (node); | |
1338 #endif | |
1339 | |
1340 old_mode = (enum inlining_mode) (size_t)node->aux; | |
1341 | |
1342 if (mode != INLINE_ALWAYS_INLINE | |
1343 && lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL) | |
1344 { | |
1345 if (dump_file) | |
1346 { | |
1347 indent_to (dump_file, depth); | |
1348 fprintf (dump_file, "Flattening %s\n", cgraph_node_name (node)); | |
1349 } | |
1350 mode = INLINE_ALL; | |
1351 } | |
1352 | |
1353 node->aux = (void *)(size_t) mode; | |
1354 | |
1355 /* First of all look for always inline functions. */ | |
1356 for (e = node->callees; e; e = e->next_callee) | |
1357 { | |
1358 if (!e->callee->local.disregard_inline_limits | |
1359 && (mode != INLINE_ALL || !e->callee->local.inlinable)) | |
1360 continue; | |
1361 if (gimple_call_cannot_inline_p (e->call_stmt)) | |
1362 continue; | |
1363 /* When the edge is already inlined, we just need to recurse into | |
1364 it in order to fully flatten the leaves. */ | |
1365 if (!e->inline_failed && mode == INLINE_ALL) | |
1366 { | |
1367 inlined |= try_inline (e, mode, depth); | |
1368 continue; | |
1369 } | |
1370 if (dump_file) | |
1371 { | |
1372 indent_to (dump_file, depth); | |
1373 fprintf (dump_file, | |
1374 "Considering to always inline inline candidate %s.\n", | |
1375 cgraph_node_name (e->callee)); | |
1376 } | |
1377 if (cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)) | |
1378 { | |
1379 if (dump_file) | |
1380 { | |
1381 indent_to (dump_file, depth); | |
1382 fprintf (dump_file, "Not inlining: recursive call.\n"); | |
1383 } | |
1384 continue; | |
1385 } | |
1386 if (!tree_can_inline_p (node->decl, e->callee->decl)) | |
1387 { | |
1388 gimple_call_set_cannot_inline (e->call_stmt, true); | |
1389 if (dump_file) | |
1390 { | |
1391 indent_to (dump_file, depth); | |
1392 fprintf (dump_file, | |
1393 "Not inlining: Target specific option mismatch.\n"); | |
1394 } | |
1395 continue; | |
1396 } | |
1397 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl)) | |
1398 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl))) | |
1399 { | |
1400 if (dump_file) | |
1401 { | |
1402 indent_to (dump_file, depth); | |
1403 fprintf (dump_file, "Not inlining: SSA form does not match.\n"); | |
1404 } | |
1405 continue; | |
1406 } | |
1407 if (!e->callee->analyzed && !e->callee->inline_decl) | |
1408 { | |
1409 if (dump_file) | |
1410 { | |
1411 indent_to (dump_file, depth); | |
1412 fprintf (dump_file, | |
1413 "Not inlining: Function body no longer available.\n"); | |
1414 } | |
1415 continue; | |
1416 } | |
1417 inlined |= try_inline (e, mode, depth); | |
1418 } | |
1419 | |
1420 /* Now do the automatic inlining. */ | |
1421 if (mode != INLINE_ALL && mode != INLINE_ALWAYS_INLINE) | |
1422 for (e = node->callees; e; e = e->next_callee) | |
1423 { | |
1424 if (!e->callee->local.inlinable | |
1425 || !e->inline_failed | |
1426 || e->callee->local.disregard_inline_limits) | |
1427 continue; | |
1428 if (dump_file) | |
1429 fprintf (dump_file, "Considering inline candidate %s.\n", | |
1430 cgraph_node_name (e->callee)); | |
1431 if (cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)) | |
1432 { | |
1433 if (dump_file) | |
1434 { | |
1435 indent_to (dump_file, depth); | |
1436 fprintf (dump_file, "Not inlining: recursive call.\n"); | |
1437 } | |
1438 continue; | |
1439 } | |
1440 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl)) | |
1441 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl))) | |
1442 { | |
1443 if (dump_file) | |
1444 { | |
1445 indent_to (dump_file, depth); | |
1446 fprintf (dump_file, "Not inlining: SSA form does not match.\n"); | |
1447 } | |
1448 continue; | |
1449 } | |
1450 /* When the function body would grow and inlining the function won't | |
1451 eliminate the need for offline copy of the function, don't inline. | |
1452 */ | |
1453 if ((mode == INLINE_SIZE | |
1454 || (!flag_inline_functions | |
1455 && !DECL_DECLARED_INLINE_P (e->callee->decl))) | |
1456 && (cgraph_estimate_size_after_inlining (1, e->caller, e->callee) | |
1457 > e->caller->global.insns) | |
1458 && cgraph_estimate_growth (e->callee) > 0) | |
1459 { | |
1460 if (dump_file) | |
1461 { | |
1462 indent_to (dump_file, depth); | |
1463 fprintf (dump_file, | |
1464 "Not inlining: code size would grow by %i insns.\n", | |
1465 cgraph_estimate_size_after_inlining (1, e->caller, | |
1466 e->callee) | |
1467 - e->caller->global.insns); | |
1468 } | |
1469 continue; | |
1470 } | |
1471 if (!cgraph_check_inline_limits (node, e->callee, &e->inline_failed, | |
1472 false) | |
1473 || gimple_call_cannot_inline_p (e->call_stmt)) | |
1474 { | |
1475 if (dump_file) | |
1476 { | |
1477 indent_to (dump_file, depth); | |
1478 fprintf (dump_file, "Not inlining: %s.\n", e->inline_failed); | |
1479 } | |
1480 continue; | |
1481 } | |
1482 if (!e->callee->analyzed && !e->callee->inline_decl) | |
1483 { | |
1484 if (dump_file) | |
1485 { | |
1486 indent_to (dump_file, depth); | |
1487 fprintf (dump_file, | |
1488 "Not inlining: Function body no longer available.\n"); | |
1489 } | |
1490 continue; | |
1491 } | |
1492 if (!tree_can_inline_p (node->decl, e->callee->decl)) | |
1493 { | |
1494 gimple_call_set_cannot_inline (e->call_stmt, true); | |
1495 if (dump_file) | |
1496 { | |
1497 indent_to (dump_file, depth); | |
1498 fprintf (dump_file, | |
1499 "Not inlining: Target specific option mismatch.\n"); | |
1500 } | |
1501 continue; | |
1502 } | |
1503 if (cgraph_default_inline_p (e->callee, &failed_reason)) | |
1504 inlined |= try_inline (e, mode, depth); | |
1505 } | |
1506 node->aux = (void *)(size_t) old_mode; | |
1507 return inlined; | |
1508 } | |
1509 | |
1510 /* Because inlining might remove no-longer reachable nodes, we need to | |
1511 keep the array visible to garbage collector to avoid reading collected | |
1512 out nodes. */ | |
1513 static int nnodes; | |
1514 static GTY ((length ("nnodes"))) struct cgraph_node **order; | |
1515 | |
1516 /* Do inlining of small functions. Doing so early helps profiling and other | |
1517 passes to be somewhat more effective and avoids some code duplication in | |
1518 later real inlining pass for testcases with very many function calls. */ | |
1519 static unsigned int | |
1520 cgraph_early_inlining (void) | |
1521 { | |
1522 struct cgraph_node *node = cgraph_node (current_function_decl); | |
1523 unsigned int todo = 0; | |
1524 | |
1525 if (sorrycount || errorcount) | |
1526 return 0; | |
1527 if (cgraph_decide_inlining_incrementally (node, INLINE_SIZE, 0)) | |
1528 { | |
1529 timevar_push (TV_INTEGRATION); | |
1530 todo = optimize_inline_calls (current_function_decl); | |
1531 timevar_pop (TV_INTEGRATION); | |
1532 } | |
1533 cfun->always_inline_functions_inlined = true; | |
1534 return todo; | |
1535 } | |
1536 | |
1537 /* When inlining shall be performed. */ | |
1538 static bool | |
1539 cgraph_gate_early_inlining (void) | |
1540 { | |
1541 return flag_early_inlining; | |
1542 } | |
1543 | |
1544 struct gimple_opt_pass pass_early_inline = | |
1545 { | |
1546 { | |
1547 GIMPLE_PASS, | |
1548 "einline", /* name */ | |
1549 cgraph_gate_early_inlining, /* gate */ | |
1550 cgraph_early_inlining, /* execute */ | |
1551 NULL, /* sub */ | |
1552 NULL, /* next */ | |
1553 0, /* static_pass_number */ | |
1554 TV_INLINE_HEURISTICS, /* tv_id */ | |
1555 0, /* properties_required */ | |
1556 PROP_cfg, /* properties_provided */ | |
1557 0, /* properties_destroyed */ | |
1558 0, /* todo_flags_start */ | |
1559 TODO_dump_func /* todo_flags_finish */ | |
1560 } | |
1561 }; | |
1562 | |
1563 /* When inlining shall be performed. */ | |
1564 static bool | |
1565 cgraph_gate_ipa_early_inlining (void) | |
1566 { | |
1567 return (flag_early_inlining | |
1568 && (flag_branch_probabilities || flag_test_coverage | |
1569 || profile_arc_flag)); | |
1570 } | |
1571 | |
1572 /* IPA pass wrapper for early inlining pass. We need to run early inlining | |
1573 before tree profiling so we have stand alone IPA pass for doing so. */ | |
1574 struct simple_ipa_opt_pass pass_ipa_early_inline = | |
1575 { | |
1576 { | |
1577 SIMPLE_IPA_PASS, | |
1578 "einline_ipa", /* name */ | |
1579 cgraph_gate_ipa_early_inlining, /* gate */ | |
1580 NULL, /* execute */ | |
1581 NULL, /* sub */ | |
1582 NULL, /* next */ | |
1583 0, /* static_pass_number */ | |
1584 TV_INLINE_HEURISTICS, /* tv_id */ | |
1585 0, /* properties_required */ | |
1586 PROP_cfg, /* properties_provided */ | |
1587 0, /* properties_destroyed */ | |
1588 0, /* todo_flags_start */ | |
1589 TODO_dump_cgraph /* todo_flags_finish */ | |
1590 } | |
1591 }; | |
1592 | |
1593 /* Compute parameters of functions used by inliner. */ | |
1594 unsigned int | |
1595 compute_inline_parameters (struct cgraph_node *node) | |
1596 { | |
1597 HOST_WIDE_INT self_stack_size; | |
1598 | |
1599 gcc_assert (!node->global.inlined_to); | |
1600 | |
1601 /* Estimate the stack size for the function. But not at -O0 | |
1602 because estimated_stack_frame_size is a quadratic problem. */ | |
1603 self_stack_size = optimize ? estimated_stack_frame_size () : 0; | |
1604 inline_summary (node)->estimated_self_stack_size = self_stack_size; | |
1605 node->global.estimated_stack_size = self_stack_size; | |
1606 node->global.stack_frame_offset = 0; | |
1607 | |
1608 /* Can this function be inlined at all? */ | |
1609 node->local.inlinable = tree_inlinable_function_p (current_function_decl); | |
1610 | |
1611 /* Estimate the number of instructions for this function. | |
1612 ??? At -O0 we don't use this information except for the dumps, and | |
1613 even then only for always_inline functions. But disabling this | |
1614 causes ICEs in the inline heuristics... */ | |
1615 inline_summary (node)->self_insns | |
1616 = estimate_num_insns_fn (current_function_decl, &eni_inlining_weights); | |
1617 if (node->local.inlinable && !node->local.disregard_inline_limits) | |
1618 node->local.disregard_inline_limits | |
1619 = DECL_DISREGARD_INLINE_LIMITS (current_function_decl); | |
1620 | |
1621 /* Inlining characteristics are maintained by the cgraph_mark_inline. */ | |
1622 node->global.insns = inline_summary (node)->self_insns; | |
1623 return 0; | |
1624 } | |
1625 | |
1626 | |
1627 /* Compute parameters of functions used by inliner using | |
1628 current_function_decl. */ | |
1629 static unsigned int | |
1630 compute_inline_parameters_for_current (void) | |
1631 { | |
1632 compute_inline_parameters (cgraph_node (current_function_decl)); | |
1633 return 0; | |
1634 } | |
1635 | |
1636 struct gimple_opt_pass pass_inline_parameters = | |
1637 { | |
1638 { | |
1639 GIMPLE_PASS, | |
1640 NULL, /* name */ | |
1641 NULL, /* gate */ | |
1642 compute_inline_parameters_for_current,/* execute */ | |
1643 NULL, /* sub */ | |
1644 NULL, /* next */ | |
1645 0, /* static_pass_number */ | |
1646 TV_INLINE_HEURISTICS, /* tv_id */ | |
1647 0, /* properties_required */ | |
1648 PROP_cfg, /* properties_provided */ | |
1649 0, /* properties_destroyed */ | |
1650 0, /* todo_flags_start */ | |
1651 0 /* todo_flags_finish */ | |
1652 } | |
1653 }; | |
1654 | |
1655 /* This function performs intraprocedural analyzis in NODE that is required to | |
1656 inline indirect calls. */ | |
1657 static void | |
1658 inline_indirect_intraprocedural_analysis (struct cgraph_node *node) | |
1659 { | |
1660 struct cgraph_edge *cs; | |
1661 | |
1662 if (!flag_ipa_cp) | |
1663 { | |
1664 ipa_initialize_node_params (node); | |
1665 ipa_detect_param_modifications (node); | |
1666 } | |
1667 ipa_analyze_params_uses (node); | |
1668 | |
1669 if (!flag_ipa_cp) | |
1670 for (cs = node->callees; cs; cs = cs->next_callee) | |
1671 { | |
1672 ipa_count_arguments (cs); | |
1673 ipa_compute_jump_functions (cs); | |
1674 } | |
1675 | |
1676 if (dump_file) | |
1677 { | |
1678 ipa_print_node_params (dump_file, node); | |
1679 ipa_print_node_jump_functions (dump_file, node); | |
1680 } | |
1681 } | |
1682 | |
1683 /* Note function body size. */ | |
1684 static void | |
1685 analyze_function (struct cgraph_node *node) | |
1686 { | |
1687 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); | |
1688 current_function_decl = node->decl; | |
1689 | |
1690 compute_inline_parameters (node); | |
1691 if (flag_indirect_inlining) | |
1692 inline_indirect_intraprocedural_analysis (node); | |
1693 | |
1694 current_function_decl = NULL; | |
1695 pop_cfun (); | |
1696 } | |
1697 | |
1698 /* Called when new function is inserted to callgraph late. */ | |
1699 static void | |
1700 add_new_function (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED) | |
1701 { | |
1702 analyze_function (node); | |
1703 } | |
1704 | |
1705 /* Note function body size. */ | |
1706 static void | |
1707 inline_generate_summary (void) | |
1708 { | |
1709 struct cgraph_node *node; | |
1710 | |
1711 function_insertion_hook_holder = | |
1712 cgraph_add_function_insertion_hook (&add_new_function, NULL); | |
1713 | |
1714 if (flag_indirect_inlining) | |
1715 { | |
1716 ipa_register_cgraph_hooks (); | |
1717 ipa_check_create_node_params (); | |
1718 ipa_check_create_edge_args (); | |
1719 } | |
1720 | |
1721 for (node = cgraph_nodes; node; node = node->next) | |
1722 if (node->analyzed) | |
1723 analyze_function (node); | |
1724 | |
1725 return; | |
1726 } | |
1727 | |
1728 /* Apply inline plan to function. */ | |
1729 static unsigned int | |
1730 inline_transform (struct cgraph_node *node) | |
1731 { | |
1732 unsigned int todo = 0; | |
1733 struct cgraph_edge *e; | |
1734 | |
1735 /* We might need the body of this function so that we can expand | |
1736 it inline somewhere else. */ | |
1737 if (cgraph_preserve_function_body_p (node->decl)) | |
1738 save_inline_function_body (node); | |
1739 | |
1740 for (e = node->callees; e; e = e->next_callee) | |
1741 if (!e->inline_failed || warn_inline) | |
1742 break; | |
1743 | |
1744 if (e) | |
1745 { | |
1746 timevar_push (TV_INTEGRATION); | |
1747 todo = optimize_inline_calls (current_function_decl); | |
1748 timevar_pop (TV_INTEGRATION); | |
1749 } | |
1750 return todo | execute_fixup_cfg (); | |
1751 } | |
1752 | |
1753 struct ipa_opt_pass pass_ipa_inline = | |
1754 { | |
1755 { | |
1756 IPA_PASS, | |
1757 "inline", /* name */ | |
1758 NULL, /* gate */ | |
1759 cgraph_decide_inlining, /* execute */ | |
1760 NULL, /* sub */ | |
1761 NULL, /* next */ | |
1762 0, /* static_pass_number */ | |
1763 TV_INLINE_HEURISTICS, /* tv_id */ | |
1764 0, /* properties_required */ | |
1765 PROP_cfg, /* properties_provided */ | |
1766 0, /* properties_destroyed */ | |
1767 TODO_remove_functions, /* todo_flags_finish */ | |
1768 TODO_dump_cgraph | TODO_dump_func | |
1769 | TODO_remove_functions /* todo_flags_finish */ | |
1770 }, | |
1771 inline_generate_summary, /* generate_summary */ | |
1772 NULL, /* write_summary */ | |
1773 NULL, /* read_summary */ | |
1774 NULL, /* function_read_summary */ | |
1775 0, /* TODOs */ | |
1776 inline_transform, /* function_transform */ | |
1777 NULL, /* variable_transform */ | |
1778 }; | |
1779 | |
1780 | |
1781 #include "gt-ipa-inline.h" |