comparison gcc/ipa-fnsummary.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Function summary pass.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "params.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cilk.h"
83 #include "cfgexpand.h"
84 #include "gimplify.h"
85 #include "stringpool.h"
86 #include "attribs.h"
87
88 /* Summaries. */
89 function_summary <ipa_fn_summary *> *ipa_fn_summaries;
90 call_summary <ipa_call_summary *> *ipa_call_summaries;
91
92 /* Edge predicates goes here. */
93 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
94
95
96 /* Dump IPA hints. */
97 void
98 ipa_dump_hints (FILE *f, ipa_hints hints)
99 {
100 if (!hints)
101 return;
102 fprintf (f, "IPA hints:");
103 if (hints & INLINE_HINT_indirect_call)
104 {
105 hints &= ~INLINE_HINT_indirect_call;
106 fprintf (f, " indirect_call");
107 }
108 if (hints & INLINE_HINT_loop_iterations)
109 {
110 hints &= ~INLINE_HINT_loop_iterations;
111 fprintf (f, " loop_iterations");
112 }
113 if (hints & INLINE_HINT_loop_stride)
114 {
115 hints &= ~INLINE_HINT_loop_stride;
116 fprintf (f, " loop_stride");
117 }
118 if (hints & INLINE_HINT_same_scc)
119 {
120 hints &= ~INLINE_HINT_same_scc;
121 fprintf (f, " same_scc");
122 }
123 if (hints & INLINE_HINT_in_scc)
124 {
125 hints &= ~INLINE_HINT_in_scc;
126 fprintf (f, " in_scc");
127 }
128 if (hints & INLINE_HINT_cross_module)
129 {
130 hints &= ~INLINE_HINT_cross_module;
131 fprintf (f, " cross_module");
132 }
133 if (hints & INLINE_HINT_declared_inline)
134 {
135 hints &= ~INLINE_HINT_declared_inline;
136 fprintf (f, " declared_inline");
137 }
138 if (hints & INLINE_HINT_array_index)
139 {
140 hints &= ~INLINE_HINT_array_index;
141 fprintf (f, " array_index");
142 }
143 if (hints & INLINE_HINT_known_hot)
144 {
145 hints &= ~INLINE_HINT_known_hot;
146 fprintf (f, " known_hot");
147 }
148 gcc_assert (!hints);
149 }
150
151
152 /* Record SIZE and TIME to SUMMARY.
153 The accounted code will be executed when EXEC_PRED is true.
154 When NONCONST_PRED is false the code will evaulate to constant and
155 will get optimized out in specialized clones of the function. */
156
157 void
158 ipa_fn_summary::account_size_time (int size, sreal time,
159 const predicate &exec_pred,
160 const predicate &nonconst_pred_in)
161 {
162 size_time_entry *e;
163 bool found = false;
164 int i;
165 predicate nonconst_pred;
166
167 if (exec_pred == false)
168 return;
169
170 nonconst_pred = nonconst_pred_in & exec_pred;
171
172 if (nonconst_pred == false)
173 return;
174
175 /* We need to create initial empty unconitional clause, but otherwie
176 we don't need to account empty times and sizes. */
177 if (!size && time == 0 && size_time_table)
178 return;
179
180 gcc_assert (time >= 0);
181
182 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
183 if (e->exec_predicate == exec_pred
184 && e->nonconst_predicate == nonconst_pred)
185 {
186 found = true;
187 break;
188 }
189 if (i == 256)
190 {
191 i = 0;
192 found = true;
193 e = &(*size_time_table)[0];
194 if (dump_file && (dump_flags & TDF_DETAILS))
195 fprintf (dump_file,
196 "\t\tReached limit on number of entries, "
197 "ignoring the predicate.");
198 }
199 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
200 {
201 fprintf (dump_file,
202 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
203 ((double) size) / ipa_fn_summary::size_scale,
204 (time.to_double ()), found ? "" : "new ");
205 exec_pred.dump (dump_file, conds, 0);
206 if (exec_pred != nonconst_pred)
207 {
208 fprintf (dump_file, " nonconst:");
209 nonconst_pred.dump (dump_file, conds);
210 }
211 else
212 fprintf (dump_file, "\n");
213 }
214 if (!found)
215 {
216 struct size_time_entry new_entry;
217 new_entry.size = size;
218 new_entry.time = time;
219 new_entry.exec_predicate = exec_pred;
220 new_entry.nonconst_predicate = nonconst_pred;
221 vec_safe_push (size_time_table, new_entry);
222 }
223 else
224 {
225 e->size += size;
226 e->time += time;
227 }
228 }
229
230 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
231
232 static struct cgraph_edge *
233 redirect_to_unreachable (struct cgraph_edge *e)
234 {
235 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
236 struct cgraph_node *target = cgraph_node::get_create
237 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
238
239 if (e->speculative)
240 e = e->resolve_speculation (target->decl);
241 else if (!e->callee)
242 e->make_direct (target);
243 else
244 e->redirect_callee (target);
245 struct ipa_call_summary *es = ipa_call_summaries->get (e);
246 e->inline_failed = CIF_UNREACHABLE;
247 e->frequency = 0;
248 e->count = profile_count::zero ();
249 es->call_stmt_size = 0;
250 es->call_stmt_time = 0;
251 if (callee)
252 callee->remove_symbol_and_inline_clones ();
253 return e;
254 }
255
256 /* Set predicate for edge E. */
257
258 static void
259 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
260 {
261 /* If the edge is determined to be never executed, redirect it
262 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
263 be optimized out. */
264 if (predicate && *predicate == false
265 /* When handling speculative edges, we need to do the redirection
266 just once. Do it always on the direct edge, so we do not
267 attempt to resolve speculation while duplicating the edge. */
268 && (!e->speculative || e->callee))
269 e = redirect_to_unreachable (e);
270
271 struct ipa_call_summary *es = ipa_call_summaries->get (e);
272 if (predicate && *predicate != true)
273 {
274 if (!es->predicate)
275 es->predicate = edge_predicate_pool.allocate ();
276 *es->predicate = *predicate;
277 }
278 else
279 {
280 if (es->predicate)
281 edge_predicate_pool.remove (es->predicate);
282 es->predicate = NULL;
283 }
284 }
285
286 /* Set predicate for hint *P. */
287
288 static void
289 set_hint_predicate (predicate **p, predicate new_predicate)
290 {
291 if (new_predicate == false || new_predicate == true)
292 {
293 if (*p)
294 edge_predicate_pool.remove (*p);
295 *p = NULL;
296 }
297 else
298 {
299 if (!*p)
300 *p = edge_predicate_pool.allocate ();
301 **p = new_predicate;
302 }
303 }
304
305
306 /* Compute what conditions may or may not hold given invormation about
307 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
308 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
309 copy when called in a given context. It is a bitmask of conditions. Bit
310 0 means that condition is known to be false, while bit 1 means that condition
311 may or may not be true. These differs - for example NOT_INLINED condition
312 is always false in the second and also builtin_constant_p tests can not use
313 the fact that parameter is indeed a constant.
314
315 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
316 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
317 Return clause of possible truths. When INLINE_P is true, assume that we are
318 inlining.
319
320 ERROR_MARK means compile time invariant. */
321
322 static void
323 evaluate_conditions_for_known_args (struct cgraph_node *node,
324 bool inline_p,
325 vec<tree> known_vals,
326 vec<ipa_agg_jump_function_p>
327 known_aggs,
328 clause_t *ret_clause,
329 clause_t *ret_nonspec_clause)
330 {
331 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
332 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
333 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
334 int i;
335 struct condition *c;
336
337 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
338 {
339 tree val;
340 tree res;
341
342 /* We allow call stmt to have fewer arguments than the callee function
343 (especially for K&R style programs). So bound check here (we assume
344 known_aggs vector, if non-NULL, has the same length as
345 known_vals). */
346 gcc_checking_assert (!known_aggs.exists ()
347 || (known_vals.length () == known_aggs.length ()));
348 if (c->operand_num >= (int) known_vals.length ())
349 {
350 clause |= 1 << (i + predicate::first_dynamic_condition);
351 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
352 continue;
353 }
354
355 if (c->agg_contents)
356 {
357 struct ipa_agg_jump_function *agg;
358
359 if (c->code == predicate::changed
360 && !c->by_ref
361 && (known_vals[c->operand_num] == error_mark_node))
362 continue;
363
364 if (known_aggs.exists ())
365 {
366 agg = known_aggs[c->operand_num];
367 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
368 c->offset, c->by_ref);
369 }
370 else
371 val = NULL_TREE;
372 }
373 else
374 {
375 val = known_vals[c->operand_num];
376 if (val == error_mark_node && c->code != predicate::changed)
377 val = NULL_TREE;
378 }
379
380 if (!val)
381 {
382 clause |= 1 << (i + predicate::first_dynamic_condition);
383 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
384 continue;
385 }
386 if (c->code == predicate::changed)
387 {
388 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
389 continue;
390 }
391
392 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
393 {
394 clause |= 1 << (i + predicate::first_dynamic_condition);
395 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
396 continue;
397 }
398 if (c->code == predicate::is_not_constant)
399 {
400 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
401 continue;
402 }
403
404 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
405 res = val
406 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
407 : NULL;
408
409 if (res && integer_zerop (res))
410 continue;
411
412 clause |= 1 << (i + predicate::first_dynamic_condition);
413 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
414 }
415 *ret_clause = clause;
416 if (ret_nonspec_clause)
417 *ret_nonspec_clause = nonspec_clause;
418 }
419
420
421 /* Work out what conditions might be true at invocation of E. */
422
423 void
424 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
425 clause_t *clause_ptr,
426 clause_t *nonspec_clause_ptr,
427 vec<tree> *known_vals_ptr,
428 vec<ipa_polymorphic_call_context>
429 *known_contexts_ptr,
430 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
431 {
432 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
433 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
434 vec<tree> known_vals = vNULL;
435 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
436
437 if (clause_ptr)
438 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
439 if (known_vals_ptr)
440 known_vals_ptr->create (0);
441 if (known_contexts_ptr)
442 known_contexts_ptr->create (0);
443
444 if (ipa_node_params_sum
445 && !e->call_stmt_cannot_inline_p
446 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
447 {
448 struct ipa_node_params *parms_info;
449 struct ipa_edge_args *args = IPA_EDGE_REF (e);
450 struct ipa_call_summary *es = ipa_call_summaries->get (e);
451 int i, count = ipa_get_cs_argument_count (args);
452
453 if (e->caller->global.inlined_to)
454 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
455 else
456 parms_info = IPA_NODE_REF (e->caller);
457
458 if (count && (info->conds || known_vals_ptr))
459 known_vals.safe_grow_cleared (count);
460 if (count && (info->conds || known_aggs_ptr))
461 known_aggs.safe_grow_cleared (count);
462 if (count && known_contexts_ptr)
463 known_contexts_ptr->safe_grow_cleared (count);
464
465 for (i = 0; i < count; i++)
466 {
467 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
468 tree cst = ipa_value_from_jfunc (parms_info, jf);
469
470 if (!cst && e->call_stmt
471 && i < (int)gimple_call_num_args (e->call_stmt))
472 {
473 cst = gimple_call_arg (e->call_stmt, i);
474 if (!is_gimple_min_invariant (cst))
475 cst = NULL;
476 }
477 if (cst)
478 {
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
480 if (known_vals.exists ())
481 known_vals[i] = cst;
482 }
483 else if (inline_p && !es->param[i].change_prob)
484 known_vals[i] = error_mark_node;
485
486 if (known_contexts_ptr)
487 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
488 i, jf);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs[i] = &jf->agg;
493 }
494 }
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
496 && ((clause_ptr && info->conds) || known_vals_ptr))
497 {
498 int i, count = (int)gimple_call_num_args (e->call_stmt);
499
500 if (count && (info->conds || known_vals_ptr))
501 known_vals.safe_grow_cleared (count);
502 for (i = 0; i < count; i++)
503 {
504 tree cst = gimple_call_arg (e->call_stmt, i);
505 if (!is_gimple_min_invariant (cst))
506 cst = NULL;
507 if (cst)
508 known_vals[i] = cst;
509 }
510 }
511
512 evaluate_conditions_for_known_args (callee, inline_p,
513 known_vals, known_aggs, clause_ptr,
514 nonspec_clause_ptr);
515
516 if (known_vals_ptr)
517 *known_vals_ptr = known_vals;
518 else
519 known_vals.release ();
520
521 if (known_aggs_ptr)
522 *known_aggs_ptr = known_aggs;
523 else
524 known_aggs.release ();
525 }
526
527
528 /* Allocate the function summary. */
529
530 static void
531 ipa_fn_summary_alloc (void)
532 {
533 gcc_checking_assert (!ipa_fn_summaries);
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
536 }
537
538 /* We are called multiple time for given function; clear
539 data from previous run so they are not cumulated. */
540
541 void
542 ipa_call_summary::reset ()
543 {
544 call_stmt_size = call_stmt_time = 0;
545 if (predicate)
546 edge_predicate_pool.remove (predicate);
547 predicate = NULL;
548 param.release ();
549 }
550
551 /* We are called multiple time for given function; clear
552 data from previous run so they are not cumulated. */
553
554 void
555 ipa_fn_summary::reset (struct cgraph_node *node)
556 {
557 struct cgraph_edge *e;
558
559 self_size = 0;
560 estimated_stack_size = 0;
561 estimated_self_stack_size = 0;
562 stack_frame_offset = 0;
563 size = 0;
564 time = 0;
565 growth = 0;
566 scc_no = 0;
567 if (loop_iterations)
568 {
569 edge_predicate_pool.remove (loop_iterations);
570 loop_iterations = NULL;
571 }
572 if (loop_stride)
573 {
574 edge_predicate_pool.remove (loop_stride);
575 loop_stride = NULL;
576 }
577 if (array_index)
578 {
579 edge_predicate_pool.remove (array_index);
580 array_index = NULL;
581 }
582 vec_free (conds);
583 vec_free (size_time_table);
584 for (e = node->callees; e; e = e->next_callee)
585 ipa_call_summaries->get (e)->reset ();
586 for (e = node->indirect_calls; e; e = e->next_callee)
587 ipa_call_summaries->get (e)->reset ();
588 fp_expressions = false;
589 }
590
591 /* Hook that is called by cgraph.c when a node is removed. */
592
593 void
594 ipa_fn_summary_t::remove (cgraph_node *node, ipa_fn_summary *info)
595 {
596 info->reset (node);
597 }
598
599 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
600 Additionally care about allocating new memory slot for updated predicate
601 and set it to NULL when it becomes true or false (and thus uninteresting).
602 */
603
604 static void
605 remap_hint_predicate_after_duplication (predicate **p,
606 clause_t possible_truths)
607 {
608 predicate new_predicate;
609
610 if (!*p)
611 return;
612
613 new_predicate = (*p)->remap_after_duplication (possible_truths);
614 /* We do not want to free previous predicate; it is used by node origin. */
615 *p = NULL;
616 set_hint_predicate (p, new_predicate);
617 }
618
619
620 /* Hook that is called by cgraph.c when a node is duplicated. */
621 void
622 ipa_fn_summary_t::duplicate (cgraph_node *src,
623 cgraph_node *dst,
624 ipa_fn_summary *,
625 ipa_fn_summary *info)
626 {
627 memcpy (info, ipa_fn_summaries->get (src), sizeof (ipa_fn_summary));
628 /* TODO: as an optimization, we may avoid copying conditions
629 that are known to be false or true. */
630 info->conds = vec_safe_copy (info->conds);
631
632 /* When there are any replacements in the function body, see if we can figure
633 out that something was optimized out. */
634 if (ipa_node_params_sum && dst->clone.tree_map)
635 {
636 vec<size_time_entry, va_gc> *entry = info->size_time_table;
637 /* Use SRC parm info since it may not be copied yet. */
638 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
639 vec<tree> known_vals = vNULL;
640 int count = ipa_get_param_count (parms_info);
641 int i, j;
642 clause_t possible_truths;
643 predicate true_pred = true;
644 size_time_entry *e;
645 int optimized_out_size = 0;
646 bool inlined_to_p = false;
647 struct cgraph_edge *edge, *next;
648
649 info->size_time_table = 0;
650 known_vals.safe_grow_cleared (count);
651 for (i = 0; i < count; i++)
652 {
653 struct ipa_replace_map *r;
654
655 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
656 {
657 if (((!r->old_tree && r->parm_num == i)
658 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
659 && r->replace_p && !r->ref_p)
660 {
661 known_vals[i] = r->new_tree;
662 break;
663 }
664 }
665 }
666 evaluate_conditions_for_known_args (dst, false,
667 known_vals,
668 vNULL,
669 &possible_truths,
670 /* We are going to specialize,
671 so ignore nonspec truths. */
672 NULL);
673 known_vals.release ();
674
675 info->account_size_time (0, 0, true_pred, true_pred);
676
677 /* Remap size_time vectors.
678 Simplify the predicate by prunning out alternatives that are known
679 to be false.
680 TODO: as on optimization, we can also eliminate conditions known
681 to be true. */
682 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
683 {
684 predicate new_exec_pred;
685 predicate new_nonconst_pred;
686 new_exec_pred = e->exec_predicate.remap_after_duplication
687 (possible_truths);
688 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
689 (possible_truths);
690 if (new_exec_pred == false || new_nonconst_pred == false)
691 optimized_out_size += e->size;
692 else
693 info->account_size_time (e->size, e->time, new_exec_pred,
694 new_nonconst_pred);
695 }
696
697 /* Remap edge predicates with the same simplification as above.
698 Also copy constantness arrays. */
699 for (edge = dst->callees; edge; edge = next)
700 {
701 predicate new_predicate;
702 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
703 next = edge->next_callee;
704
705 if (!edge->inline_failed)
706 inlined_to_p = true;
707 if (!es->predicate)
708 continue;
709 new_predicate = es->predicate->remap_after_duplication
710 (possible_truths);
711 if (new_predicate == false && *es->predicate != false)
712 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
713 edge_set_predicate (edge, &new_predicate);
714 }
715
716 /* Remap indirect edge predicates with the same simplificaiton as above.
717 Also copy constantness arrays. */
718 for (edge = dst->indirect_calls; edge; edge = next)
719 {
720 predicate new_predicate;
721 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
722 next = edge->next_callee;
723
724 gcc_checking_assert (edge->inline_failed);
725 if (!es->predicate)
726 continue;
727 new_predicate = es->predicate->remap_after_duplication
728 (possible_truths);
729 if (new_predicate == false && *es->predicate != false)
730 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
731 edge_set_predicate (edge, &new_predicate);
732 }
733 remap_hint_predicate_after_duplication (&info->loop_iterations,
734 possible_truths);
735 remap_hint_predicate_after_duplication (&info->loop_stride,
736 possible_truths);
737 remap_hint_predicate_after_duplication (&info->array_index,
738 possible_truths);
739
740 /* If inliner or someone after inliner will ever start producing
741 non-trivial clones, we will get trouble with lack of information
742 about updating self sizes, because size vectors already contains
743 sizes of the calees. */
744 gcc_assert (!inlined_to_p || !optimized_out_size);
745 }
746 else
747 {
748 info->size_time_table = vec_safe_copy (info->size_time_table);
749 if (info->loop_iterations)
750 {
751 predicate p = *info->loop_iterations;
752 info->loop_iterations = NULL;
753 set_hint_predicate (&info->loop_iterations, p);
754 }
755 if (info->loop_stride)
756 {
757 predicate p = *info->loop_stride;
758 info->loop_stride = NULL;
759 set_hint_predicate (&info->loop_stride, p);
760 }
761 if (info->array_index)
762 {
763 predicate p = *info->array_index;
764 info->array_index = NULL;
765 set_hint_predicate (&info->array_index, p);
766 }
767 }
768 if (!dst->global.inlined_to)
769 ipa_update_overall_fn_summary (dst);
770 }
771
772
773 /* Hook that is called by cgraph.c when a node is duplicated. */
774
775 void
776 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
777 struct cgraph_edge *dst,
778 struct ipa_call_summary *srcinfo,
779 struct ipa_call_summary *info)
780 {
781 *info = *srcinfo;
782 info->predicate = NULL;
783 edge_set_predicate (dst, srcinfo->predicate);
784 info->param = srcinfo->param.copy ();
785 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
786 {
787 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
788 - eni_size_weights.call_cost);
789 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
790 - eni_time_weights.call_cost);
791 }
792 }
793
794
795 /* Keep edge cache consistent across edge removal. */
796
797 void
798 ipa_call_summary_t::remove (struct cgraph_edge *,
799 struct ipa_call_summary *sum)
800 {
801 sum->reset ();
802 }
803
804
805 /* Dump edge summaries associated to NODE and recursively to all clones.
806 Indent by INDENT. */
807
808 static void
809 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
810 struct ipa_fn_summary *info)
811 {
812 struct cgraph_edge *edge;
813 for (edge = node->callees; edge; edge = edge->next_callee)
814 {
815 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
816 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
817 int i;
818
819 fprintf (f,
820 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
821 " time: %2i callee size:%2i stack:%2i",
822 indent, "", callee->name (), callee->order,
823 !edge->inline_failed
824 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
825 indent, "", es->loop_depth, edge->frequency,
826 es->call_stmt_size, es->call_stmt_time,
827 (int) ipa_fn_summaries->get (callee)->size / ipa_fn_summary::size_scale,
828 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
829
830 if (es->predicate)
831 {
832 fprintf (f, " predicate: ");
833 es->predicate->dump (f, info->conds);
834 }
835 else
836 fprintf (f, "\n");
837 if (es->param.exists ())
838 for (i = 0; i < (int) es->param.length (); i++)
839 {
840 int prob = es->param[i].change_prob;
841
842 if (!prob)
843 fprintf (f, "%*s op%i is compile time invariant\n",
844 indent + 2, "", i);
845 else if (prob != REG_BR_PROB_BASE)
846 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
847 prob * 100.0 / REG_BR_PROB_BASE);
848 }
849 if (!edge->inline_failed)
850 {
851 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
852 " callee size %i\n",
853 indent + 2, "",
854 (int) ipa_fn_summaries->get (callee)->stack_frame_offset,
855 (int) ipa_fn_summaries->get (callee)->estimated_self_stack_size,
856 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
857 dump_ipa_call_summary (f, indent + 2, callee, info);
858 }
859 }
860 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
861 {
862 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
863 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
864 " time: %2i",
865 indent, "",
866 es->loop_depth,
867 edge->frequency, es->call_stmt_size, es->call_stmt_time);
868 if (es->predicate)
869 {
870 fprintf (f, "predicate: ");
871 es->predicate->dump (f, info->conds);
872 }
873 else
874 fprintf (f, "\n");
875 }
876 }
877
878
879 void
880 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
881 {
882 if (node->definition)
883 {
884 struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
885 size_time_entry *e;
886 int i;
887 fprintf (f, "IPA function summary for %s/%i", node->name (),
888 node->order);
889 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
890 fprintf (f, " always_inline");
891 if (s->inlinable)
892 fprintf (f, " inlinable");
893 if (s->contains_cilk_spawn)
894 fprintf (f, " contains_cilk_spawn");
895 if (s->fp_expressions)
896 fprintf (f, " fp_expression");
897 fprintf (f, "\n global time: %f\n", s->time.to_double ());
898 fprintf (f, " self size: %i\n", s->self_size);
899 fprintf (f, " global size: %i\n", s->size);
900 fprintf (f, " min size: %i\n", s->min_size);
901 fprintf (f, " self stack: %i\n",
902 (int) s->estimated_self_stack_size);
903 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
904 if (s->growth)
905 fprintf (f, " estimated growth:%i\n", (int) s->growth);
906 if (s->scc_no)
907 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
908 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
909 {
910 fprintf (f, " size:%f, time:%f",
911 (double) e->size / ipa_fn_summary::size_scale,
912 e->time.to_double ());
913 if (e->exec_predicate != true)
914 {
915 fprintf (f, ", executed if:");
916 e->exec_predicate.dump (f, s->conds, 0);
917 }
918 if (e->exec_predicate != e->nonconst_predicate)
919 {
920 fprintf (f, ", nonconst if:");
921 e->nonconst_predicate.dump (f, s->conds, 0);
922 }
923 fprintf (f, "\n");
924 }
925 if (s->loop_iterations)
926 {
927 fprintf (f, " loop iterations:");
928 s->loop_iterations->dump (f, s->conds);
929 }
930 if (s->loop_stride)
931 {
932 fprintf (f, " loop stride:");
933 s->loop_stride->dump (f, s->conds);
934 }
935 if (s->array_index)
936 {
937 fprintf (f, " array index:");
938 s->array_index->dump (f, s->conds);
939 }
940 fprintf (f, " calls:\n");
941 dump_ipa_call_summary (f, 4, node, s);
942 fprintf (f, "\n");
943 }
944 }
945
946 DEBUG_FUNCTION void
947 ipa_debug_fn_summary (struct cgraph_node *node)
948 {
949 ipa_dump_fn_summary (stderr, node);
950 }
951
952 void
953 ipa_dump_fn_summaries (FILE *f)
954 {
955 struct cgraph_node *node;
956
957 FOR_EACH_DEFINED_FUNCTION (node)
958 if (!node->global.inlined_to)
959 ipa_dump_fn_summary (f, node);
960 }
961
962 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
963 boolean variable pointed to by DATA. */
964
965 static bool
966 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
967 void *data)
968 {
969 bool *b = (bool *) data;
970 *b = true;
971 return true;
972 }
973
974 /* If OP refers to value of function parameter, return the corresponding
975 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
976 PARM_DECL) will be stored to *SIZE_P in that case too. */
977
978 static tree
979 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
980 {
981 /* SSA_NAME referring to parm default def? */
982 if (TREE_CODE (op) == SSA_NAME
983 && SSA_NAME_IS_DEFAULT_DEF (op)
984 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
985 {
986 if (size_p)
987 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
988 return SSA_NAME_VAR (op);
989 }
990 /* Non-SSA parm reference? */
991 if (TREE_CODE (op) == PARM_DECL)
992 {
993 bool modified = false;
994
995 ao_ref refd;
996 ao_ref_init (&refd, op);
997 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
998 NULL);
999 if (!modified)
1000 {
1001 if (size_p)
1002 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1003 return op;
1004 }
1005 }
1006 return NULL_TREE;
1007 }
1008
1009 /* If OP refers to value of function parameter, return the corresponding
1010 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1011 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1012 stored to *SIZE_P in that case too. */
1013
1014 static tree
1015 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1016 {
1017 tree res = unmodified_parm_1 (stmt, op, size_p);
1018 if (res)
1019 return res;
1020
1021 if (TREE_CODE (op) == SSA_NAME
1022 && !SSA_NAME_IS_DEFAULT_DEF (op)
1023 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1024 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1025 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1026 size_p);
1027 return NULL_TREE;
1028 }
1029
1030 /* If OP refers to a value of a function parameter or value loaded from an
1031 aggregate passed to a parameter (either by value or reference), return TRUE
1032 and store the number of the parameter to *INDEX_P, the access size into
1033 *SIZE_P, and information whether and how it has been loaded from an
1034 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1035 statement in which OP is used or loaded. */
1036
1037 static bool
1038 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1039 gimple *stmt, tree op, int *index_p,
1040 HOST_WIDE_INT *size_p,
1041 struct agg_position_info *aggpos)
1042 {
1043 tree res = unmodified_parm_1 (stmt, op, size_p);
1044
1045 gcc_checking_assert (aggpos);
1046 if (res)
1047 {
1048 *index_p = ipa_get_param_decl_index (fbi->info, res);
1049 if (*index_p < 0)
1050 return false;
1051 aggpos->agg_contents = false;
1052 aggpos->by_ref = false;
1053 return true;
1054 }
1055
1056 if (TREE_CODE (op) == SSA_NAME)
1057 {
1058 if (SSA_NAME_IS_DEFAULT_DEF (op)
1059 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1060 return false;
1061 stmt = SSA_NAME_DEF_STMT (op);
1062 op = gimple_assign_rhs1 (stmt);
1063 if (!REFERENCE_CLASS_P (op))
1064 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1065 aggpos);
1066 }
1067
1068 aggpos->agg_contents = true;
1069 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1070 stmt, op, index_p, &aggpos->offset,
1071 size_p, &aggpos->by_ref);
1072 }
1073
1074 /* See if statement might disappear after inlining.
1075 0 - means not eliminated
1076 1 - half of statements goes away
1077 2 - for sure it is eliminated.
1078 We are not terribly sophisticated, basically looking for simple abstraction
1079 penalty wrappers. */
1080
1081 static int
1082 eliminated_by_inlining_prob (gimple *stmt)
1083 {
1084 enum gimple_code code = gimple_code (stmt);
1085 enum tree_code rhs_code;
1086
1087 if (!optimize)
1088 return 0;
1089
1090 switch (code)
1091 {
1092 case GIMPLE_RETURN:
1093 return 2;
1094 case GIMPLE_ASSIGN:
1095 if (gimple_num_ops (stmt) != 2)
1096 return 0;
1097
1098 rhs_code = gimple_assign_rhs_code (stmt);
1099
1100 /* Casts of parameters, loads from parameters passed by reference
1101 and stores to return value or parameters are often free after
1102 inlining dua to SRA and further combining.
1103 Assume that half of statements goes away. */
1104 if (CONVERT_EXPR_CODE_P (rhs_code)
1105 || rhs_code == VIEW_CONVERT_EXPR
1106 || rhs_code == ADDR_EXPR
1107 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1108 {
1109 tree rhs = gimple_assign_rhs1 (stmt);
1110 tree lhs = gimple_assign_lhs (stmt);
1111 tree inner_rhs = get_base_address (rhs);
1112 tree inner_lhs = get_base_address (lhs);
1113 bool rhs_free = false;
1114 bool lhs_free = false;
1115
1116 if (!inner_rhs)
1117 inner_rhs = rhs;
1118 if (!inner_lhs)
1119 inner_lhs = lhs;
1120
1121 /* Reads of parameter are expected to be free. */
1122 if (unmodified_parm (stmt, inner_rhs, NULL))
1123 rhs_free = true;
1124 /* Match expressions of form &this->field. Those will most likely
1125 combine with something upstream after inlining. */
1126 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1127 {
1128 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1129 if (TREE_CODE (op) == PARM_DECL)
1130 rhs_free = true;
1131 else if (TREE_CODE (op) == MEM_REF
1132 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1133 rhs_free = true;
1134 }
1135
1136 /* When parameter is not SSA register because its address is taken
1137 and it is just copied into one, the statement will be completely
1138 free after inlining (we will copy propagate backward). */
1139 if (rhs_free && is_gimple_reg (lhs))
1140 return 2;
1141
1142 /* Reads of parameters passed by reference
1143 expected to be free (i.e. optimized out after inlining). */
1144 if (TREE_CODE (inner_rhs) == MEM_REF
1145 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1146 rhs_free = true;
1147
1148 /* Copying parameter passed by reference into gimple register is
1149 probably also going to copy propagate, but we can't be quite
1150 sure. */
1151 if (rhs_free && is_gimple_reg (lhs))
1152 lhs_free = true;
1153
1154 /* Writes to parameters, parameters passed by value and return value
1155 (either dirrectly or passed via invisible reference) are free.
1156
1157 TODO: We ought to handle testcase like
1158 struct a {int a,b;};
1159 struct a
1160 retrurnsturct (void)
1161 {
1162 struct a a ={1,2};
1163 return a;
1164 }
1165
1166 This translate into:
1167
1168 retrurnsturct ()
1169 {
1170 int a$b;
1171 int a$a;
1172 struct a a;
1173 struct a D.2739;
1174
1175 <bb 2>:
1176 D.2739.a = 1;
1177 D.2739.b = 2;
1178 return D.2739;
1179
1180 }
1181 For that we either need to copy ipa-split logic detecting writes
1182 to return value. */
1183 if (TREE_CODE (inner_lhs) == PARM_DECL
1184 || TREE_CODE (inner_lhs) == RESULT_DECL
1185 || (TREE_CODE (inner_lhs) == MEM_REF
1186 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1187 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1188 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1189 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1190 (inner_lhs,
1191 0))) == RESULT_DECL))))
1192 lhs_free = true;
1193 if (lhs_free
1194 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1195 rhs_free = true;
1196 if (lhs_free && rhs_free)
1197 return 1;
1198 }
1199 return 0;
1200 default:
1201 return 0;
1202 }
1203 }
1204
1205
1206 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1207 predicates to the CFG edges. */
1208
1209 static void
1210 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1211 struct ipa_fn_summary *summary,
1212 basic_block bb)
1213 {
1214 gimple *last;
1215 tree op;
1216 int index;
1217 HOST_WIDE_INT size;
1218 struct agg_position_info aggpos;
1219 enum tree_code code, inverted_code;
1220 edge e;
1221 edge_iterator ei;
1222 gimple *set_stmt;
1223 tree op2;
1224
1225 last = last_stmt (bb);
1226 if (!last || gimple_code (last) != GIMPLE_COND)
1227 return;
1228 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1229 return;
1230 op = gimple_cond_lhs (last);
1231 /* TODO: handle conditionals like
1232 var = op0 < 4;
1233 if (var != 0). */
1234 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1235 {
1236 code = gimple_cond_code (last);
1237 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1238
1239 FOR_EACH_EDGE (e, ei, bb->succs)
1240 {
1241 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1242 ? code : inverted_code);
1243 /* invert_tree_comparison will return ERROR_MARK on FP
1244 comparsions that are not EQ/NE instead of returning proper
1245 unordered one. Be sure it is not confused with NON_CONSTANT. */
1246 if (this_code != ERROR_MARK)
1247 {
1248 predicate p
1249 = add_condition (summary, index, size, &aggpos, this_code,
1250 unshare_expr_without_location
1251 (gimple_cond_rhs (last)));
1252 e->aux = edge_predicate_pool.allocate ();
1253 *(predicate *) e->aux = p;
1254 }
1255 }
1256 }
1257
1258 if (TREE_CODE (op) != SSA_NAME)
1259 return;
1260 /* Special case
1261 if (builtin_constant_p (op))
1262 constant_code
1263 else
1264 nonconstant_code.
1265 Here we can predicate nonconstant_code. We can't
1266 really handle constant_code since we have no predicate
1267 for this and also the constant code is not known to be
1268 optimized away when inliner doen't see operand is constant.
1269 Other optimizers might think otherwise. */
1270 if (gimple_cond_code (last) != NE_EXPR
1271 || !integer_zerop (gimple_cond_rhs (last)))
1272 return;
1273 set_stmt = SSA_NAME_DEF_STMT (op);
1274 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1275 || gimple_call_num_args (set_stmt) != 1)
1276 return;
1277 op2 = gimple_call_arg (set_stmt, 0);
1278 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1279 &aggpos))
1280 return;
1281 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1282 {
1283 predicate p = add_condition (summary, index, size, &aggpos,
1284 predicate::is_not_constant, NULL_TREE);
1285 e->aux = edge_predicate_pool.allocate ();
1286 *(predicate *) e->aux = p;
1287 }
1288 }
1289
1290
1291 /* If BB ends by a switch we can turn into predicates, attach corresponding
1292 predicates to the CFG edges. */
1293
1294 static void
1295 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1296 struct ipa_fn_summary *summary,
1297 basic_block bb)
1298 {
1299 gimple *lastg;
1300 tree op;
1301 int index;
1302 HOST_WIDE_INT size;
1303 struct agg_position_info aggpos;
1304 edge e;
1305 edge_iterator ei;
1306 size_t n;
1307 size_t case_idx;
1308
1309 lastg = last_stmt (bb);
1310 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1311 return;
1312 gswitch *last = as_a <gswitch *> (lastg);
1313 op = gimple_switch_index (last);
1314 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1315 return;
1316
1317 FOR_EACH_EDGE (e, ei, bb->succs)
1318 {
1319 e->aux = edge_predicate_pool.allocate ();
1320 *(predicate *) e->aux = false;
1321 }
1322 n = gimple_switch_num_labels (last);
1323 for (case_idx = 0; case_idx < n; ++case_idx)
1324 {
1325 tree cl = gimple_switch_label (last, case_idx);
1326 tree min, max;
1327 predicate p;
1328
1329 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1330 min = CASE_LOW (cl);
1331 max = CASE_HIGH (cl);
1332
1333 /* For default we might want to construct predicate that none
1334 of cases is met, but it is bit hard to do not having negations
1335 of conditionals handy. */
1336 if (!min && !max)
1337 p = true;
1338 else if (!max)
1339 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1340 unshare_expr_without_location (min));
1341 else
1342 {
1343 predicate p1, p2;
1344 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1345 unshare_expr_without_location (min));
1346 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1347 unshare_expr_without_location (max));
1348 p = p1 & p2;
1349 }
1350 *(struct predicate *) e->aux
1351 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1352 }
1353 }
1354
1355
1356 /* For each BB in NODE attach to its AUX pointer predicate under
1357 which it is executable. */
1358
1359 static void
1360 compute_bb_predicates (struct ipa_func_body_info *fbi,
1361 struct cgraph_node *node,
1362 struct ipa_fn_summary *summary)
1363 {
1364 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1365 bool done = false;
1366 basic_block bb;
1367
1368 FOR_EACH_BB_FN (bb, my_function)
1369 {
1370 set_cond_stmt_execution_predicate (fbi, summary, bb);
1371 set_switch_stmt_execution_predicate (fbi, summary, bb);
1372 }
1373
1374 /* Entry block is always executable. */
1375 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1376 = edge_predicate_pool.allocate ();
1377 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1378
1379 /* A simple dataflow propagation of predicates forward in the CFG.
1380 TODO: work in reverse postorder. */
1381 while (!done)
1382 {
1383 done = true;
1384 FOR_EACH_BB_FN (bb, my_function)
1385 {
1386 predicate p = false;
1387 edge e;
1388 edge_iterator ei;
1389 FOR_EACH_EDGE (e, ei, bb->preds)
1390 {
1391 if (e->src->aux)
1392 {
1393 predicate this_bb_predicate
1394 = *(predicate *) e->src->aux;
1395 if (e->aux)
1396 this_bb_predicate &= (*(struct predicate *) e->aux);
1397 p = p.or_with (summary->conds, this_bb_predicate);
1398 if (p == true)
1399 break;
1400 }
1401 }
1402 if (p == false)
1403 gcc_checking_assert (!bb->aux);
1404 else
1405 {
1406 if (!bb->aux)
1407 {
1408 done = false;
1409 bb->aux = edge_predicate_pool.allocate ();
1410 *((predicate *) bb->aux) = p;
1411 }
1412 else if (p != *(predicate *) bb->aux)
1413 {
1414 /* This OR operation is needed to ensure monotonous data flow
1415 in the case we hit the limit on number of clauses and the
1416 and/or operations above give approximate answers. */
1417 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1418 if (p != *(predicate *) bb->aux)
1419 {
1420 done = false;
1421 *((predicate *) bb->aux) = p;
1422 }
1423 }
1424 }
1425 }
1426 }
1427 }
1428
1429
1430 /* Return predicate specifying when the STMT might have result that is not
1431 a compile time constant. */
1432
1433 static predicate
1434 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1435 struct ipa_fn_summary *summary,
1436 tree expr,
1437 vec<predicate> nonconstant_names)
1438 {
1439 tree parm;
1440 int index;
1441 HOST_WIDE_INT size;
1442
1443 while (UNARY_CLASS_P (expr))
1444 expr = TREE_OPERAND (expr, 0);
1445
1446 parm = unmodified_parm (NULL, expr, &size);
1447 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1448 return add_condition (summary, index, size, NULL, predicate::changed,
1449 NULL_TREE);
1450 if (is_gimple_min_invariant (expr))
1451 return false;
1452 if (TREE_CODE (expr) == SSA_NAME)
1453 return nonconstant_names[SSA_NAME_VERSION (expr)];
1454 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1455 {
1456 predicate p1 = will_be_nonconstant_expr_predicate
1457 (info, summary, TREE_OPERAND (expr, 0),
1458 nonconstant_names);
1459 if (p1 == true)
1460 return p1;
1461
1462 predicate p2;
1463 p2 = will_be_nonconstant_expr_predicate (info, summary,
1464 TREE_OPERAND (expr, 1),
1465 nonconstant_names);
1466 return p1.or_with (summary->conds, p2);
1467 }
1468 else if (TREE_CODE (expr) == COND_EXPR)
1469 {
1470 predicate p1 = will_be_nonconstant_expr_predicate
1471 (info, summary, TREE_OPERAND (expr, 0),
1472 nonconstant_names);
1473 if (p1 == true)
1474 return p1;
1475
1476 predicate p2;
1477 p2 = will_be_nonconstant_expr_predicate (info, summary,
1478 TREE_OPERAND (expr, 1),
1479 nonconstant_names);
1480 if (p2 == true)
1481 return p2;
1482 p1 = p1.or_with (summary->conds, p2);
1483 p2 = will_be_nonconstant_expr_predicate (info, summary,
1484 TREE_OPERAND (expr, 2),
1485 nonconstant_names);
1486 return p2.or_with (summary->conds, p1);
1487 }
1488 else
1489 {
1490 debug_tree (expr);
1491 gcc_unreachable ();
1492 }
1493 return false;
1494 }
1495
1496
1497 /* Return predicate specifying when the STMT might have result that is not
1498 a compile time constant. */
1499
1500 static predicate
1501 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1502 struct ipa_fn_summary *summary,
1503 gimple *stmt,
1504 vec<predicate> nonconstant_names)
1505 {
1506 predicate p = true;
1507 ssa_op_iter iter;
1508 tree use;
1509 predicate op_non_const;
1510 bool is_load;
1511 int base_index;
1512 HOST_WIDE_INT size;
1513 struct agg_position_info aggpos;
1514
1515 /* What statments might be optimized away
1516 when their arguments are constant. */
1517 if (gimple_code (stmt) != GIMPLE_ASSIGN
1518 && gimple_code (stmt) != GIMPLE_COND
1519 && gimple_code (stmt) != GIMPLE_SWITCH
1520 && (gimple_code (stmt) != GIMPLE_CALL
1521 || !(gimple_call_flags (stmt) & ECF_CONST)))
1522 return p;
1523
1524 /* Stores will stay anyway. */
1525 if (gimple_store_p (stmt))
1526 return p;
1527
1528 is_load = gimple_assign_load_p (stmt);
1529
1530 /* Loads can be optimized when the value is known. */
1531 if (is_load)
1532 {
1533 tree op;
1534 gcc_assert (gimple_assign_single_p (stmt));
1535 op = gimple_assign_rhs1 (stmt);
1536 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1537 &aggpos))
1538 return p;
1539 }
1540 else
1541 base_index = -1;
1542
1543 /* See if we understand all operands before we start
1544 adding conditionals. */
1545 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1546 {
1547 tree parm = unmodified_parm (stmt, use, NULL);
1548 /* For arguments we can build a condition. */
1549 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1550 continue;
1551 if (TREE_CODE (use) != SSA_NAME)
1552 return p;
1553 /* If we know when operand is constant,
1554 we still can say something useful. */
1555 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1556 continue;
1557 return p;
1558 }
1559
1560 if (is_load)
1561 op_non_const =
1562 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1563 NULL);
1564 else
1565 op_non_const = false;
1566 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1567 {
1568 HOST_WIDE_INT size;
1569 tree parm = unmodified_parm (stmt, use, &size);
1570 int index;
1571
1572 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1573 {
1574 if (index != base_index)
1575 p = add_condition (summary, index, size, NULL, predicate::changed,
1576 NULL_TREE);
1577 else
1578 continue;
1579 }
1580 else
1581 p = nonconstant_names[SSA_NAME_VERSION (use)];
1582 op_non_const = p.or_with (summary->conds, op_non_const);
1583 }
1584 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1585 && gimple_op (stmt, 0)
1586 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1587 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1588 = op_non_const;
1589 return op_non_const;
1590 }
1591
1592 struct record_modified_bb_info
1593 {
1594 bitmap bb_set;
1595 gimple *stmt;
1596 };
1597
1598 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1599 probability how often it changes between USE_BB.
1600 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1601 is in different loop nest, we can do better.
1602 This is all just estimate. In theory we look for minimal cut separating
1603 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1604 anyway. */
1605
1606 static basic_block
1607 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1608 {
1609 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1610 if (l && l->header->frequency < init_bb->frequency)
1611 return l->header;
1612 return init_bb;
1613 }
1614
1615 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1616 set except for info->stmt. */
1617
1618 static bool
1619 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1620 {
1621 struct record_modified_bb_info *info =
1622 (struct record_modified_bb_info *) data;
1623 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1624 return false;
1625 bitmap_set_bit (info->bb_set,
1626 SSA_NAME_IS_DEFAULT_DEF (vdef)
1627 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1628 : get_minimal_bb
1629 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1630 gimple_bb (info->stmt))->index);
1631 return false;
1632 }
1633
1634 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1635 will change since last invocation of STMT.
1636
1637 Value 0 is reserved for compile time invariants.
1638 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1639 ought to be REG_BR_PROB_BASE / estimated_iters. */
1640
1641 static int
1642 param_change_prob (gimple *stmt, int i)
1643 {
1644 tree op = gimple_call_arg (stmt, i);
1645 basic_block bb = gimple_bb (stmt);
1646
1647 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1648 op = TREE_OPERAND (op, 0);
1649
1650 tree base = get_base_address (op);
1651
1652 /* Global invariants never change. */
1653 if (is_gimple_min_invariant (base))
1654 return 0;
1655
1656 /* We would have to do non-trivial analysis to really work out what
1657 is the probability of value to change (i.e. when init statement
1658 is in a sibling loop of the call).
1659
1660 We do an conservative estimate: when call is executed N times more often
1661 than the statement defining value, we take the frequency 1/N. */
1662 if (TREE_CODE (base) == SSA_NAME)
1663 {
1664 int init_freq;
1665
1666 if (!bb->frequency)
1667 return REG_BR_PROB_BASE;
1668
1669 if (SSA_NAME_IS_DEFAULT_DEF (base))
1670 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1671 else
1672 init_freq = get_minimal_bb
1673 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1674 gimple_bb (stmt))->frequency;
1675
1676 if (!init_freq)
1677 init_freq = 1;
1678 if (init_freq < bb->frequency)
1679 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
1680 else
1681 return REG_BR_PROB_BASE;
1682 }
1683 else
1684 {
1685 ao_ref refd;
1686 int max;
1687 struct record_modified_bb_info info;
1688 bitmap_iterator bi;
1689 unsigned index;
1690 tree init = ctor_for_folding (base);
1691
1692 if (init != error_mark_node)
1693 return 0;
1694 if (!bb->frequency)
1695 return REG_BR_PROB_BASE;
1696 ao_ref_init (&refd, op);
1697 info.stmt = stmt;
1698 info.bb_set = BITMAP_ALLOC (NULL);
1699 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1700 NULL);
1701 if (bitmap_bit_p (info.bb_set, bb->index))
1702 {
1703 BITMAP_FREE (info.bb_set);
1704 return REG_BR_PROB_BASE;
1705 }
1706
1707 /* Assume that every memory is initialized at entry.
1708 TODO: Can we easilly determine if value is always defined
1709 and thus we may skip entry block? */
1710 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
1711 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1712 else
1713 max = 1;
1714
1715 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1716 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
1717
1718 BITMAP_FREE (info.bb_set);
1719 if (max < bb->frequency)
1720 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
1721 else
1722 return REG_BR_PROB_BASE;
1723 }
1724 }
1725
1726 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1727 sub-graph and if the predicate the condition depends on is known. If so,
1728 return true and store the pointer the predicate in *P. */
1729
1730 static bool
1731 phi_result_unknown_predicate (struct ipa_node_params *info,
1732 ipa_fn_summary *summary, basic_block bb,
1733 predicate *p,
1734 vec<predicate> nonconstant_names)
1735 {
1736 edge e;
1737 edge_iterator ei;
1738 basic_block first_bb = NULL;
1739 gimple *stmt;
1740
1741 if (single_pred_p (bb))
1742 {
1743 *p = false;
1744 return true;
1745 }
1746
1747 FOR_EACH_EDGE (e, ei, bb->preds)
1748 {
1749 if (single_succ_p (e->src))
1750 {
1751 if (!single_pred_p (e->src))
1752 return false;
1753 if (!first_bb)
1754 first_bb = single_pred (e->src);
1755 else if (single_pred (e->src) != first_bb)
1756 return false;
1757 }
1758 else
1759 {
1760 if (!first_bb)
1761 first_bb = e->src;
1762 else if (e->src != first_bb)
1763 return false;
1764 }
1765 }
1766
1767 if (!first_bb)
1768 return false;
1769
1770 stmt = last_stmt (first_bb);
1771 if (!stmt
1772 || gimple_code (stmt) != GIMPLE_COND
1773 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1774 return false;
1775
1776 *p = will_be_nonconstant_expr_predicate (info, summary,
1777 gimple_cond_lhs (stmt),
1778 nonconstant_names);
1779 if (*p == true)
1780 return false;
1781 else
1782 return true;
1783 }
1784
1785 /* Given a PHI statement in a function described by inline properties SUMMARY
1786 and *P being the predicate describing whether the selected PHI argument is
1787 known, store a predicate for the result of the PHI statement into
1788 NONCONSTANT_NAMES, if possible. */
1789
1790 static void
1791 predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
1792 predicate *p,
1793 vec<predicate> nonconstant_names)
1794 {
1795 unsigned i;
1796
1797 for (i = 0; i < gimple_phi_num_args (phi); i++)
1798 {
1799 tree arg = gimple_phi_arg (phi, i)->def;
1800 if (!is_gimple_min_invariant (arg))
1801 {
1802 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1803 *p = p->or_with (summary->conds,
1804 nonconstant_names[SSA_NAME_VERSION (arg)]);
1805 if (*p == true)
1806 return;
1807 }
1808 }
1809
1810 if (dump_file && (dump_flags & TDF_DETAILS))
1811 {
1812 fprintf (dump_file, "\t\tphi predicate: ");
1813 p->dump (dump_file, summary->conds);
1814 }
1815 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1816 }
1817
1818 /* Return predicate specifying when array index in access OP becomes non-constant. */
1819
1820 static predicate
1821 array_index_predicate (ipa_fn_summary *info,
1822 vec< predicate> nonconstant_names, tree op)
1823 {
1824 predicate p = false;
1825 while (handled_component_p (op))
1826 {
1827 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1828 {
1829 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1830 p = p.or_with (info->conds,
1831 nonconstant_names[SSA_NAME_VERSION
1832 (TREE_OPERAND (op, 1))]);
1833 }
1834 op = TREE_OPERAND (op, 0);
1835 }
1836 return p;
1837 }
1838
1839 /* For a typical usage of __builtin_expect (a<b, 1), we
1840 may introduce an extra relation stmt:
1841 With the builtin, we have
1842 t1 = a <= b;
1843 t2 = (long int) t1;
1844 t3 = __builtin_expect (t2, 1);
1845 if (t3 != 0)
1846 goto ...
1847 Without the builtin, we have
1848 if (a<=b)
1849 goto...
1850 This affects the size/time estimation and may have
1851 an impact on the earlier inlining.
1852 Here find this pattern and fix it up later. */
1853
1854 static gimple *
1855 find_foldable_builtin_expect (basic_block bb)
1856 {
1857 gimple_stmt_iterator bsi;
1858
1859 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1860 {
1861 gimple *stmt = gsi_stmt (bsi);
1862 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1863 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1864 {
1865 tree var = gimple_call_lhs (stmt);
1866 tree arg = gimple_call_arg (stmt, 0);
1867 use_operand_p use_p;
1868 gimple *use_stmt;
1869 bool match = false;
1870 bool done = false;
1871
1872 if (!var || !arg)
1873 continue;
1874 gcc_assert (TREE_CODE (var) == SSA_NAME);
1875
1876 while (TREE_CODE (arg) == SSA_NAME)
1877 {
1878 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1879 if (!is_gimple_assign (stmt_tmp))
1880 break;
1881 switch (gimple_assign_rhs_code (stmt_tmp))
1882 {
1883 case LT_EXPR:
1884 case LE_EXPR:
1885 case GT_EXPR:
1886 case GE_EXPR:
1887 case EQ_EXPR:
1888 case NE_EXPR:
1889 match = true;
1890 done = true;
1891 break;
1892 CASE_CONVERT:
1893 break;
1894 default:
1895 done = true;
1896 break;
1897 }
1898 if (done)
1899 break;
1900 arg = gimple_assign_rhs1 (stmt_tmp);
1901 }
1902
1903 if (match && single_imm_use (var, &use_p, &use_stmt)
1904 && gimple_code (use_stmt) == GIMPLE_COND)
1905 return use_stmt;
1906 }
1907 }
1908 return NULL;
1909 }
1910
1911 /* Return true when the basic blocks contains only clobbers followed by RESX.
1912 Such BBs are kept around to make removal of dead stores possible with
1913 presence of EH and will be optimized out by optimize_clobbers later in the
1914 game.
1915
1916 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1917 that can be clobber only, too.. When it is false, the RESX is not necessary
1918 on the end of basic block. */
1919
1920 static bool
1921 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1922 {
1923 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1924 edge_iterator ei;
1925 edge e;
1926
1927 if (need_eh)
1928 {
1929 if (gsi_end_p (gsi))
1930 return false;
1931 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1932 return false;
1933 gsi_prev (&gsi);
1934 }
1935 else if (!single_succ_p (bb))
1936 return false;
1937
1938 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1939 {
1940 gimple *stmt = gsi_stmt (gsi);
1941 if (is_gimple_debug (stmt))
1942 continue;
1943 if (gimple_clobber_p (stmt))
1944 continue;
1945 if (gimple_code (stmt) == GIMPLE_LABEL)
1946 break;
1947 return false;
1948 }
1949
1950 /* See if all predecestors are either throws or clobber only BBs. */
1951 FOR_EACH_EDGE (e, ei, bb->preds)
1952 if (!(e->flags & EDGE_EH)
1953 && !clobber_only_eh_bb_p (e->src, false))
1954 return false;
1955
1956 return true;
1957 }
1958
1959 /* Return true if STMT compute a floating point expression that may be affected
1960 by -ffast-math and similar flags. */
1961
1962 static bool
1963 fp_expression_p (gimple *stmt)
1964 {
1965 ssa_op_iter i;
1966 tree op;
1967
1968 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
1969 if (FLOAT_TYPE_P (TREE_TYPE (op)))
1970 return true;
1971 return false;
1972 }
1973
1974 /* Analyze function body for NODE.
1975 EARLY indicates run from early optimization pipeline. */
1976
1977 static void
1978 analyze_function_body (struct cgraph_node *node, bool early)
1979 {
1980 sreal time = 0;
1981 /* Estimate static overhead for function prologue/epilogue and alignment. */
1982 int size = 2;
1983 /* Benefits are scaled by probability of elimination that is in range
1984 <0,2>. */
1985 basic_block bb;
1986 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1987 int freq;
1988 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1989 predicate bb_predicate;
1990 struct ipa_func_body_info fbi;
1991 vec<predicate> nonconstant_names = vNULL;
1992 int nblocks, n;
1993 int *order;
1994 predicate array_index = true;
1995 gimple *fix_builtin_expect_stmt;
1996
1997 gcc_assert (my_function && my_function->cfg);
1998 gcc_assert (cfun == my_function);
1999
2000 memset(&fbi, 0, sizeof(fbi));
2001 info->conds = NULL;
2002 info->size_time_table = NULL;
2003
2004 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2005 so we can produce proper inline hints.
2006
2007 When optimizing and analyzing for early inliner, initialize node params
2008 so we can produce correct BB predicates. */
2009
2010 if (opt_for_fn (node->decl, optimize))
2011 {
2012 calculate_dominance_info (CDI_DOMINATORS);
2013 if (!early)
2014 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2015 else
2016 {
2017 ipa_check_create_node_params ();
2018 ipa_initialize_node_params (node);
2019 }
2020
2021 if (ipa_node_params_sum)
2022 {
2023 fbi.node = node;
2024 fbi.info = IPA_NODE_REF (node);
2025 fbi.bb_infos = vNULL;
2026 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2027 fbi.param_count = count_formal_params(node->decl);
2028 nonconstant_names.safe_grow_cleared
2029 (SSANAMES (my_function)->length ());
2030 }
2031 }
2032
2033 if (dump_file)
2034 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2035 node->name ());
2036
2037 /* When we run into maximal number of entries, we assign everything to the
2038 constant truth case. Be sure to have it in list. */
2039 bb_predicate = true;
2040 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2041
2042 bb_predicate = predicate::not_inlined ();
2043 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate,
2044 bb_predicate);
2045
2046 if (fbi.info)
2047 compute_bb_predicates (&fbi, node, info);
2048 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2049 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2050 for (n = 0; n < nblocks; n++)
2051 {
2052 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2053 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2054 if (clobber_only_eh_bb_p (bb))
2055 {
2056 if (dump_file && (dump_flags & TDF_DETAILS))
2057 fprintf (dump_file, "\n Ignoring BB %i;"
2058 " it will be optimized away by cleanup_clobbers\n",
2059 bb->index);
2060 continue;
2061 }
2062
2063 /* TODO: Obviously predicates can be propagated down across CFG. */
2064 if (fbi.info)
2065 {
2066 if (bb->aux)
2067 bb_predicate = *(predicate *) bb->aux;
2068 else
2069 bb_predicate = false;
2070 }
2071 else
2072 bb_predicate = true;
2073
2074 if (dump_file && (dump_flags & TDF_DETAILS))
2075 {
2076 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2077 bb_predicate.dump (dump_file, info->conds);
2078 }
2079
2080 if (fbi.info && nonconstant_names.exists ())
2081 {
2082 predicate phi_predicate;
2083 bool first_phi = true;
2084
2085 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2086 gsi_next (&bsi))
2087 {
2088 if (first_phi
2089 && !phi_result_unknown_predicate (fbi.info, info, bb,
2090 &phi_predicate,
2091 nonconstant_names))
2092 break;
2093 first_phi = false;
2094 if (dump_file && (dump_flags & TDF_DETAILS))
2095 {
2096 fprintf (dump_file, " ");
2097 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2098 }
2099 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2100 nonconstant_names);
2101 }
2102 }
2103
2104 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2105
2106 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2107 gsi_next (&bsi))
2108 {
2109 gimple *stmt = gsi_stmt (bsi);
2110 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2111 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2112 int prob;
2113 predicate will_be_nonconstant;
2114
2115 /* This relation stmt should be folded after we remove
2116 buildin_expect call. Adjust the cost here. */
2117 if (stmt == fix_builtin_expect_stmt)
2118 {
2119 this_size--;
2120 this_time--;
2121 }
2122
2123 if (dump_file && (dump_flags & TDF_DETAILS))
2124 {
2125 fprintf (dump_file, " ");
2126 print_gimple_stmt (dump_file, stmt, 0);
2127 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2128 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2129 this_time);
2130 }
2131
2132 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2133 {
2134 predicate this_array_index;
2135 this_array_index =
2136 array_index_predicate (info, nonconstant_names,
2137 gimple_assign_rhs1 (stmt));
2138 if (this_array_index != false)
2139 array_index &= this_array_index;
2140 }
2141 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2142 {
2143 predicate this_array_index;
2144 this_array_index =
2145 array_index_predicate (info, nonconstant_names,
2146 gimple_get_lhs (stmt));
2147 if (this_array_index != false)
2148 array_index &= this_array_index;
2149 }
2150
2151
2152 if (is_gimple_call (stmt)
2153 && !gimple_call_internal_p (stmt))
2154 {
2155 struct cgraph_edge *edge = node->get_edge (stmt);
2156 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2157
2158 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2159 resolved as constant. We however don't want to optimize
2160 out the cgraph edges. */
2161 if (nonconstant_names.exists ()
2162 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2163 && gimple_call_lhs (stmt)
2164 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2165 {
2166 predicate false_p = false;
2167 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2168 = false_p;
2169 }
2170 if (ipa_node_params_sum)
2171 {
2172 int count = gimple_call_num_args (stmt);
2173 int i;
2174
2175 if (count)
2176 es->param.safe_grow_cleared (count);
2177 for (i = 0; i < count; i++)
2178 {
2179 int prob = param_change_prob (stmt, i);
2180 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2181 es->param[i].change_prob = prob;
2182 }
2183 }
2184
2185 es->call_stmt_size = this_size;
2186 es->call_stmt_time = this_time;
2187 es->loop_depth = bb_loop_depth (bb);
2188 edge_set_predicate (edge, &bb_predicate);
2189 }
2190
2191 /* TODO: When conditional jump or swithc is known to be constant, but
2192 we did not translate it into the predicates, we really can account
2193 just maximum of the possible paths. */
2194 if (fbi.info)
2195 will_be_nonconstant
2196 = will_be_nonconstant_predicate (&fbi, info,
2197 stmt, nonconstant_names);
2198 else
2199 will_be_nonconstant = true;
2200 if (this_time || this_size)
2201 {
2202 this_time *= freq;
2203
2204 prob = eliminated_by_inlining_prob (stmt);
2205 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2206 fprintf (dump_file,
2207 "\t\t50%% will be eliminated by inlining\n");
2208 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2209 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2210
2211 struct predicate p = bb_predicate & will_be_nonconstant;
2212
2213 /* We can ignore statement when we proved it is never going
2214 to happen, but we can not do that for call statements
2215 because edges are accounted specially. */
2216
2217 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2218 {
2219 time += this_time;
2220 size += this_size;
2221 }
2222
2223 /* We account everything but the calls. Calls have their own
2224 size/time info attached to cgraph edges. This is necessary
2225 in order to make the cost disappear after inlining. */
2226 if (!is_gimple_call (stmt))
2227 {
2228 if (prob)
2229 {
2230 predicate ip = bb_predicate & predicate::not_inlined ();
2231 info->account_size_time (this_size * prob,
2232 (sreal)(this_time * prob)
2233 / (CGRAPH_FREQ_BASE * 2), ip,
2234 p);
2235 }
2236 if (prob != 2)
2237 info->account_size_time (this_size * (2 - prob),
2238 (sreal)(this_time * (2 - prob))
2239 / (CGRAPH_FREQ_BASE * 2),
2240 bb_predicate,
2241 p);
2242 }
2243
2244 if (!info->fp_expressions && fp_expression_p (stmt))
2245 {
2246 info->fp_expressions = true;
2247 if (dump_file)
2248 fprintf (dump_file, " fp_expression set\n");
2249 }
2250
2251 gcc_assert (time >= 0);
2252 gcc_assert (size >= 0);
2253 }
2254 }
2255 }
2256 set_hint_predicate (&ipa_fn_summaries->get (node)->array_index, array_index);
2257 time = time / CGRAPH_FREQ_BASE;
2258 free (order);
2259
2260 if (nonconstant_names.exists () && !early)
2261 {
2262 struct loop *loop;
2263 predicate loop_iterations = true;
2264 predicate loop_stride = true;
2265
2266 if (dump_file && (dump_flags & TDF_DETAILS))
2267 flow_loops_dump (dump_file, NULL, 0);
2268 scev_initialize ();
2269 FOR_EACH_LOOP (loop, 0)
2270 {
2271 vec<edge> exits;
2272 edge ex;
2273 unsigned int j;
2274 struct tree_niter_desc niter_desc;
2275 bb_predicate = *(predicate *) loop->header->aux;
2276
2277 exits = get_loop_exit_edges (loop);
2278 FOR_EACH_VEC_ELT (exits, j, ex)
2279 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2280 && !is_gimple_min_invariant (niter_desc.niter))
2281 {
2282 predicate will_be_nonconstant
2283 = will_be_nonconstant_expr_predicate (fbi.info, info,
2284 niter_desc.niter,
2285 nonconstant_names);
2286 if (will_be_nonconstant != true)
2287 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2288 if (will_be_nonconstant != true
2289 && will_be_nonconstant != false)
2290 /* This is slightly inprecise. We may want to represent each
2291 loop with independent predicate. */
2292 loop_iterations &= will_be_nonconstant;
2293 }
2294 exits.release ();
2295 }
2296
2297 /* To avoid quadratic behavior we analyze stride predicates only
2298 with respect to the containing loop. Thus we simply iterate
2299 over all defs in the outermost loop body. */
2300 for (loop = loops_for_fn (cfun)->tree_root->inner;
2301 loop != NULL; loop = loop->next)
2302 {
2303 basic_block *body = get_loop_body (loop);
2304 for (unsigned i = 0; i < loop->num_nodes; i++)
2305 {
2306 gimple_stmt_iterator gsi;
2307 bb_predicate = *(predicate *) body[i]->aux;
2308 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2309 gsi_next (&gsi))
2310 {
2311 gimple *stmt = gsi_stmt (gsi);
2312
2313 if (!is_gimple_assign (stmt))
2314 continue;
2315
2316 tree def = gimple_assign_lhs (stmt);
2317 if (TREE_CODE (def) != SSA_NAME)
2318 continue;
2319
2320 affine_iv iv;
2321 if (!simple_iv (loop_containing_stmt (stmt),
2322 loop_containing_stmt (stmt),
2323 def, &iv, true)
2324 || is_gimple_min_invariant (iv.step))
2325 continue;
2326
2327 predicate will_be_nonconstant
2328 = will_be_nonconstant_expr_predicate (fbi.info, info,
2329 iv.step,
2330 nonconstant_names);
2331 if (will_be_nonconstant != true)
2332 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2333 if (will_be_nonconstant != true
2334 && will_be_nonconstant != false)
2335 /* This is slightly inprecise. We may want to represent
2336 each loop with independent predicate. */
2337 loop_stride = loop_stride & will_be_nonconstant;
2338 }
2339 }
2340 free (body);
2341 }
2342 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_iterations,
2343 loop_iterations);
2344 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_stride,
2345 loop_stride);
2346 scev_finalize ();
2347 }
2348 FOR_ALL_BB_FN (bb, my_function)
2349 {
2350 edge e;
2351 edge_iterator ei;
2352
2353 if (bb->aux)
2354 edge_predicate_pool.remove ((predicate *)bb->aux);
2355 bb->aux = NULL;
2356 FOR_EACH_EDGE (e, ei, bb->succs)
2357 {
2358 if (e->aux)
2359 edge_predicate_pool.remove ((predicate *) e->aux);
2360 e->aux = NULL;
2361 }
2362 }
2363 ipa_fn_summaries->get (node)->time = time;
2364 ipa_fn_summaries->get (node)->self_size = size;
2365 nonconstant_names.release ();
2366 ipa_release_body_info (&fbi);
2367 if (opt_for_fn (node->decl, optimize))
2368 {
2369 if (!early)
2370 loop_optimizer_finalize ();
2371 else if (!ipa_edge_args_sum)
2372 ipa_free_all_node_params ();
2373 free_dominance_info (CDI_DOMINATORS);
2374 }
2375 if (dump_file)
2376 {
2377 fprintf (dump_file, "\n");
2378 ipa_dump_fn_summary (dump_file, node);
2379 }
2380 }
2381
2382
2383 /* Compute function summary.
2384 EARLY is true when we compute parameters during early opts. */
2385
2386 void
2387 compute_fn_summary (struct cgraph_node *node, bool early)
2388 {
2389 HOST_WIDE_INT self_stack_size;
2390 struct cgraph_edge *e;
2391 struct ipa_fn_summary *info;
2392
2393 gcc_assert (!node->global.inlined_to);
2394
2395 if (!ipa_fn_summaries)
2396 ipa_fn_summary_alloc ();
2397
2398 info = ipa_fn_summaries->get (node);
2399 info->reset (node);
2400
2401 /* Estimate the stack size for the function if we're optimizing. */
2402 self_stack_size = optimize && !node->thunk.thunk_p
2403 ? estimated_stack_frame_size (node) : 0;
2404 info->estimated_self_stack_size = self_stack_size;
2405 info->estimated_stack_size = self_stack_size;
2406 info->stack_frame_offset = 0;
2407
2408 if (node->thunk.thunk_p)
2409 {
2410 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
2411 predicate t = true;
2412
2413 node->local.can_change_signature = false;
2414 es->call_stmt_size = eni_size_weights.call_cost;
2415 es->call_stmt_time = eni_time_weights.call_cost;
2416 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t);
2417 t = predicate::not_inlined ();
2418 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2419 ipa_update_overall_fn_summary (node);
2420 info->self_size = info->size;
2421 /* We can not inline instrumentation clones. */
2422 if (node->thunk.add_pointer_bounds_args)
2423 {
2424 info->inlinable = false;
2425 node->callees->inline_failed = CIF_CHKP;
2426 }
2427 else
2428 info->inlinable = true;
2429 }
2430 else
2431 {
2432 /* Even is_gimple_min_invariant rely on current_function_decl. */
2433 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2434
2435 /* Can this function be inlined at all? */
2436 if (!opt_for_fn (node->decl, optimize)
2437 && !lookup_attribute ("always_inline",
2438 DECL_ATTRIBUTES (node->decl)))
2439 info->inlinable = false;
2440 else
2441 info->inlinable = tree_inlinable_function_p (node->decl);
2442
2443 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
2444
2445 /* Type attributes can use parameter indices to describe them. */
2446 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2447 node->local.can_change_signature = false;
2448 else
2449 {
2450 /* Otherwise, inlinable functions always can change signature. */
2451 if (info->inlinable)
2452 node->local.can_change_signature = true;
2453 else
2454 {
2455 /* Functions calling builtin_apply can not change signature. */
2456 for (e = node->callees; e; e = e->next_callee)
2457 {
2458 tree cdecl = e->callee->decl;
2459 if (DECL_BUILT_IN (cdecl)
2460 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2461 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2462 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2463 break;
2464 }
2465 node->local.can_change_signature = !e;
2466 }
2467 }
2468 /* Functions called by instrumentation thunk can't change signature
2469 because instrumentation thunk modification is not supported. */
2470 if (node->local.can_change_signature)
2471 for (e = node->callers; e; e = e->next_caller)
2472 if (e->caller->thunk.thunk_p
2473 && e->caller->thunk.add_pointer_bounds_args)
2474 {
2475 node->local.can_change_signature = false;
2476 break;
2477 }
2478 analyze_function_body (node, early);
2479 pop_cfun ();
2480 }
2481 for (e = node->callees; e; e = e->next_callee)
2482 if (e->callee->comdat_local_p ())
2483 break;
2484 node->calls_comdat_local = (e != NULL);
2485
2486 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2487 info->size = info->self_size;
2488 info->stack_frame_offset = 0;
2489 info->estimated_stack_size = info->estimated_self_stack_size;
2490
2491 /* Code above should compute exactly the same result as
2492 ipa_update_overall_fn_summary but because computation happens in
2493 different order the roundoff errors result in slight changes. */
2494 ipa_update_overall_fn_summary (node);
2495 gcc_assert (info->size == info->self_size);
2496 }
2497
2498
2499 /* Compute parameters of functions used by inliner using
2500 current_function_decl. */
2501
2502 static unsigned int
2503 compute_fn_summary_for_current (void)
2504 {
2505 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2506 return 0;
2507 }
2508
2509 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2510 KNOWN_CONTEXTS and KNOWN_AGGS. */
2511
2512 static bool
2513 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2514 int *size, int *time,
2515 vec<tree> known_vals,
2516 vec<ipa_polymorphic_call_context> known_contexts,
2517 vec<ipa_agg_jump_function_p> known_aggs)
2518 {
2519 tree target;
2520 struct cgraph_node *callee;
2521 struct ipa_fn_summary *isummary;
2522 enum availability avail;
2523 bool speculative;
2524
2525 if (!known_vals.exists () && !known_contexts.exists ())
2526 return false;
2527 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2528 return false;
2529
2530 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2531 known_aggs, &speculative);
2532 if (!target || speculative)
2533 return false;
2534
2535 /* Account for difference in cost between indirect and direct calls. */
2536 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2537 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2538 gcc_checking_assert (*time >= 0);
2539 gcc_checking_assert (*size >= 0);
2540
2541 callee = cgraph_node::get (target);
2542 if (!callee || !callee->definition)
2543 return false;
2544 callee = callee->function_symbol (&avail);
2545 if (avail < AVAIL_AVAILABLE)
2546 return false;
2547 isummary = ipa_fn_summaries->get (callee);
2548 return isummary->inlinable;
2549 }
2550
2551 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2552 handle edge E with probability PROB.
2553 Set HINTS if edge may be devirtualized.
2554 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2555 site. */
2556
2557 static inline void
2558 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2559 sreal *time,
2560 int prob,
2561 vec<tree> known_vals,
2562 vec<ipa_polymorphic_call_context> known_contexts,
2563 vec<ipa_agg_jump_function_p> known_aggs,
2564 ipa_hints *hints)
2565 {
2566 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2567 int call_size = es->call_stmt_size;
2568 int call_time = es->call_stmt_time;
2569 int cur_size;
2570 if (!e->callee
2571 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2572 known_vals, known_contexts, known_aggs)
2573 && hints && e->maybe_hot_p ())
2574 *hints |= INLINE_HINT_indirect_call;
2575 cur_size = call_size * ipa_fn_summary::size_scale;
2576 *size += cur_size;
2577 if (min_size)
2578 *min_size += cur_size;
2579 if (prob == REG_BR_PROB_BASE)
2580 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
2581 else
2582 *time += ((sreal)call_time) * (prob * e->frequency)
2583 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
2584 }
2585
2586
2587
2588 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2589 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2590 describe context of the call site. */
2591
2592 static void
2593 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2594 int *min_size, sreal *time,
2595 ipa_hints *hints,
2596 clause_t possible_truths,
2597 vec<tree> known_vals,
2598 vec<ipa_polymorphic_call_context> known_contexts,
2599 vec<ipa_agg_jump_function_p> known_aggs)
2600 {
2601 struct cgraph_edge *e;
2602 for (e = node->callees; e; e = e->next_callee)
2603 {
2604 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2605
2606 /* Do not care about zero sized builtins. */
2607 if (e->inline_failed && !es->call_stmt_size)
2608 {
2609 gcc_checking_assert (!es->call_stmt_time);
2610 continue;
2611 }
2612 if (!es->predicate
2613 || es->predicate->evaluate (possible_truths))
2614 {
2615 if (e->inline_failed)
2616 {
2617 /* Predicates of calls shall not use NOT_CHANGED codes,
2618 sowe do not need to compute probabilities. */
2619 estimate_edge_size_and_time (e, size,
2620 es->predicate ? NULL : min_size,
2621 time, REG_BR_PROB_BASE,
2622 known_vals, known_contexts,
2623 known_aggs, hints);
2624 }
2625 else
2626 estimate_calls_size_and_time (e->callee, size, min_size, time,
2627 hints,
2628 possible_truths,
2629 known_vals, known_contexts,
2630 known_aggs);
2631 }
2632 }
2633 for (e = node->indirect_calls; e; e = e->next_callee)
2634 {
2635 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2636 if (!es->predicate
2637 || es->predicate->evaluate (possible_truths))
2638 estimate_edge_size_and_time (e, size,
2639 es->predicate ? NULL : min_size,
2640 time, REG_BR_PROB_BASE,
2641 known_vals, known_contexts, known_aggs,
2642 hints);
2643 }
2644 }
2645
2646
2647 /* Estimate size and time needed to execute NODE assuming
2648 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2649 information about NODE's arguments. If non-NULL use also probability
2650 information present in INLINE_PARAM_SUMMARY vector.
2651 Additionally detemine hints determined by the context. Finally compute
2652 minimal size needed for the call that is independent on the call context and
2653 can be used for fast estimates. Return the values in RET_SIZE,
2654 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2655
2656 void
2657 estimate_node_size_and_time (struct cgraph_node *node,
2658 clause_t possible_truths,
2659 clause_t nonspec_possible_truths,
2660 vec<tree> known_vals,
2661 vec<ipa_polymorphic_call_context> known_contexts,
2662 vec<ipa_agg_jump_function_p> known_aggs,
2663 int *ret_size, int *ret_min_size,
2664 sreal *ret_time,
2665 sreal *ret_nonspecialized_time,
2666 ipa_hints *ret_hints,
2667 vec<inline_param_summary>
2668 inline_param_summary)
2669 {
2670 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
2671 size_time_entry *e;
2672 int size = 0;
2673 sreal time = 0;
2674 int min_size = 0;
2675 ipa_hints hints = 0;
2676 int i;
2677
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2679 {
2680 bool found = false;
2681 fprintf (dump_file, " Estimating body: %s/%i\n"
2682 " Known to be false: ", node->name (),
2683 node->order);
2684
2685 for (i = predicate::not_inlined_condition;
2686 i < (predicate::first_dynamic_condition
2687 + (int) vec_safe_length (info->conds)); i++)
2688 if (!(possible_truths & (1 << i)))
2689 {
2690 if (found)
2691 fprintf (dump_file, ", ");
2692 found = true;
2693 dump_condition (dump_file, info->conds, i);
2694 }
2695 }
2696
2697 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2698 known_vals, known_contexts, known_aggs);
2699 sreal nonspecialized_time = time;
2700
2701 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2702 {
2703 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2704
2705 /* Because predicates are conservative, it can happen that nonconst is 1
2706 but exec is 0. */
2707 if (exec)
2708 {
2709 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2710
2711 gcc_checking_assert (e->time >= 0);
2712 gcc_checking_assert (time >= 0);
2713
2714 /* We compute specialized size only because size of nonspecialized
2715 copy is context independent.
2716
2717 The difference between nonspecialized execution and specialized is
2718 that nonspecialized is not going to have optimized out computations
2719 known to be constant in a specialized setting. */
2720 if (nonconst)
2721 size += e->size;
2722 nonspecialized_time += e->time;
2723 if (!nonconst)
2724 ;
2725 else if (!inline_param_summary.exists ())
2726 {
2727 if (nonconst)
2728 time += e->time;
2729 }
2730 else
2731 {
2732 int prob = e->nonconst_predicate.probability
2733 (info->conds, possible_truths,
2734 inline_param_summary);
2735 gcc_checking_assert (prob >= 0);
2736 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2737 time += e->time * prob / REG_BR_PROB_BASE;
2738 }
2739 gcc_checking_assert (time >= 0);
2740 }
2741 }
2742 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2743 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2744 min_size = (*info->size_time_table)[0].size;
2745 gcc_checking_assert (size >= 0);
2746 gcc_checking_assert (time >= 0);
2747 /* nonspecialized_time should be always bigger than specialized time.
2748 Roundoff issues however may get into the way. */
2749 gcc_checking_assert ((nonspecialized_time - time) >= -1);
2750
2751 /* Roundoff issues may make specialized time bigger than nonspecialized
2752 time. We do not really want that to happen because some heurstics
2753 may get confused by seeing negative speedups. */
2754 if (time > nonspecialized_time)
2755 time = nonspecialized_time;
2756
2757 if (info->loop_iterations
2758 && !info->loop_iterations->evaluate (possible_truths))
2759 hints |= INLINE_HINT_loop_iterations;
2760 if (info->loop_stride
2761 && !info->loop_stride->evaluate (possible_truths))
2762 hints |= INLINE_HINT_loop_stride;
2763 if (info->array_index
2764 && !info->array_index->evaluate (possible_truths))
2765 hints |= INLINE_HINT_array_index;
2766 if (info->scc_no)
2767 hints |= INLINE_HINT_in_scc;
2768 if (DECL_DECLARED_INLINE_P (node->decl))
2769 hints |= INLINE_HINT_declared_inline;
2770
2771 size = RDIV (size, ipa_fn_summary::size_scale);
2772 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
2773
2774 if (dump_file && (dump_flags & TDF_DETAILS))
2775 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2776 time.to_double (), nonspecialized_time.to_double ());
2777 if (ret_time)
2778 *ret_time = time;
2779 if (ret_nonspecialized_time)
2780 *ret_nonspecialized_time = nonspecialized_time;
2781 if (ret_size)
2782 *ret_size = size;
2783 if (ret_min_size)
2784 *ret_min_size = min_size;
2785 if (ret_hints)
2786 *ret_hints = hints;
2787 return;
2788 }
2789
2790
2791 /* Estimate size and time needed to execute callee of EDGE assuming that
2792 parameters known to be constant at caller of EDGE are propagated.
2793 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2794 and types for parameters. */
2795
2796 void
2797 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2798 vec<tree> known_vals,
2799 vec<ipa_polymorphic_call_context>
2800 known_contexts,
2801 vec<ipa_agg_jump_function_p> known_aggs,
2802 int *ret_size, sreal *ret_time,
2803 sreal *ret_nonspec_time,
2804 ipa_hints *hints)
2805 {
2806 clause_t clause, nonspec_clause;
2807
2808 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2809 &clause, &nonspec_clause);
2810 estimate_node_size_and_time (node, clause, nonspec_clause,
2811 known_vals, known_contexts,
2812 known_aggs, ret_size, NULL, ret_time,
2813 ret_nonspec_time, hints, vNULL);
2814 }
2815
2816
2817 /* Update summary information of inline clones after inlining.
2818 Compute peak stack usage. */
2819
2820 static void
2821 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2822 {
2823 struct cgraph_edge *e;
2824 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
2825 struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
2826 HOST_WIDE_INT peak;
2827
2828 callee_info->stack_frame_offset
2829 = caller_info->stack_frame_offset
2830 + caller_info->estimated_self_stack_size;
2831 peak = callee_info->stack_frame_offset
2832 + callee_info->estimated_self_stack_size;
2833 if (ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2834 ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
2835 ipa_propagate_frequency (node);
2836 for (e = node->callees; e; e = e->next_callee)
2837 {
2838 if (!e->inline_failed)
2839 inline_update_callee_summaries (e->callee, depth);
2840 ipa_call_summaries->get (e)->loop_depth += depth;
2841 }
2842 for (e = node->indirect_calls; e; e = e->next_callee)
2843 ipa_call_summaries->get (e)->loop_depth += depth;
2844 }
2845
2846 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2847 When functoin A is inlined in B and A calls C with parameter that
2848 changes with probability PROB1 and C is known to be passthroug
2849 of argument if B that change with probability PROB2, the probability
2850 of change is now PROB1*PROB2. */
2851
2852 static void
2853 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2854 struct cgraph_edge *edge)
2855 {
2856 if (ipa_node_params_sum)
2857 {
2858 int i;
2859 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2860 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2861 struct ipa_call_summary *inlined_es
2862 = ipa_call_summaries->get (inlined_edge);
2863
2864 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2865 {
2866 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2867 if (jfunc->type == IPA_JF_PASS_THROUGH
2868 || jfunc->type == IPA_JF_ANCESTOR)
2869 {
2870 int id = jfunc->type == IPA_JF_PASS_THROUGH
2871 ? ipa_get_jf_pass_through_formal_id (jfunc)
2872 : ipa_get_jf_ancestor_formal_id (jfunc);
2873 if (id < (int) inlined_es->param.length ())
2874 {
2875 int prob1 = es->param[i].change_prob;
2876 int prob2 = inlined_es->param[id].change_prob;
2877 int prob = combine_probabilities (prob1, prob2);
2878
2879 if (prob1 && prob2 && !prob)
2880 prob = 1;
2881
2882 es->param[i].change_prob = prob;
2883 }
2884 }
2885 }
2886 }
2887 }
2888
2889 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2890
2891 Remap predicates of callees of NODE. Rest of arguments match
2892 remap_predicate.
2893
2894 Also update change probabilities. */
2895
2896 static void
2897 remap_edge_summaries (struct cgraph_edge *inlined_edge,
2898 struct cgraph_node *node,
2899 struct ipa_fn_summary *info,
2900 struct ipa_fn_summary *callee_info,
2901 vec<int> operand_map,
2902 vec<int> offset_map,
2903 clause_t possible_truths,
2904 predicate *toplev_predicate)
2905 {
2906 struct cgraph_edge *e, *next;
2907 for (e = node->callees; e; e = next)
2908 {
2909 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2910 predicate p;
2911 next = e->next_callee;
2912
2913 if (e->inline_failed)
2914 {
2915 remap_edge_change_prob (inlined_edge, e);
2916
2917 if (es->predicate)
2918 {
2919 p = es->predicate->remap_after_inlining
2920 (info, callee_info, operand_map,
2921 offset_map, possible_truths,
2922 *toplev_predicate);
2923 edge_set_predicate (e, &p);
2924 }
2925 else
2926 edge_set_predicate (e, toplev_predicate);
2927 }
2928 else
2929 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
2930 operand_map, offset_map, possible_truths,
2931 toplev_predicate);
2932 }
2933 for (e = node->indirect_calls; e; e = next)
2934 {
2935 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2936 predicate p;
2937 next = e->next_callee;
2938
2939 remap_edge_change_prob (inlined_edge, e);
2940 if (es->predicate)
2941 {
2942 p = es->predicate->remap_after_inlining
2943 (info, callee_info, operand_map, offset_map,
2944 possible_truths, *toplev_predicate);
2945 edge_set_predicate (e, &p);
2946 }
2947 else
2948 edge_set_predicate (e, toplev_predicate);
2949 }
2950 }
2951
2952 /* Same as remap_predicate, but set result into hint *HINT. */
2953
2954 static void
2955 remap_hint_predicate (struct ipa_fn_summary *info,
2956 struct ipa_fn_summary *callee_info,
2957 predicate **hint,
2958 vec<int> operand_map,
2959 vec<int> offset_map,
2960 clause_t possible_truths,
2961 predicate *toplev_predicate)
2962 {
2963 predicate p;
2964
2965 if (!*hint)
2966 return;
2967 p = (*hint)->remap_after_inlining
2968 (info, callee_info,
2969 operand_map, offset_map,
2970 possible_truths, *toplev_predicate);
2971 if (p != false && p != true)
2972 {
2973 if (!*hint)
2974 set_hint_predicate (hint, p);
2975 else
2976 **hint &= p;
2977 }
2978 }
2979
2980 /* We inlined EDGE. Update summary of the function we inlined into. */
2981
2982 void
2983 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
2984 {
2985 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
2986 struct cgraph_node *to = (edge->caller->global.inlined_to
2987 ? edge->caller->global.inlined_to : edge->caller);
2988 struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
2989 clause_t clause = 0; /* not_inline is known to be false. */
2990 size_time_entry *e;
2991 vec<int> operand_map = vNULL;
2992 vec<int> offset_map = vNULL;
2993 int i;
2994 predicate toplev_predicate;
2995 predicate true_p = true;
2996 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2997
2998 if (es->predicate)
2999 toplev_predicate = *es->predicate;
3000 else
3001 toplev_predicate = true;
3002
3003 info->fp_expressions |= callee_info->fp_expressions;
3004
3005 if (callee_info->conds)
3006 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3007 if (ipa_node_params_sum && callee_info->conds)
3008 {
3009 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3010 int count = ipa_get_cs_argument_count (args);
3011 int i;
3012
3013 if (count)
3014 {
3015 operand_map.safe_grow_cleared (count);
3016 offset_map.safe_grow_cleared (count);
3017 }
3018 for (i = 0; i < count; i++)
3019 {
3020 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3021 int map = -1;
3022
3023 /* TODO: handle non-NOPs when merging. */
3024 if (jfunc->type == IPA_JF_PASS_THROUGH)
3025 {
3026 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3027 map = ipa_get_jf_pass_through_formal_id (jfunc);
3028 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3029 offset_map[i] = -1;
3030 }
3031 else if (jfunc->type == IPA_JF_ANCESTOR)
3032 {
3033 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3034 if (offset >= 0 && offset < INT_MAX)
3035 {
3036 map = ipa_get_jf_ancestor_formal_id (jfunc);
3037 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3038 offset = -1;
3039 offset_map[i] = offset;
3040 }
3041 }
3042 operand_map[i] = map;
3043 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3044 }
3045 }
3046 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3047 {
3048 predicate p;
3049 p = e->exec_predicate.remap_after_inlining
3050 (info, callee_info, operand_map,
3051 offset_map, clause,
3052 toplev_predicate);
3053 predicate nonconstp;
3054 nonconstp = e->nonconst_predicate.remap_after_inlining
3055 (info, callee_info, operand_map,
3056 offset_map, clause,
3057 toplev_predicate);
3058 if (p != false && nonconstp != false)
3059 {
3060 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3061 int prob = e->nonconst_predicate.probability (callee_info->conds,
3062 clause, es->param);
3063 add_time = add_time * prob / REG_BR_PROB_BASE;
3064 if (prob != REG_BR_PROB_BASE
3065 && dump_file && (dump_flags & TDF_DETAILS))
3066 {
3067 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3068 (double) prob / REG_BR_PROB_BASE);
3069 }
3070 info->account_size_time (e->size, add_time, p, nonconstp);
3071 }
3072 }
3073 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3074 offset_map, clause, &toplev_predicate);
3075 remap_hint_predicate (info, callee_info,
3076 &callee_info->loop_iterations,
3077 operand_map, offset_map, clause, &toplev_predicate);
3078 remap_hint_predicate (info, callee_info,
3079 &callee_info->loop_stride,
3080 operand_map, offset_map, clause, &toplev_predicate);
3081 remap_hint_predicate (info, callee_info,
3082 &callee_info->array_index,
3083 operand_map, offset_map, clause, &toplev_predicate);
3084
3085 inline_update_callee_summaries (edge->callee,
3086 ipa_call_summaries->get (edge)->loop_depth);
3087
3088 /* We do not maintain predicates of inlined edges, free it. */
3089 edge_set_predicate (edge, &true_p);
3090 /* Similarly remove param summaries. */
3091 es->param.release ();
3092 operand_map.release ();
3093 offset_map.release ();
3094 }
3095
3096 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3097 and time. Recompute it. */
3098
3099 void
3100 ipa_update_overall_fn_summary (struct cgraph_node *node)
3101 {
3102 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
3103 size_time_entry *e;
3104 int i;
3105
3106 info->size = 0;
3107 info->time = 0;
3108 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3109 {
3110 info->size += e->size;
3111 info->time += e->time;
3112 }
3113 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3114 &info->time, NULL,
3115 ~(clause_t) (1 << predicate::false_condition),
3116 vNULL, vNULL, vNULL);
3117 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
3118 }
3119
3120
3121 /* This function performs intraprocedural analysis in NODE that is required to
3122 inline indirect calls. */
3123
3124 static void
3125 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3126 {
3127 ipa_analyze_node (node);
3128 if (dump_file && (dump_flags & TDF_DETAILS))
3129 {
3130 ipa_print_node_params (dump_file, node);
3131 ipa_print_node_jump_functions (dump_file, node);
3132 }
3133 }
3134
3135
3136 /* Note function body size. */
3137
3138 void
3139 inline_analyze_function (struct cgraph_node *node)
3140 {
3141 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3142
3143 if (dump_file)
3144 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3145 node->name (), node->order);
3146 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3147 inline_indirect_intraprocedural_analysis (node);
3148 compute_fn_summary (node, false);
3149 if (!optimize)
3150 {
3151 struct cgraph_edge *e;
3152 for (e = node->callees; e; e = e->next_callee)
3153 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3154 for (e = node->indirect_calls; e; e = e->next_callee)
3155 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3156 }
3157
3158 pop_cfun ();
3159 }
3160
3161
3162 /* Called when new function is inserted to callgraph late. */
3163
3164 void
3165 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3166 {
3167 inline_analyze_function (node);
3168 }
3169
3170 /* Note function body size. */
3171
3172 static void
3173 ipa_fn_summary_generate (void)
3174 {
3175 struct cgraph_node *node;
3176
3177 FOR_EACH_DEFINED_FUNCTION (node)
3178 if (DECL_STRUCT_FUNCTION (node->decl))
3179 node->local.versionable = tree_versionable_function_p (node->decl);
3180
3181 ipa_fn_summary_alloc ();
3182
3183 ipa_fn_summaries->enable_insertion_hook ();
3184
3185 ipa_register_cgraph_hooks ();
3186
3187 FOR_EACH_DEFINED_FUNCTION (node)
3188 if (!node->alias
3189 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3190 || opt_for_fn (node->decl, optimize)))
3191 inline_analyze_function (node);
3192 }
3193
3194
3195 /* Write inline summary for edge E to OB. */
3196
3197 static void
3198 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3199 {
3200 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3201 predicate p;
3202 int length, i;
3203
3204 es->call_stmt_size = streamer_read_uhwi (ib);
3205 es->call_stmt_time = streamer_read_uhwi (ib);
3206 es->loop_depth = streamer_read_uhwi (ib);
3207 p.stream_in (ib);
3208 edge_set_predicate (e, &p);
3209 length = streamer_read_uhwi (ib);
3210 if (length)
3211 {
3212 es->param.safe_grow_cleared (length);
3213 for (i = 0; i < length; i++)
3214 es->param[i].change_prob = streamer_read_uhwi (ib);
3215 }
3216 }
3217
3218
3219 /* Stream in inline summaries from the section. */
3220
3221 static void
3222 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3223 size_t len)
3224 {
3225 const struct lto_function_header *header =
3226 (const struct lto_function_header *) data;
3227 const int cfg_offset = sizeof (struct lto_function_header);
3228 const int main_offset = cfg_offset + header->cfg_size;
3229 const int string_offset = main_offset + header->main_size;
3230 struct data_in *data_in;
3231 unsigned int i, count2, j;
3232 unsigned int f_count;
3233
3234 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3235 file_data->mode_table);
3236
3237 data_in =
3238 lto_data_in_create (file_data, (const char *) data + string_offset,
3239 header->string_size, vNULL);
3240 f_count = streamer_read_uhwi (&ib);
3241 for (i = 0; i < f_count; i++)
3242 {
3243 unsigned int index;
3244 struct cgraph_node *node;
3245 struct ipa_fn_summary *info;
3246 lto_symtab_encoder_t encoder;
3247 struct bitpack_d bp;
3248 struct cgraph_edge *e;
3249 predicate p;
3250
3251 index = streamer_read_uhwi (&ib);
3252 encoder = file_data->symtab_node_encoder;
3253 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3254 index));
3255 info = ipa_fn_summaries->get (node);
3256
3257 info->estimated_stack_size
3258 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3259 info->size = info->self_size = streamer_read_uhwi (&ib);
3260 info->time = sreal::stream_in (&ib);
3261
3262 bp = streamer_read_bitpack (&ib);
3263 info->inlinable = bp_unpack_value (&bp, 1);
3264 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
3265 info->fp_expressions = bp_unpack_value (&bp, 1);
3266
3267 count2 = streamer_read_uhwi (&ib);
3268 gcc_assert (!info->conds);
3269 for (j = 0; j < count2; j++)
3270 {
3271 struct condition c;
3272 c.operand_num = streamer_read_uhwi (&ib);
3273 c.size = streamer_read_uhwi (&ib);
3274 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3275 c.val = stream_read_tree (&ib, data_in);
3276 bp = streamer_read_bitpack (&ib);
3277 c.agg_contents = bp_unpack_value (&bp, 1);
3278 c.by_ref = bp_unpack_value (&bp, 1);
3279 if (c.agg_contents)
3280 c.offset = streamer_read_uhwi (&ib);
3281 vec_safe_push (info->conds, c);
3282 }
3283 count2 = streamer_read_uhwi (&ib);
3284 gcc_assert (!info->size_time_table);
3285 for (j = 0; j < count2; j++)
3286 {
3287 struct size_time_entry e;
3288
3289 e.size = streamer_read_uhwi (&ib);
3290 e.time = sreal::stream_in (&ib);
3291 e.exec_predicate.stream_in (&ib);
3292 e.nonconst_predicate.stream_in (&ib);
3293
3294 vec_safe_push (info->size_time_table, e);
3295 }
3296
3297 p.stream_in (&ib);
3298 set_hint_predicate (&info->loop_iterations, p);
3299 p.stream_in (&ib);
3300 set_hint_predicate (&info->loop_stride, p);
3301 p.stream_in (&ib);
3302 set_hint_predicate (&info->array_index, p);
3303 for (e = node->callees; e; e = e->next_callee)
3304 read_ipa_call_summary (&ib, e);
3305 for (e = node->indirect_calls; e; e = e->next_callee)
3306 read_ipa_call_summary (&ib, e);
3307 }
3308
3309 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
3310 len);
3311 lto_data_in_delete (data_in);
3312 }
3313
3314
3315 /* Read inline summary. Jump functions are shared among ipa-cp
3316 and inliner, so when ipa-cp is active, we don't need to write them
3317 twice. */
3318
3319 static void
3320 ipa_fn_summary_read (void)
3321 {
3322 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3323 struct lto_file_decl_data *file_data;
3324 unsigned int j = 0;
3325
3326 ipa_fn_summary_alloc ();
3327
3328 while ((file_data = file_data_vec[j++]))
3329 {
3330 size_t len;
3331 const char *data = lto_get_section_data (file_data,
3332 LTO_section_ipa_fn_summary,
3333 NULL, &len);
3334 if (data)
3335 inline_read_section (file_data, data, len);
3336 else
3337 /* Fatal error here. We do not want to support compiling ltrans units
3338 with different version of compiler or different flags than the WPA
3339 unit, so this should never happen. */
3340 fatal_error (input_location,
3341 "ipa inline summary is missing in input file");
3342 }
3343 ipa_register_cgraph_hooks ();
3344 if (!flag_ipa_cp)
3345 ipa_prop_read_jump_functions ();
3346
3347 gcc_assert (ipa_fn_summaries);
3348 ipa_fn_summaries->enable_insertion_hook ();
3349 }
3350
3351
3352 /* Write inline summary for edge E to OB. */
3353
3354 static void
3355 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3356 {
3357 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3358 int i;
3359
3360 streamer_write_uhwi (ob, es->call_stmt_size);
3361 streamer_write_uhwi (ob, es->call_stmt_time);
3362 streamer_write_uhwi (ob, es->loop_depth);
3363 if (es->predicate)
3364 es->predicate->stream_out (ob);
3365 else
3366 streamer_write_uhwi (ob, 0);
3367 streamer_write_uhwi (ob, es->param.length ());
3368 for (i = 0; i < (int) es->param.length (); i++)
3369 streamer_write_uhwi (ob, es->param[i].change_prob);
3370 }
3371
3372
3373 /* Write inline summary for node in SET.
3374 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3375 active, we don't need to write them twice. */
3376
3377 static void
3378 ipa_fn_summary_write (void)
3379 {
3380 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
3381 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3382 unsigned int count = 0;
3383 int i;
3384
3385 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3386 {
3387 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3388 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3389 if (cnode && cnode->definition && !cnode->alias)
3390 count++;
3391 }
3392 streamer_write_uhwi (ob, count);
3393
3394 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3395 {
3396 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3397 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3398 if (cnode && cnode->definition && !cnode->alias)
3399 {
3400 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
3401 struct bitpack_d bp;
3402 struct cgraph_edge *edge;
3403 int i;
3404 size_time_entry *e;
3405 struct condition *c;
3406
3407 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3408 streamer_write_hwi (ob, info->estimated_self_stack_size);
3409 streamer_write_hwi (ob, info->self_size);
3410 info->time.stream_out (ob);
3411 bp = bitpack_create (ob->main_stream);
3412 bp_pack_value (&bp, info->inlinable, 1);
3413 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
3414 bp_pack_value (&bp, info->fp_expressions, 1);
3415 streamer_write_bitpack (&bp);
3416 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3417 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3418 {
3419 streamer_write_uhwi (ob, c->operand_num);
3420 streamer_write_uhwi (ob, c->size);
3421 streamer_write_uhwi (ob, c->code);
3422 stream_write_tree (ob, c->val, true);
3423 bp = bitpack_create (ob->main_stream);
3424 bp_pack_value (&bp, c->agg_contents, 1);
3425 bp_pack_value (&bp, c->by_ref, 1);
3426 streamer_write_bitpack (&bp);
3427 if (c->agg_contents)
3428 streamer_write_uhwi (ob, c->offset);
3429 }
3430 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3431 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3432 {
3433 streamer_write_uhwi (ob, e->size);
3434 e->time.stream_out (ob);
3435 e->exec_predicate.stream_out (ob);
3436 e->nonconst_predicate.stream_out (ob);
3437 }
3438 if (info->loop_iterations)
3439 info->loop_iterations->stream_out (ob);
3440 else
3441 streamer_write_uhwi (ob, 0);
3442 if (info->loop_stride)
3443 info->loop_stride->stream_out (ob);
3444 else
3445 streamer_write_uhwi (ob, 0);
3446 if (info->array_index)
3447 info->array_index->stream_out (ob);
3448 else
3449 streamer_write_uhwi (ob, 0);
3450 for (edge = cnode->callees; edge; edge = edge->next_callee)
3451 write_ipa_call_summary (ob, edge);
3452 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3453 write_ipa_call_summary (ob, edge);
3454 }
3455 }
3456 streamer_write_char_stream (ob->main_stream, 0);
3457 produce_asm (ob, NULL);
3458 destroy_output_block (ob);
3459
3460 if (!flag_ipa_cp)
3461 ipa_prop_write_jump_functions ();
3462 }
3463
3464
3465 /* Release inline summary. */
3466
3467 void
3468 ipa_free_fn_summary (void)
3469 {
3470 struct cgraph_node *node;
3471 if (!ipa_call_summaries)
3472 return;
3473 FOR_EACH_DEFINED_FUNCTION (node)
3474 if (!node->alias)
3475 ipa_fn_summaries->get (node)->reset (node);
3476 ipa_fn_summaries->release ();
3477 ipa_fn_summaries = NULL;
3478 ipa_call_summaries->release ();
3479 delete ipa_call_summaries;
3480 ipa_call_summaries = NULL;
3481 edge_predicate_pool.release ();
3482 }
3483
3484 namespace {
3485
3486 const pass_data pass_data_local_fn_summary =
3487 {
3488 GIMPLE_PASS, /* type */
3489 "local-fnsummary", /* name */
3490 OPTGROUP_INLINE, /* optinfo_flags */
3491 TV_INLINE_PARAMETERS, /* tv_id */
3492 0, /* properties_required */
3493 0, /* properties_provided */
3494 0, /* properties_destroyed */
3495 0, /* todo_flags_start */
3496 0, /* todo_flags_finish */
3497 };
3498
3499 class pass_local_fn_summary : public gimple_opt_pass
3500 {
3501 public:
3502 pass_local_fn_summary (gcc::context *ctxt)
3503 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
3504 {}
3505
3506 /* opt_pass methods: */
3507 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
3508 virtual unsigned int execute (function *)
3509 {
3510 return compute_fn_summary_for_current ();
3511 }
3512
3513 }; // class pass_local_fn_summary
3514
3515 } // anon namespace
3516
3517 gimple_opt_pass *
3518 make_pass_local_fn_summary (gcc::context *ctxt)
3519 {
3520 return new pass_local_fn_summary (ctxt);
3521 }
3522
3523
3524 /* Free inline summary. */
3525
3526 namespace {
3527
3528 const pass_data pass_data_ipa_free_fn_summary =
3529 {
3530 SIMPLE_IPA_PASS, /* type */
3531 "free-fnsummary", /* name */
3532 OPTGROUP_NONE, /* optinfo_flags */
3533 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
3534 0, /* properties_required */
3535 0, /* properties_provided */
3536 0, /* properties_destroyed */
3537 0, /* todo_flags_start */
3538 /* Early optimizations may make function unreachable. We can not
3539 remove unreachable functions as part of the ealry opts pass because
3540 TODOs are run before subpasses. Do it here. */
3541 ( TODO_remove_functions | TODO_dump_symtab ), /* todo_flags_finish */
3542 };
3543
3544 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
3545 {
3546 public:
3547 pass_ipa_free_fn_summary (gcc::context *ctxt)
3548 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt)
3549 {}
3550
3551 /* opt_pass methods: */
3552 virtual unsigned int execute (function *)
3553 {
3554 ipa_free_fn_summary ();
3555 return 0;
3556 }
3557
3558 }; // class pass_ipa_free_fn_summary
3559
3560 } // anon namespace
3561
3562 simple_ipa_opt_pass *
3563 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
3564 {
3565 return new pass_ipa_free_fn_summary (ctxt);
3566 }
3567
3568 namespace {
3569
3570 const pass_data pass_data_ipa_fn_summary =
3571 {
3572 IPA_PASS, /* type */
3573 "fnsummary", /* name */
3574 OPTGROUP_INLINE, /* optinfo_flags */
3575 TV_IPA_FNSUMMARY, /* tv_id */
3576 0, /* properties_required */
3577 0, /* properties_provided */
3578 0, /* properties_destroyed */
3579 0, /* todo_flags_start */
3580 ( TODO_dump_symtab ), /* todo_flags_finish */
3581 };
3582
3583 class pass_ipa_fn_summary : public ipa_opt_pass_d
3584 {
3585 public:
3586 pass_ipa_fn_summary (gcc::context *ctxt)
3587 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
3588 ipa_fn_summary_generate, /* generate_summary */
3589 ipa_fn_summary_write, /* write_summary */
3590 ipa_fn_summary_read, /* read_summary */
3591 NULL, /* write_optimization_summary */
3592 NULL, /* read_optimization_summary */
3593 NULL, /* stmt_fixup */
3594 0, /* function_transform_todo_flags_start */
3595 NULL, /* function_transform */
3596 NULL) /* variable_transform */
3597 {}
3598
3599 /* opt_pass methods: */
3600 virtual unsigned int execute (function *) { return 0; }
3601
3602 }; // class pass_ipa_fn_summary
3603
3604 } // anon namespace
3605
3606 ipa_opt_pass_d *
3607 make_pass_ipa_fn_summary (gcc::context *ctxt)
3608 {
3609 return new pass_ipa_fn_summary (ctxt);
3610 }