111
|
1 /* Function summary pass.
|
131
|
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
|
111
|
3 Contributed by Jan Hubicka
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 /* Analysis of function bodies used by inter-procedural passes
|
|
22
|
|
23 We estimate for each function
|
|
24 - function body size and size after specializing into given context
|
|
25 - average function execution time in a given context
|
|
26 - function frame size
|
|
27 For each call
|
|
28 - call statement size, time and how often the parameters change
|
|
29
|
|
30 ipa_fn_summary data structures store above information locally (i.e.
|
|
31 parameters of the function itself) and globally (i.e. parameters of
|
|
32 the function created by applying all the inline decisions already
|
|
33 present in the callgraph).
|
|
34
|
|
35 We provide access to the ipa_fn_summary data structure and
|
|
36 basic logic updating the parameters when inlining is performed.
|
|
37
|
|
38 The summaries are context sensitive. Context means
|
|
39 1) partial assignment of known constant values of operands
|
|
40 2) whether function is inlined into the call or not.
|
|
41 It is easy to add more variants. To represent function size and time
|
|
42 that depends on context (i.e. it is known to be optimized away when
|
|
43 context is known either by inlining or from IP-CP and cloning),
|
|
44 we use predicates.
|
|
45
|
|
46 estimate_edge_size_and_time can be used to query
|
|
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
|
|
48 properties of caller and callee after inlining.
|
|
49
|
|
50 Finally pass_inline_parameters is exported. This is used to drive
|
|
51 computation of function parameters used by the early inliner. IPA
|
|
52 inlined performs analysis via its analyze_function method. */
|
|
53
|
|
54 #include "config.h"
|
|
55 #include "system.h"
|
|
56 #include "coretypes.h"
|
|
57 #include "backend.h"
|
|
58 #include "tree.h"
|
|
59 #include "gimple.h"
|
|
60 #include "alloc-pool.h"
|
|
61 #include "tree-pass.h"
|
|
62 #include "ssa.h"
|
|
63 #include "tree-streamer.h"
|
|
64 #include "cgraph.h"
|
|
65 #include "diagnostic.h"
|
|
66 #include "fold-const.h"
|
|
67 #include "print-tree.h"
|
|
68 #include "tree-inline.h"
|
|
69 #include "gimple-pretty-print.h"
|
|
70 #include "params.h"
|
|
71 #include "cfganal.h"
|
|
72 #include "gimple-iterator.h"
|
|
73 #include "tree-cfg.h"
|
|
74 #include "tree-ssa-loop-niter.h"
|
|
75 #include "tree-ssa-loop.h"
|
|
76 #include "symbol-summary.h"
|
|
77 #include "ipa-prop.h"
|
|
78 #include "ipa-fnsummary.h"
|
|
79 #include "cfgloop.h"
|
|
80 #include "tree-scalar-evolution.h"
|
|
81 #include "ipa-utils.h"
|
|
82 #include "cfgexpand.h"
|
|
83 #include "gimplify.h"
|
|
84 #include "stringpool.h"
|
|
85 #include "attribs.h"
|
|
86
|
|
87 /* Summaries. */
|
|
88 function_summary <ipa_fn_summary *> *ipa_fn_summaries;
|
|
89 call_summary <ipa_call_summary *> *ipa_call_summaries;
|
|
90
|
|
91 /* Edge predicates goes here. */
|
|
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
|
|
93
|
|
94
|
|
95 /* Dump IPA hints. */
|
|
96 void
|
|
97 ipa_dump_hints (FILE *f, ipa_hints hints)
|
|
98 {
|
|
99 if (!hints)
|
|
100 return;
|
|
101 fprintf (f, "IPA hints:");
|
|
102 if (hints & INLINE_HINT_indirect_call)
|
|
103 {
|
|
104 hints &= ~INLINE_HINT_indirect_call;
|
|
105 fprintf (f, " indirect_call");
|
|
106 }
|
|
107 if (hints & INLINE_HINT_loop_iterations)
|
|
108 {
|
|
109 hints &= ~INLINE_HINT_loop_iterations;
|
|
110 fprintf (f, " loop_iterations");
|
|
111 }
|
|
112 if (hints & INLINE_HINT_loop_stride)
|
|
113 {
|
|
114 hints &= ~INLINE_HINT_loop_stride;
|
|
115 fprintf (f, " loop_stride");
|
|
116 }
|
|
117 if (hints & INLINE_HINT_same_scc)
|
|
118 {
|
|
119 hints &= ~INLINE_HINT_same_scc;
|
|
120 fprintf (f, " same_scc");
|
|
121 }
|
|
122 if (hints & INLINE_HINT_in_scc)
|
|
123 {
|
|
124 hints &= ~INLINE_HINT_in_scc;
|
|
125 fprintf (f, " in_scc");
|
|
126 }
|
|
127 if (hints & INLINE_HINT_cross_module)
|
|
128 {
|
|
129 hints &= ~INLINE_HINT_cross_module;
|
|
130 fprintf (f, " cross_module");
|
|
131 }
|
|
132 if (hints & INLINE_HINT_declared_inline)
|
|
133 {
|
|
134 hints &= ~INLINE_HINT_declared_inline;
|
|
135 fprintf (f, " declared_inline");
|
|
136 }
|
|
137 if (hints & INLINE_HINT_array_index)
|
|
138 {
|
|
139 hints &= ~INLINE_HINT_array_index;
|
|
140 fprintf (f, " array_index");
|
|
141 }
|
|
142 if (hints & INLINE_HINT_known_hot)
|
|
143 {
|
|
144 hints &= ~INLINE_HINT_known_hot;
|
|
145 fprintf (f, " known_hot");
|
|
146 }
|
|
147 gcc_assert (!hints);
|
|
148 }
|
|
149
|
|
150
|
|
151 /* Record SIZE and TIME to SUMMARY.
|
|
152 The accounted code will be executed when EXEC_PRED is true.
|
|
153 When NONCONST_PRED is false the code will evaulate to constant and
|
|
154 will get optimized out in specialized clones of the function. */
|
|
155
|
|
156 void
|
|
157 ipa_fn_summary::account_size_time (int size, sreal time,
|
|
158 const predicate &exec_pred,
|
|
159 const predicate &nonconst_pred_in)
|
|
160 {
|
|
161 size_time_entry *e;
|
|
162 bool found = false;
|
|
163 int i;
|
|
164 predicate nonconst_pred;
|
|
165
|
|
166 if (exec_pred == false)
|
|
167 return;
|
|
168
|
|
169 nonconst_pred = nonconst_pred_in & exec_pred;
|
|
170
|
|
171 if (nonconst_pred == false)
|
|
172 return;
|
|
173
|
|
174 /* We need to create initial empty unconitional clause, but otherwie
|
|
175 we don't need to account empty times and sizes. */
|
|
176 if (!size && time == 0 && size_time_table)
|
|
177 return;
|
|
178
|
|
179 gcc_assert (time >= 0);
|
|
180
|
|
181 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
|
|
182 if (e->exec_predicate == exec_pred
|
|
183 && e->nonconst_predicate == nonconst_pred)
|
|
184 {
|
|
185 found = true;
|
|
186 break;
|
|
187 }
|
|
188 if (i == 256)
|
|
189 {
|
|
190 i = 0;
|
|
191 found = true;
|
|
192 e = &(*size_time_table)[0];
|
|
193 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
194 fprintf (dump_file,
|
|
195 "\t\tReached limit on number of entries, "
|
|
196 "ignoring the predicate.");
|
|
197 }
|
|
198 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
|
|
199 {
|
|
200 fprintf (dump_file,
|
|
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
|
|
202 ((double) size) / ipa_fn_summary::size_scale,
|
|
203 (time.to_double ()), found ? "" : "new ");
|
|
204 exec_pred.dump (dump_file, conds, 0);
|
|
205 if (exec_pred != nonconst_pred)
|
|
206 {
|
|
207 fprintf (dump_file, " nonconst:");
|
|
208 nonconst_pred.dump (dump_file, conds);
|
|
209 }
|
|
210 else
|
|
211 fprintf (dump_file, "\n");
|
|
212 }
|
|
213 if (!found)
|
|
214 {
|
|
215 struct size_time_entry new_entry;
|
|
216 new_entry.size = size;
|
|
217 new_entry.time = time;
|
|
218 new_entry.exec_predicate = exec_pred;
|
|
219 new_entry.nonconst_predicate = nonconst_pred;
|
|
220 vec_safe_push (size_time_table, new_entry);
|
|
221 }
|
|
222 else
|
|
223 {
|
|
224 e->size += size;
|
|
225 e->time += time;
|
|
226 }
|
|
227 }
|
|
228
|
|
229 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
|
|
230
|
|
231 static struct cgraph_edge *
|
|
232 redirect_to_unreachable (struct cgraph_edge *e)
|
|
233 {
|
|
234 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
|
|
235 struct cgraph_node *target = cgraph_node::get_create
|
|
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
|
|
237
|
|
238 if (e->speculative)
|
|
239 e = e->resolve_speculation (target->decl);
|
|
240 else if (!e->callee)
|
|
241 e->make_direct (target);
|
|
242 else
|
|
243 e->redirect_callee (target);
|
|
244 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
245 e->inline_failed = CIF_UNREACHABLE;
|
|
246 e->count = profile_count::zero ();
|
|
247 es->call_stmt_size = 0;
|
|
248 es->call_stmt_time = 0;
|
|
249 if (callee)
|
|
250 callee->remove_symbol_and_inline_clones ();
|
|
251 return e;
|
|
252 }
|
|
253
|
|
254 /* Set predicate for edge E. */
|
|
255
|
|
256 static void
|
|
257 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
|
|
258 {
|
|
259 /* If the edge is determined to be never executed, redirect it
|
|
260 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
|
|
261 be optimized out. */
|
|
262 if (predicate && *predicate == false
|
|
263 /* When handling speculative edges, we need to do the redirection
|
|
264 just once. Do it always on the direct edge, so we do not
|
|
265 attempt to resolve speculation while duplicating the edge. */
|
|
266 && (!e->speculative || e->callee))
|
|
267 e = redirect_to_unreachable (e);
|
|
268
|
|
269 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
270 if (predicate && *predicate != true)
|
|
271 {
|
|
272 if (!es->predicate)
|
|
273 es->predicate = edge_predicate_pool.allocate ();
|
|
274 *es->predicate = *predicate;
|
|
275 }
|
|
276 else
|
|
277 {
|
|
278 if (es->predicate)
|
|
279 edge_predicate_pool.remove (es->predicate);
|
|
280 es->predicate = NULL;
|
|
281 }
|
|
282 }
|
|
283
|
|
284 /* Set predicate for hint *P. */
|
|
285
|
|
286 static void
|
|
287 set_hint_predicate (predicate **p, predicate new_predicate)
|
|
288 {
|
|
289 if (new_predicate == false || new_predicate == true)
|
|
290 {
|
|
291 if (*p)
|
|
292 edge_predicate_pool.remove (*p);
|
|
293 *p = NULL;
|
|
294 }
|
|
295 else
|
|
296 {
|
|
297 if (!*p)
|
|
298 *p = edge_predicate_pool.allocate ();
|
|
299 **p = new_predicate;
|
|
300 }
|
|
301 }
|
|
302
|
|
303
|
|
304 /* Compute what conditions may or may not hold given invormation about
|
|
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
|
|
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
|
|
307 copy when called in a given context. It is a bitmask of conditions. Bit
|
|
308 0 means that condition is known to be false, while bit 1 means that condition
|
|
309 may or may not be true. These differs - for example NOT_INLINED condition
|
|
310 is always false in the second and also builtin_constant_p tests can not use
|
|
311 the fact that parameter is indeed a constant.
|
|
312
|
|
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
|
|
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
|
|
315 Return clause of possible truths. When INLINE_P is true, assume that we are
|
|
316 inlining.
|
|
317
|
|
318 ERROR_MARK means compile time invariant. */
|
|
319
|
|
320 static void
|
|
321 evaluate_conditions_for_known_args (struct cgraph_node *node,
|
|
322 bool inline_p,
|
|
323 vec<tree> known_vals,
|
|
324 vec<ipa_agg_jump_function_p>
|
|
325 known_aggs,
|
|
326 clause_t *ret_clause,
|
|
327 clause_t *ret_nonspec_clause)
|
|
328 {
|
|
329 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
|
|
330 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
|
|
331 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
|
|
332 int i;
|
|
333 struct condition *c;
|
|
334
|
|
335 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
|
|
336 {
|
|
337 tree val;
|
|
338 tree res;
|
|
339
|
|
340 /* We allow call stmt to have fewer arguments than the callee function
|
|
341 (especially for K&R style programs). So bound check here (we assume
|
|
342 known_aggs vector, if non-NULL, has the same length as
|
|
343 known_vals). */
|
|
344 gcc_checking_assert (!known_aggs.exists ()
|
|
345 || (known_vals.length () == known_aggs.length ()));
|
|
346 if (c->operand_num >= (int) known_vals.length ())
|
|
347 {
|
|
348 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
349 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
350 continue;
|
|
351 }
|
|
352
|
|
353 if (c->agg_contents)
|
|
354 {
|
|
355 struct ipa_agg_jump_function *agg;
|
|
356
|
|
357 if (c->code == predicate::changed
|
|
358 && !c->by_ref
|
|
359 && (known_vals[c->operand_num] == error_mark_node))
|
|
360 continue;
|
|
361
|
|
362 if (known_aggs.exists ())
|
|
363 {
|
|
364 agg = known_aggs[c->operand_num];
|
|
365 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
|
|
366 c->offset, c->by_ref);
|
|
367 }
|
|
368 else
|
|
369 val = NULL_TREE;
|
|
370 }
|
|
371 else
|
|
372 {
|
|
373 val = known_vals[c->operand_num];
|
|
374 if (val == error_mark_node && c->code != predicate::changed)
|
|
375 val = NULL_TREE;
|
|
376 }
|
|
377
|
|
378 if (!val)
|
|
379 {
|
|
380 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
381 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
382 continue;
|
|
383 }
|
|
384 if (c->code == predicate::changed)
|
|
385 {
|
|
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
387 continue;
|
|
388 }
|
|
389
|
|
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
|
|
391 {
|
|
392 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
394 continue;
|
|
395 }
|
|
396 if (c->code == predicate::is_not_constant)
|
|
397 {
|
|
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
399 continue;
|
|
400 }
|
|
401
|
|
402 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
|
|
403 res = val
|
|
404 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
|
|
405 : NULL;
|
|
406
|
|
407 if (res && integer_zerop (res))
|
|
408 continue;
|
|
409
|
|
410 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
411 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
412 }
|
|
413 *ret_clause = clause;
|
|
414 if (ret_nonspec_clause)
|
|
415 *ret_nonspec_clause = nonspec_clause;
|
|
416 }
|
|
417
|
|
418
|
|
419 /* Work out what conditions might be true at invocation of E. */
|
|
420
|
|
421 void
|
|
422 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
|
|
423 clause_t *clause_ptr,
|
|
424 clause_t *nonspec_clause_ptr,
|
|
425 vec<tree> *known_vals_ptr,
|
|
426 vec<ipa_polymorphic_call_context>
|
|
427 *known_contexts_ptr,
|
|
428 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
|
|
429 {
|
|
430 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
|
|
431 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
|
|
432 vec<tree> known_vals = vNULL;
|
|
433 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
|
|
434
|
|
435 if (clause_ptr)
|
|
436 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
|
|
437 if (known_vals_ptr)
|
|
438 known_vals_ptr->create (0);
|
|
439 if (known_contexts_ptr)
|
|
440 known_contexts_ptr->create (0);
|
|
441
|
|
442 if (ipa_node_params_sum
|
|
443 && !e->call_stmt_cannot_inline_p
|
|
444 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
|
|
445 {
|
131
|
446 struct ipa_node_params *caller_parms_info, *callee_pi;
|
111
|
447 struct ipa_edge_args *args = IPA_EDGE_REF (e);
|
|
448 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
449 int i, count = ipa_get_cs_argument_count (args);
|
|
450
|
|
451 if (e->caller->global.inlined_to)
|
131
|
452 caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
|
111
|
453 else
|
131
|
454 caller_parms_info = IPA_NODE_REF (e->caller);
|
|
455 callee_pi = IPA_NODE_REF (e->callee);
|
111
|
456
|
|
457 if (count && (info->conds || known_vals_ptr))
|
|
458 known_vals.safe_grow_cleared (count);
|
|
459 if (count && (info->conds || known_aggs_ptr))
|
|
460 known_aggs.safe_grow_cleared (count);
|
|
461 if (count && known_contexts_ptr)
|
|
462 known_contexts_ptr->safe_grow_cleared (count);
|
|
463
|
|
464 for (i = 0; i < count; i++)
|
|
465 {
|
|
466 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
|
131
|
467 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
|
|
468 ipa_get_type (callee_pi, i));
|
111
|
469
|
|
470 if (!cst && e->call_stmt
|
|
471 && i < (int)gimple_call_num_args (e->call_stmt))
|
|
472 {
|
|
473 cst = gimple_call_arg (e->call_stmt, i);
|
|
474 if (!is_gimple_min_invariant (cst))
|
|
475 cst = NULL;
|
|
476 }
|
|
477 if (cst)
|
|
478 {
|
|
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
|
|
480 if (known_vals.exists ())
|
|
481 known_vals[i] = cst;
|
|
482 }
|
|
483 else if (inline_p && !es->param[i].change_prob)
|
|
484 known_vals[i] = error_mark_node;
|
|
485
|
|
486 if (known_contexts_ptr)
|
131
|
487 (*known_contexts_ptr)[i]
|
|
488 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
|
111
|
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
|
|
490 functions, use its knowledge of the caller too, just like the
|
|
491 scalar case above. */
|
|
492 known_aggs[i] = &jf->agg;
|
|
493 }
|
|
494 }
|
|
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
|
|
496 && ((clause_ptr && info->conds) || known_vals_ptr))
|
|
497 {
|
|
498 int i, count = (int)gimple_call_num_args (e->call_stmt);
|
|
499
|
|
500 if (count && (info->conds || known_vals_ptr))
|
|
501 known_vals.safe_grow_cleared (count);
|
|
502 for (i = 0; i < count; i++)
|
|
503 {
|
|
504 tree cst = gimple_call_arg (e->call_stmt, i);
|
|
505 if (!is_gimple_min_invariant (cst))
|
|
506 cst = NULL;
|
|
507 if (cst)
|
|
508 known_vals[i] = cst;
|
|
509 }
|
|
510 }
|
|
511
|
|
512 evaluate_conditions_for_known_args (callee, inline_p,
|
|
513 known_vals, known_aggs, clause_ptr,
|
|
514 nonspec_clause_ptr);
|
|
515
|
|
516 if (known_vals_ptr)
|
|
517 *known_vals_ptr = known_vals;
|
|
518 else
|
|
519 known_vals.release ();
|
|
520
|
|
521 if (known_aggs_ptr)
|
|
522 *known_aggs_ptr = known_aggs;
|
|
523 else
|
|
524 known_aggs.release ();
|
|
525 }
|
|
526
|
|
527
|
|
528 /* Allocate the function summary. */
|
|
529
|
|
530 static void
|
|
531 ipa_fn_summary_alloc (void)
|
|
532 {
|
|
533 gcc_checking_assert (!ipa_fn_summaries);
|
|
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
|
|
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
|
|
536 }
|
|
537
|
131
|
538 ipa_call_summary::~ipa_call_summary ()
|
111
|
539 {
|
|
540 if (predicate)
|
|
541 edge_predicate_pool.remove (predicate);
|
131
|
542
|
111
|
543 param.release ();
|
|
544 }
|
|
545
|
131
|
546 ipa_fn_summary::~ipa_fn_summary ()
|
|
547 {
|
|
548 if (loop_iterations)
|
|
549 edge_predicate_pool.remove (loop_iterations);
|
|
550 if (loop_stride)
|
|
551 edge_predicate_pool.remove (loop_stride);
|
|
552 if (array_index)
|
|
553 edge_predicate_pool.remove (array_index);
|
|
554 vec_free (conds);
|
|
555 vec_free (size_time_table);
|
|
556 }
|
111
|
557
|
|
558 void
|
131
|
559 ipa_fn_summary_t::remove_callees (cgraph_node *node)
|
111
|
560 {
|
131
|
561 cgraph_edge *e;
|
111
|
562 for (e = node->callees; e; e = e->next_callee)
|
131
|
563 ipa_call_summaries->remove (e);
|
111
|
564 for (e = node->indirect_calls; e; e = e->next_callee)
|
131
|
565 ipa_call_summaries->remove (e);
|
111
|
566 }
|
|
567
|
|
568 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
|
|
569 Additionally care about allocating new memory slot for updated predicate
|
|
570 and set it to NULL when it becomes true or false (and thus uninteresting).
|
|
571 */
|
|
572
|
|
573 static void
|
|
574 remap_hint_predicate_after_duplication (predicate **p,
|
|
575 clause_t possible_truths)
|
|
576 {
|
|
577 predicate new_predicate;
|
|
578
|
|
579 if (!*p)
|
|
580 return;
|
|
581
|
|
582 new_predicate = (*p)->remap_after_duplication (possible_truths);
|
|
583 /* We do not want to free previous predicate; it is used by node origin. */
|
|
584 *p = NULL;
|
|
585 set_hint_predicate (p, new_predicate);
|
|
586 }
|
|
587
|
|
588
|
|
589 /* Hook that is called by cgraph.c when a node is duplicated. */
|
|
590 void
|
|
591 ipa_fn_summary_t::duplicate (cgraph_node *src,
|
|
592 cgraph_node *dst,
|
|
593 ipa_fn_summary *,
|
|
594 ipa_fn_summary *info)
|
|
595 {
|
131
|
596 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
|
111
|
597 /* TODO: as an optimization, we may avoid copying conditions
|
|
598 that are known to be false or true. */
|
|
599 info->conds = vec_safe_copy (info->conds);
|
|
600
|
|
601 /* When there are any replacements in the function body, see if we can figure
|
|
602 out that something was optimized out. */
|
|
603 if (ipa_node_params_sum && dst->clone.tree_map)
|
|
604 {
|
|
605 vec<size_time_entry, va_gc> *entry = info->size_time_table;
|
|
606 /* Use SRC parm info since it may not be copied yet. */
|
|
607 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
|
|
608 vec<tree> known_vals = vNULL;
|
|
609 int count = ipa_get_param_count (parms_info);
|
|
610 int i, j;
|
|
611 clause_t possible_truths;
|
|
612 predicate true_pred = true;
|
|
613 size_time_entry *e;
|
|
614 int optimized_out_size = 0;
|
|
615 bool inlined_to_p = false;
|
|
616 struct cgraph_edge *edge, *next;
|
|
617
|
|
618 info->size_time_table = 0;
|
|
619 known_vals.safe_grow_cleared (count);
|
|
620 for (i = 0; i < count; i++)
|
|
621 {
|
|
622 struct ipa_replace_map *r;
|
|
623
|
|
624 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
|
|
625 {
|
|
626 if (((!r->old_tree && r->parm_num == i)
|
|
627 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
|
|
628 && r->replace_p && !r->ref_p)
|
|
629 {
|
|
630 known_vals[i] = r->new_tree;
|
|
631 break;
|
|
632 }
|
|
633 }
|
|
634 }
|
|
635 evaluate_conditions_for_known_args (dst, false,
|
|
636 known_vals,
|
|
637 vNULL,
|
|
638 &possible_truths,
|
|
639 /* We are going to specialize,
|
|
640 so ignore nonspec truths. */
|
|
641 NULL);
|
|
642 known_vals.release ();
|
|
643
|
|
644 info->account_size_time (0, 0, true_pred, true_pred);
|
|
645
|
|
646 /* Remap size_time vectors.
|
|
647 Simplify the predicate by prunning out alternatives that are known
|
|
648 to be false.
|
|
649 TODO: as on optimization, we can also eliminate conditions known
|
|
650 to be true. */
|
|
651 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
|
|
652 {
|
|
653 predicate new_exec_pred;
|
|
654 predicate new_nonconst_pred;
|
|
655 new_exec_pred = e->exec_predicate.remap_after_duplication
|
|
656 (possible_truths);
|
|
657 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
|
|
658 (possible_truths);
|
|
659 if (new_exec_pred == false || new_nonconst_pred == false)
|
|
660 optimized_out_size += e->size;
|
|
661 else
|
|
662 info->account_size_time (e->size, e->time, new_exec_pred,
|
|
663 new_nonconst_pred);
|
|
664 }
|
|
665
|
|
666 /* Remap edge predicates with the same simplification as above.
|
|
667 Also copy constantness arrays. */
|
|
668 for (edge = dst->callees; edge; edge = next)
|
|
669 {
|
|
670 predicate new_predicate;
|
131
|
671 struct ipa_call_summary *es = ipa_call_summaries->get_create (edge);
|
111
|
672 next = edge->next_callee;
|
|
673
|
|
674 if (!edge->inline_failed)
|
|
675 inlined_to_p = true;
|
|
676 if (!es->predicate)
|
|
677 continue;
|
|
678 new_predicate = es->predicate->remap_after_duplication
|
|
679 (possible_truths);
|
|
680 if (new_predicate == false && *es->predicate != false)
|
|
681 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
|
|
682 edge_set_predicate (edge, &new_predicate);
|
|
683 }
|
|
684
|
|
685 /* Remap indirect edge predicates with the same simplificaiton as above.
|
|
686 Also copy constantness arrays. */
|
|
687 for (edge = dst->indirect_calls; edge; edge = next)
|
|
688 {
|
|
689 predicate new_predicate;
|
131
|
690 struct ipa_call_summary *es = ipa_call_summaries->get_create (edge);
|
111
|
691 next = edge->next_callee;
|
|
692
|
|
693 gcc_checking_assert (edge->inline_failed);
|
|
694 if (!es->predicate)
|
|
695 continue;
|
|
696 new_predicate = es->predicate->remap_after_duplication
|
|
697 (possible_truths);
|
|
698 if (new_predicate == false && *es->predicate != false)
|
|
699 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
|
|
700 edge_set_predicate (edge, &new_predicate);
|
|
701 }
|
|
702 remap_hint_predicate_after_duplication (&info->loop_iterations,
|
|
703 possible_truths);
|
|
704 remap_hint_predicate_after_duplication (&info->loop_stride,
|
|
705 possible_truths);
|
|
706 remap_hint_predicate_after_duplication (&info->array_index,
|
|
707 possible_truths);
|
|
708
|
|
709 /* If inliner or someone after inliner will ever start producing
|
|
710 non-trivial clones, we will get trouble with lack of information
|
|
711 about updating self sizes, because size vectors already contains
|
|
712 sizes of the calees. */
|
|
713 gcc_assert (!inlined_to_p || !optimized_out_size);
|
|
714 }
|
|
715 else
|
|
716 {
|
|
717 info->size_time_table = vec_safe_copy (info->size_time_table);
|
|
718 if (info->loop_iterations)
|
|
719 {
|
|
720 predicate p = *info->loop_iterations;
|
|
721 info->loop_iterations = NULL;
|
|
722 set_hint_predicate (&info->loop_iterations, p);
|
|
723 }
|
|
724 if (info->loop_stride)
|
|
725 {
|
|
726 predicate p = *info->loop_stride;
|
|
727 info->loop_stride = NULL;
|
|
728 set_hint_predicate (&info->loop_stride, p);
|
|
729 }
|
|
730 if (info->array_index)
|
|
731 {
|
|
732 predicate p = *info->array_index;
|
|
733 info->array_index = NULL;
|
|
734 set_hint_predicate (&info->array_index, p);
|
|
735 }
|
|
736 }
|
|
737 if (!dst->global.inlined_to)
|
|
738 ipa_update_overall_fn_summary (dst);
|
|
739 }
|
|
740
|
|
741
|
|
742 /* Hook that is called by cgraph.c when a node is duplicated. */
|
|
743
|
|
744 void
|
|
745 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
|
|
746 struct cgraph_edge *dst,
|
|
747 struct ipa_call_summary *srcinfo,
|
|
748 struct ipa_call_summary *info)
|
|
749 {
|
131
|
750 new (info) ipa_call_summary (*srcinfo);
|
111
|
751 info->predicate = NULL;
|
|
752 edge_set_predicate (dst, srcinfo->predicate);
|
|
753 info->param = srcinfo->param.copy ();
|
|
754 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
|
|
755 {
|
|
756 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
|
|
757 - eni_size_weights.call_cost);
|
|
758 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
|
|
759 - eni_time_weights.call_cost);
|
|
760 }
|
|
761 }
|
|
762
|
|
763 /* Dump edge summaries associated to NODE and recursively to all clones.
|
|
764 Indent by INDENT. */
|
|
765
|
|
766 static void
|
|
767 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
|
|
768 struct ipa_fn_summary *info)
|
|
769 {
|
|
770 struct cgraph_edge *edge;
|
|
771 for (edge = node->callees; edge; edge = edge->next_callee)
|
|
772 {
|
|
773 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
774 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
|
|
775 int i;
|
|
776
|
|
777 fprintf (f,
|
131
|
778 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
|
111
|
779 indent, "", callee->name (), callee->order,
|
|
780 !edge->inline_failed
|
|
781 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
|
131
|
782 indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
|
|
783 es->call_stmt_size, es->call_stmt_time);
|
|
784
|
|
785 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
|
|
786 if (s != NULL)
|
|
787 fprintf (f, "callee size:%2i stack:%2i",
|
|
788 (int) (s->size / ipa_fn_summary::size_scale),
|
|
789 (int) s->estimated_stack_size);
|
111
|
790
|
|
791 if (es->predicate)
|
|
792 {
|
|
793 fprintf (f, " predicate: ");
|
|
794 es->predicate->dump (f, info->conds);
|
|
795 }
|
|
796 else
|
|
797 fprintf (f, "\n");
|
|
798 if (es->param.exists ())
|
|
799 for (i = 0; i < (int) es->param.length (); i++)
|
|
800 {
|
|
801 int prob = es->param[i].change_prob;
|
|
802
|
|
803 if (!prob)
|
|
804 fprintf (f, "%*s op%i is compile time invariant\n",
|
|
805 indent + 2, "", i);
|
|
806 else if (prob != REG_BR_PROB_BASE)
|
|
807 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
|
|
808 prob * 100.0 / REG_BR_PROB_BASE);
|
|
809 }
|
|
810 if (!edge->inline_failed)
|
|
811 {
|
131
|
812 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
|
111
|
813 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
|
|
814 " callee size %i\n",
|
|
815 indent + 2, "",
|
131
|
816 (int) s->stack_frame_offset,
|
|
817 (int) s->estimated_self_stack_size,
|
|
818 (int) s->estimated_stack_size);
|
111
|
819 dump_ipa_call_summary (f, indent + 2, callee, info);
|
|
820 }
|
|
821 }
|
|
822 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
|
|
823 {
|
|
824 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
131
|
825 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
|
111
|
826 " time: %2i",
|
|
827 indent, "",
|
|
828 es->loop_depth,
|
131
|
829 edge->sreal_frequency ().to_double (), es->call_stmt_size,
|
|
830 es->call_stmt_time);
|
111
|
831 if (es->predicate)
|
|
832 {
|
|
833 fprintf (f, "predicate: ");
|
|
834 es->predicate->dump (f, info->conds);
|
|
835 }
|
|
836 else
|
|
837 fprintf (f, "\n");
|
|
838 }
|
|
839 }
|
|
840
|
|
841
|
|
842 void
|
|
843 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
|
|
844 {
|
|
845 if (node->definition)
|
|
846 {
|
|
847 struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
|
131
|
848 if (s != NULL)
|
111
|
849 {
|
131
|
850 size_time_entry *e;
|
|
851 int i;
|
|
852 fprintf (f, "IPA function summary for %s", node->dump_name ());
|
|
853 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
|
|
854 fprintf (f, " always_inline");
|
|
855 if (s->inlinable)
|
|
856 fprintf (f, " inlinable");
|
|
857 if (s->fp_expressions)
|
|
858 fprintf (f, " fp_expression");
|
|
859 fprintf (f, "\n global time: %f\n", s->time.to_double ());
|
|
860 fprintf (f, " self size: %i\n", s->self_size);
|
|
861 fprintf (f, " global size: %i\n", s->size);
|
|
862 fprintf (f, " min size: %i\n", s->min_size);
|
|
863 fprintf (f, " self stack: %i\n",
|
|
864 (int) s->estimated_self_stack_size);
|
|
865 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
|
|
866 if (s->growth)
|
|
867 fprintf (f, " estimated growth:%i\n", (int) s->growth);
|
|
868 if (s->scc_no)
|
|
869 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
|
|
870 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
|
111
|
871 {
|
131
|
872 fprintf (f, " size:%f, time:%f",
|
|
873 (double) e->size / ipa_fn_summary::size_scale,
|
|
874 e->time.to_double ());
|
|
875 if (e->exec_predicate != true)
|
|
876 {
|
|
877 fprintf (f, ", executed if:");
|
|
878 e->exec_predicate.dump (f, s->conds, 0);
|
|
879 }
|
|
880 if (e->exec_predicate != e->nonconst_predicate)
|
|
881 {
|
|
882 fprintf (f, ", nonconst if:");
|
|
883 e->nonconst_predicate.dump (f, s->conds, 0);
|
|
884 }
|
|
885 fprintf (f, "\n");
|
111
|
886 }
|
131
|
887 if (s->loop_iterations)
|
|
888 {
|
|
889 fprintf (f, " loop iterations:");
|
|
890 s->loop_iterations->dump (f, s->conds);
|
|
891 }
|
|
892 if (s->loop_stride)
|
111
|
893 {
|
131
|
894 fprintf (f, " loop stride:");
|
|
895 s->loop_stride->dump (f, s->conds);
|
111
|
896 }
|
131
|
897 if (s->array_index)
|
|
898 {
|
|
899 fprintf (f, " array index:");
|
|
900 s->array_index->dump (f, s->conds);
|
|
901 }
|
|
902 fprintf (f, " calls:\n");
|
|
903 dump_ipa_call_summary (f, 4, node, s);
|
111
|
904 fprintf (f, "\n");
|
|
905 }
|
131
|
906 else
|
|
907 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
|
111
|
908 }
|
|
909 }
|
|
910
|
|
911 DEBUG_FUNCTION void
|
|
912 ipa_debug_fn_summary (struct cgraph_node *node)
|
|
913 {
|
|
914 ipa_dump_fn_summary (stderr, node);
|
|
915 }
|
|
916
|
|
917 void
|
|
918 ipa_dump_fn_summaries (FILE *f)
|
|
919 {
|
|
920 struct cgraph_node *node;
|
|
921
|
|
922 FOR_EACH_DEFINED_FUNCTION (node)
|
|
923 if (!node->global.inlined_to)
|
|
924 ipa_dump_fn_summary (f, node);
|
|
925 }
|
|
926
|
|
927 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
|
|
928 boolean variable pointed to by DATA. */
|
|
929
|
|
930 static bool
|
|
931 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
|
|
932 void *data)
|
|
933 {
|
|
934 bool *b = (bool *) data;
|
|
935 *b = true;
|
|
936 return true;
|
|
937 }
|
|
938
|
|
939 /* If OP refers to value of function parameter, return the corresponding
|
|
940 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
|
|
941 PARM_DECL) will be stored to *SIZE_P in that case too. */
|
|
942
|
|
943 static tree
|
|
944 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
|
|
945 {
|
|
946 /* SSA_NAME referring to parm default def? */
|
|
947 if (TREE_CODE (op) == SSA_NAME
|
|
948 && SSA_NAME_IS_DEFAULT_DEF (op)
|
|
949 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
|
|
950 {
|
|
951 if (size_p)
|
|
952 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
|
|
953 return SSA_NAME_VAR (op);
|
|
954 }
|
|
955 /* Non-SSA parm reference? */
|
|
956 if (TREE_CODE (op) == PARM_DECL)
|
|
957 {
|
|
958 bool modified = false;
|
|
959
|
|
960 ao_ref refd;
|
|
961 ao_ref_init (&refd, op);
|
|
962 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
|
|
963 NULL);
|
|
964 if (!modified)
|
|
965 {
|
|
966 if (size_p)
|
|
967 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
|
|
968 return op;
|
|
969 }
|
|
970 }
|
|
971 return NULL_TREE;
|
|
972 }
|
|
973
|
|
974 /* If OP refers to value of function parameter, return the corresponding
|
|
975 parameter. Also traverse chains of SSA register assignments. If non-NULL,
|
|
976 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
|
|
977 stored to *SIZE_P in that case too. */
|
|
978
|
|
979 static tree
|
|
980 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
|
|
981 {
|
|
982 tree res = unmodified_parm_1 (stmt, op, size_p);
|
|
983 if (res)
|
|
984 return res;
|
|
985
|
|
986 if (TREE_CODE (op) == SSA_NAME
|
|
987 && !SSA_NAME_IS_DEFAULT_DEF (op)
|
|
988 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
|
|
989 return unmodified_parm (SSA_NAME_DEF_STMT (op),
|
|
990 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
|
|
991 size_p);
|
|
992 return NULL_TREE;
|
|
993 }
|
|
994
|
|
995 /* If OP refers to a value of a function parameter or value loaded from an
|
|
996 aggregate passed to a parameter (either by value or reference), return TRUE
|
|
997 and store the number of the parameter to *INDEX_P, the access size into
|
|
998 *SIZE_P, and information whether and how it has been loaded from an
|
|
999 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
|
|
1000 statement in which OP is used or loaded. */
|
|
1001
|
|
1002 static bool
|
|
1003 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
|
|
1004 gimple *stmt, tree op, int *index_p,
|
|
1005 HOST_WIDE_INT *size_p,
|
|
1006 struct agg_position_info *aggpos)
|
|
1007 {
|
|
1008 tree res = unmodified_parm_1 (stmt, op, size_p);
|
|
1009
|
|
1010 gcc_checking_assert (aggpos);
|
|
1011 if (res)
|
|
1012 {
|
|
1013 *index_p = ipa_get_param_decl_index (fbi->info, res);
|
|
1014 if (*index_p < 0)
|
|
1015 return false;
|
|
1016 aggpos->agg_contents = false;
|
|
1017 aggpos->by_ref = false;
|
|
1018 return true;
|
|
1019 }
|
|
1020
|
|
1021 if (TREE_CODE (op) == SSA_NAME)
|
|
1022 {
|
|
1023 if (SSA_NAME_IS_DEFAULT_DEF (op)
|
|
1024 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
|
|
1025 return false;
|
|
1026 stmt = SSA_NAME_DEF_STMT (op);
|
|
1027 op = gimple_assign_rhs1 (stmt);
|
|
1028 if (!REFERENCE_CLASS_P (op))
|
|
1029 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
|
|
1030 aggpos);
|
|
1031 }
|
|
1032
|
|
1033 aggpos->agg_contents = true;
|
|
1034 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
|
|
1035 stmt, op, index_p, &aggpos->offset,
|
|
1036 size_p, &aggpos->by_ref);
|
|
1037 }
|
|
1038
|
|
1039 /* See if statement might disappear after inlining.
|
|
1040 0 - means not eliminated
|
|
1041 1 - half of statements goes away
|
|
1042 2 - for sure it is eliminated.
|
|
1043 We are not terribly sophisticated, basically looking for simple abstraction
|
|
1044 penalty wrappers. */
|
|
1045
|
|
1046 static int
|
|
1047 eliminated_by_inlining_prob (gimple *stmt)
|
|
1048 {
|
|
1049 enum gimple_code code = gimple_code (stmt);
|
|
1050 enum tree_code rhs_code;
|
|
1051
|
|
1052 if (!optimize)
|
|
1053 return 0;
|
|
1054
|
|
1055 switch (code)
|
|
1056 {
|
|
1057 case GIMPLE_RETURN:
|
|
1058 return 2;
|
|
1059 case GIMPLE_ASSIGN:
|
|
1060 if (gimple_num_ops (stmt) != 2)
|
|
1061 return 0;
|
|
1062
|
|
1063 rhs_code = gimple_assign_rhs_code (stmt);
|
|
1064
|
|
1065 /* Casts of parameters, loads from parameters passed by reference
|
|
1066 and stores to return value or parameters are often free after
|
|
1067 inlining dua to SRA and further combining.
|
|
1068 Assume that half of statements goes away. */
|
|
1069 if (CONVERT_EXPR_CODE_P (rhs_code)
|
|
1070 || rhs_code == VIEW_CONVERT_EXPR
|
|
1071 || rhs_code == ADDR_EXPR
|
|
1072 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
|
|
1073 {
|
|
1074 tree rhs = gimple_assign_rhs1 (stmt);
|
|
1075 tree lhs = gimple_assign_lhs (stmt);
|
|
1076 tree inner_rhs = get_base_address (rhs);
|
|
1077 tree inner_lhs = get_base_address (lhs);
|
|
1078 bool rhs_free = false;
|
|
1079 bool lhs_free = false;
|
|
1080
|
|
1081 if (!inner_rhs)
|
|
1082 inner_rhs = rhs;
|
|
1083 if (!inner_lhs)
|
|
1084 inner_lhs = lhs;
|
|
1085
|
|
1086 /* Reads of parameter are expected to be free. */
|
|
1087 if (unmodified_parm (stmt, inner_rhs, NULL))
|
|
1088 rhs_free = true;
|
|
1089 /* Match expressions of form &this->field. Those will most likely
|
|
1090 combine with something upstream after inlining. */
|
|
1091 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
|
|
1092 {
|
|
1093 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
|
|
1094 if (TREE_CODE (op) == PARM_DECL)
|
|
1095 rhs_free = true;
|
|
1096 else if (TREE_CODE (op) == MEM_REF
|
|
1097 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
|
|
1098 rhs_free = true;
|
|
1099 }
|
|
1100
|
|
1101 /* When parameter is not SSA register because its address is taken
|
|
1102 and it is just copied into one, the statement will be completely
|
|
1103 free after inlining (we will copy propagate backward). */
|
|
1104 if (rhs_free && is_gimple_reg (lhs))
|
|
1105 return 2;
|
|
1106
|
|
1107 /* Reads of parameters passed by reference
|
|
1108 expected to be free (i.e. optimized out after inlining). */
|
|
1109 if (TREE_CODE (inner_rhs) == MEM_REF
|
|
1110 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
|
|
1111 rhs_free = true;
|
|
1112
|
|
1113 /* Copying parameter passed by reference into gimple register is
|
|
1114 probably also going to copy propagate, but we can't be quite
|
|
1115 sure. */
|
|
1116 if (rhs_free && is_gimple_reg (lhs))
|
|
1117 lhs_free = true;
|
|
1118
|
|
1119 /* Writes to parameters, parameters passed by value and return value
|
|
1120 (either dirrectly or passed via invisible reference) are free.
|
|
1121
|
|
1122 TODO: We ought to handle testcase like
|
|
1123 struct a {int a,b;};
|
|
1124 struct a
|
|
1125 retrurnsturct (void)
|
|
1126 {
|
|
1127 struct a a ={1,2};
|
|
1128 return a;
|
|
1129 }
|
|
1130
|
|
1131 This translate into:
|
|
1132
|
|
1133 retrurnsturct ()
|
|
1134 {
|
|
1135 int a$b;
|
|
1136 int a$a;
|
|
1137 struct a a;
|
|
1138 struct a D.2739;
|
|
1139
|
|
1140 <bb 2>:
|
|
1141 D.2739.a = 1;
|
|
1142 D.2739.b = 2;
|
|
1143 return D.2739;
|
|
1144
|
|
1145 }
|
|
1146 For that we either need to copy ipa-split logic detecting writes
|
|
1147 to return value. */
|
|
1148 if (TREE_CODE (inner_lhs) == PARM_DECL
|
|
1149 || TREE_CODE (inner_lhs) == RESULT_DECL
|
|
1150 || (TREE_CODE (inner_lhs) == MEM_REF
|
|
1151 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
|
|
1152 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
|
|
1153 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
|
|
1154 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
|
|
1155 (inner_lhs,
|
|
1156 0))) == RESULT_DECL))))
|
|
1157 lhs_free = true;
|
|
1158 if (lhs_free
|
|
1159 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
|
|
1160 rhs_free = true;
|
|
1161 if (lhs_free && rhs_free)
|
|
1162 return 1;
|
|
1163 }
|
|
1164 return 0;
|
|
1165 default:
|
|
1166 return 0;
|
|
1167 }
|
|
1168 }
|
|
1169
|
|
1170
|
|
1171 /* If BB ends by a conditional we can turn into predicates, attach corresponding
|
|
1172 predicates to the CFG edges. */
|
|
1173
|
|
1174 static void
|
|
1175 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
|
|
1176 struct ipa_fn_summary *summary,
|
|
1177 basic_block bb)
|
|
1178 {
|
|
1179 gimple *last;
|
|
1180 tree op;
|
|
1181 int index;
|
|
1182 HOST_WIDE_INT size;
|
|
1183 struct agg_position_info aggpos;
|
|
1184 enum tree_code code, inverted_code;
|
|
1185 edge e;
|
|
1186 edge_iterator ei;
|
|
1187 gimple *set_stmt;
|
|
1188 tree op2;
|
|
1189
|
|
1190 last = last_stmt (bb);
|
|
1191 if (!last || gimple_code (last) != GIMPLE_COND)
|
|
1192 return;
|
|
1193 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
|
|
1194 return;
|
|
1195 op = gimple_cond_lhs (last);
|
|
1196 /* TODO: handle conditionals like
|
|
1197 var = op0 < 4;
|
|
1198 if (var != 0). */
|
|
1199 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
|
|
1200 {
|
|
1201 code = gimple_cond_code (last);
|
|
1202 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
|
|
1203
|
|
1204 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1205 {
|
|
1206 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
|
|
1207 ? code : inverted_code);
|
|
1208 /* invert_tree_comparison will return ERROR_MARK on FP
|
|
1209 comparsions that are not EQ/NE instead of returning proper
|
|
1210 unordered one. Be sure it is not confused with NON_CONSTANT. */
|
|
1211 if (this_code != ERROR_MARK)
|
|
1212 {
|
|
1213 predicate p
|
|
1214 = add_condition (summary, index, size, &aggpos, this_code,
|
|
1215 unshare_expr_without_location
|
|
1216 (gimple_cond_rhs (last)));
|
|
1217 e->aux = edge_predicate_pool.allocate ();
|
|
1218 *(predicate *) e->aux = p;
|
|
1219 }
|
|
1220 }
|
|
1221 }
|
|
1222
|
|
1223 if (TREE_CODE (op) != SSA_NAME)
|
|
1224 return;
|
|
1225 /* Special case
|
|
1226 if (builtin_constant_p (op))
|
|
1227 constant_code
|
|
1228 else
|
|
1229 nonconstant_code.
|
|
1230 Here we can predicate nonconstant_code. We can't
|
|
1231 really handle constant_code since we have no predicate
|
|
1232 for this and also the constant code is not known to be
|
|
1233 optimized away when inliner doen't see operand is constant.
|
|
1234 Other optimizers might think otherwise. */
|
|
1235 if (gimple_cond_code (last) != NE_EXPR
|
|
1236 || !integer_zerop (gimple_cond_rhs (last)))
|
|
1237 return;
|
|
1238 set_stmt = SSA_NAME_DEF_STMT (op);
|
|
1239 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
|
|
1240 || gimple_call_num_args (set_stmt) != 1)
|
|
1241 return;
|
|
1242 op2 = gimple_call_arg (set_stmt, 0);
|
|
1243 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
|
|
1244 &aggpos))
|
|
1245 return;
|
|
1246 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
|
|
1247 {
|
|
1248 predicate p = add_condition (summary, index, size, &aggpos,
|
|
1249 predicate::is_not_constant, NULL_TREE);
|
|
1250 e->aux = edge_predicate_pool.allocate ();
|
|
1251 *(predicate *) e->aux = p;
|
|
1252 }
|
|
1253 }
|
|
1254
|
|
1255
|
|
1256 /* If BB ends by a switch we can turn into predicates, attach corresponding
|
|
1257 predicates to the CFG edges. */
|
|
1258
|
|
1259 static void
|
|
1260 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
|
|
1261 struct ipa_fn_summary *summary,
|
|
1262 basic_block bb)
|
|
1263 {
|
|
1264 gimple *lastg;
|
|
1265 tree op;
|
|
1266 int index;
|
|
1267 HOST_WIDE_INT size;
|
|
1268 struct agg_position_info aggpos;
|
|
1269 edge e;
|
|
1270 edge_iterator ei;
|
|
1271 size_t n;
|
|
1272 size_t case_idx;
|
|
1273
|
|
1274 lastg = last_stmt (bb);
|
|
1275 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
|
|
1276 return;
|
|
1277 gswitch *last = as_a <gswitch *> (lastg);
|
|
1278 op = gimple_switch_index (last);
|
|
1279 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
|
|
1280 return;
|
|
1281
|
|
1282 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1283 {
|
|
1284 e->aux = edge_predicate_pool.allocate ();
|
|
1285 *(predicate *) e->aux = false;
|
|
1286 }
|
|
1287 n = gimple_switch_num_labels (last);
|
|
1288 for (case_idx = 0; case_idx < n; ++case_idx)
|
|
1289 {
|
|
1290 tree cl = gimple_switch_label (last, case_idx);
|
|
1291 tree min, max;
|
|
1292 predicate p;
|
|
1293
|
131
|
1294 e = gimple_switch_edge (cfun, last, case_idx);
|
111
|
1295 min = CASE_LOW (cl);
|
|
1296 max = CASE_HIGH (cl);
|
|
1297
|
|
1298 /* For default we might want to construct predicate that none
|
|
1299 of cases is met, but it is bit hard to do not having negations
|
|
1300 of conditionals handy. */
|
|
1301 if (!min && !max)
|
|
1302 p = true;
|
|
1303 else if (!max)
|
|
1304 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
|
|
1305 unshare_expr_without_location (min));
|
|
1306 else
|
|
1307 {
|
|
1308 predicate p1, p2;
|
|
1309 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
|
|
1310 unshare_expr_without_location (min));
|
|
1311 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
|
|
1312 unshare_expr_without_location (max));
|
|
1313 p = p1 & p2;
|
|
1314 }
|
|
1315 *(struct predicate *) e->aux
|
|
1316 = p.or_with (summary->conds, *(struct predicate *) e->aux);
|
|
1317 }
|
|
1318 }
|
|
1319
|
|
1320
|
|
1321 /* For each BB in NODE attach to its AUX pointer predicate under
|
|
1322 which it is executable. */
|
|
1323
|
|
1324 static void
|
|
1325 compute_bb_predicates (struct ipa_func_body_info *fbi,
|
|
1326 struct cgraph_node *node,
|
|
1327 struct ipa_fn_summary *summary)
|
|
1328 {
|
|
1329 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
|
|
1330 bool done = false;
|
|
1331 basic_block bb;
|
|
1332
|
|
1333 FOR_EACH_BB_FN (bb, my_function)
|
|
1334 {
|
|
1335 set_cond_stmt_execution_predicate (fbi, summary, bb);
|
|
1336 set_switch_stmt_execution_predicate (fbi, summary, bb);
|
|
1337 }
|
|
1338
|
|
1339 /* Entry block is always executable. */
|
|
1340 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
|
|
1341 = edge_predicate_pool.allocate ();
|
|
1342 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
|
|
1343
|
|
1344 /* A simple dataflow propagation of predicates forward in the CFG.
|
|
1345 TODO: work in reverse postorder. */
|
|
1346 while (!done)
|
|
1347 {
|
|
1348 done = true;
|
|
1349 FOR_EACH_BB_FN (bb, my_function)
|
|
1350 {
|
|
1351 predicate p = false;
|
|
1352 edge e;
|
|
1353 edge_iterator ei;
|
|
1354 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1355 {
|
|
1356 if (e->src->aux)
|
|
1357 {
|
|
1358 predicate this_bb_predicate
|
|
1359 = *(predicate *) e->src->aux;
|
|
1360 if (e->aux)
|
|
1361 this_bb_predicate &= (*(struct predicate *) e->aux);
|
|
1362 p = p.or_with (summary->conds, this_bb_predicate);
|
|
1363 if (p == true)
|
|
1364 break;
|
|
1365 }
|
|
1366 }
|
|
1367 if (p == false)
|
|
1368 gcc_checking_assert (!bb->aux);
|
|
1369 else
|
|
1370 {
|
|
1371 if (!bb->aux)
|
|
1372 {
|
|
1373 done = false;
|
|
1374 bb->aux = edge_predicate_pool.allocate ();
|
|
1375 *((predicate *) bb->aux) = p;
|
|
1376 }
|
|
1377 else if (p != *(predicate *) bb->aux)
|
|
1378 {
|
|
1379 /* This OR operation is needed to ensure monotonous data flow
|
|
1380 in the case we hit the limit on number of clauses and the
|
|
1381 and/or operations above give approximate answers. */
|
|
1382 p = p.or_with (summary->conds, *(predicate *)bb->aux);
|
|
1383 if (p != *(predicate *) bb->aux)
|
|
1384 {
|
|
1385 done = false;
|
|
1386 *((predicate *) bb->aux) = p;
|
|
1387 }
|
|
1388 }
|
|
1389 }
|
|
1390 }
|
|
1391 }
|
|
1392 }
|
|
1393
|
|
1394
|
|
1395 /* Return predicate specifying when the STMT might have result that is not
|
|
1396 a compile time constant. */
|
|
1397
|
|
1398 static predicate
|
|
1399 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
|
|
1400 struct ipa_fn_summary *summary,
|
|
1401 tree expr,
|
|
1402 vec<predicate> nonconstant_names)
|
|
1403 {
|
|
1404 tree parm;
|
|
1405 int index;
|
|
1406 HOST_WIDE_INT size;
|
|
1407
|
|
1408 while (UNARY_CLASS_P (expr))
|
|
1409 expr = TREE_OPERAND (expr, 0);
|
|
1410
|
|
1411 parm = unmodified_parm (NULL, expr, &size);
|
|
1412 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
|
|
1413 return add_condition (summary, index, size, NULL, predicate::changed,
|
|
1414 NULL_TREE);
|
|
1415 if (is_gimple_min_invariant (expr))
|
|
1416 return false;
|
|
1417 if (TREE_CODE (expr) == SSA_NAME)
|
|
1418 return nonconstant_names[SSA_NAME_VERSION (expr)];
|
|
1419 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
|
|
1420 {
|
|
1421 predicate p1 = will_be_nonconstant_expr_predicate
|
|
1422 (info, summary, TREE_OPERAND (expr, 0),
|
|
1423 nonconstant_names);
|
|
1424 if (p1 == true)
|
|
1425 return p1;
|
|
1426
|
|
1427 predicate p2;
|
|
1428 p2 = will_be_nonconstant_expr_predicate (info, summary,
|
|
1429 TREE_OPERAND (expr, 1),
|
|
1430 nonconstant_names);
|
|
1431 return p1.or_with (summary->conds, p2);
|
|
1432 }
|
|
1433 else if (TREE_CODE (expr) == COND_EXPR)
|
|
1434 {
|
|
1435 predicate p1 = will_be_nonconstant_expr_predicate
|
|
1436 (info, summary, TREE_OPERAND (expr, 0),
|
|
1437 nonconstant_names);
|
|
1438 if (p1 == true)
|
|
1439 return p1;
|
|
1440
|
|
1441 predicate p2;
|
|
1442 p2 = will_be_nonconstant_expr_predicate (info, summary,
|
|
1443 TREE_OPERAND (expr, 1),
|
|
1444 nonconstant_names);
|
|
1445 if (p2 == true)
|
|
1446 return p2;
|
|
1447 p1 = p1.or_with (summary->conds, p2);
|
|
1448 p2 = will_be_nonconstant_expr_predicate (info, summary,
|
|
1449 TREE_OPERAND (expr, 2),
|
|
1450 nonconstant_names);
|
|
1451 return p2.or_with (summary->conds, p1);
|
|
1452 }
|
131
|
1453 else if (TREE_CODE (expr) == CALL_EXPR)
|
|
1454 return true;
|
111
|
1455 else
|
|
1456 {
|
|
1457 debug_tree (expr);
|
|
1458 gcc_unreachable ();
|
|
1459 }
|
|
1460 return false;
|
|
1461 }
|
|
1462
|
|
1463
|
|
1464 /* Return predicate specifying when the STMT might have result that is not
|
|
1465 a compile time constant. */
|
|
1466
|
|
1467 static predicate
|
|
1468 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
|
|
1469 struct ipa_fn_summary *summary,
|
|
1470 gimple *stmt,
|
|
1471 vec<predicate> nonconstant_names)
|
|
1472 {
|
|
1473 predicate p = true;
|
|
1474 ssa_op_iter iter;
|
|
1475 tree use;
|
|
1476 predicate op_non_const;
|
|
1477 bool is_load;
|
|
1478 int base_index;
|
|
1479 HOST_WIDE_INT size;
|
|
1480 struct agg_position_info aggpos;
|
|
1481
|
|
1482 /* What statments might be optimized away
|
|
1483 when their arguments are constant. */
|
|
1484 if (gimple_code (stmt) != GIMPLE_ASSIGN
|
|
1485 && gimple_code (stmt) != GIMPLE_COND
|
|
1486 && gimple_code (stmt) != GIMPLE_SWITCH
|
|
1487 && (gimple_code (stmt) != GIMPLE_CALL
|
|
1488 || !(gimple_call_flags (stmt) & ECF_CONST)))
|
|
1489 return p;
|
|
1490
|
|
1491 /* Stores will stay anyway. */
|
|
1492 if (gimple_store_p (stmt))
|
|
1493 return p;
|
|
1494
|
|
1495 is_load = gimple_assign_load_p (stmt);
|
|
1496
|
|
1497 /* Loads can be optimized when the value is known. */
|
|
1498 if (is_load)
|
|
1499 {
|
|
1500 tree op;
|
|
1501 gcc_assert (gimple_assign_single_p (stmt));
|
|
1502 op = gimple_assign_rhs1 (stmt);
|
|
1503 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
|
|
1504 &aggpos))
|
|
1505 return p;
|
|
1506 }
|
|
1507 else
|
|
1508 base_index = -1;
|
|
1509
|
|
1510 /* See if we understand all operands before we start
|
|
1511 adding conditionals. */
|
|
1512 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
|
|
1513 {
|
|
1514 tree parm = unmodified_parm (stmt, use, NULL);
|
|
1515 /* For arguments we can build a condition. */
|
|
1516 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
|
|
1517 continue;
|
|
1518 if (TREE_CODE (use) != SSA_NAME)
|
|
1519 return p;
|
|
1520 /* If we know when operand is constant,
|
|
1521 we still can say something useful. */
|
|
1522 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
|
|
1523 continue;
|
|
1524 return p;
|
|
1525 }
|
|
1526
|
|
1527 if (is_load)
|
|
1528 op_non_const =
|
|
1529 add_condition (summary, base_index, size, &aggpos, predicate::changed,
|
|
1530 NULL);
|
|
1531 else
|
|
1532 op_non_const = false;
|
|
1533 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
|
|
1534 {
|
|
1535 HOST_WIDE_INT size;
|
|
1536 tree parm = unmodified_parm (stmt, use, &size);
|
|
1537 int index;
|
|
1538
|
|
1539 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
|
|
1540 {
|
|
1541 if (index != base_index)
|
|
1542 p = add_condition (summary, index, size, NULL, predicate::changed,
|
|
1543 NULL_TREE);
|
|
1544 else
|
|
1545 continue;
|
|
1546 }
|
|
1547 else
|
|
1548 p = nonconstant_names[SSA_NAME_VERSION (use)];
|
|
1549 op_non_const = p.or_with (summary->conds, op_non_const);
|
|
1550 }
|
|
1551 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
|
|
1552 && gimple_op (stmt, 0)
|
|
1553 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
|
|
1554 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
|
|
1555 = op_non_const;
|
|
1556 return op_non_const;
|
|
1557 }
|
|
1558
|
|
1559 struct record_modified_bb_info
|
|
1560 {
|
131
|
1561 tree op;
|
111
|
1562 bitmap bb_set;
|
|
1563 gimple *stmt;
|
|
1564 };
|
|
1565
|
|
1566 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
|
|
1567 probability how often it changes between USE_BB.
|
131
|
1568 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
|
111
|
1569 is in different loop nest, we can do better.
|
|
1570 This is all just estimate. In theory we look for minimal cut separating
|
|
1571 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
|
|
1572 anyway. */
|
|
1573
|
|
1574 static basic_block
|
|
1575 get_minimal_bb (basic_block init_bb, basic_block use_bb)
|
|
1576 {
|
|
1577 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
|
131
|
1578 if (l && l->header->count < init_bb->count)
|
111
|
1579 return l->header;
|
|
1580 return init_bb;
|
|
1581 }
|
|
1582
|
|
1583 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
|
|
1584 set except for info->stmt. */
|
|
1585
|
|
1586 static bool
|
|
1587 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
|
|
1588 {
|
|
1589 struct record_modified_bb_info *info =
|
|
1590 (struct record_modified_bb_info *) data;
|
|
1591 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
|
|
1592 return false;
|
131
|
1593 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
|
|
1594 return false;
|
111
|
1595 bitmap_set_bit (info->bb_set,
|
|
1596 SSA_NAME_IS_DEFAULT_DEF (vdef)
|
|
1597 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
|
|
1598 : get_minimal_bb
|
|
1599 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
|
|
1600 gimple_bb (info->stmt))->index);
|
131
|
1601 if (dump_file)
|
|
1602 {
|
|
1603 fprintf (dump_file, " Param ");
|
|
1604 print_generic_expr (dump_file, info->op, TDF_SLIM);
|
|
1605 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
|
|
1606 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
|
|
1607 get_minimal_bb
|
|
1608 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
|
|
1609 gimple_bb (info->stmt))->index);
|
|
1610 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
|
|
1611 }
|
111
|
1612 return false;
|
|
1613 }
|
|
1614
|
|
1615 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
|
|
1616 will change since last invocation of STMT.
|
|
1617
|
|
1618 Value 0 is reserved for compile time invariants.
|
|
1619 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
|
|
1620 ought to be REG_BR_PROB_BASE / estimated_iters. */
|
|
1621
|
|
1622 static int
|
|
1623 param_change_prob (gimple *stmt, int i)
|
|
1624 {
|
|
1625 tree op = gimple_call_arg (stmt, i);
|
|
1626 basic_block bb = gimple_bb (stmt);
|
|
1627
|
|
1628 if (TREE_CODE (op) == WITH_SIZE_EXPR)
|
|
1629 op = TREE_OPERAND (op, 0);
|
|
1630
|
|
1631 tree base = get_base_address (op);
|
|
1632
|
|
1633 /* Global invariants never change. */
|
|
1634 if (is_gimple_min_invariant (base))
|
|
1635 return 0;
|
|
1636
|
|
1637 /* We would have to do non-trivial analysis to really work out what
|
|
1638 is the probability of value to change (i.e. when init statement
|
|
1639 is in a sibling loop of the call).
|
|
1640
|
|
1641 We do an conservative estimate: when call is executed N times more often
|
|
1642 than the statement defining value, we take the frequency 1/N. */
|
|
1643 if (TREE_CODE (base) == SSA_NAME)
|
|
1644 {
|
131
|
1645 profile_count init_count;
|
|
1646
|
|
1647 if (!bb->count.nonzero_p ())
|
111
|
1648 return REG_BR_PROB_BASE;
|
|
1649
|
|
1650 if (SSA_NAME_IS_DEFAULT_DEF (base))
|
131
|
1651 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
|
111
|
1652 else
|
131
|
1653 init_count = get_minimal_bb
|
111
|
1654 (gimple_bb (SSA_NAME_DEF_STMT (base)),
|
131
|
1655 gimple_bb (stmt))->count;
|
|
1656
|
|
1657 if (init_count < bb->count)
|
|
1658 return MAX ((init_count.to_sreal_scale (bb->count)
|
|
1659 * REG_BR_PROB_BASE).to_int (), 1);
|
|
1660 return REG_BR_PROB_BASE;
|
111
|
1661 }
|
|
1662 else
|
|
1663 {
|
|
1664 ao_ref refd;
|
131
|
1665 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
|
111
|
1666 struct record_modified_bb_info info;
|
|
1667 tree init = ctor_for_folding (base);
|
|
1668
|
|
1669 if (init != error_mark_node)
|
|
1670 return 0;
|
131
|
1671 if (!bb->count.nonzero_p ())
|
111
|
1672 return REG_BR_PROB_BASE;
|
131
|
1673 if (dump_file)
|
|
1674 {
|
|
1675 fprintf (dump_file, " Analyzing param change probablity of ");
|
|
1676 print_generic_expr (dump_file, op, TDF_SLIM);
|
|
1677 fprintf (dump_file, "\n");
|
|
1678 }
|
111
|
1679 ao_ref_init (&refd, op);
|
131
|
1680 info.op = op;
|
111
|
1681 info.stmt = stmt;
|
|
1682 info.bb_set = BITMAP_ALLOC (NULL);
|
|
1683 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
|
|
1684 NULL);
|
|
1685 if (bitmap_bit_p (info.bb_set, bb->index))
|
|
1686 {
|
131
|
1687 if (dump_file)
|
|
1688 fprintf (dump_file, " Set in same BB as used.\n");
|
111
|
1689 BITMAP_FREE (info.bb_set);
|
|
1690 return REG_BR_PROB_BASE;
|
|
1691 }
|
|
1692
|
131
|
1693 bitmap_iterator bi;
|
|
1694 unsigned index;
|
|
1695 /* Lookup the most frequent update of the value and believe that
|
|
1696 it dominates all the other; precise analysis here is difficult. */
|
111
|
1697 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
|
131
|
1698 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
|
|
1699 if (dump_file)
|
|
1700 {
|
|
1701 fprintf (dump_file, " Set with count ");
|
|
1702 max.dump (dump_file);
|
|
1703 fprintf (dump_file, " and used with count ");
|
|
1704 bb->count.dump (dump_file);
|
|
1705 fprintf (dump_file, " freq %f\n",
|
|
1706 max.to_sreal_scale (bb->count).to_double ());
|
|
1707 }
|
111
|
1708
|
|
1709 BITMAP_FREE (info.bb_set);
|
131
|
1710 if (max < bb->count)
|
|
1711 return MAX ((max.to_sreal_scale (bb->count)
|
|
1712 * REG_BR_PROB_BASE).to_int (), 1);
|
|
1713 return REG_BR_PROB_BASE;
|
111
|
1714 }
|
|
1715 }
|
|
1716
|
|
1717 /* Find whether a basic block BB is the final block of a (half) diamond CFG
|
|
1718 sub-graph and if the predicate the condition depends on is known. If so,
|
|
1719 return true and store the pointer the predicate in *P. */
|
|
1720
|
|
1721 static bool
|
|
1722 phi_result_unknown_predicate (struct ipa_node_params *info,
|
|
1723 ipa_fn_summary *summary, basic_block bb,
|
|
1724 predicate *p,
|
|
1725 vec<predicate> nonconstant_names)
|
|
1726 {
|
|
1727 edge e;
|
|
1728 edge_iterator ei;
|
|
1729 basic_block first_bb = NULL;
|
|
1730 gimple *stmt;
|
|
1731
|
|
1732 if (single_pred_p (bb))
|
|
1733 {
|
|
1734 *p = false;
|
|
1735 return true;
|
|
1736 }
|
|
1737
|
|
1738 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1739 {
|
|
1740 if (single_succ_p (e->src))
|
|
1741 {
|
|
1742 if (!single_pred_p (e->src))
|
|
1743 return false;
|
|
1744 if (!first_bb)
|
|
1745 first_bb = single_pred (e->src);
|
|
1746 else if (single_pred (e->src) != first_bb)
|
|
1747 return false;
|
|
1748 }
|
|
1749 else
|
|
1750 {
|
|
1751 if (!first_bb)
|
|
1752 first_bb = e->src;
|
|
1753 else if (e->src != first_bb)
|
|
1754 return false;
|
|
1755 }
|
|
1756 }
|
|
1757
|
|
1758 if (!first_bb)
|
|
1759 return false;
|
|
1760
|
|
1761 stmt = last_stmt (first_bb);
|
|
1762 if (!stmt
|
|
1763 || gimple_code (stmt) != GIMPLE_COND
|
|
1764 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
|
|
1765 return false;
|
|
1766
|
|
1767 *p = will_be_nonconstant_expr_predicate (info, summary,
|
|
1768 gimple_cond_lhs (stmt),
|
|
1769 nonconstant_names);
|
|
1770 if (*p == true)
|
|
1771 return false;
|
|
1772 else
|
|
1773 return true;
|
|
1774 }
|
|
1775
|
|
1776 /* Given a PHI statement in a function described by inline properties SUMMARY
|
|
1777 and *P being the predicate describing whether the selected PHI argument is
|
|
1778 known, store a predicate for the result of the PHI statement into
|
|
1779 NONCONSTANT_NAMES, if possible. */
|
|
1780
|
|
1781 static void
|
|
1782 predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
|
|
1783 predicate *p,
|
|
1784 vec<predicate> nonconstant_names)
|
|
1785 {
|
|
1786 unsigned i;
|
|
1787
|
|
1788 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
1789 {
|
|
1790 tree arg = gimple_phi_arg (phi, i)->def;
|
|
1791 if (!is_gimple_min_invariant (arg))
|
|
1792 {
|
|
1793 gcc_assert (TREE_CODE (arg) == SSA_NAME);
|
|
1794 *p = p->or_with (summary->conds,
|
|
1795 nonconstant_names[SSA_NAME_VERSION (arg)]);
|
|
1796 if (*p == true)
|
|
1797 return;
|
|
1798 }
|
|
1799 }
|
|
1800
|
|
1801 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1802 {
|
|
1803 fprintf (dump_file, "\t\tphi predicate: ");
|
|
1804 p->dump (dump_file, summary->conds);
|
|
1805 }
|
|
1806 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
|
|
1807 }
|
|
1808
|
|
1809 /* Return predicate specifying when array index in access OP becomes non-constant. */
|
|
1810
|
|
1811 static predicate
|
|
1812 array_index_predicate (ipa_fn_summary *info,
|
|
1813 vec< predicate> nonconstant_names, tree op)
|
|
1814 {
|
|
1815 predicate p = false;
|
|
1816 while (handled_component_p (op))
|
|
1817 {
|
|
1818 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
|
|
1819 {
|
|
1820 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
|
|
1821 p = p.or_with (info->conds,
|
|
1822 nonconstant_names[SSA_NAME_VERSION
|
|
1823 (TREE_OPERAND (op, 1))]);
|
|
1824 }
|
|
1825 op = TREE_OPERAND (op, 0);
|
|
1826 }
|
|
1827 return p;
|
|
1828 }
|
|
1829
|
|
1830 /* For a typical usage of __builtin_expect (a<b, 1), we
|
|
1831 may introduce an extra relation stmt:
|
|
1832 With the builtin, we have
|
|
1833 t1 = a <= b;
|
|
1834 t2 = (long int) t1;
|
|
1835 t3 = __builtin_expect (t2, 1);
|
|
1836 if (t3 != 0)
|
|
1837 goto ...
|
|
1838 Without the builtin, we have
|
|
1839 if (a<=b)
|
|
1840 goto...
|
|
1841 This affects the size/time estimation and may have
|
|
1842 an impact on the earlier inlining.
|
|
1843 Here find this pattern and fix it up later. */
|
|
1844
|
|
1845 static gimple *
|
|
1846 find_foldable_builtin_expect (basic_block bb)
|
|
1847 {
|
|
1848 gimple_stmt_iterator bsi;
|
|
1849
|
|
1850 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
|
|
1851 {
|
|
1852 gimple *stmt = gsi_stmt (bsi);
|
|
1853 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
|
131
|
1854 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
|
111
|
1855 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
|
|
1856 {
|
|
1857 tree var = gimple_call_lhs (stmt);
|
|
1858 tree arg = gimple_call_arg (stmt, 0);
|
|
1859 use_operand_p use_p;
|
|
1860 gimple *use_stmt;
|
|
1861 bool match = false;
|
|
1862 bool done = false;
|
|
1863
|
|
1864 if (!var || !arg)
|
|
1865 continue;
|
|
1866 gcc_assert (TREE_CODE (var) == SSA_NAME);
|
|
1867
|
|
1868 while (TREE_CODE (arg) == SSA_NAME)
|
|
1869 {
|
|
1870 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
|
|
1871 if (!is_gimple_assign (stmt_tmp))
|
|
1872 break;
|
|
1873 switch (gimple_assign_rhs_code (stmt_tmp))
|
|
1874 {
|
|
1875 case LT_EXPR:
|
|
1876 case LE_EXPR:
|
|
1877 case GT_EXPR:
|
|
1878 case GE_EXPR:
|
|
1879 case EQ_EXPR:
|
|
1880 case NE_EXPR:
|
|
1881 match = true;
|
|
1882 done = true;
|
|
1883 break;
|
|
1884 CASE_CONVERT:
|
|
1885 break;
|
|
1886 default:
|
|
1887 done = true;
|
|
1888 break;
|
|
1889 }
|
|
1890 if (done)
|
|
1891 break;
|
|
1892 arg = gimple_assign_rhs1 (stmt_tmp);
|
|
1893 }
|
|
1894
|
|
1895 if (match && single_imm_use (var, &use_p, &use_stmt)
|
|
1896 && gimple_code (use_stmt) == GIMPLE_COND)
|
|
1897 return use_stmt;
|
|
1898 }
|
|
1899 }
|
|
1900 return NULL;
|
|
1901 }
|
|
1902
|
|
1903 /* Return true when the basic blocks contains only clobbers followed by RESX.
|
|
1904 Such BBs are kept around to make removal of dead stores possible with
|
|
1905 presence of EH and will be optimized out by optimize_clobbers later in the
|
|
1906 game.
|
|
1907
|
|
1908 NEED_EH is used to recurse in case the clobber has non-EH predecestors
|
|
1909 that can be clobber only, too.. When it is false, the RESX is not necessary
|
|
1910 on the end of basic block. */
|
|
1911
|
|
1912 static bool
|
|
1913 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
|
|
1914 {
|
|
1915 gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
|
1916 edge_iterator ei;
|
|
1917 edge e;
|
|
1918
|
|
1919 if (need_eh)
|
|
1920 {
|
|
1921 if (gsi_end_p (gsi))
|
|
1922 return false;
|
|
1923 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
|
|
1924 return false;
|
|
1925 gsi_prev (&gsi);
|
|
1926 }
|
|
1927 else if (!single_succ_p (bb))
|
|
1928 return false;
|
|
1929
|
|
1930 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
|
|
1931 {
|
|
1932 gimple *stmt = gsi_stmt (gsi);
|
|
1933 if (is_gimple_debug (stmt))
|
|
1934 continue;
|
|
1935 if (gimple_clobber_p (stmt))
|
|
1936 continue;
|
|
1937 if (gimple_code (stmt) == GIMPLE_LABEL)
|
|
1938 break;
|
|
1939 return false;
|
|
1940 }
|
|
1941
|
|
1942 /* See if all predecestors are either throws or clobber only BBs. */
|
|
1943 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1944 if (!(e->flags & EDGE_EH)
|
|
1945 && !clobber_only_eh_bb_p (e->src, false))
|
|
1946 return false;
|
|
1947
|
|
1948 return true;
|
|
1949 }
|
|
1950
|
|
1951 /* Return true if STMT compute a floating point expression that may be affected
|
|
1952 by -ffast-math and similar flags. */
|
|
1953
|
|
1954 static bool
|
|
1955 fp_expression_p (gimple *stmt)
|
|
1956 {
|
|
1957 ssa_op_iter i;
|
|
1958 tree op;
|
|
1959
|
|
1960 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
|
|
1961 if (FLOAT_TYPE_P (TREE_TYPE (op)))
|
|
1962 return true;
|
|
1963 return false;
|
|
1964 }
|
|
1965
|
|
1966 /* Analyze function body for NODE.
|
|
1967 EARLY indicates run from early optimization pipeline. */
|
|
1968
|
|
1969 static void
|
|
1970 analyze_function_body (struct cgraph_node *node, bool early)
|
|
1971 {
|
|
1972 sreal time = 0;
|
|
1973 /* Estimate static overhead for function prologue/epilogue and alignment. */
|
|
1974 int size = 2;
|
|
1975 /* Benefits are scaled by probability of elimination that is in range
|
|
1976 <0,2>. */
|
|
1977 basic_block bb;
|
|
1978 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
|
131
|
1979 sreal freq;
|
|
1980 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
|
111
|
1981 predicate bb_predicate;
|
|
1982 struct ipa_func_body_info fbi;
|
|
1983 vec<predicate> nonconstant_names = vNULL;
|
|
1984 int nblocks, n;
|
|
1985 int *order;
|
|
1986 predicate array_index = true;
|
|
1987 gimple *fix_builtin_expect_stmt;
|
|
1988
|
|
1989 gcc_assert (my_function && my_function->cfg);
|
|
1990 gcc_assert (cfun == my_function);
|
|
1991
|
|
1992 memset(&fbi, 0, sizeof(fbi));
|
|
1993 info->conds = NULL;
|
|
1994 info->size_time_table = NULL;
|
|
1995
|
|
1996 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
|
|
1997 so we can produce proper inline hints.
|
|
1998
|
|
1999 When optimizing and analyzing for early inliner, initialize node params
|
|
2000 so we can produce correct BB predicates. */
|
|
2001
|
|
2002 if (opt_for_fn (node->decl, optimize))
|
|
2003 {
|
|
2004 calculate_dominance_info (CDI_DOMINATORS);
|
|
2005 if (!early)
|
|
2006 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
|
|
2007 else
|
|
2008 {
|
|
2009 ipa_check_create_node_params ();
|
|
2010 ipa_initialize_node_params (node);
|
|
2011 }
|
|
2012
|
|
2013 if (ipa_node_params_sum)
|
|
2014 {
|
|
2015 fbi.node = node;
|
|
2016 fbi.info = IPA_NODE_REF (node);
|
|
2017 fbi.bb_infos = vNULL;
|
|
2018 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
|
|
2019 fbi.param_count = count_formal_params(node->decl);
|
|
2020 nonconstant_names.safe_grow_cleared
|
|
2021 (SSANAMES (my_function)->length ());
|
|
2022 }
|
|
2023 }
|
|
2024
|
|
2025 if (dump_file)
|
|
2026 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
|
|
2027 node->name ());
|
|
2028
|
|
2029 /* When we run into maximal number of entries, we assign everything to the
|
|
2030 constant truth case. Be sure to have it in list. */
|
|
2031 bb_predicate = true;
|
|
2032 info->account_size_time (0, 0, bb_predicate, bb_predicate);
|
|
2033
|
|
2034 bb_predicate = predicate::not_inlined ();
|
|
2035 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate,
|
|
2036 bb_predicate);
|
|
2037
|
|
2038 if (fbi.info)
|
|
2039 compute_bb_predicates (&fbi, node, info);
|
|
2040 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
|
|
2041 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
|
|
2042 for (n = 0; n < nblocks; n++)
|
|
2043 {
|
|
2044 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
|
131
|
2045 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
|
111
|
2046 if (clobber_only_eh_bb_p (bb))
|
|
2047 {
|
|
2048 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2049 fprintf (dump_file, "\n Ignoring BB %i;"
|
|
2050 " it will be optimized away by cleanup_clobbers\n",
|
|
2051 bb->index);
|
|
2052 continue;
|
|
2053 }
|
|
2054
|
|
2055 /* TODO: Obviously predicates can be propagated down across CFG. */
|
|
2056 if (fbi.info)
|
|
2057 {
|
|
2058 if (bb->aux)
|
|
2059 bb_predicate = *(predicate *) bb->aux;
|
|
2060 else
|
|
2061 bb_predicate = false;
|
|
2062 }
|
|
2063 else
|
|
2064 bb_predicate = true;
|
|
2065
|
|
2066 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2067 {
|
|
2068 fprintf (dump_file, "\n BB %i predicate:", bb->index);
|
|
2069 bb_predicate.dump (dump_file, info->conds);
|
|
2070 }
|
|
2071
|
|
2072 if (fbi.info && nonconstant_names.exists ())
|
|
2073 {
|
|
2074 predicate phi_predicate;
|
|
2075 bool first_phi = true;
|
|
2076
|
|
2077 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
|
|
2078 gsi_next (&bsi))
|
|
2079 {
|
|
2080 if (first_phi
|
|
2081 && !phi_result_unknown_predicate (fbi.info, info, bb,
|
|
2082 &phi_predicate,
|
|
2083 nonconstant_names))
|
|
2084 break;
|
|
2085 first_phi = false;
|
|
2086 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2087 {
|
|
2088 fprintf (dump_file, " ");
|
|
2089 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
|
|
2090 }
|
|
2091 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
|
|
2092 nonconstant_names);
|
|
2093 }
|
|
2094 }
|
|
2095
|
|
2096 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
|
|
2097
|
|
2098 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
|
|
2099 gsi_next (&bsi))
|
|
2100 {
|
|
2101 gimple *stmt = gsi_stmt (bsi);
|
|
2102 int this_size = estimate_num_insns (stmt, &eni_size_weights);
|
|
2103 int this_time = estimate_num_insns (stmt, &eni_time_weights);
|
|
2104 int prob;
|
|
2105 predicate will_be_nonconstant;
|
|
2106
|
|
2107 /* This relation stmt should be folded after we remove
|
|
2108 buildin_expect call. Adjust the cost here. */
|
|
2109 if (stmt == fix_builtin_expect_stmt)
|
|
2110 {
|
|
2111 this_size--;
|
|
2112 this_time--;
|
|
2113 }
|
|
2114
|
|
2115 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2116 {
|
|
2117 fprintf (dump_file, " ");
|
|
2118 print_gimple_stmt (dump_file, stmt, 0);
|
|
2119 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
|
131
|
2120 freq.to_double (), this_size,
|
111
|
2121 this_time);
|
|
2122 }
|
|
2123
|
|
2124 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
|
|
2125 {
|
|
2126 predicate this_array_index;
|
|
2127 this_array_index =
|
|
2128 array_index_predicate (info, nonconstant_names,
|
|
2129 gimple_assign_rhs1 (stmt));
|
|
2130 if (this_array_index != false)
|
|
2131 array_index &= this_array_index;
|
|
2132 }
|
|
2133 if (gimple_store_p (stmt) && nonconstant_names.exists ())
|
|
2134 {
|
|
2135 predicate this_array_index;
|
|
2136 this_array_index =
|
|
2137 array_index_predicate (info, nonconstant_names,
|
|
2138 gimple_get_lhs (stmt));
|
|
2139 if (this_array_index != false)
|
|
2140 array_index &= this_array_index;
|
|
2141 }
|
|
2142
|
|
2143
|
|
2144 if (is_gimple_call (stmt)
|
|
2145 && !gimple_call_internal_p (stmt))
|
|
2146 {
|
|
2147 struct cgraph_edge *edge = node->get_edge (stmt);
|
131
|
2148 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
|
111
|
2149
|
|
2150 /* Special case: results of BUILT_IN_CONSTANT_P will be always
|
|
2151 resolved as constant. We however don't want to optimize
|
|
2152 out the cgraph edges. */
|
|
2153 if (nonconstant_names.exists ()
|
|
2154 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
|
|
2155 && gimple_call_lhs (stmt)
|
|
2156 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
|
|
2157 {
|
|
2158 predicate false_p = false;
|
|
2159 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
|
|
2160 = false_p;
|
|
2161 }
|
|
2162 if (ipa_node_params_sum)
|
|
2163 {
|
|
2164 int count = gimple_call_num_args (stmt);
|
|
2165 int i;
|
|
2166
|
|
2167 if (count)
|
|
2168 es->param.safe_grow_cleared (count);
|
|
2169 for (i = 0; i < count; i++)
|
|
2170 {
|
|
2171 int prob = param_change_prob (stmt, i);
|
|
2172 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
|
|
2173 es->param[i].change_prob = prob;
|
|
2174 }
|
|
2175 }
|
|
2176
|
|
2177 es->call_stmt_size = this_size;
|
|
2178 es->call_stmt_time = this_time;
|
|
2179 es->loop_depth = bb_loop_depth (bb);
|
|
2180 edge_set_predicate (edge, &bb_predicate);
|
|
2181 }
|
|
2182
|
|
2183 /* TODO: When conditional jump or swithc is known to be constant, but
|
|
2184 we did not translate it into the predicates, we really can account
|
|
2185 just maximum of the possible paths. */
|
|
2186 if (fbi.info)
|
|
2187 will_be_nonconstant
|
|
2188 = will_be_nonconstant_predicate (&fbi, info,
|
|
2189 stmt, nonconstant_names);
|
|
2190 else
|
|
2191 will_be_nonconstant = true;
|
|
2192 if (this_time || this_size)
|
|
2193 {
|
131
|
2194 sreal final_time = (sreal)this_time * freq;
|
111
|
2195
|
|
2196 prob = eliminated_by_inlining_prob (stmt);
|
|
2197 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
|
|
2198 fprintf (dump_file,
|
|
2199 "\t\t50%% will be eliminated by inlining\n");
|
|
2200 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
|
|
2201 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
|
|
2202
|
|
2203 struct predicate p = bb_predicate & will_be_nonconstant;
|
|
2204
|
|
2205 /* We can ignore statement when we proved it is never going
|
|
2206 to happen, but we can not do that for call statements
|
|
2207 because edges are accounted specially. */
|
|
2208
|
|
2209 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
|
|
2210 {
|
131
|
2211 time += final_time;
|
111
|
2212 size += this_size;
|
|
2213 }
|
|
2214
|
|
2215 /* We account everything but the calls. Calls have their own
|
|
2216 size/time info attached to cgraph edges. This is necessary
|
|
2217 in order to make the cost disappear after inlining. */
|
|
2218 if (!is_gimple_call (stmt))
|
|
2219 {
|
|
2220 if (prob)
|
|
2221 {
|
|
2222 predicate ip = bb_predicate & predicate::not_inlined ();
|
|
2223 info->account_size_time (this_size * prob,
|
131
|
2224 (this_time * prob) / 2, ip,
|
111
|
2225 p);
|
|
2226 }
|
|
2227 if (prob != 2)
|
|
2228 info->account_size_time (this_size * (2 - prob),
|
131
|
2229 (this_time * (2 - prob) / 2),
|
111
|
2230 bb_predicate,
|
|
2231 p);
|
|
2232 }
|
|
2233
|
|
2234 if (!info->fp_expressions && fp_expression_p (stmt))
|
|
2235 {
|
|
2236 info->fp_expressions = true;
|
|
2237 if (dump_file)
|
|
2238 fprintf (dump_file, " fp_expression set\n");
|
|
2239 }
|
|
2240
|
|
2241 gcc_assert (time >= 0);
|
|
2242 gcc_assert (size >= 0);
|
|
2243 }
|
|
2244 }
|
|
2245 }
|
131
|
2246 set_hint_predicate (&ipa_fn_summaries->get_create (node)->array_index,
|
|
2247 array_index);
|
111
|
2248 free (order);
|
|
2249
|
|
2250 if (nonconstant_names.exists () && !early)
|
|
2251 {
|
|
2252 struct loop *loop;
|
|
2253 predicate loop_iterations = true;
|
|
2254 predicate loop_stride = true;
|
|
2255
|
|
2256 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2257 flow_loops_dump (dump_file, NULL, 0);
|
|
2258 scev_initialize ();
|
|
2259 FOR_EACH_LOOP (loop, 0)
|
|
2260 {
|
|
2261 vec<edge> exits;
|
|
2262 edge ex;
|
|
2263 unsigned int j;
|
|
2264 struct tree_niter_desc niter_desc;
|
|
2265 bb_predicate = *(predicate *) loop->header->aux;
|
|
2266
|
|
2267 exits = get_loop_exit_edges (loop);
|
|
2268 FOR_EACH_VEC_ELT (exits, j, ex)
|
|
2269 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
|
|
2270 && !is_gimple_min_invariant (niter_desc.niter))
|
|
2271 {
|
|
2272 predicate will_be_nonconstant
|
|
2273 = will_be_nonconstant_expr_predicate (fbi.info, info,
|
|
2274 niter_desc.niter,
|
|
2275 nonconstant_names);
|
|
2276 if (will_be_nonconstant != true)
|
|
2277 will_be_nonconstant = bb_predicate & will_be_nonconstant;
|
|
2278 if (will_be_nonconstant != true
|
|
2279 && will_be_nonconstant != false)
|
|
2280 /* This is slightly inprecise. We may want to represent each
|
|
2281 loop with independent predicate. */
|
|
2282 loop_iterations &= will_be_nonconstant;
|
|
2283 }
|
|
2284 exits.release ();
|
|
2285 }
|
|
2286
|
|
2287 /* To avoid quadratic behavior we analyze stride predicates only
|
|
2288 with respect to the containing loop. Thus we simply iterate
|
|
2289 over all defs in the outermost loop body. */
|
|
2290 for (loop = loops_for_fn (cfun)->tree_root->inner;
|
|
2291 loop != NULL; loop = loop->next)
|
|
2292 {
|
|
2293 basic_block *body = get_loop_body (loop);
|
|
2294 for (unsigned i = 0; i < loop->num_nodes; i++)
|
|
2295 {
|
|
2296 gimple_stmt_iterator gsi;
|
|
2297 bb_predicate = *(predicate *) body[i]->aux;
|
|
2298 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
|
|
2299 gsi_next (&gsi))
|
|
2300 {
|
|
2301 gimple *stmt = gsi_stmt (gsi);
|
|
2302
|
|
2303 if (!is_gimple_assign (stmt))
|
|
2304 continue;
|
|
2305
|
|
2306 tree def = gimple_assign_lhs (stmt);
|
|
2307 if (TREE_CODE (def) != SSA_NAME)
|
|
2308 continue;
|
|
2309
|
|
2310 affine_iv iv;
|
|
2311 if (!simple_iv (loop_containing_stmt (stmt),
|
|
2312 loop_containing_stmt (stmt),
|
|
2313 def, &iv, true)
|
|
2314 || is_gimple_min_invariant (iv.step))
|
|
2315 continue;
|
|
2316
|
|
2317 predicate will_be_nonconstant
|
|
2318 = will_be_nonconstant_expr_predicate (fbi.info, info,
|
|
2319 iv.step,
|
|
2320 nonconstant_names);
|
|
2321 if (will_be_nonconstant != true)
|
|
2322 will_be_nonconstant = bb_predicate & will_be_nonconstant;
|
|
2323 if (will_be_nonconstant != true
|
|
2324 && will_be_nonconstant != false)
|
|
2325 /* This is slightly inprecise. We may want to represent
|
|
2326 each loop with independent predicate. */
|
|
2327 loop_stride = loop_stride & will_be_nonconstant;
|
|
2328 }
|
|
2329 }
|
|
2330 free (body);
|
|
2331 }
|
131
|
2332 ipa_fn_summary *s = ipa_fn_summaries->get (node);
|
|
2333 set_hint_predicate (&s->loop_iterations, loop_iterations);
|
|
2334 set_hint_predicate (&s->loop_stride, loop_stride);
|
111
|
2335 scev_finalize ();
|
|
2336 }
|
|
2337 FOR_ALL_BB_FN (bb, my_function)
|
|
2338 {
|
|
2339 edge e;
|
|
2340 edge_iterator ei;
|
|
2341
|
|
2342 if (bb->aux)
|
|
2343 edge_predicate_pool.remove ((predicate *)bb->aux);
|
|
2344 bb->aux = NULL;
|
|
2345 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
2346 {
|
|
2347 if (e->aux)
|
|
2348 edge_predicate_pool.remove ((predicate *) e->aux);
|
|
2349 e->aux = NULL;
|
|
2350 }
|
|
2351 }
|
131
|
2352 ipa_fn_summary *s = ipa_fn_summaries->get (node);
|
|
2353 s->time = time;
|
|
2354 s->self_size = size;
|
111
|
2355 nonconstant_names.release ();
|
|
2356 ipa_release_body_info (&fbi);
|
|
2357 if (opt_for_fn (node->decl, optimize))
|
|
2358 {
|
|
2359 if (!early)
|
|
2360 loop_optimizer_finalize ();
|
|
2361 else if (!ipa_edge_args_sum)
|
|
2362 ipa_free_all_node_params ();
|
|
2363 free_dominance_info (CDI_DOMINATORS);
|
|
2364 }
|
|
2365 if (dump_file)
|
|
2366 {
|
|
2367 fprintf (dump_file, "\n");
|
|
2368 ipa_dump_fn_summary (dump_file, node);
|
|
2369 }
|
|
2370 }
|
|
2371
|
|
2372
|
|
2373 /* Compute function summary.
|
|
2374 EARLY is true when we compute parameters during early opts. */
|
|
2375
|
|
2376 void
|
|
2377 compute_fn_summary (struct cgraph_node *node, bool early)
|
|
2378 {
|
|
2379 HOST_WIDE_INT self_stack_size;
|
|
2380 struct cgraph_edge *e;
|
|
2381 struct ipa_fn_summary *info;
|
|
2382
|
|
2383 gcc_assert (!node->global.inlined_to);
|
|
2384
|
|
2385 if (!ipa_fn_summaries)
|
|
2386 ipa_fn_summary_alloc ();
|
|
2387
|
131
|
2388 /* Create a new ipa_fn_summary. */
|
|
2389 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
|
|
2390 ipa_fn_summaries->remove (node);
|
|
2391 info = ipa_fn_summaries->get_create (node);
|
111
|
2392
|
|
2393 /* Estimate the stack size for the function if we're optimizing. */
|
|
2394 self_stack_size = optimize && !node->thunk.thunk_p
|
|
2395 ? estimated_stack_frame_size (node) : 0;
|
|
2396 info->estimated_self_stack_size = self_stack_size;
|
|
2397 info->estimated_stack_size = self_stack_size;
|
|
2398 info->stack_frame_offset = 0;
|
|
2399
|
|
2400 if (node->thunk.thunk_p)
|
|
2401 {
|
131
|
2402 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
|
111
|
2403 predicate t = true;
|
|
2404
|
|
2405 node->local.can_change_signature = false;
|
|
2406 es->call_stmt_size = eni_size_weights.call_cost;
|
|
2407 es->call_stmt_time = eni_time_weights.call_cost;
|
|
2408 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t);
|
|
2409 t = predicate::not_inlined ();
|
|
2410 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
|
|
2411 ipa_update_overall_fn_summary (node);
|
|
2412 info->self_size = info->size;
|
|
2413 /* We can not inline instrumentation clones. */
|
|
2414 if (node->thunk.add_pointer_bounds_args)
|
|
2415 {
|
|
2416 info->inlinable = false;
|
|
2417 node->callees->inline_failed = CIF_CHKP;
|
|
2418 }
|
131
|
2419 else if (stdarg_p (TREE_TYPE (node->decl)))
|
|
2420 {
|
|
2421 info->inlinable = false;
|
|
2422 node->callees->inline_failed = CIF_VARIADIC_THUNK;
|
|
2423 }
|
111
|
2424 else
|
|
2425 info->inlinable = true;
|
|
2426 }
|
|
2427 else
|
|
2428 {
|
|
2429 /* Even is_gimple_min_invariant rely on current_function_decl. */
|
|
2430 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
|
|
2431
|
|
2432 /* Can this function be inlined at all? */
|
|
2433 if (!opt_for_fn (node->decl, optimize)
|
|
2434 && !lookup_attribute ("always_inline",
|
|
2435 DECL_ATTRIBUTES (node->decl)))
|
|
2436 info->inlinable = false;
|
|
2437 else
|
|
2438 info->inlinable = tree_inlinable_function_p (node->decl);
|
|
2439
|
|
2440 /* Type attributes can use parameter indices to describe them. */
|
131
|
2441 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
|
|
2442 /* Likewise for #pragma omp declare simd functions or functions
|
|
2443 with simd attribute. */
|
|
2444 || lookup_attribute ("omp declare simd",
|
|
2445 DECL_ATTRIBUTES (node->decl)))
|
111
|
2446 node->local.can_change_signature = false;
|
|
2447 else
|
|
2448 {
|
|
2449 /* Otherwise, inlinable functions always can change signature. */
|
|
2450 if (info->inlinable)
|
|
2451 node->local.can_change_signature = true;
|
|
2452 else
|
|
2453 {
|
|
2454 /* Functions calling builtin_apply can not change signature. */
|
|
2455 for (e = node->callees; e; e = e->next_callee)
|
|
2456 {
|
|
2457 tree cdecl = e->callee->decl;
|
131
|
2458 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
|
|
2459 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
|
111
|
2460 break;
|
|
2461 }
|
|
2462 node->local.can_change_signature = !e;
|
|
2463 }
|
|
2464 }
|
|
2465 /* Functions called by instrumentation thunk can't change signature
|
|
2466 because instrumentation thunk modification is not supported. */
|
|
2467 if (node->local.can_change_signature)
|
|
2468 for (e = node->callers; e; e = e->next_caller)
|
|
2469 if (e->caller->thunk.thunk_p
|
|
2470 && e->caller->thunk.add_pointer_bounds_args)
|
|
2471 {
|
|
2472 node->local.can_change_signature = false;
|
|
2473 break;
|
|
2474 }
|
|
2475 analyze_function_body (node, early);
|
|
2476 pop_cfun ();
|
|
2477 }
|
|
2478 for (e = node->callees; e; e = e->next_callee)
|
|
2479 if (e->callee->comdat_local_p ())
|
|
2480 break;
|
|
2481 node->calls_comdat_local = (e != NULL);
|
|
2482
|
|
2483 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
|
|
2484 info->size = info->self_size;
|
|
2485 info->stack_frame_offset = 0;
|
|
2486 info->estimated_stack_size = info->estimated_self_stack_size;
|
|
2487
|
|
2488 /* Code above should compute exactly the same result as
|
|
2489 ipa_update_overall_fn_summary but because computation happens in
|
|
2490 different order the roundoff errors result in slight changes. */
|
|
2491 ipa_update_overall_fn_summary (node);
|
|
2492 gcc_assert (info->size == info->self_size);
|
|
2493 }
|
|
2494
|
|
2495
|
|
2496 /* Compute parameters of functions used by inliner using
|
|
2497 current_function_decl. */
|
|
2498
|
|
2499 static unsigned int
|
|
2500 compute_fn_summary_for_current (void)
|
|
2501 {
|
|
2502 compute_fn_summary (cgraph_node::get (current_function_decl), true);
|
|
2503 return 0;
|
|
2504 }
|
|
2505
|
|
2506 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
|
|
2507 KNOWN_CONTEXTS and KNOWN_AGGS. */
|
|
2508
|
|
2509 static bool
|
|
2510 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
|
|
2511 int *size, int *time,
|
|
2512 vec<tree> known_vals,
|
|
2513 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2514 vec<ipa_agg_jump_function_p> known_aggs)
|
|
2515 {
|
|
2516 tree target;
|
|
2517 struct cgraph_node *callee;
|
|
2518 struct ipa_fn_summary *isummary;
|
|
2519 enum availability avail;
|
|
2520 bool speculative;
|
|
2521
|
|
2522 if (!known_vals.exists () && !known_contexts.exists ())
|
|
2523 return false;
|
|
2524 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
|
|
2525 return false;
|
|
2526
|
|
2527 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
|
|
2528 known_aggs, &speculative);
|
|
2529 if (!target || speculative)
|
|
2530 return false;
|
|
2531
|
|
2532 /* Account for difference in cost between indirect and direct calls. */
|
|
2533 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
|
|
2534 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
|
|
2535 gcc_checking_assert (*time >= 0);
|
|
2536 gcc_checking_assert (*size >= 0);
|
|
2537
|
|
2538 callee = cgraph_node::get (target);
|
|
2539 if (!callee || !callee->definition)
|
|
2540 return false;
|
|
2541 callee = callee->function_symbol (&avail);
|
|
2542 if (avail < AVAIL_AVAILABLE)
|
|
2543 return false;
|
|
2544 isummary = ipa_fn_summaries->get (callee);
|
|
2545 return isummary->inlinable;
|
|
2546 }
|
|
2547
|
|
2548 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
|
|
2549 handle edge E with probability PROB.
|
|
2550 Set HINTS if edge may be devirtualized.
|
|
2551 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
|
|
2552 site. */
|
|
2553
|
|
2554 static inline void
|
|
2555 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
|
|
2556 sreal *time,
|
|
2557 int prob,
|
|
2558 vec<tree> known_vals,
|
|
2559 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2560 vec<ipa_agg_jump_function_p> known_aggs,
|
|
2561 ipa_hints *hints)
|
|
2562 {
|
|
2563 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2564 int call_size = es->call_stmt_size;
|
|
2565 int call_time = es->call_stmt_time;
|
|
2566 int cur_size;
|
|
2567 if (!e->callee
|
|
2568 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
|
|
2569 known_vals, known_contexts, known_aggs)
|
|
2570 && hints && e->maybe_hot_p ())
|
|
2571 *hints |= INLINE_HINT_indirect_call;
|
|
2572 cur_size = call_size * ipa_fn_summary::size_scale;
|
|
2573 *size += cur_size;
|
|
2574 if (min_size)
|
|
2575 *min_size += cur_size;
|
|
2576 if (prob == REG_BR_PROB_BASE)
|
131
|
2577 *time += ((sreal)call_time) * e->sreal_frequency ();
|
111
|
2578 else
|
131
|
2579 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
|
111
|
2580 }
|
|
2581
|
|
2582
|
|
2583
|
|
2584 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
|
|
2585 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
|
|
2586 describe context of the call site. */
|
|
2587
|
|
2588 static void
|
|
2589 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
|
|
2590 int *min_size, sreal *time,
|
|
2591 ipa_hints *hints,
|
|
2592 clause_t possible_truths,
|
|
2593 vec<tree> known_vals,
|
|
2594 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2595 vec<ipa_agg_jump_function_p> known_aggs)
|
|
2596 {
|
|
2597 struct cgraph_edge *e;
|
|
2598 for (e = node->callees; e; e = e->next_callee)
|
|
2599 {
|
131
|
2600 struct ipa_call_summary *es = ipa_call_summaries->get_create (e);
|
111
|
2601
|
|
2602 /* Do not care about zero sized builtins. */
|
|
2603 if (e->inline_failed && !es->call_stmt_size)
|
|
2604 {
|
|
2605 gcc_checking_assert (!es->call_stmt_time);
|
|
2606 continue;
|
|
2607 }
|
|
2608 if (!es->predicate
|
|
2609 || es->predicate->evaluate (possible_truths))
|
|
2610 {
|
|
2611 if (e->inline_failed)
|
|
2612 {
|
|
2613 /* Predicates of calls shall not use NOT_CHANGED codes,
|
|
2614 sowe do not need to compute probabilities. */
|
|
2615 estimate_edge_size_and_time (e, size,
|
|
2616 es->predicate ? NULL : min_size,
|
|
2617 time, REG_BR_PROB_BASE,
|
|
2618 known_vals, known_contexts,
|
|
2619 known_aggs, hints);
|
|
2620 }
|
|
2621 else
|
|
2622 estimate_calls_size_and_time (e->callee, size, min_size, time,
|
|
2623 hints,
|
|
2624 possible_truths,
|
|
2625 known_vals, known_contexts,
|
|
2626 known_aggs);
|
|
2627 }
|
|
2628 }
|
|
2629 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
2630 {
|
131
|
2631 struct ipa_call_summary *es = ipa_call_summaries->get_create (e);
|
111
|
2632 if (!es->predicate
|
|
2633 || es->predicate->evaluate (possible_truths))
|
|
2634 estimate_edge_size_and_time (e, size,
|
|
2635 es->predicate ? NULL : min_size,
|
|
2636 time, REG_BR_PROB_BASE,
|
|
2637 known_vals, known_contexts, known_aggs,
|
|
2638 hints);
|
|
2639 }
|
|
2640 }
|
|
2641
|
|
2642
|
|
2643 /* Estimate size and time needed to execute NODE assuming
|
|
2644 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
|
|
2645 information about NODE's arguments. If non-NULL use also probability
|
|
2646 information present in INLINE_PARAM_SUMMARY vector.
|
|
2647 Additionally detemine hints determined by the context. Finally compute
|
|
2648 minimal size needed for the call that is independent on the call context and
|
|
2649 can be used for fast estimates. Return the values in RET_SIZE,
|
|
2650 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
|
|
2651
|
|
2652 void
|
|
2653 estimate_node_size_and_time (struct cgraph_node *node,
|
|
2654 clause_t possible_truths,
|
|
2655 clause_t nonspec_possible_truths,
|
|
2656 vec<tree> known_vals,
|
|
2657 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2658 vec<ipa_agg_jump_function_p> known_aggs,
|
|
2659 int *ret_size, int *ret_min_size,
|
|
2660 sreal *ret_time,
|
|
2661 sreal *ret_nonspecialized_time,
|
|
2662 ipa_hints *ret_hints,
|
|
2663 vec<inline_param_summary>
|
|
2664 inline_param_summary)
|
|
2665 {
|
131
|
2666 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
|
111
|
2667 size_time_entry *e;
|
|
2668 int size = 0;
|
|
2669 sreal time = 0;
|
|
2670 int min_size = 0;
|
|
2671 ipa_hints hints = 0;
|
|
2672 int i;
|
|
2673
|
|
2674 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2675 {
|
|
2676 bool found = false;
|
|
2677 fprintf (dump_file, " Estimating body: %s/%i\n"
|
|
2678 " Known to be false: ", node->name (),
|
|
2679 node->order);
|
|
2680
|
|
2681 for (i = predicate::not_inlined_condition;
|
|
2682 i < (predicate::first_dynamic_condition
|
|
2683 + (int) vec_safe_length (info->conds)); i++)
|
|
2684 if (!(possible_truths & (1 << i)))
|
|
2685 {
|
|
2686 if (found)
|
|
2687 fprintf (dump_file, ", ");
|
|
2688 found = true;
|
|
2689 dump_condition (dump_file, info->conds, i);
|
|
2690 }
|
|
2691 }
|
|
2692
|
|
2693 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
|
|
2694 known_vals, known_contexts, known_aggs);
|
|
2695 sreal nonspecialized_time = time;
|
|
2696
|
|
2697 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
|
|
2698 {
|
|
2699 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
|
|
2700
|
|
2701 /* Because predicates are conservative, it can happen that nonconst is 1
|
|
2702 but exec is 0. */
|
|
2703 if (exec)
|
|
2704 {
|
|
2705 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
|
|
2706
|
|
2707 gcc_checking_assert (e->time >= 0);
|
|
2708 gcc_checking_assert (time >= 0);
|
|
2709
|
|
2710 /* We compute specialized size only because size of nonspecialized
|
|
2711 copy is context independent.
|
|
2712
|
|
2713 The difference between nonspecialized execution and specialized is
|
|
2714 that nonspecialized is not going to have optimized out computations
|
|
2715 known to be constant in a specialized setting. */
|
|
2716 if (nonconst)
|
|
2717 size += e->size;
|
|
2718 nonspecialized_time += e->time;
|
|
2719 if (!nonconst)
|
|
2720 ;
|
|
2721 else if (!inline_param_summary.exists ())
|
|
2722 {
|
|
2723 if (nonconst)
|
|
2724 time += e->time;
|
|
2725 }
|
|
2726 else
|
|
2727 {
|
|
2728 int prob = e->nonconst_predicate.probability
|
|
2729 (info->conds, possible_truths,
|
|
2730 inline_param_summary);
|
|
2731 gcc_checking_assert (prob >= 0);
|
|
2732 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
|
|
2733 time += e->time * prob / REG_BR_PROB_BASE;
|
|
2734 }
|
|
2735 gcc_checking_assert (time >= 0);
|
|
2736 }
|
|
2737 }
|
|
2738 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
|
|
2739 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
|
|
2740 min_size = (*info->size_time_table)[0].size;
|
|
2741 gcc_checking_assert (size >= 0);
|
|
2742 gcc_checking_assert (time >= 0);
|
|
2743 /* nonspecialized_time should be always bigger than specialized time.
|
|
2744 Roundoff issues however may get into the way. */
|
131
|
2745 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
|
111
|
2746
|
|
2747 /* Roundoff issues may make specialized time bigger than nonspecialized
|
|
2748 time. We do not really want that to happen because some heurstics
|
|
2749 may get confused by seeing negative speedups. */
|
|
2750 if (time > nonspecialized_time)
|
|
2751 time = nonspecialized_time;
|
|
2752
|
|
2753 if (info->loop_iterations
|
|
2754 && !info->loop_iterations->evaluate (possible_truths))
|
|
2755 hints |= INLINE_HINT_loop_iterations;
|
|
2756 if (info->loop_stride
|
|
2757 && !info->loop_stride->evaluate (possible_truths))
|
|
2758 hints |= INLINE_HINT_loop_stride;
|
|
2759 if (info->array_index
|
|
2760 && !info->array_index->evaluate (possible_truths))
|
|
2761 hints |= INLINE_HINT_array_index;
|
|
2762 if (info->scc_no)
|
|
2763 hints |= INLINE_HINT_in_scc;
|
|
2764 if (DECL_DECLARED_INLINE_P (node->decl))
|
|
2765 hints |= INLINE_HINT_declared_inline;
|
|
2766
|
|
2767 size = RDIV (size, ipa_fn_summary::size_scale);
|
|
2768 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
|
|
2769
|
|
2770 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2771 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
|
|
2772 time.to_double (), nonspecialized_time.to_double ());
|
|
2773 if (ret_time)
|
|
2774 *ret_time = time;
|
|
2775 if (ret_nonspecialized_time)
|
|
2776 *ret_nonspecialized_time = nonspecialized_time;
|
|
2777 if (ret_size)
|
|
2778 *ret_size = size;
|
|
2779 if (ret_min_size)
|
|
2780 *ret_min_size = min_size;
|
|
2781 if (ret_hints)
|
|
2782 *ret_hints = hints;
|
|
2783 return;
|
|
2784 }
|
|
2785
|
|
2786
|
|
2787 /* Estimate size and time needed to execute callee of EDGE assuming that
|
|
2788 parameters known to be constant at caller of EDGE are propagated.
|
|
2789 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
|
|
2790 and types for parameters. */
|
|
2791
|
|
2792 void
|
|
2793 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
|
|
2794 vec<tree> known_vals,
|
|
2795 vec<ipa_polymorphic_call_context>
|
|
2796 known_contexts,
|
|
2797 vec<ipa_agg_jump_function_p> known_aggs,
|
|
2798 int *ret_size, sreal *ret_time,
|
|
2799 sreal *ret_nonspec_time,
|
|
2800 ipa_hints *hints)
|
|
2801 {
|
|
2802 clause_t clause, nonspec_clause;
|
|
2803
|
|
2804 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
|
|
2805 &clause, &nonspec_clause);
|
|
2806 estimate_node_size_and_time (node, clause, nonspec_clause,
|
|
2807 known_vals, known_contexts,
|
|
2808 known_aggs, ret_size, NULL, ret_time,
|
|
2809 ret_nonspec_time, hints, vNULL);
|
|
2810 }
|
|
2811
|
|
2812
|
|
2813 /* Update summary information of inline clones after inlining.
|
|
2814 Compute peak stack usage. */
|
|
2815
|
|
2816 static void
|
|
2817 inline_update_callee_summaries (struct cgraph_node *node, int depth)
|
|
2818 {
|
|
2819 struct cgraph_edge *e;
|
131
|
2820 ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
|
|
2821 ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
|
111
|
2822 HOST_WIDE_INT peak;
|
|
2823
|
|
2824 callee_info->stack_frame_offset
|
|
2825 = caller_info->stack_frame_offset
|
|
2826 + caller_info->estimated_self_stack_size;
|
|
2827 peak = callee_info->stack_frame_offset
|
|
2828 + callee_info->estimated_self_stack_size;
|
131
|
2829
|
|
2830 ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to);
|
|
2831 if (s->estimated_stack_size < peak)
|
|
2832 s->estimated_stack_size = peak;
|
111
|
2833 ipa_propagate_frequency (node);
|
|
2834 for (e = node->callees; e; e = e->next_callee)
|
|
2835 {
|
|
2836 if (!e->inline_failed)
|
|
2837 inline_update_callee_summaries (e->callee, depth);
|
|
2838 ipa_call_summaries->get (e)->loop_depth += depth;
|
|
2839 }
|
|
2840 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
2841 ipa_call_summaries->get (e)->loop_depth += depth;
|
|
2842 }
|
|
2843
|
|
2844 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
|
|
2845 When functoin A is inlined in B and A calls C with parameter that
|
|
2846 changes with probability PROB1 and C is known to be passthroug
|
|
2847 of argument if B that change with probability PROB2, the probability
|
|
2848 of change is now PROB1*PROB2. */
|
|
2849
|
|
2850 static void
|
|
2851 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
|
|
2852 struct cgraph_edge *edge)
|
|
2853 {
|
|
2854 if (ipa_node_params_sum)
|
|
2855 {
|
|
2856 int i;
|
|
2857 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
|
|
2858 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
2859 struct ipa_call_summary *inlined_es
|
|
2860 = ipa_call_summaries->get (inlined_edge);
|
|
2861
|
|
2862 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
|
|
2863 {
|
|
2864 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
|
|
2865 if (jfunc->type == IPA_JF_PASS_THROUGH
|
|
2866 || jfunc->type == IPA_JF_ANCESTOR)
|
|
2867 {
|
|
2868 int id = jfunc->type == IPA_JF_PASS_THROUGH
|
|
2869 ? ipa_get_jf_pass_through_formal_id (jfunc)
|
|
2870 : ipa_get_jf_ancestor_formal_id (jfunc);
|
|
2871 if (id < (int) inlined_es->param.length ())
|
|
2872 {
|
|
2873 int prob1 = es->param[i].change_prob;
|
|
2874 int prob2 = inlined_es->param[id].change_prob;
|
|
2875 int prob = combine_probabilities (prob1, prob2);
|
|
2876
|
|
2877 if (prob1 && prob2 && !prob)
|
|
2878 prob = 1;
|
|
2879
|
|
2880 es->param[i].change_prob = prob;
|
|
2881 }
|
|
2882 }
|
|
2883 }
|
|
2884 }
|
|
2885 }
|
|
2886
|
|
2887 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
|
|
2888
|
|
2889 Remap predicates of callees of NODE. Rest of arguments match
|
|
2890 remap_predicate.
|
|
2891
|
|
2892 Also update change probabilities. */
|
|
2893
|
|
2894 static void
|
|
2895 remap_edge_summaries (struct cgraph_edge *inlined_edge,
|
|
2896 struct cgraph_node *node,
|
|
2897 struct ipa_fn_summary *info,
|
|
2898 struct ipa_fn_summary *callee_info,
|
|
2899 vec<int> operand_map,
|
|
2900 vec<int> offset_map,
|
|
2901 clause_t possible_truths,
|
|
2902 predicate *toplev_predicate)
|
|
2903 {
|
|
2904 struct cgraph_edge *e, *next;
|
|
2905 for (e = node->callees; e; e = next)
|
|
2906 {
|
|
2907 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2908 predicate p;
|
|
2909 next = e->next_callee;
|
|
2910
|
|
2911 if (e->inline_failed)
|
|
2912 {
|
|
2913 remap_edge_change_prob (inlined_edge, e);
|
|
2914
|
|
2915 if (es->predicate)
|
|
2916 {
|
|
2917 p = es->predicate->remap_after_inlining
|
|
2918 (info, callee_info, operand_map,
|
|
2919 offset_map, possible_truths,
|
|
2920 *toplev_predicate);
|
|
2921 edge_set_predicate (e, &p);
|
|
2922 }
|
|
2923 else
|
|
2924 edge_set_predicate (e, toplev_predicate);
|
|
2925 }
|
|
2926 else
|
|
2927 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
|
|
2928 operand_map, offset_map, possible_truths,
|
|
2929 toplev_predicate);
|
|
2930 }
|
|
2931 for (e = node->indirect_calls; e; e = next)
|
|
2932 {
|
|
2933 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2934 predicate p;
|
|
2935 next = e->next_callee;
|
|
2936
|
|
2937 remap_edge_change_prob (inlined_edge, e);
|
|
2938 if (es->predicate)
|
|
2939 {
|
|
2940 p = es->predicate->remap_after_inlining
|
|
2941 (info, callee_info, operand_map, offset_map,
|
|
2942 possible_truths, *toplev_predicate);
|
|
2943 edge_set_predicate (e, &p);
|
|
2944 }
|
|
2945 else
|
|
2946 edge_set_predicate (e, toplev_predicate);
|
|
2947 }
|
|
2948 }
|
|
2949
|
|
2950 /* Same as remap_predicate, but set result into hint *HINT. */
|
|
2951
|
|
2952 static void
|
|
2953 remap_hint_predicate (struct ipa_fn_summary *info,
|
|
2954 struct ipa_fn_summary *callee_info,
|
|
2955 predicate **hint,
|
|
2956 vec<int> operand_map,
|
|
2957 vec<int> offset_map,
|
|
2958 clause_t possible_truths,
|
|
2959 predicate *toplev_predicate)
|
|
2960 {
|
|
2961 predicate p;
|
|
2962
|
|
2963 if (!*hint)
|
|
2964 return;
|
|
2965 p = (*hint)->remap_after_inlining
|
|
2966 (info, callee_info,
|
|
2967 operand_map, offset_map,
|
|
2968 possible_truths, *toplev_predicate);
|
|
2969 if (p != false && p != true)
|
|
2970 {
|
|
2971 if (!*hint)
|
|
2972 set_hint_predicate (hint, p);
|
|
2973 else
|
|
2974 **hint &= p;
|
|
2975 }
|
|
2976 }
|
|
2977
|
|
2978 /* We inlined EDGE. Update summary of the function we inlined into. */
|
|
2979
|
|
2980 void
|
|
2981 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
|
|
2982 {
|
131
|
2983 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
|
111
|
2984 struct cgraph_node *to = (edge->caller->global.inlined_to
|
|
2985 ? edge->caller->global.inlined_to : edge->caller);
|
|
2986 struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
|
|
2987 clause_t clause = 0; /* not_inline is known to be false. */
|
|
2988 size_time_entry *e;
|
|
2989 vec<int> operand_map = vNULL;
|
|
2990 vec<int> offset_map = vNULL;
|
|
2991 int i;
|
|
2992 predicate toplev_predicate;
|
|
2993 predicate true_p = true;
|
|
2994 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
2995
|
|
2996 if (es->predicate)
|
|
2997 toplev_predicate = *es->predicate;
|
|
2998 else
|
|
2999 toplev_predicate = true;
|
|
3000
|
|
3001 info->fp_expressions |= callee_info->fp_expressions;
|
|
3002
|
|
3003 if (callee_info->conds)
|
|
3004 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
|
|
3005 if (ipa_node_params_sum && callee_info->conds)
|
|
3006 {
|
|
3007 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
|
|
3008 int count = ipa_get_cs_argument_count (args);
|
|
3009 int i;
|
|
3010
|
|
3011 if (count)
|
|
3012 {
|
|
3013 operand_map.safe_grow_cleared (count);
|
|
3014 offset_map.safe_grow_cleared (count);
|
|
3015 }
|
|
3016 for (i = 0; i < count; i++)
|
|
3017 {
|
|
3018 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
|
|
3019 int map = -1;
|
|
3020
|
|
3021 /* TODO: handle non-NOPs when merging. */
|
|
3022 if (jfunc->type == IPA_JF_PASS_THROUGH)
|
|
3023 {
|
|
3024 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
|
|
3025 map = ipa_get_jf_pass_through_formal_id (jfunc);
|
|
3026 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
|
|
3027 offset_map[i] = -1;
|
|
3028 }
|
|
3029 else if (jfunc->type == IPA_JF_ANCESTOR)
|
|
3030 {
|
|
3031 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
|
|
3032 if (offset >= 0 && offset < INT_MAX)
|
|
3033 {
|
|
3034 map = ipa_get_jf_ancestor_formal_id (jfunc);
|
|
3035 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
|
|
3036 offset = -1;
|
|
3037 offset_map[i] = offset;
|
|
3038 }
|
|
3039 }
|
|
3040 operand_map[i] = map;
|
|
3041 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
|
|
3042 }
|
|
3043 }
|
|
3044 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
|
|
3045 {
|
|
3046 predicate p;
|
|
3047 p = e->exec_predicate.remap_after_inlining
|
|
3048 (info, callee_info, operand_map,
|
|
3049 offset_map, clause,
|
|
3050 toplev_predicate);
|
|
3051 predicate nonconstp;
|
|
3052 nonconstp = e->nonconst_predicate.remap_after_inlining
|
|
3053 (info, callee_info, operand_map,
|
|
3054 offset_map, clause,
|
|
3055 toplev_predicate);
|
|
3056 if (p != false && nonconstp != false)
|
|
3057 {
|
131
|
3058 sreal add_time = ((sreal)e->time * edge->sreal_frequency ());
|
111
|
3059 int prob = e->nonconst_predicate.probability (callee_info->conds,
|
|
3060 clause, es->param);
|
|
3061 add_time = add_time * prob / REG_BR_PROB_BASE;
|
|
3062 if (prob != REG_BR_PROB_BASE
|
|
3063 && dump_file && (dump_flags & TDF_DETAILS))
|
|
3064 {
|
|
3065 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
|
|
3066 (double) prob / REG_BR_PROB_BASE);
|
|
3067 }
|
|
3068 info->account_size_time (e->size, add_time, p, nonconstp);
|
|
3069 }
|
|
3070 }
|
|
3071 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
|
|
3072 offset_map, clause, &toplev_predicate);
|
|
3073 remap_hint_predicate (info, callee_info,
|
|
3074 &callee_info->loop_iterations,
|
|
3075 operand_map, offset_map, clause, &toplev_predicate);
|
|
3076 remap_hint_predicate (info, callee_info,
|
|
3077 &callee_info->loop_stride,
|
|
3078 operand_map, offset_map, clause, &toplev_predicate);
|
|
3079 remap_hint_predicate (info, callee_info,
|
|
3080 &callee_info->array_index,
|
|
3081 operand_map, offset_map, clause, &toplev_predicate);
|
|
3082
|
131
|
3083 ipa_call_summary *s = ipa_call_summaries->get (edge);
|
|
3084 inline_update_callee_summaries (edge->callee, s->loop_depth);
|
111
|
3085
|
|
3086 /* We do not maintain predicates of inlined edges, free it. */
|
|
3087 edge_set_predicate (edge, &true_p);
|
|
3088 /* Similarly remove param summaries. */
|
|
3089 es->param.release ();
|
|
3090 operand_map.release ();
|
|
3091 offset_map.release ();
|
|
3092 }
|
|
3093
|
|
3094 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
|
|
3095 and time. Recompute it. */
|
|
3096
|
|
3097 void
|
|
3098 ipa_update_overall_fn_summary (struct cgraph_node *node)
|
|
3099 {
|
131
|
3100 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
|
111
|
3101 size_time_entry *e;
|
|
3102 int i;
|
|
3103
|
|
3104 info->size = 0;
|
|
3105 info->time = 0;
|
|
3106 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
|
|
3107 {
|
|
3108 info->size += e->size;
|
|
3109 info->time += e->time;
|
|
3110 }
|
|
3111 estimate_calls_size_and_time (node, &info->size, &info->min_size,
|
|
3112 &info->time, NULL,
|
|
3113 ~(clause_t) (1 << predicate::false_condition),
|
|
3114 vNULL, vNULL, vNULL);
|
|
3115 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
|
|
3116 }
|
|
3117
|
|
3118
|
|
3119 /* This function performs intraprocedural analysis in NODE that is required to
|
|
3120 inline indirect calls. */
|
|
3121
|
|
3122 static void
|
|
3123 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
|
|
3124 {
|
|
3125 ipa_analyze_node (node);
|
|
3126 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3127 {
|
|
3128 ipa_print_node_params (dump_file, node);
|
|
3129 ipa_print_node_jump_functions (dump_file, node);
|
|
3130 }
|
|
3131 }
|
|
3132
|
|
3133
|
|
3134 /* Note function body size. */
|
|
3135
|
|
3136 void
|
|
3137 inline_analyze_function (struct cgraph_node *node)
|
|
3138 {
|
|
3139 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
|
|
3140
|
|
3141 if (dump_file)
|
|
3142 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
|
|
3143 node->name (), node->order);
|
|
3144 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
|
|
3145 inline_indirect_intraprocedural_analysis (node);
|
|
3146 compute_fn_summary (node, false);
|
|
3147 if (!optimize)
|
|
3148 {
|
|
3149 struct cgraph_edge *e;
|
|
3150 for (e = node->callees; e; e = e->next_callee)
|
|
3151 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
|
|
3152 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
3153 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
|
|
3154 }
|
|
3155
|
|
3156 pop_cfun ();
|
|
3157 }
|
|
3158
|
|
3159
|
|
3160 /* Called when new function is inserted to callgraph late. */
|
|
3161
|
|
3162 void
|
|
3163 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
|
|
3164 {
|
|
3165 inline_analyze_function (node);
|
|
3166 }
|
|
3167
|
|
3168 /* Note function body size. */
|
|
3169
|
|
3170 static void
|
|
3171 ipa_fn_summary_generate (void)
|
|
3172 {
|
|
3173 struct cgraph_node *node;
|
|
3174
|
|
3175 FOR_EACH_DEFINED_FUNCTION (node)
|
|
3176 if (DECL_STRUCT_FUNCTION (node->decl))
|
|
3177 node->local.versionable = tree_versionable_function_p (node->decl);
|
|
3178
|
|
3179 ipa_fn_summary_alloc ();
|
|
3180
|
|
3181 ipa_fn_summaries->enable_insertion_hook ();
|
|
3182
|
|
3183 ipa_register_cgraph_hooks ();
|
|
3184
|
|
3185 FOR_EACH_DEFINED_FUNCTION (node)
|
|
3186 if (!node->alias
|
|
3187 && (flag_generate_lto || flag_generate_offload|| flag_wpa
|
|
3188 || opt_for_fn (node->decl, optimize)))
|
|
3189 inline_analyze_function (node);
|
|
3190 }
|
|
3191
|
|
3192
|
|
3193 /* Write inline summary for edge E to OB. */
|
|
3194
|
|
3195 static void
|
|
3196 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
|
|
3197 {
|
131
|
3198 struct ipa_call_summary *es = ipa_call_summaries->get_create (e);
|
111
|
3199 predicate p;
|
|
3200 int length, i;
|
|
3201
|
|
3202 es->call_stmt_size = streamer_read_uhwi (ib);
|
|
3203 es->call_stmt_time = streamer_read_uhwi (ib);
|
|
3204 es->loop_depth = streamer_read_uhwi (ib);
|
131
|
3205
|
|
3206 bitpack_d bp = streamer_read_bitpack (ib);
|
|
3207 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
|
|
3208
|
111
|
3209 p.stream_in (ib);
|
|
3210 edge_set_predicate (e, &p);
|
|
3211 length = streamer_read_uhwi (ib);
|
|
3212 if (length)
|
|
3213 {
|
|
3214 es->param.safe_grow_cleared (length);
|
|
3215 for (i = 0; i < length; i++)
|
|
3216 es->param[i].change_prob = streamer_read_uhwi (ib);
|
|
3217 }
|
|
3218 }
|
|
3219
|
|
3220
|
|
3221 /* Stream in inline summaries from the section. */
|
|
3222
|
|
3223 static void
|
|
3224 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
|
|
3225 size_t len)
|
|
3226 {
|
|
3227 const struct lto_function_header *header =
|
|
3228 (const struct lto_function_header *) data;
|
|
3229 const int cfg_offset = sizeof (struct lto_function_header);
|
|
3230 const int main_offset = cfg_offset + header->cfg_size;
|
|
3231 const int string_offset = main_offset + header->main_size;
|
|
3232 struct data_in *data_in;
|
|
3233 unsigned int i, count2, j;
|
|
3234 unsigned int f_count;
|
|
3235
|
|
3236 lto_input_block ib ((const char *) data + main_offset, header->main_size,
|
|
3237 file_data->mode_table);
|
|
3238
|
|
3239 data_in =
|
|
3240 lto_data_in_create (file_data, (const char *) data + string_offset,
|
|
3241 header->string_size, vNULL);
|
|
3242 f_count = streamer_read_uhwi (&ib);
|
|
3243 for (i = 0; i < f_count; i++)
|
|
3244 {
|
|
3245 unsigned int index;
|
|
3246 struct cgraph_node *node;
|
|
3247 struct ipa_fn_summary *info;
|
|
3248 lto_symtab_encoder_t encoder;
|
|
3249 struct bitpack_d bp;
|
|
3250 struct cgraph_edge *e;
|
|
3251 predicate p;
|
|
3252
|
|
3253 index = streamer_read_uhwi (&ib);
|
|
3254 encoder = file_data->symtab_node_encoder;
|
|
3255 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
|
|
3256 index));
|
131
|
3257 info = ipa_fn_summaries->get_create (node);
|
111
|
3258
|
|
3259 info->estimated_stack_size
|
|
3260 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
|
|
3261 info->size = info->self_size = streamer_read_uhwi (&ib);
|
|
3262 info->time = sreal::stream_in (&ib);
|
|
3263
|
|
3264 bp = streamer_read_bitpack (&ib);
|
|
3265 info->inlinable = bp_unpack_value (&bp, 1);
|
|
3266 info->fp_expressions = bp_unpack_value (&bp, 1);
|
|
3267
|
|
3268 count2 = streamer_read_uhwi (&ib);
|
|
3269 gcc_assert (!info->conds);
|
|
3270 for (j = 0; j < count2; j++)
|
|
3271 {
|
|
3272 struct condition c;
|
|
3273 c.operand_num = streamer_read_uhwi (&ib);
|
|
3274 c.size = streamer_read_uhwi (&ib);
|
|
3275 c.code = (enum tree_code) streamer_read_uhwi (&ib);
|
|
3276 c.val = stream_read_tree (&ib, data_in);
|
|
3277 bp = streamer_read_bitpack (&ib);
|
|
3278 c.agg_contents = bp_unpack_value (&bp, 1);
|
|
3279 c.by_ref = bp_unpack_value (&bp, 1);
|
|
3280 if (c.agg_contents)
|
|
3281 c.offset = streamer_read_uhwi (&ib);
|
|
3282 vec_safe_push (info->conds, c);
|
|
3283 }
|
|
3284 count2 = streamer_read_uhwi (&ib);
|
|
3285 gcc_assert (!info->size_time_table);
|
|
3286 for (j = 0; j < count2; j++)
|
|
3287 {
|
|
3288 struct size_time_entry e;
|
|
3289
|
|
3290 e.size = streamer_read_uhwi (&ib);
|
|
3291 e.time = sreal::stream_in (&ib);
|
|
3292 e.exec_predicate.stream_in (&ib);
|
|
3293 e.nonconst_predicate.stream_in (&ib);
|
|
3294
|
|
3295 vec_safe_push (info->size_time_table, e);
|
|
3296 }
|
|
3297
|
|
3298 p.stream_in (&ib);
|
|
3299 set_hint_predicate (&info->loop_iterations, p);
|
|
3300 p.stream_in (&ib);
|
|
3301 set_hint_predicate (&info->loop_stride, p);
|
|
3302 p.stream_in (&ib);
|
|
3303 set_hint_predicate (&info->array_index, p);
|
|
3304 for (e = node->callees; e; e = e->next_callee)
|
|
3305 read_ipa_call_summary (&ib, e);
|
|
3306 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
3307 read_ipa_call_summary (&ib, e);
|
|
3308 }
|
|
3309
|
|
3310 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
|
|
3311 len);
|
|
3312 lto_data_in_delete (data_in);
|
|
3313 }
|
|
3314
|
|
3315
|
|
3316 /* Read inline summary. Jump functions are shared among ipa-cp
|
|
3317 and inliner, so when ipa-cp is active, we don't need to write them
|
|
3318 twice. */
|
|
3319
|
|
3320 static void
|
|
3321 ipa_fn_summary_read (void)
|
|
3322 {
|
|
3323 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
|
|
3324 struct lto_file_decl_data *file_data;
|
|
3325 unsigned int j = 0;
|
|
3326
|
|
3327 ipa_fn_summary_alloc ();
|
|
3328
|
|
3329 while ((file_data = file_data_vec[j++]))
|
|
3330 {
|
|
3331 size_t len;
|
|
3332 const char *data = lto_get_section_data (file_data,
|
|
3333 LTO_section_ipa_fn_summary,
|
|
3334 NULL, &len);
|
|
3335 if (data)
|
|
3336 inline_read_section (file_data, data, len);
|
|
3337 else
|
|
3338 /* Fatal error here. We do not want to support compiling ltrans units
|
|
3339 with different version of compiler or different flags than the WPA
|
|
3340 unit, so this should never happen. */
|
|
3341 fatal_error (input_location,
|
|
3342 "ipa inline summary is missing in input file");
|
|
3343 }
|
|
3344 ipa_register_cgraph_hooks ();
|
|
3345 if (!flag_ipa_cp)
|
|
3346 ipa_prop_read_jump_functions ();
|
|
3347
|
|
3348 gcc_assert (ipa_fn_summaries);
|
|
3349 ipa_fn_summaries->enable_insertion_hook ();
|
|
3350 }
|
|
3351
|
|
3352
|
|
3353 /* Write inline summary for edge E to OB. */
|
|
3354
|
|
3355 static void
|
|
3356 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
|
|
3357 {
|
|
3358 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
3359 int i;
|
|
3360
|
|
3361 streamer_write_uhwi (ob, es->call_stmt_size);
|
|
3362 streamer_write_uhwi (ob, es->call_stmt_time);
|
|
3363 streamer_write_uhwi (ob, es->loop_depth);
|
131
|
3364
|
|
3365 bitpack_d bp = bitpack_create (ob->main_stream);
|
|
3366 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
|
|
3367 streamer_write_bitpack (&bp);
|
|
3368
|
111
|
3369 if (es->predicate)
|
|
3370 es->predicate->stream_out (ob);
|
|
3371 else
|
|
3372 streamer_write_uhwi (ob, 0);
|
|
3373 streamer_write_uhwi (ob, es->param.length ());
|
|
3374 for (i = 0; i < (int) es->param.length (); i++)
|
|
3375 streamer_write_uhwi (ob, es->param[i].change_prob);
|
|
3376 }
|
|
3377
|
|
3378
|
|
3379 /* Write inline summary for node in SET.
|
|
3380 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
|
|
3381 active, we don't need to write them twice. */
|
|
3382
|
|
3383 static void
|
|
3384 ipa_fn_summary_write (void)
|
|
3385 {
|
|
3386 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
|
|
3387 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
|
|
3388 unsigned int count = 0;
|
|
3389 int i;
|
|
3390
|
|
3391 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
|
|
3392 {
|
|
3393 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
|
|
3394 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
|
|
3395 if (cnode && cnode->definition && !cnode->alias)
|
|
3396 count++;
|
|
3397 }
|
|
3398 streamer_write_uhwi (ob, count);
|
|
3399
|
|
3400 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
|
|
3401 {
|
|
3402 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
|
|
3403 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
|
|
3404 if (cnode && cnode->definition && !cnode->alias)
|
|
3405 {
|
|
3406 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
|
|
3407 struct bitpack_d bp;
|
|
3408 struct cgraph_edge *edge;
|
|
3409 int i;
|
|
3410 size_time_entry *e;
|
|
3411 struct condition *c;
|
|
3412
|
|
3413 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
|
|
3414 streamer_write_hwi (ob, info->estimated_self_stack_size);
|
|
3415 streamer_write_hwi (ob, info->self_size);
|
|
3416 info->time.stream_out (ob);
|
|
3417 bp = bitpack_create (ob->main_stream);
|
|
3418 bp_pack_value (&bp, info->inlinable, 1);
|
131
|
3419 bp_pack_value (&bp, false, 1);
|
111
|
3420 bp_pack_value (&bp, info->fp_expressions, 1);
|
|
3421 streamer_write_bitpack (&bp);
|
|
3422 streamer_write_uhwi (ob, vec_safe_length (info->conds));
|
|
3423 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
|
|
3424 {
|
|
3425 streamer_write_uhwi (ob, c->operand_num);
|
|
3426 streamer_write_uhwi (ob, c->size);
|
|
3427 streamer_write_uhwi (ob, c->code);
|
|
3428 stream_write_tree (ob, c->val, true);
|
|
3429 bp = bitpack_create (ob->main_stream);
|
|
3430 bp_pack_value (&bp, c->agg_contents, 1);
|
|
3431 bp_pack_value (&bp, c->by_ref, 1);
|
|
3432 streamer_write_bitpack (&bp);
|
|
3433 if (c->agg_contents)
|
|
3434 streamer_write_uhwi (ob, c->offset);
|
|
3435 }
|
|
3436 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
|
|
3437 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
|
|
3438 {
|
|
3439 streamer_write_uhwi (ob, e->size);
|
|
3440 e->time.stream_out (ob);
|
|
3441 e->exec_predicate.stream_out (ob);
|
|
3442 e->nonconst_predicate.stream_out (ob);
|
|
3443 }
|
|
3444 if (info->loop_iterations)
|
|
3445 info->loop_iterations->stream_out (ob);
|
|
3446 else
|
|
3447 streamer_write_uhwi (ob, 0);
|
|
3448 if (info->loop_stride)
|
|
3449 info->loop_stride->stream_out (ob);
|
|
3450 else
|
|
3451 streamer_write_uhwi (ob, 0);
|
|
3452 if (info->array_index)
|
|
3453 info->array_index->stream_out (ob);
|
|
3454 else
|
|
3455 streamer_write_uhwi (ob, 0);
|
|
3456 for (edge = cnode->callees; edge; edge = edge->next_callee)
|
|
3457 write_ipa_call_summary (ob, edge);
|
|
3458 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
|
|
3459 write_ipa_call_summary (ob, edge);
|
|
3460 }
|
|
3461 }
|
|
3462 streamer_write_char_stream (ob->main_stream, 0);
|
|
3463 produce_asm (ob, NULL);
|
|
3464 destroy_output_block (ob);
|
|
3465
|
|
3466 if (!flag_ipa_cp)
|
|
3467 ipa_prop_write_jump_functions ();
|
|
3468 }
|
|
3469
|
|
3470
|
|
3471 /* Release inline summary. */
|
|
3472
|
|
3473 void
|
|
3474 ipa_free_fn_summary (void)
|
|
3475 {
|
|
3476 struct cgraph_node *node;
|
|
3477 if (!ipa_call_summaries)
|
|
3478 return;
|
|
3479 FOR_EACH_DEFINED_FUNCTION (node)
|
|
3480 if (!node->alias)
|
131
|
3481 ipa_fn_summaries->remove (node);
|
111
|
3482 ipa_fn_summaries->release ();
|
|
3483 ipa_fn_summaries = NULL;
|
|
3484 ipa_call_summaries->release ();
|
|
3485 delete ipa_call_summaries;
|
|
3486 ipa_call_summaries = NULL;
|
|
3487 edge_predicate_pool.release ();
|
|
3488 }
|
|
3489
|
|
3490 namespace {
|
|
3491
|
|
3492 const pass_data pass_data_local_fn_summary =
|
|
3493 {
|
|
3494 GIMPLE_PASS, /* type */
|
|
3495 "local-fnsummary", /* name */
|
|
3496 OPTGROUP_INLINE, /* optinfo_flags */
|
|
3497 TV_INLINE_PARAMETERS, /* tv_id */
|
|
3498 0, /* properties_required */
|
|
3499 0, /* properties_provided */
|
|
3500 0, /* properties_destroyed */
|
|
3501 0, /* todo_flags_start */
|
|
3502 0, /* todo_flags_finish */
|
|
3503 };
|
|
3504
|
|
3505 class pass_local_fn_summary : public gimple_opt_pass
|
|
3506 {
|
|
3507 public:
|
|
3508 pass_local_fn_summary (gcc::context *ctxt)
|
|
3509 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
|
|
3510 {}
|
|
3511
|
|
3512 /* opt_pass methods: */
|
|
3513 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
|
|
3514 virtual unsigned int execute (function *)
|
|
3515 {
|
|
3516 return compute_fn_summary_for_current ();
|
|
3517 }
|
|
3518
|
|
3519 }; // class pass_local_fn_summary
|
|
3520
|
|
3521 } // anon namespace
|
|
3522
|
|
3523 gimple_opt_pass *
|
|
3524 make_pass_local_fn_summary (gcc::context *ctxt)
|
|
3525 {
|
|
3526 return new pass_local_fn_summary (ctxt);
|
|
3527 }
|
|
3528
|
|
3529
|
|
3530 /* Free inline summary. */
|
|
3531
|
|
3532 namespace {
|
|
3533
|
|
3534 const pass_data pass_data_ipa_free_fn_summary =
|
|
3535 {
|
|
3536 SIMPLE_IPA_PASS, /* type */
|
|
3537 "free-fnsummary", /* name */
|
|
3538 OPTGROUP_NONE, /* optinfo_flags */
|
|
3539 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
|
|
3540 0, /* properties_required */
|
|
3541 0, /* properties_provided */
|
|
3542 0, /* properties_destroyed */
|
|
3543 0, /* todo_flags_start */
|
131
|
3544 0, /* todo_flags_finish */
|
111
|
3545 };
|
|
3546
|
|
3547 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
|
|
3548 {
|
|
3549 public:
|
|
3550 pass_ipa_free_fn_summary (gcc::context *ctxt)
|
131
|
3551 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
|
|
3552 small_p (false)
|
111
|
3553 {}
|
|
3554
|
|
3555 /* opt_pass methods: */
|
131
|
3556 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
|
|
3557 void set_pass_param (unsigned int n, bool param)
|
|
3558 {
|
|
3559 gcc_assert (n == 0);
|
|
3560 small_p = param;
|
|
3561 }
|
|
3562 virtual bool gate (function *) { return small_p || !flag_wpa; }
|
111
|
3563 virtual unsigned int execute (function *)
|
|
3564 {
|
|
3565 ipa_free_fn_summary ();
|
131
|
3566 /* Early optimizations may make function unreachable. We can not
|
|
3567 remove unreachable functions as part of the early opts pass because
|
|
3568 TODOs are run before subpasses. Do it here. */
|
|
3569 return small_p ? TODO_remove_functions | TODO_dump_symtab : 0;
|
111
|
3570 }
|
|
3571
|
131
|
3572 private:
|
|
3573 bool small_p;
|
111
|
3574 }; // class pass_ipa_free_fn_summary
|
|
3575
|
|
3576 } // anon namespace
|
|
3577
|
|
3578 simple_ipa_opt_pass *
|
|
3579 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
|
|
3580 {
|
|
3581 return new pass_ipa_free_fn_summary (ctxt);
|
|
3582 }
|
|
3583
|
|
3584 namespace {
|
|
3585
|
|
3586 const pass_data pass_data_ipa_fn_summary =
|
|
3587 {
|
|
3588 IPA_PASS, /* type */
|
|
3589 "fnsummary", /* name */
|
|
3590 OPTGROUP_INLINE, /* optinfo_flags */
|
|
3591 TV_IPA_FNSUMMARY, /* tv_id */
|
|
3592 0, /* properties_required */
|
|
3593 0, /* properties_provided */
|
|
3594 0, /* properties_destroyed */
|
|
3595 0, /* todo_flags_start */
|
|
3596 ( TODO_dump_symtab ), /* todo_flags_finish */
|
|
3597 };
|
|
3598
|
|
3599 class pass_ipa_fn_summary : public ipa_opt_pass_d
|
|
3600 {
|
|
3601 public:
|
|
3602 pass_ipa_fn_summary (gcc::context *ctxt)
|
|
3603 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
|
|
3604 ipa_fn_summary_generate, /* generate_summary */
|
|
3605 ipa_fn_summary_write, /* write_summary */
|
|
3606 ipa_fn_summary_read, /* read_summary */
|
|
3607 NULL, /* write_optimization_summary */
|
|
3608 NULL, /* read_optimization_summary */
|
|
3609 NULL, /* stmt_fixup */
|
|
3610 0, /* function_transform_todo_flags_start */
|
|
3611 NULL, /* function_transform */
|
|
3612 NULL) /* variable_transform */
|
|
3613 {}
|
|
3614
|
|
3615 /* opt_pass methods: */
|
|
3616 virtual unsigned int execute (function *) { return 0; }
|
|
3617
|
|
3618 }; // class pass_ipa_fn_summary
|
|
3619
|
|
3620 } // anon namespace
|
|
3621
|
|
3622 ipa_opt_pass_d *
|
|
3623 make_pass_ipa_fn_summary (gcc::context *ctxt)
|
|
3624 {
|
|
3625 return new pass_ipa_fn_summary (ctxt);
|
|
3626 }
|
131
|
3627
|
|
3628 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
|
|
3629 within the same process. For use by toplev::finalize. */
|
|
3630
|
|
3631 void
|
|
3632 ipa_fnsummary_c_finalize (void)
|
|
3633 {
|
|
3634 ipa_free_fn_summary ();
|
|
3635 }
|