111
|
1 /* Function summary pass.
|
|
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
|
|
3 Contributed by Jan Hubicka
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 /* Analysis of function bodies used by inter-procedural passes
|
|
22
|
|
23 We estimate for each function
|
|
24 - function body size and size after specializing into given context
|
|
25 - average function execution time in a given context
|
|
26 - function frame size
|
|
27 For each call
|
|
28 - call statement size, time and how often the parameters change
|
|
29
|
|
30 ipa_fn_summary data structures store above information locally (i.e.
|
|
31 parameters of the function itself) and globally (i.e. parameters of
|
|
32 the function created by applying all the inline decisions already
|
|
33 present in the callgraph).
|
|
34
|
|
35 We provide access to the ipa_fn_summary data structure and
|
|
36 basic logic updating the parameters when inlining is performed.
|
|
37
|
|
38 The summaries are context sensitive. Context means
|
|
39 1) partial assignment of known constant values of operands
|
|
40 2) whether function is inlined into the call or not.
|
|
41 It is easy to add more variants. To represent function size and time
|
|
42 that depends on context (i.e. it is known to be optimized away when
|
|
43 context is known either by inlining or from IP-CP and cloning),
|
|
44 we use predicates.
|
|
45
|
|
46 estimate_edge_size_and_time can be used to query
|
|
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
|
|
48 properties of caller and callee after inlining.
|
|
49
|
|
50 Finally pass_inline_parameters is exported. This is used to drive
|
|
51 computation of function parameters used by the early inliner. IPA
|
|
52 inlined performs analysis via its analyze_function method. */
|
|
53
|
|
54 #include "config.h"
|
|
55 #include "system.h"
|
|
56 #include "coretypes.h"
|
|
57 #include "backend.h"
|
|
58 #include "tree.h"
|
|
59 #include "gimple.h"
|
|
60 #include "alloc-pool.h"
|
|
61 #include "tree-pass.h"
|
|
62 #include "ssa.h"
|
|
63 #include "tree-streamer.h"
|
|
64 #include "cgraph.h"
|
|
65 #include "diagnostic.h"
|
|
66 #include "fold-const.h"
|
|
67 #include "print-tree.h"
|
|
68 #include "tree-inline.h"
|
|
69 #include "gimple-pretty-print.h"
|
|
70 #include "params.h"
|
|
71 #include "cfganal.h"
|
|
72 #include "gimple-iterator.h"
|
|
73 #include "tree-cfg.h"
|
|
74 #include "tree-ssa-loop-niter.h"
|
|
75 #include "tree-ssa-loop.h"
|
|
76 #include "symbol-summary.h"
|
|
77 #include "ipa-prop.h"
|
|
78 #include "ipa-fnsummary.h"
|
|
79 #include "cfgloop.h"
|
|
80 #include "tree-scalar-evolution.h"
|
|
81 #include "ipa-utils.h"
|
|
82 #include "cilk.h"
|
|
83 #include "cfgexpand.h"
|
|
84 #include "gimplify.h"
|
|
85 #include "stringpool.h"
|
|
86 #include "attribs.h"
|
|
87
|
|
88 /* Summaries. */
|
|
89 function_summary <ipa_fn_summary *> *ipa_fn_summaries;
|
|
90 call_summary <ipa_call_summary *> *ipa_call_summaries;
|
|
91
|
|
92 /* Edge predicates goes here. */
|
|
93 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
|
|
94
|
|
95
|
|
96 /* Dump IPA hints. */
|
|
97 void
|
|
98 ipa_dump_hints (FILE *f, ipa_hints hints)
|
|
99 {
|
|
100 if (!hints)
|
|
101 return;
|
|
102 fprintf (f, "IPA hints:");
|
|
103 if (hints & INLINE_HINT_indirect_call)
|
|
104 {
|
|
105 hints &= ~INLINE_HINT_indirect_call;
|
|
106 fprintf (f, " indirect_call");
|
|
107 }
|
|
108 if (hints & INLINE_HINT_loop_iterations)
|
|
109 {
|
|
110 hints &= ~INLINE_HINT_loop_iterations;
|
|
111 fprintf (f, " loop_iterations");
|
|
112 }
|
|
113 if (hints & INLINE_HINT_loop_stride)
|
|
114 {
|
|
115 hints &= ~INLINE_HINT_loop_stride;
|
|
116 fprintf (f, " loop_stride");
|
|
117 }
|
|
118 if (hints & INLINE_HINT_same_scc)
|
|
119 {
|
|
120 hints &= ~INLINE_HINT_same_scc;
|
|
121 fprintf (f, " same_scc");
|
|
122 }
|
|
123 if (hints & INLINE_HINT_in_scc)
|
|
124 {
|
|
125 hints &= ~INLINE_HINT_in_scc;
|
|
126 fprintf (f, " in_scc");
|
|
127 }
|
|
128 if (hints & INLINE_HINT_cross_module)
|
|
129 {
|
|
130 hints &= ~INLINE_HINT_cross_module;
|
|
131 fprintf (f, " cross_module");
|
|
132 }
|
|
133 if (hints & INLINE_HINT_declared_inline)
|
|
134 {
|
|
135 hints &= ~INLINE_HINT_declared_inline;
|
|
136 fprintf (f, " declared_inline");
|
|
137 }
|
|
138 if (hints & INLINE_HINT_array_index)
|
|
139 {
|
|
140 hints &= ~INLINE_HINT_array_index;
|
|
141 fprintf (f, " array_index");
|
|
142 }
|
|
143 if (hints & INLINE_HINT_known_hot)
|
|
144 {
|
|
145 hints &= ~INLINE_HINT_known_hot;
|
|
146 fprintf (f, " known_hot");
|
|
147 }
|
|
148 gcc_assert (!hints);
|
|
149 }
|
|
150
|
|
151
|
|
152 /* Record SIZE and TIME to SUMMARY.
|
|
153 The accounted code will be executed when EXEC_PRED is true.
|
|
154 When NONCONST_PRED is false the code will evaulate to constant and
|
|
155 will get optimized out in specialized clones of the function. */
|
|
156
|
|
157 void
|
|
158 ipa_fn_summary::account_size_time (int size, sreal time,
|
|
159 const predicate &exec_pred,
|
|
160 const predicate &nonconst_pred_in)
|
|
161 {
|
|
162 size_time_entry *e;
|
|
163 bool found = false;
|
|
164 int i;
|
|
165 predicate nonconst_pred;
|
|
166
|
|
167 if (exec_pred == false)
|
|
168 return;
|
|
169
|
|
170 nonconst_pred = nonconst_pred_in & exec_pred;
|
|
171
|
|
172 if (nonconst_pred == false)
|
|
173 return;
|
|
174
|
|
175 /* We need to create initial empty unconitional clause, but otherwie
|
|
176 we don't need to account empty times and sizes. */
|
|
177 if (!size && time == 0 && size_time_table)
|
|
178 return;
|
|
179
|
|
180 gcc_assert (time >= 0);
|
|
181
|
|
182 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
|
|
183 if (e->exec_predicate == exec_pred
|
|
184 && e->nonconst_predicate == nonconst_pred)
|
|
185 {
|
|
186 found = true;
|
|
187 break;
|
|
188 }
|
|
189 if (i == 256)
|
|
190 {
|
|
191 i = 0;
|
|
192 found = true;
|
|
193 e = &(*size_time_table)[0];
|
|
194 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
195 fprintf (dump_file,
|
|
196 "\t\tReached limit on number of entries, "
|
|
197 "ignoring the predicate.");
|
|
198 }
|
|
199 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
|
|
200 {
|
|
201 fprintf (dump_file,
|
|
202 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
|
|
203 ((double) size) / ipa_fn_summary::size_scale,
|
|
204 (time.to_double ()), found ? "" : "new ");
|
|
205 exec_pred.dump (dump_file, conds, 0);
|
|
206 if (exec_pred != nonconst_pred)
|
|
207 {
|
|
208 fprintf (dump_file, " nonconst:");
|
|
209 nonconst_pred.dump (dump_file, conds);
|
|
210 }
|
|
211 else
|
|
212 fprintf (dump_file, "\n");
|
|
213 }
|
|
214 if (!found)
|
|
215 {
|
|
216 struct size_time_entry new_entry;
|
|
217 new_entry.size = size;
|
|
218 new_entry.time = time;
|
|
219 new_entry.exec_predicate = exec_pred;
|
|
220 new_entry.nonconst_predicate = nonconst_pred;
|
|
221 vec_safe_push (size_time_table, new_entry);
|
|
222 }
|
|
223 else
|
|
224 {
|
|
225 e->size += size;
|
|
226 e->time += time;
|
|
227 }
|
|
228 }
|
|
229
|
|
230 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
|
|
231
|
|
232 static struct cgraph_edge *
|
|
233 redirect_to_unreachable (struct cgraph_edge *e)
|
|
234 {
|
|
235 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
|
|
236 struct cgraph_node *target = cgraph_node::get_create
|
|
237 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
|
|
238
|
|
239 if (e->speculative)
|
|
240 e = e->resolve_speculation (target->decl);
|
|
241 else if (!e->callee)
|
|
242 e->make_direct (target);
|
|
243 else
|
|
244 e->redirect_callee (target);
|
|
245 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
246 e->inline_failed = CIF_UNREACHABLE;
|
|
247 e->frequency = 0;
|
|
248 e->count = profile_count::zero ();
|
|
249 es->call_stmt_size = 0;
|
|
250 es->call_stmt_time = 0;
|
|
251 if (callee)
|
|
252 callee->remove_symbol_and_inline_clones ();
|
|
253 return e;
|
|
254 }
|
|
255
|
|
256 /* Set predicate for edge E. */
|
|
257
|
|
258 static void
|
|
259 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
|
|
260 {
|
|
261 /* If the edge is determined to be never executed, redirect it
|
|
262 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
|
|
263 be optimized out. */
|
|
264 if (predicate && *predicate == false
|
|
265 /* When handling speculative edges, we need to do the redirection
|
|
266 just once. Do it always on the direct edge, so we do not
|
|
267 attempt to resolve speculation while duplicating the edge. */
|
|
268 && (!e->speculative || e->callee))
|
|
269 e = redirect_to_unreachable (e);
|
|
270
|
|
271 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
272 if (predicate && *predicate != true)
|
|
273 {
|
|
274 if (!es->predicate)
|
|
275 es->predicate = edge_predicate_pool.allocate ();
|
|
276 *es->predicate = *predicate;
|
|
277 }
|
|
278 else
|
|
279 {
|
|
280 if (es->predicate)
|
|
281 edge_predicate_pool.remove (es->predicate);
|
|
282 es->predicate = NULL;
|
|
283 }
|
|
284 }
|
|
285
|
|
286 /* Set predicate for hint *P. */
|
|
287
|
|
288 static void
|
|
289 set_hint_predicate (predicate **p, predicate new_predicate)
|
|
290 {
|
|
291 if (new_predicate == false || new_predicate == true)
|
|
292 {
|
|
293 if (*p)
|
|
294 edge_predicate_pool.remove (*p);
|
|
295 *p = NULL;
|
|
296 }
|
|
297 else
|
|
298 {
|
|
299 if (!*p)
|
|
300 *p = edge_predicate_pool.allocate ();
|
|
301 **p = new_predicate;
|
|
302 }
|
|
303 }
|
|
304
|
|
305
|
|
306 /* Compute what conditions may or may not hold given invormation about
|
|
307 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
|
|
308 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
|
|
309 copy when called in a given context. It is a bitmask of conditions. Bit
|
|
310 0 means that condition is known to be false, while bit 1 means that condition
|
|
311 may or may not be true. These differs - for example NOT_INLINED condition
|
|
312 is always false in the second and also builtin_constant_p tests can not use
|
|
313 the fact that parameter is indeed a constant.
|
|
314
|
|
315 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
|
|
316 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
|
|
317 Return clause of possible truths. When INLINE_P is true, assume that we are
|
|
318 inlining.
|
|
319
|
|
320 ERROR_MARK means compile time invariant. */
|
|
321
|
|
322 static void
|
|
323 evaluate_conditions_for_known_args (struct cgraph_node *node,
|
|
324 bool inline_p,
|
|
325 vec<tree> known_vals,
|
|
326 vec<ipa_agg_jump_function_p>
|
|
327 known_aggs,
|
|
328 clause_t *ret_clause,
|
|
329 clause_t *ret_nonspec_clause)
|
|
330 {
|
|
331 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
|
|
332 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
|
|
333 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
|
|
334 int i;
|
|
335 struct condition *c;
|
|
336
|
|
337 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
|
|
338 {
|
|
339 tree val;
|
|
340 tree res;
|
|
341
|
|
342 /* We allow call stmt to have fewer arguments than the callee function
|
|
343 (especially for K&R style programs). So bound check here (we assume
|
|
344 known_aggs vector, if non-NULL, has the same length as
|
|
345 known_vals). */
|
|
346 gcc_checking_assert (!known_aggs.exists ()
|
|
347 || (known_vals.length () == known_aggs.length ()));
|
|
348 if (c->operand_num >= (int) known_vals.length ())
|
|
349 {
|
|
350 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
351 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
352 continue;
|
|
353 }
|
|
354
|
|
355 if (c->agg_contents)
|
|
356 {
|
|
357 struct ipa_agg_jump_function *agg;
|
|
358
|
|
359 if (c->code == predicate::changed
|
|
360 && !c->by_ref
|
|
361 && (known_vals[c->operand_num] == error_mark_node))
|
|
362 continue;
|
|
363
|
|
364 if (known_aggs.exists ())
|
|
365 {
|
|
366 agg = known_aggs[c->operand_num];
|
|
367 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
|
|
368 c->offset, c->by_ref);
|
|
369 }
|
|
370 else
|
|
371 val = NULL_TREE;
|
|
372 }
|
|
373 else
|
|
374 {
|
|
375 val = known_vals[c->operand_num];
|
|
376 if (val == error_mark_node && c->code != predicate::changed)
|
|
377 val = NULL_TREE;
|
|
378 }
|
|
379
|
|
380 if (!val)
|
|
381 {
|
|
382 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
383 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
384 continue;
|
|
385 }
|
|
386 if (c->code == predicate::changed)
|
|
387 {
|
|
388 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
389 continue;
|
|
390 }
|
|
391
|
|
392 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
|
|
393 {
|
|
394 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
395 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
396 continue;
|
|
397 }
|
|
398 if (c->code == predicate::is_not_constant)
|
|
399 {
|
|
400 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
401 continue;
|
|
402 }
|
|
403
|
|
404 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
|
|
405 res = val
|
|
406 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
|
|
407 : NULL;
|
|
408
|
|
409 if (res && integer_zerop (res))
|
|
410 continue;
|
|
411
|
|
412 clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
413 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
|
|
414 }
|
|
415 *ret_clause = clause;
|
|
416 if (ret_nonspec_clause)
|
|
417 *ret_nonspec_clause = nonspec_clause;
|
|
418 }
|
|
419
|
|
420
|
|
421 /* Work out what conditions might be true at invocation of E. */
|
|
422
|
|
423 void
|
|
424 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
|
|
425 clause_t *clause_ptr,
|
|
426 clause_t *nonspec_clause_ptr,
|
|
427 vec<tree> *known_vals_ptr,
|
|
428 vec<ipa_polymorphic_call_context>
|
|
429 *known_contexts_ptr,
|
|
430 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
|
|
431 {
|
|
432 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
|
|
433 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
|
|
434 vec<tree> known_vals = vNULL;
|
|
435 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
|
|
436
|
|
437 if (clause_ptr)
|
|
438 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
|
|
439 if (known_vals_ptr)
|
|
440 known_vals_ptr->create (0);
|
|
441 if (known_contexts_ptr)
|
|
442 known_contexts_ptr->create (0);
|
|
443
|
|
444 if (ipa_node_params_sum
|
|
445 && !e->call_stmt_cannot_inline_p
|
|
446 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
|
|
447 {
|
|
448 struct ipa_node_params *parms_info;
|
|
449 struct ipa_edge_args *args = IPA_EDGE_REF (e);
|
|
450 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
451 int i, count = ipa_get_cs_argument_count (args);
|
|
452
|
|
453 if (e->caller->global.inlined_to)
|
|
454 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
|
|
455 else
|
|
456 parms_info = IPA_NODE_REF (e->caller);
|
|
457
|
|
458 if (count && (info->conds || known_vals_ptr))
|
|
459 known_vals.safe_grow_cleared (count);
|
|
460 if (count && (info->conds || known_aggs_ptr))
|
|
461 known_aggs.safe_grow_cleared (count);
|
|
462 if (count && known_contexts_ptr)
|
|
463 known_contexts_ptr->safe_grow_cleared (count);
|
|
464
|
|
465 for (i = 0; i < count; i++)
|
|
466 {
|
|
467 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
|
|
468 tree cst = ipa_value_from_jfunc (parms_info, jf);
|
|
469
|
|
470 if (!cst && e->call_stmt
|
|
471 && i < (int)gimple_call_num_args (e->call_stmt))
|
|
472 {
|
|
473 cst = gimple_call_arg (e->call_stmt, i);
|
|
474 if (!is_gimple_min_invariant (cst))
|
|
475 cst = NULL;
|
|
476 }
|
|
477 if (cst)
|
|
478 {
|
|
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
|
|
480 if (known_vals.exists ())
|
|
481 known_vals[i] = cst;
|
|
482 }
|
|
483 else if (inline_p && !es->param[i].change_prob)
|
|
484 known_vals[i] = error_mark_node;
|
|
485
|
|
486 if (known_contexts_ptr)
|
|
487 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
|
|
488 i, jf);
|
|
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
|
|
490 functions, use its knowledge of the caller too, just like the
|
|
491 scalar case above. */
|
|
492 known_aggs[i] = &jf->agg;
|
|
493 }
|
|
494 }
|
|
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
|
|
496 && ((clause_ptr && info->conds) || known_vals_ptr))
|
|
497 {
|
|
498 int i, count = (int)gimple_call_num_args (e->call_stmt);
|
|
499
|
|
500 if (count && (info->conds || known_vals_ptr))
|
|
501 known_vals.safe_grow_cleared (count);
|
|
502 for (i = 0; i < count; i++)
|
|
503 {
|
|
504 tree cst = gimple_call_arg (e->call_stmt, i);
|
|
505 if (!is_gimple_min_invariant (cst))
|
|
506 cst = NULL;
|
|
507 if (cst)
|
|
508 known_vals[i] = cst;
|
|
509 }
|
|
510 }
|
|
511
|
|
512 evaluate_conditions_for_known_args (callee, inline_p,
|
|
513 known_vals, known_aggs, clause_ptr,
|
|
514 nonspec_clause_ptr);
|
|
515
|
|
516 if (known_vals_ptr)
|
|
517 *known_vals_ptr = known_vals;
|
|
518 else
|
|
519 known_vals.release ();
|
|
520
|
|
521 if (known_aggs_ptr)
|
|
522 *known_aggs_ptr = known_aggs;
|
|
523 else
|
|
524 known_aggs.release ();
|
|
525 }
|
|
526
|
|
527
|
|
528 /* Allocate the function summary. */
|
|
529
|
|
530 static void
|
|
531 ipa_fn_summary_alloc (void)
|
|
532 {
|
|
533 gcc_checking_assert (!ipa_fn_summaries);
|
|
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
|
|
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
|
|
536 }
|
|
537
|
|
538 /* We are called multiple time for given function; clear
|
|
539 data from previous run so they are not cumulated. */
|
|
540
|
|
541 void
|
|
542 ipa_call_summary::reset ()
|
|
543 {
|
|
544 call_stmt_size = call_stmt_time = 0;
|
|
545 if (predicate)
|
|
546 edge_predicate_pool.remove (predicate);
|
|
547 predicate = NULL;
|
|
548 param.release ();
|
|
549 }
|
|
550
|
|
551 /* We are called multiple time for given function; clear
|
|
552 data from previous run so they are not cumulated. */
|
|
553
|
|
554 void
|
|
555 ipa_fn_summary::reset (struct cgraph_node *node)
|
|
556 {
|
|
557 struct cgraph_edge *e;
|
|
558
|
|
559 self_size = 0;
|
|
560 estimated_stack_size = 0;
|
|
561 estimated_self_stack_size = 0;
|
|
562 stack_frame_offset = 0;
|
|
563 size = 0;
|
|
564 time = 0;
|
|
565 growth = 0;
|
|
566 scc_no = 0;
|
|
567 if (loop_iterations)
|
|
568 {
|
|
569 edge_predicate_pool.remove (loop_iterations);
|
|
570 loop_iterations = NULL;
|
|
571 }
|
|
572 if (loop_stride)
|
|
573 {
|
|
574 edge_predicate_pool.remove (loop_stride);
|
|
575 loop_stride = NULL;
|
|
576 }
|
|
577 if (array_index)
|
|
578 {
|
|
579 edge_predicate_pool.remove (array_index);
|
|
580 array_index = NULL;
|
|
581 }
|
|
582 vec_free (conds);
|
|
583 vec_free (size_time_table);
|
|
584 for (e = node->callees; e; e = e->next_callee)
|
|
585 ipa_call_summaries->get (e)->reset ();
|
|
586 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
587 ipa_call_summaries->get (e)->reset ();
|
|
588 fp_expressions = false;
|
|
589 }
|
|
590
|
|
591 /* Hook that is called by cgraph.c when a node is removed. */
|
|
592
|
|
593 void
|
|
594 ipa_fn_summary_t::remove (cgraph_node *node, ipa_fn_summary *info)
|
|
595 {
|
|
596 info->reset (node);
|
|
597 }
|
|
598
|
|
599 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
|
|
600 Additionally care about allocating new memory slot for updated predicate
|
|
601 and set it to NULL when it becomes true or false (and thus uninteresting).
|
|
602 */
|
|
603
|
|
604 static void
|
|
605 remap_hint_predicate_after_duplication (predicate **p,
|
|
606 clause_t possible_truths)
|
|
607 {
|
|
608 predicate new_predicate;
|
|
609
|
|
610 if (!*p)
|
|
611 return;
|
|
612
|
|
613 new_predicate = (*p)->remap_after_duplication (possible_truths);
|
|
614 /* We do not want to free previous predicate; it is used by node origin. */
|
|
615 *p = NULL;
|
|
616 set_hint_predicate (p, new_predicate);
|
|
617 }
|
|
618
|
|
619
|
|
620 /* Hook that is called by cgraph.c when a node is duplicated. */
|
|
621 void
|
|
622 ipa_fn_summary_t::duplicate (cgraph_node *src,
|
|
623 cgraph_node *dst,
|
|
624 ipa_fn_summary *,
|
|
625 ipa_fn_summary *info)
|
|
626 {
|
|
627 memcpy (info, ipa_fn_summaries->get (src), sizeof (ipa_fn_summary));
|
|
628 /* TODO: as an optimization, we may avoid copying conditions
|
|
629 that are known to be false or true. */
|
|
630 info->conds = vec_safe_copy (info->conds);
|
|
631
|
|
632 /* When there are any replacements in the function body, see if we can figure
|
|
633 out that something was optimized out. */
|
|
634 if (ipa_node_params_sum && dst->clone.tree_map)
|
|
635 {
|
|
636 vec<size_time_entry, va_gc> *entry = info->size_time_table;
|
|
637 /* Use SRC parm info since it may not be copied yet. */
|
|
638 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
|
|
639 vec<tree> known_vals = vNULL;
|
|
640 int count = ipa_get_param_count (parms_info);
|
|
641 int i, j;
|
|
642 clause_t possible_truths;
|
|
643 predicate true_pred = true;
|
|
644 size_time_entry *e;
|
|
645 int optimized_out_size = 0;
|
|
646 bool inlined_to_p = false;
|
|
647 struct cgraph_edge *edge, *next;
|
|
648
|
|
649 info->size_time_table = 0;
|
|
650 known_vals.safe_grow_cleared (count);
|
|
651 for (i = 0; i < count; i++)
|
|
652 {
|
|
653 struct ipa_replace_map *r;
|
|
654
|
|
655 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
|
|
656 {
|
|
657 if (((!r->old_tree && r->parm_num == i)
|
|
658 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
|
|
659 && r->replace_p && !r->ref_p)
|
|
660 {
|
|
661 known_vals[i] = r->new_tree;
|
|
662 break;
|
|
663 }
|
|
664 }
|
|
665 }
|
|
666 evaluate_conditions_for_known_args (dst, false,
|
|
667 known_vals,
|
|
668 vNULL,
|
|
669 &possible_truths,
|
|
670 /* We are going to specialize,
|
|
671 so ignore nonspec truths. */
|
|
672 NULL);
|
|
673 known_vals.release ();
|
|
674
|
|
675 info->account_size_time (0, 0, true_pred, true_pred);
|
|
676
|
|
677 /* Remap size_time vectors.
|
|
678 Simplify the predicate by prunning out alternatives that are known
|
|
679 to be false.
|
|
680 TODO: as on optimization, we can also eliminate conditions known
|
|
681 to be true. */
|
|
682 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
|
|
683 {
|
|
684 predicate new_exec_pred;
|
|
685 predicate new_nonconst_pred;
|
|
686 new_exec_pred = e->exec_predicate.remap_after_duplication
|
|
687 (possible_truths);
|
|
688 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
|
|
689 (possible_truths);
|
|
690 if (new_exec_pred == false || new_nonconst_pred == false)
|
|
691 optimized_out_size += e->size;
|
|
692 else
|
|
693 info->account_size_time (e->size, e->time, new_exec_pred,
|
|
694 new_nonconst_pred);
|
|
695 }
|
|
696
|
|
697 /* Remap edge predicates with the same simplification as above.
|
|
698 Also copy constantness arrays. */
|
|
699 for (edge = dst->callees; edge; edge = next)
|
|
700 {
|
|
701 predicate new_predicate;
|
|
702 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
703 next = edge->next_callee;
|
|
704
|
|
705 if (!edge->inline_failed)
|
|
706 inlined_to_p = true;
|
|
707 if (!es->predicate)
|
|
708 continue;
|
|
709 new_predicate = es->predicate->remap_after_duplication
|
|
710 (possible_truths);
|
|
711 if (new_predicate == false && *es->predicate != false)
|
|
712 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
|
|
713 edge_set_predicate (edge, &new_predicate);
|
|
714 }
|
|
715
|
|
716 /* Remap indirect edge predicates with the same simplificaiton as above.
|
|
717 Also copy constantness arrays. */
|
|
718 for (edge = dst->indirect_calls; edge; edge = next)
|
|
719 {
|
|
720 predicate new_predicate;
|
|
721 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
722 next = edge->next_callee;
|
|
723
|
|
724 gcc_checking_assert (edge->inline_failed);
|
|
725 if (!es->predicate)
|
|
726 continue;
|
|
727 new_predicate = es->predicate->remap_after_duplication
|
|
728 (possible_truths);
|
|
729 if (new_predicate == false && *es->predicate != false)
|
|
730 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
|
|
731 edge_set_predicate (edge, &new_predicate);
|
|
732 }
|
|
733 remap_hint_predicate_after_duplication (&info->loop_iterations,
|
|
734 possible_truths);
|
|
735 remap_hint_predicate_after_duplication (&info->loop_stride,
|
|
736 possible_truths);
|
|
737 remap_hint_predicate_after_duplication (&info->array_index,
|
|
738 possible_truths);
|
|
739
|
|
740 /* If inliner or someone after inliner will ever start producing
|
|
741 non-trivial clones, we will get trouble with lack of information
|
|
742 about updating self sizes, because size vectors already contains
|
|
743 sizes of the calees. */
|
|
744 gcc_assert (!inlined_to_p || !optimized_out_size);
|
|
745 }
|
|
746 else
|
|
747 {
|
|
748 info->size_time_table = vec_safe_copy (info->size_time_table);
|
|
749 if (info->loop_iterations)
|
|
750 {
|
|
751 predicate p = *info->loop_iterations;
|
|
752 info->loop_iterations = NULL;
|
|
753 set_hint_predicate (&info->loop_iterations, p);
|
|
754 }
|
|
755 if (info->loop_stride)
|
|
756 {
|
|
757 predicate p = *info->loop_stride;
|
|
758 info->loop_stride = NULL;
|
|
759 set_hint_predicate (&info->loop_stride, p);
|
|
760 }
|
|
761 if (info->array_index)
|
|
762 {
|
|
763 predicate p = *info->array_index;
|
|
764 info->array_index = NULL;
|
|
765 set_hint_predicate (&info->array_index, p);
|
|
766 }
|
|
767 }
|
|
768 if (!dst->global.inlined_to)
|
|
769 ipa_update_overall_fn_summary (dst);
|
|
770 }
|
|
771
|
|
772
|
|
773 /* Hook that is called by cgraph.c when a node is duplicated. */
|
|
774
|
|
775 void
|
|
776 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
|
|
777 struct cgraph_edge *dst,
|
|
778 struct ipa_call_summary *srcinfo,
|
|
779 struct ipa_call_summary *info)
|
|
780 {
|
|
781 *info = *srcinfo;
|
|
782 info->predicate = NULL;
|
|
783 edge_set_predicate (dst, srcinfo->predicate);
|
|
784 info->param = srcinfo->param.copy ();
|
|
785 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
|
|
786 {
|
|
787 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
|
|
788 - eni_size_weights.call_cost);
|
|
789 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
|
|
790 - eni_time_weights.call_cost);
|
|
791 }
|
|
792 }
|
|
793
|
|
794
|
|
795 /* Keep edge cache consistent across edge removal. */
|
|
796
|
|
797 void
|
|
798 ipa_call_summary_t::remove (struct cgraph_edge *,
|
|
799 struct ipa_call_summary *sum)
|
|
800 {
|
|
801 sum->reset ();
|
|
802 }
|
|
803
|
|
804
|
|
805 /* Dump edge summaries associated to NODE and recursively to all clones.
|
|
806 Indent by INDENT. */
|
|
807
|
|
808 static void
|
|
809 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
|
|
810 struct ipa_fn_summary *info)
|
|
811 {
|
|
812 struct cgraph_edge *edge;
|
|
813 for (edge = node->callees; edge; edge = edge->next_callee)
|
|
814 {
|
|
815 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
816 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
|
|
817 int i;
|
|
818
|
|
819 fprintf (f,
|
|
820 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
|
|
821 " time: %2i callee size:%2i stack:%2i",
|
|
822 indent, "", callee->name (), callee->order,
|
|
823 !edge->inline_failed
|
|
824 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
|
|
825 indent, "", es->loop_depth, edge->frequency,
|
|
826 es->call_stmt_size, es->call_stmt_time,
|
|
827 (int) ipa_fn_summaries->get (callee)->size / ipa_fn_summary::size_scale,
|
|
828 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
|
|
829
|
|
830 if (es->predicate)
|
|
831 {
|
|
832 fprintf (f, " predicate: ");
|
|
833 es->predicate->dump (f, info->conds);
|
|
834 }
|
|
835 else
|
|
836 fprintf (f, "\n");
|
|
837 if (es->param.exists ())
|
|
838 for (i = 0; i < (int) es->param.length (); i++)
|
|
839 {
|
|
840 int prob = es->param[i].change_prob;
|
|
841
|
|
842 if (!prob)
|
|
843 fprintf (f, "%*s op%i is compile time invariant\n",
|
|
844 indent + 2, "", i);
|
|
845 else if (prob != REG_BR_PROB_BASE)
|
|
846 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
|
|
847 prob * 100.0 / REG_BR_PROB_BASE);
|
|
848 }
|
|
849 if (!edge->inline_failed)
|
|
850 {
|
|
851 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
|
|
852 " callee size %i\n",
|
|
853 indent + 2, "",
|
|
854 (int) ipa_fn_summaries->get (callee)->stack_frame_offset,
|
|
855 (int) ipa_fn_summaries->get (callee)->estimated_self_stack_size,
|
|
856 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
|
|
857 dump_ipa_call_summary (f, indent + 2, callee, info);
|
|
858 }
|
|
859 }
|
|
860 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
|
|
861 {
|
|
862 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
863 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
|
|
864 " time: %2i",
|
|
865 indent, "",
|
|
866 es->loop_depth,
|
|
867 edge->frequency, es->call_stmt_size, es->call_stmt_time);
|
|
868 if (es->predicate)
|
|
869 {
|
|
870 fprintf (f, "predicate: ");
|
|
871 es->predicate->dump (f, info->conds);
|
|
872 }
|
|
873 else
|
|
874 fprintf (f, "\n");
|
|
875 }
|
|
876 }
|
|
877
|
|
878
|
|
879 void
|
|
880 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
|
|
881 {
|
|
882 if (node->definition)
|
|
883 {
|
|
884 struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
|
|
885 size_time_entry *e;
|
|
886 int i;
|
|
887 fprintf (f, "IPA function summary for %s/%i", node->name (),
|
|
888 node->order);
|
|
889 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
|
|
890 fprintf (f, " always_inline");
|
|
891 if (s->inlinable)
|
|
892 fprintf (f, " inlinable");
|
|
893 if (s->contains_cilk_spawn)
|
|
894 fprintf (f, " contains_cilk_spawn");
|
|
895 if (s->fp_expressions)
|
|
896 fprintf (f, " fp_expression");
|
|
897 fprintf (f, "\n global time: %f\n", s->time.to_double ());
|
|
898 fprintf (f, " self size: %i\n", s->self_size);
|
|
899 fprintf (f, " global size: %i\n", s->size);
|
|
900 fprintf (f, " min size: %i\n", s->min_size);
|
|
901 fprintf (f, " self stack: %i\n",
|
|
902 (int) s->estimated_self_stack_size);
|
|
903 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
|
|
904 if (s->growth)
|
|
905 fprintf (f, " estimated growth:%i\n", (int) s->growth);
|
|
906 if (s->scc_no)
|
|
907 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
|
|
908 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
|
|
909 {
|
|
910 fprintf (f, " size:%f, time:%f",
|
|
911 (double) e->size / ipa_fn_summary::size_scale,
|
|
912 e->time.to_double ());
|
|
913 if (e->exec_predicate != true)
|
|
914 {
|
|
915 fprintf (f, ", executed if:");
|
|
916 e->exec_predicate.dump (f, s->conds, 0);
|
|
917 }
|
|
918 if (e->exec_predicate != e->nonconst_predicate)
|
|
919 {
|
|
920 fprintf (f, ", nonconst if:");
|
|
921 e->nonconst_predicate.dump (f, s->conds, 0);
|
|
922 }
|
|
923 fprintf (f, "\n");
|
|
924 }
|
|
925 if (s->loop_iterations)
|
|
926 {
|
|
927 fprintf (f, " loop iterations:");
|
|
928 s->loop_iterations->dump (f, s->conds);
|
|
929 }
|
|
930 if (s->loop_stride)
|
|
931 {
|
|
932 fprintf (f, " loop stride:");
|
|
933 s->loop_stride->dump (f, s->conds);
|
|
934 }
|
|
935 if (s->array_index)
|
|
936 {
|
|
937 fprintf (f, " array index:");
|
|
938 s->array_index->dump (f, s->conds);
|
|
939 }
|
|
940 fprintf (f, " calls:\n");
|
|
941 dump_ipa_call_summary (f, 4, node, s);
|
|
942 fprintf (f, "\n");
|
|
943 }
|
|
944 }
|
|
945
|
|
946 DEBUG_FUNCTION void
|
|
947 ipa_debug_fn_summary (struct cgraph_node *node)
|
|
948 {
|
|
949 ipa_dump_fn_summary (stderr, node);
|
|
950 }
|
|
951
|
|
952 void
|
|
953 ipa_dump_fn_summaries (FILE *f)
|
|
954 {
|
|
955 struct cgraph_node *node;
|
|
956
|
|
957 FOR_EACH_DEFINED_FUNCTION (node)
|
|
958 if (!node->global.inlined_to)
|
|
959 ipa_dump_fn_summary (f, node);
|
|
960 }
|
|
961
|
|
962 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
|
|
963 boolean variable pointed to by DATA. */
|
|
964
|
|
965 static bool
|
|
966 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
|
|
967 void *data)
|
|
968 {
|
|
969 bool *b = (bool *) data;
|
|
970 *b = true;
|
|
971 return true;
|
|
972 }
|
|
973
|
|
974 /* If OP refers to value of function parameter, return the corresponding
|
|
975 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
|
|
976 PARM_DECL) will be stored to *SIZE_P in that case too. */
|
|
977
|
|
978 static tree
|
|
979 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
|
|
980 {
|
|
981 /* SSA_NAME referring to parm default def? */
|
|
982 if (TREE_CODE (op) == SSA_NAME
|
|
983 && SSA_NAME_IS_DEFAULT_DEF (op)
|
|
984 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
|
|
985 {
|
|
986 if (size_p)
|
|
987 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
|
|
988 return SSA_NAME_VAR (op);
|
|
989 }
|
|
990 /* Non-SSA parm reference? */
|
|
991 if (TREE_CODE (op) == PARM_DECL)
|
|
992 {
|
|
993 bool modified = false;
|
|
994
|
|
995 ao_ref refd;
|
|
996 ao_ref_init (&refd, op);
|
|
997 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
|
|
998 NULL);
|
|
999 if (!modified)
|
|
1000 {
|
|
1001 if (size_p)
|
|
1002 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
|
|
1003 return op;
|
|
1004 }
|
|
1005 }
|
|
1006 return NULL_TREE;
|
|
1007 }
|
|
1008
|
|
1009 /* If OP refers to value of function parameter, return the corresponding
|
|
1010 parameter. Also traverse chains of SSA register assignments. If non-NULL,
|
|
1011 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
|
|
1012 stored to *SIZE_P in that case too. */
|
|
1013
|
|
1014 static tree
|
|
1015 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
|
|
1016 {
|
|
1017 tree res = unmodified_parm_1 (stmt, op, size_p);
|
|
1018 if (res)
|
|
1019 return res;
|
|
1020
|
|
1021 if (TREE_CODE (op) == SSA_NAME
|
|
1022 && !SSA_NAME_IS_DEFAULT_DEF (op)
|
|
1023 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
|
|
1024 return unmodified_parm (SSA_NAME_DEF_STMT (op),
|
|
1025 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
|
|
1026 size_p);
|
|
1027 return NULL_TREE;
|
|
1028 }
|
|
1029
|
|
1030 /* If OP refers to a value of a function parameter or value loaded from an
|
|
1031 aggregate passed to a parameter (either by value or reference), return TRUE
|
|
1032 and store the number of the parameter to *INDEX_P, the access size into
|
|
1033 *SIZE_P, and information whether and how it has been loaded from an
|
|
1034 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
|
|
1035 statement in which OP is used or loaded. */
|
|
1036
|
|
1037 static bool
|
|
1038 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
|
|
1039 gimple *stmt, tree op, int *index_p,
|
|
1040 HOST_WIDE_INT *size_p,
|
|
1041 struct agg_position_info *aggpos)
|
|
1042 {
|
|
1043 tree res = unmodified_parm_1 (stmt, op, size_p);
|
|
1044
|
|
1045 gcc_checking_assert (aggpos);
|
|
1046 if (res)
|
|
1047 {
|
|
1048 *index_p = ipa_get_param_decl_index (fbi->info, res);
|
|
1049 if (*index_p < 0)
|
|
1050 return false;
|
|
1051 aggpos->agg_contents = false;
|
|
1052 aggpos->by_ref = false;
|
|
1053 return true;
|
|
1054 }
|
|
1055
|
|
1056 if (TREE_CODE (op) == SSA_NAME)
|
|
1057 {
|
|
1058 if (SSA_NAME_IS_DEFAULT_DEF (op)
|
|
1059 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
|
|
1060 return false;
|
|
1061 stmt = SSA_NAME_DEF_STMT (op);
|
|
1062 op = gimple_assign_rhs1 (stmt);
|
|
1063 if (!REFERENCE_CLASS_P (op))
|
|
1064 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
|
|
1065 aggpos);
|
|
1066 }
|
|
1067
|
|
1068 aggpos->agg_contents = true;
|
|
1069 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
|
|
1070 stmt, op, index_p, &aggpos->offset,
|
|
1071 size_p, &aggpos->by_ref);
|
|
1072 }
|
|
1073
|
|
1074 /* See if statement might disappear after inlining.
|
|
1075 0 - means not eliminated
|
|
1076 1 - half of statements goes away
|
|
1077 2 - for sure it is eliminated.
|
|
1078 We are not terribly sophisticated, basically looking for simple abstraction
|
|
1079 penalty wrappers. */
|
|
1080
|
|
1081 static int
|
|
1082 eliminated_by_inlining_prob (gimple *stmt)
|
|
1083 {
|
|
1084 enum gimple_code code = gimple_code (stmt);
|
|
1085 enum tree_code rhs_code;
|
|
1086
|
|
1087 if (!optimize)
|
|
1088 return 0;
|
|
1089
|
|
1090 switch (code)
|
|
1091 {
|
|
1092 case GIMPLE_RETURN:
|
|
1093 return 2;
|
|
1094 case GIMPLE_ASSIGN:
|
|
1095 if (gimple_num_ops (stmt) != 2)
|
|
1096 return 0;
|
|
1097
|
|
1098 rhs_code = gimple_assign_rhs_code (stmt);
|
|
1099
|
|
1100 /* Casts of parameters, loads from parameters passed by reference
|
|
1101 and stores to return value or parameters are often free after
|
|
1102 inlining dua to SRA and further combining.
|
|
1103 Assume that half of statements goes away. */
|
|
1104 if (CONVERT_EXPR_CODE_P (rhs_code)
|
|
1105 || rhs_code == VIEW_CONVERT_EXPR
|
|
1106 || rhs_code == ADDR_EXPR
|
|
1107 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
|
|
1108 {
|
|
1109 tree rhs = gimple_assign_rhs1 (stmt);
|
|
1110 tree lhs = gimple_assign_lhs (stmt);
|
|
1111 tree inner_rhs = get_base_address (rhs);
|
|
1112 tree inner_lhs = get_base_address (lhs);
|
|
1113 bool rhs_free = false;
|
|
1114 bool lhs_free = false;
|
|
1115
|
|
1116 if (!inner_rhs)
|
|
1117 inner_rhs = rhs;
|
|
1118 if (!inner_lhs)
|
|
1119 inner_lhs = lhs;
|
|
1120
|
|
1121 /* Reads of parameter are expected to be free. */
|
|
1122 if (unmodified_parm (stmt, inner_rhs, NULL))
|
|
1123 rhs_free = true;
|
|
1124 /* Match expressions of form &this->field. Those will most likely
|
|
1125 combine with something upstream after inlining. */
|
|
1126 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
|
|
1127 {
|
|
1128 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
|
|
1129 if (TREE_CODE (op) == PARM_DECL)
|
|
1130 rhs_free = true;
|
|
1131 else if (TREE_CODE (op) == MEM_REF
|
|
1132 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
|
|
1133 rhs_free = true;
|
|
1134 }
|
|
1135
|
|
1136 /* When parameter is not SSA register because its address is taken
|
|
1137 and it is just copied into one, the statement will be completely
|
|
1138 free after inlining (we will copy propagate backward). */
|
|
1139 if (rhs_free && is_gimple_reg (lhs))
|
|
1140 return 2;
|
|
1141
|
|
1142 /* Reads of parameters passed by reference
|
|
1143 expected to be free (i.e. optimized out after inlining). */
|
|
1144 if (TREE_CODE (inner_rhs) == MEM_REF
|
|
1145 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
|
|
1146 rhs_free = true;
|
|
1147
|
|
1148 /* Copying parameter passed by reference into gimple register is
|
|
1149 probably also going to copy propagate, but we can't be quite
|
|
1150 sure. */
|
|
1151 if (rhs_free && is_gimple_reg (lhs))
|
|
1152 lhs_free = true;
|
|
1153
|
|
1154 /* Writes to parameters, parameters passed by value and return value
|
|
1155 (either dirrectly or passed via invisible reference) are free.
|
|
1156
|
|
1157 TODO: We ought to handle testcase like
|
|
1158 struct a {int a,b;};
|
|
1159 struct a
|
|
1160 retrurnsturct (void)
|
|
1161 {
|
|
1162 struct a a ={1,2};
|
|
1163 return a;
|
|
1164 }
|
|
1165
|
|
1166 This translate into:
|
|
1167
|
|
1168 retrurnsturct ()
|
|
1169 {
|
|
1170 int a$b;
|
|
1171 int a$a;
|
|
1172 struct a a;
|
|
1173 struct a D.2739;
|
|
1174
|
|
1175 <bb 2>:
|
|
1176 D.2739.a = 1;
|
|
1177 D.2739.b = 2;
|
|
1178 return D.2739;
|
|
1179
|
|
1180 }
|
|
1181 For that we either need to copy ipa-split logic detecting writes
|
|
1182 to return value. */
|
|
1183 if (TREE_CODE (inner_lhs) == PARM_DECL
|
|
1184 || TREE_CODE (inner_lhs) == RESULT_DECL
|
|
1185 || (TREE_CODE (inner_lhs) == MEM_REF
|
|
1186 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
|
|
1187 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
|
|
1188 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
|
|
1189 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
|
|
1190 (inner_lhs,
|
|
1191 0))) == RESULT_DECL))))
|
|
1192 lhs_free = true;
|
|
1193 if (lhs_free
|
|
1194 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
|
|
1195 rhs_free = true;
|
|
1196 if (lhs_free && rhs_free)
|
|
1197 return 1;
|
|
1198 }
|
|
1199 return 0;
|
|
1200 default:
|
|
1201 return 0;
|
|
1202 }
|
|
1203 }
|
|
1204
|
|
1205
|
|
1206 /* If BB ends by a conditional we can turn into predicates, attach corresponding
|
|
1207 predicates to the CFG edges. */
|
|
1208
|
|
1209 static void
|
|
1210 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
|
|
1211 struct ipa_fn_summary *summary,
|
|
1212 basic_block bb)
|
|
1213 {
|
|
1214 gimple *last;
|
|
1215 tree op;
|
|
1216 int index;
|
|
1217 HOST_WIDE_INT size;
|
|
1218 struct agg_position_info aggpos;
|
|
1219 enum tree_code code, inverted_code;
|
|
1220 edge e;
|
|
1221 edge_iterator ei;
|
|
1222 gimple *set_stmt;
|
|
1223 tree op2;
|
|
1224
|
|
1225 last = last_stmt (bb);
|
|
1226 if (!last || gimple_code (last) != GIMPLE_COND)
|
|
1227 return;
|
|
1228 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
|
|
1229 return;
|
|
1230 op = gimple_cond_lhs (last);
|
|
1231 /* TODO: handle conditionals like
|
|
1232 var = op0 < 4;
|
|
1233 if (var != 0). */
|
|
1234 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
|
|
1235 {
|
|
1236 code = gimple_cond_code (last);
|
|
1237 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
|
|
1238
|
|
1239 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1240 {
|
|
1241 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
|
|
1242 ? code : inverted_code);
|
|
1243 /* invert_tree_comparison will return ERROR_MARK on FP
|
|
1244 comparsions that are not EQ/NE instead of returning proper
|
|
1245 unordered one. Be sure it is not confused with NON_CONSTANT. */
|
|
1246 if (this_code != ERROR_MARK)
|
|
1247 {
|
|
1248 predicate p
|
|
1249 = add_condition (summary, index, size, &aggpos, this_code,
|
|
1250 unshare_expr_without_location
|
|
1251 (gimple_cond_rhs (last)));
|
|
1252 e->aux = edge_predicate_pool.allocate ();
|
|
1253 *(predicate *) e->aux = p;
|
|
1254 }
|
|
1255 }
|
|
1256 }
|
|
1257
|
|
1258 if (TREE_CODE (op) != SSA_NAME)
|
|
1259 return;
|
|
1260 /* Special case
|
|
1261 if (builtin_constant_p (op))
|
|
1262 constant_code
|
|
1263 else
|
|
1264 nonconstant_code.
|
|
1265 Here we can predicate nonconstant_code. We can't
|
|
1266 really handle constant_code since we have no predicate
|
|
1267 for this and also the constant code is not known to be
|
|
1268 optimized away when inliner doen't see operand is constant.
|
|
1269 Other optimizers might think otherwise. */
|
|
1270 if (gimple_cond_code (last) != NE_EXPR
|
|
1271 || !integer_zerop (gimple_cond_rhs (last)))
|
|
1272 return;
|
|
1273 set_stmt = SSA_NAME_DEF_STMT (op);
|
|
1274 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
|
|
1275 || gimple_call_num_args (set_stmt) != 1)
|
|
1276 return;
|
|
1277 op2 = gimple_call_arg (set_stmt, 0);
|
|
1278 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
|
|
1279 &aggpos))
|
|
1280 return;
|
|
1281 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
|
|
1282 {
|
|
1283 predicate p = add_condition (summary, index, size, &aggpos,
|
|
1284 predicate::is_not_constant, NULL_TREE);
|
|
1285 e->aux = edge_predicate_pool.allocate ();
|
|
1286 *(predicate *) e->aux = p;
|
|
1287 }
|
|
1288 }
|
|
1289
|
|
1290
|
|
1291 /* If BB ends by a switch we can turn into predicates, attach corresponding
|
|
1292 predicates to the CFG edges. */
|
|
1293
|
|
1294 static void
|
|
1295 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
|
|
1296 struct ipa_fn_summary *summary,
|
|
1297 basic_block bb)
|
|
1298 {
|
|
1299 gimple *lastg;
|
|
1300 tree op;
|
|
1301 int index;
|
|
1302 HOST_WIDE_INT size;
|
|
1303 struct agg_position_info aggpos;
|
|
1304 edge e;
|
|
1305 edge_iterator ei;
|
|
1306 size_t n;
|
|
1307 size_t case_idx;
|
|
1308
|
|
1309 lastg = last_stmt (bb);
|
|
1310 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
|
|
1311 return;
|
|
1312 gswitch *last = as_a <gswitch *> (lastg);
|
|
1313 op = gimple_switch_index (last);
|
|
1314 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
|
|
1315 return;
|
|
1316
|
|
1317 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1318 {
|
|
1319 e->aux = edge_predicate_pool.allocate ();
|
|
1320 *(predicate *) e->aux = false;
|
|
1321 }
|
|
1322 n = gimple_switch_num_labels (last);
|
|
1323 for (case_idx = 0; case_idx < n; ++case_idx)
|
|
1324 {
|
|
1325 tree cl = gimple_switch_label (last, case_idx);
|
|
1326 tree min, max;
|
|
1327 predicate p;
|
|
1328
|
|
1329 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
|
|
1330 min = CASE_LOW (cl);
|
|
1331 max = CASE_HIGH (cl);
|
|
1332
|
|
1333 /* For default we might want to construct predicate that none
|
|
1334 of cases is met, but it is bit hard to do not having negations
|
|
1335 of conditionals handy. */
|
|
1336 if (!min && !max)
|
|
1337 p = true;
|
|
1338 else if (!max)
|
|
1339 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
|
|
1340 unshare_expr_without_location (min));
|
|
1341 else
|
|
1342 {
|
|
1343 predicate p1, p2;
|
|
1344 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
|
|
1345 unshare_expr_without_location (min));
|
|
1346 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
|
|
1347 unshare_expr_without_location (max));
|
|
1348 p = p1 & p2;
|
|
1349 }
|
|
1350 *(struct predicate *) e->aux
|
|
1351 = p.or_with (summary->conds, *(struct predicate *) e->aux);
|
|
1352 }
|
|
1353 }
|
|
1354
|
|
1355
|
|
1356 /* For each BB in NODE attach to its AUX pointer predicate under
|
|
1357 which it is executable. */
|
|
1358
|
|
1359 static void
|
|
1360 compute_bb_predicates (struct ipa_func_body_info *fbi,
|
|
1361 struct cgraph_node *node,
|
|
1362 struct ipa_fn_summary *summary)
|
|
1363 {
|
|
1364 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
|
|
1365 bool done = false;
|
|
1366 basic_block bb;
|
|
1367
|
|
1368 FOR_EACH_BB_FN (bb, my_function)
|
|
1369 {
|
|
1370 set_cond_stmt_execution_predicate (fbi, summary, bb);
|
|
1371 set_switch_stmt_execution_predicate (fbi, summary, bb);
|
|
1372 }
|
|
1373
|
|
1374 /* Entry block is always executable. */
|
|
1375 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
|
|
1376 = edge_predicate_pool.allocate ();
|
|
1377 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
|
|
1378
|
|
1379 /* A simple dataflow propagation of predicates forward in the CFG.
|
|
1380 TODO: work in reverse postorder. */
|
|
1381 while (!done)
|
|
1382 {
|
|
1383 done = true;
|
|
1384 FOR_EACH_BB_FN (bb, my_function)
|
|
1385 {
|
|
1386 predicate p = false;
|
|
1387 edge e;
|
|
1388 edge_iterator ei;
|
|
1389 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1390 {
|
|
1391 if (e->src->aux)
|
|
1392 {
|
|
1393 predicate this_bb_predicate
|
|
1394 = *(predicate *) e->src->aux;
|
|
1395 if (e->aux)
|
|
1396 this_bb_predicate &= (*(struct predicate *) e->aux);
|
|
1397 p = p.or_with (summary->conds, this_bb_predicate);
|
|
1398 if (p == true)
|
|
1399 break;
|
|
1400 }
|
|
1401 }
|
|
1402 if (p == false)
|
|
1403 gcc_checking_assert (!bb->aux);
|
|
1404 else
|
|
1405 {
|
|
1406 if (!bb->aux)
|
|
1407 {
|
|
1408 done = false;
|
|
1409 bb->aux = edge_predicate_pool.allocate ();
|
|
1410 *((predicate *) bb->aux) = p;
|
|
1411 }
|
|
1412 else if (p != *(predicate *) bb->aux)
|
|
1413 {
|
|
1414 /* This OR operation is needed to ensure monotonous data flow
|
|
1415 in the case we hit the limit on number of clauses and the
|
|
1416 and/or operations above give approximate answers. */
|
|
1417 p = p.or_with (summary->conds, *(predicate *)bb->aux);
|
|
1418 if (p != *(predicate *) bb->aux)
|
|
1419 {
|
|
1420 done = false;
|
|
1421 *((predicate *) bb->aux) = p;
|
|
1422 }
|
|
1423 }
|
|
1424 }
|
|
1425 }
|
|
1426 }
|
|
1427 }
|
|
1428
|
|
1429
|
|
1430 /* Return predicate specifying when the STMT might have result that is not
|
|
1431 a compile time constant. */
|
|
1432
|
|
1433 static predicate
|
|
1434 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
|
|
1435 struct ipa_fn_summary *summary,
|
|
1436 tree expr,
|
|
1437 vec<predicate> nonconstant_names)
|
|
1438 {
|
|
1439 tree parm;
|
|
1440 int index;
|
|
1441 HOST_WIDE_INT size;
|
|
1442
|
|
1443 while (UNARY_CLASS_P (expr))
|
|
1444 expr = TREE_OPERAND (expr, 0);
|
|
1445
|
|
1446 parm = unmodified_parm (NULL, expr, &size);
|
|
1447 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
|
|
1448 return add_condition (summary, index, size, NULL, predicate::changed,
|
|
1449 NULL_TREE);
|
|
1450 if (is_gimple_min_invariant (expr))
|
|
1451 return false;
|
|
1452 if (TREE_CODE (expr) == SSA_NAME)
|
|
1453 return nonconstant_names[SSA_NAME_VERSION (expr)];
|
|
1454 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
|
|
1455 {
|
|
1456 predicate p1 = will_be_nonconstant_expr_predicate
|
|
1457 (info, summary, TREE_OPERAND (expr, 0),
|
|
1458 nonconstant_names);
|
|
1459 if (p1 == true)
|
|
1460 return p1;
|
|
1461
|
|
1462 predicate p2;
|
|
1463 p2 = will_be_nonconstant_expr_predicate (info, summary,
|
|
1464 TREE_OPERAND (expr, 1),
|
|
1465 nonconstant_names);
|
|
1466 return p1.or_with (summary->conds, p2);
|
|
1467 }
|
|
1468 else if (TREE_CODE (expr) == COND_EXPR)
|
|
1469 {
|
|
1470 predicate p1 = will_be_nonconstant_expr_predicate
|
|
1471 (info, summary, TREE_OPERAND (expr, 0),
|
|
1472 nonconstant_names);
|
|
1473 if (p1 == true)
|
|
1474 return p1;
|
|
1475
|
|
1476 predicate p2;
|
|
1477 p2 = will_be_nonconstant_expr_predicate (info, summary,
|
|
1478 TREE_OPERAND (expr, 1),
|
|
1479 nonconstant_names);
|
|
1480 if (p2 == true)
|
|
1481 return p2;
|
|
1482 p1 = p1.or_with (summary->conds, p2);
|
|
1483 p2 = will_be_nonconstant_expr_predicate (info, summary,
|
|
1484 TREE_OPERAND (expr, 2),
|
|
1485 nonconstant_names);
|
|
1486 return p2.or_with (summary->conds, p1);
|
|
1487 }
|
|
1488 else
|
|
1489 {
|
|
1490 debug_tree (expr);
|
|
1491 gcc_unreachable ();
|
|
1492 }
|
|
1493 return false;
|
|
1494 }
|
|
1495
|
|
1496
|
|
1497 /* Return predicate specifying when the STMT might have result that is not
|
|
1498 a compile time constant. */
|
|
1499
|
|
1500 static predicate
|
|
1501 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
|
|
1502 struct ipa_fn_summary *summary,
|
|
1503 gimple *stmt,
|
|
1504 vec<predicate> nonconstant_names)
|
|
1505 {
|
|
1506 predicate p = true;
|
|
1507 ssa_op_iter iter;
|
|
1508 tree use;
|
|
1509 predicate op_non_const;
|
|
1510 bool is_load;
|
|
1511 int base_index;
|
|
1512 HOST_WIDE_INT size;
|
|
1513 struct agg_position_info aggpos;
|
|
1514
|
|
1515 /* What statments might be optimized away
|
|
1516 when their arguments are constant. */
|
|
1517 if (gimple_code (stmt) != GIMPLE_ASSIGN
|
|
1518 && gimple_code (stmt) != GIMPLE_COND
|
|
1519 && gimple_code (stmt) != GIMPLE_SWITCH
|
|
1520 && (gimple_code (stmt) != GIMPLE_CALL
|
|
1521 || !(gimple_call_flags (stmt) & ECF_CONST)))
|
|
1522 return p;
|
|
1523
|
|
1524 /* Stores will stay anyway. */
|
|
1525 if (gimple_store_p (stmt))
|
|
1526 return p;
|
|
1527
|
|
1528 is_load = gimple_assign_load_p (stmt);
|
|
1529
|
|
1530 /* Loads can be optimized when the value is known. */
|
|
1531 if (is_load)
|
|
1532 {
|
|
1533 tree op;
|
|
1534 gcc_assert (gimple_assign_single_p (stmt));
|
|
1535 op = gimple_assign_rhs1 (stmt);
|
|
1536 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
|
|
1537 &aggpos))
|
|
1538 return p;
|
|
1539 }
|
|
1540 else
|
|
1541 base_index = -1;
|
|
1542
|
|
1543 /* See if we understand all operands before we start
|
|
1544 adding conditionals. */
|
|
1545 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
|
|
1546 {
|
|
1547 tree parm = unmodified_parm (stmt, use, NULL);
|
|
1548 /* For arguments we can build a condition. */
|
|
1549 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
|
|
1550 continue;
|
|
1551 if (TREE_CODE (use) != SSA_NAME)
|
|
1552 return p;
|
|
1553 /* If we know when operand is constant,
|
|
1554 we still can say something useful. */
|
|
1555 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
|
|
1556 continue;
|
|
1557 return p;
|
|
1558 }
|
|
1559
|
|
1560 if (is_load)
|
|
1561 op_non_const =
|
|
1562 add_condition (summary, base_index, size, &aggpos, predicate::changed,
|
|
1563 NULL);
|
|
1564 else
|
|
1565 op_non_const = false;
|
|
1566 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
|
|
1567 {
|
|
1568 HOST_WIDE_INT size;
|
|
1569 tree parm = unmodified_parm (stmt, use, &size);
|
|
1570 int index;
|
|
1571
|
|
1572 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
|
|
1573 {
|
|
1574 if (index != base_index)
|
|
1575 p = add_condition (summary, index, size, NULL, predicate::changed,
|
|
1576 NULL_TREE);
|
|
1577 else
|
|
1578 continue;
|
|
1579 }
|
|
1580 else
|
|
1581 p = nonconstant_names[SSA_NAME_VERSION (use)];
|
|
1582 op_non_const = p.or_with (summary->conds, op_non_const);
|
|
1583 }
|
|
1584 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
|
|
1585 && gimple_op (stmt, 0)
|
|
1586 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
|
|
1587 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
|
|
1588 = op_non_const;
|
|
1589 return op_non_const;
|
|
1590 }
|
|
1591
|
|
1592 struct record_modified_bb_info
|
|
1593 {
|
|
1594 bitmap bb_set;
|
|
1595 gimple *stmt;
|
|
1596 };
|
|
1597
|
|
1598 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
|
|
1599 probability how often it changes between USE_BB.
|
|
1600 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
|
|
1601 is in different loop nest, we can do better.
|
|
1602 This is all just estimate. In theory we look for minimal cut separating
|
|
1603 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
|
|
1604 anyway. */
|
|
1605
|
|
1606 static basic_block
|
|
1607 get_minimal_bb (basic_block init_bb, basic_block use_bb)
|
|
1608 {
|
|
1609 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
|
|
1610 if (l && l->header->frequency < init_bb->frequency)
|
|
1611 return l->header;
|
|
1612 return init_bb;
|
|
1613 }
|
|
1614
|
|
1615 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
|
|
1616 set except for info->stmt. */
|
|
1617
|
|
1618 static bool
|
|
1619 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
|
|
1620 {
|
|
1621 struct record_modified_bb_info *info =
|
|
1622 (struct record_modified_bb_info *) data;
|
|
1623 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
|
|
1624 return false;
|
|
1625 bitmap_set_bit (info->bb_set,
|
|
1626 SSA_NAME_IS_DEFAULT_DEF (vdef)
|
|
1627 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
|
|
1628 : get_minimal_bb
|
|
1629 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
|
|
1630 gimple_bb (info->stmt))->index);
|
|
1631 return false;
|
|
1632 }
|
|
1633
|
|
1634 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
|
|
1635 will change since last invocation of STMT.
|
|
1636
|
|
1637 Value 0 is reserved for compile time invariants.
|
|
1638 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
|
|
1639 ought to be REG_BR_PROB_BASE / estimated_iters. */
|
|
1640
|
|
1641 static int
|
|
1642 param_change_prob (gimple *stmt, int i)
|
|
1643 {
|
|
1644 tree op = gimple_call_arg (stmt, i);
|
|
1645 basic_block bb = gimple_bb (stmt);
|
|
1646
|
|
1647 if (TREE_CODE (op) == WITH_SIZE_EXPR)
|
|
1648 op = TREE_OPERAND (op, 0);
|
|
1649
|
|
1650 tree base = get_base_address (op);
|
|
1651
|
|
1652 /* Global invariants never change. */
|
|
1653 if (is_gimple_min_invariant (base))
|
|
1654 return 0;
|
|
1655
|
|
1656 /* We would have to do non-trivial analysis to really work out what
|
|
1657 is the probability of value to change (i.e. when init statement
|
|
1658 is in a sibling loop of the call).
|
|
1659
|
|
1660 We do an conservative estimate: when call is executed N times more often
|
|
1661 than the statement defining value, we take the frequency 1/N. */
|
|
1662 if (TREE_CODE (base) == SSA_NAME)
|
|
1663 {
|
|
1664 int init_freq;
|
|
1665
|
|
1666 if (!bb->frequency)
|
|
1667 return REG_BR_PROB_BASE;
|
|
1668
|
|
1669 if (SSA_NAME_IS_DEFAULT_DEF (base))
|
|
1670 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
|
|
1671 else
|
|
1672 init_freq = get_minimal_bb
|
|
1673 (gimple_bb (SSA_NAME_DEF_STMT (base)),
|
|
1674 gimple_bb (stmt))->frequency;
|
|
1675
|
|
1676 if (!init_freq)
|
|
1677 init_freq = 1;
|
|
1678 if (init_freq < bb->frequency)
|
|
1679 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
|
|
1680 else
|
|
1681 return REG_BR_PROB_BASE;
|
|
1682 }
|
|
1683 else
|
|
1684 {
|
|
1685 ao_ref refd;
|
|
1686 int max;
|
|
1687 struct record_modified_bb_info info;
|
|
1688 bitmap_iterator bi;
|
|
1689 unsigned index;
|
|
1690 tree init = ctor_for_folding (base);
|
|
1691
|
|
1692 if (init != error_mark_node)
|
|
1693 return 0;
|
|
1694 if (!bb->frequency)
|
|
1695 return REG_BR_PROB_BASE;
|
|
1696 ao_ref_init (&refd, op);
|
|
1697 info.stmt = stmt;
|
|
1698 info.bb_set = BITMAP_ALLOC (NULL);
|
|
1699 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
|
|
1700 NULL);
|
|
1701 if (bitmap_bit_p (info.bb_set, bb->index))
|
|
1702 {
|
|
1703 BITMAP_FREE (info.bb_set);
|
|
1704 return REG_BR_PROB_BASE;
|
|
1705 }
|
|
1706
|
|
1707 /* Assume that every memory is initialized at entry.
|
|
1708 TODO: Can we easilly determine if value is always defined
|
|
1709 and thus we may skip entry block? */
|
|
1710 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
|
|
1711 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
|
|
1712 else
|
|
1713 max = 1;
|
|
1714
|
|
1715 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
|
|
1716 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
|
|
1717
|
|
1718 BITMAP_FREE (info.bb_set);
|
|
1719 if (max < bb->frequency)
|
|
1720 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
|
|
1721 else
|
|
1722 return REG_BR_PROB_BASE;
|
|
1723 }
|
|
1724 }
|
|
1725
|
|
1726 /* Find whether a basic block BB is the final block of a (half) diamond CFG
|
|
1727 sub-graph and if the predicate the condition depends on is known. If so,
|
|
1728 return true and store the pointer the predicate in *P. */
|
|
1729
|
|
1730 static bool
|
|
1731 phi_result_unknown_predicate (struct ipa_node_params *info,
|
|
1732 ipa_fn_summary *summary, basic_block bb,
|
|
1733 predicate *p,
|
|
1734 vec<predicate> nonconstant_names)
|
|
1735 {
|
|
1736 edge e;
|
|
1737 edge_iterator ei;
|
|
1738 basic_block first_bb = NULL;
|
|
1739 gimple *stmt;
|
|
1740
|
|
1741 if (single_pred_p (bb))
|
|
1742 {
|
|
1743 *p = false;
|
|
1744 return true;
|
|
1745 }
|
|
1746
|
|
1747 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1748 {
|
|
1749 if (single_succ_p (e->src))
|
|
1750 {
|
|
1751 if (!single_pred_p (e->src))
|
|
1752 return false;
|
|
1753 if (!first_bb)
|
|
1754 first_bb = single_pred (e->src);
|
|
1755 else if (single_pred (e->src) != first_bb)
|
|
1756 return false;
|
|
1757 }
|
|
1758 else
|
|
1759 {
|
|
1760 if (!first_bb)
|
|
1761 first_bb = e->src;
|
|
1762 else if (e->src != first_bb)
|
|
1763 return false;
|
|
1764 }
|
|
1765 }
|
|
1766
|
|
1767 if (!first_bb)
|
|
1768 return false;
|
|
1769
|
|
1770 stmt = last_stmt (first_bb);
|
|
1771 if (!stmt
|
|
1772 || gimple_code (stmt) != GIMPLE_COND
|
|
1773 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
|
|
1774 return false;
|
|
1775
|
|
1776 *p = will_be_nonconstant_expr_predicate (info, summary,
|
|
1777 gimple_cond_lhs (stmt),
|
|
1778 nonconstant_names);
|
|
1779 if (*p == true)
|
|
1780 return false;
|
|
1781 else
|
|
1782 return true;
|
|
1783 }
|
|
1784
|
|
1785 /* Given a PHI statement in a function described by inline properties SUMMARY
|
|
1786 and *P being the predicate describing whether the selected PHI argument is
|
|
1787 known, store a predicate for the result of the PHI statement into
|
|
1788 NONCONSTANT_NAMES, if possible. */
|
|
1789
|
|
1790 static void
|
|
1791 predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
|
|
1792 predicate *p,
|
|
1793 vec<predicate> nonconstant_names)
|
|
1794 {
|
|
1795 unsigned i;
|
|
1796
|
|
1797 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
1798 {
|
|
1799 tree arg = gimple_phi_arg (phi, i)->def;
|
|
1800 if (!is_gimple_min_invariant (arg))
|
|
1801 {
|
|
1802 gcc_assert (TREE_CODE (arg) == SSA_NAME);
|
|
1803 *p = p->or_with (summary->conds,
|
|
1804 nonconstant_names[SSA_NAME_VERSION (arg)]);
|
|
1805 if (*p == true)
|
|
1806 return;
|
|
1807 }
|
|
1808 }
|
|
1809
|
|
1810 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1811 {
|
|
1812 fprintf (dump_file, "\t\tphi predicate: ");
|
|
1813 p->dump (dump_file, summary->conds);
|
|
1814 }
|
|
1815 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
|
|
1816 }
|
|
1817
|
|
1818 /* Return predicate specifying when array index in access OP becomes non-constant. */
|
|
1819
|
|
1820 static predicate
|
|
1821 array_index_predicate (ipa_fn_summary *info,
|
|
1822 vec< predicate> nonconstant_names, tree op)
|
|
1823 {
|
|
1824 predicate p = false;
|
|
1825 while (handled_component_p (op))
|
|
1826 {
|
|
1827 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
|
|
1828 {
|
|
1829 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
|
|
1830 p = p.or_with (info->conds,
|
|
1831 nonconstant_names[SSA_NAME_VERSION
|
|
1832 (TREE_OPERAND (op, 1))]);
|
|
1833 }
|
|
1834 op = TREE_OPERAND (op, 0);
|
|
1835 }
|
|
1836 return p;
|
|
1837 }
|
|
1838
|
|
1839 /* For a typical usage of __builtin_expect (a<b, 1), we
|
|
1840 may introduce an extra relation stmt:
|
|
1841 With the builtin, we have
|
|
1842 t1 = a <= b;
|
|
1843 t2 = (long int) t1;
|
|
1844 t3 = __builtin_expect (t2, 1);
|
|
1845 if (t3 != 0)
|
|
1846 goto ...
|
|
1847 Without the builtin, we have
|
|
1848 if (a<=b)
|
|
1849 goto...
|
|
1850 This affects the size/time estimation and may have
|
|
1851 an impact on the earlier inlining.
|
|
1852 Here find this pattern and fix it up later. */
|
|
1853
|
|
1854 static gimple *
|
|
1855 find_foldable_builtin_expect (basic_block bb)
|
|
1856 {
|
|
1857 gimple_stmt_iterator bsi;
|
|
1858
|
|
1859 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
|
|
1860 {
|
|
1861 gimple *stmt = gsi_stmt (bsi);
|
|
1862 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
|
|
1863 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
|
|
1864 {
|
|
1865 tree var = gimple_call_lhs (stmt);
|
|
1866 tree arg = gimple_call_arg (stmt, 0);
|
|
1867 use_operand_p use_p;
|
|
1868 gimple *use_stmt;
|
|
1869 bool match = false;
|
|
1870 bool done = false;
|
|
1871
|
|
1872 if (!var || !arg)
|
|
1873 continue;
|
|
1874 gcc_assert (TREE_CODE (var) == SSA_NAME);
|
|
1875
|
|
1876 while (TREE_CODE (arg) == SSA_NAME)
|
|
1877 {
|
|
1878 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
|
|
1879 if (!is_gimple_assign (stmt_tmp))
|
|
1880 break;
|
|
1881 switch (gimple_assign_rhs_code (stmt_tmp))
|
|
1882 {
|
|
1883 case LT_EXPR:
|
|
1884 case LE_EXPR:
|
|
1885 case GT_EXPR:
|
|
1886 case GE_EXPR:
|
|
1887 case EQ_EXPR:
|
|
1888 case NE_EXPR:
|
|
1889 match = true;
|
|
1890 done = true;
|
|
1891 break;
|
|
1892 CASE_CONVERT:
|
|
1893 break;
|
|
1894 default:
|
|
1895 done = true;
|
|
1896 break;
|
|
1897 }
|
|
1898 if (done)
|
|
1899 break;
|
|
1900 arg = gimple_assign_rhs1 (stmt_tmp);
|
|
1901 }
|
|
1902
|
|
1903 if (match && single_imm_use (var, &use_p, &use_stmt)
|
|
1904 && gimple_code (use_stmt) == GIMPLE_COND)
|
|
1905 return use_stmt;
|
|
1906 }
|
|
1907 }
|
|
1908 return NULL;
|
|
1909 }
|
|
1910
|
|
1911 /* Return true when the basic blocks contains only clobbers followed by RESX.
|
|
1912 Such BBs are kept around to make removal of dead stores possible with
|
|
1913 presence of EH and will be optimized out by optimize_clobbers later in the
|
|
1914 game.
|
|
1915
|
|
1916 NEED_EH is used to recurse in case the clobber has non-EH predecestors
|
|
1917 that can be clobber only, too.. When it is false, the RESX is not necessary
|
|
1918 on the end of basic block. */
|
|
1919
|
|
1920 static bool
|
|
1921 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
|
|
1922 {
|
|
1923 gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
|
1924 edge_iterator ei;
|
|
1925 edge e;
|
|
1926
|
|
1927 if (need_eh)
|
|
1928 {
|
|
1929 if (gsi_end_p (gsi))
|
|
1930 return false;
|
|
1931 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
|
|
1932 return false;
|
|
1933 gsi_prev (&gsi);
|
|
1934 }
|
|
1935 else if (!single_succ_p (bb))
|
|
1936 return false;
|
|
1937
|
|
1938 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
|
|
1939 {
|
|
1940 gimple *stmt = gsi_stmt (gsi);
|
|
1941 if (is_gimple_debug (stmt))
|
|
1942 continue;
|
|
1943 if (gimple_clobber_p (stmt))
|
|
1944 continue;
|
|
1945 if (gimple_code (stmt) == GIMPLE_LABEL)
|
|
1946 break;
|
|
1947 return false;
|
|
1948 }
|
|
1949
|
|
1950 /* See if all predecestors are either throws or clobber only BBs. */
|
|
1951 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
1952 if (!(e->flags & EDGE_EH)
|
|
1953 && !clobber_only_eh_bb_p (e->src, false))
|
|
1954 return false;
|
|
1955
|
|
1956 return true;
|
|
1957 }
|
|
1958
|
|
1959 /* Return true if STMT compute a floating point expression that may be affected
|
|
1960 by -ffast-math and similar flags. */
|
|
1961
|
|
1962 static bool
|
|
1963 fp_expression_p (gimple *stmt)
|
|
1964 {
|
|
1965 ssa_op_iter i;
|
|
1966 tree op;
|
|
1967
|
|
1968 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
|
|
1969 if (FLOAT_TYPE_P (TREE_TYPE (op)))
|
|
1970 return true;
|
|
1971 return false;
|
|
1972 }
|
|
1973
|
|
1974 /* Analyze function body for NODE.
|
|
1975 EARLY indicates run from early optimization pipeline. */
|
|
1976
|
|
1977 static void
|
|
1978 analyze_function_body (struct cgraph_node *node, bool early)
|
|
1979 {
|
|
1980 sreal time = 0;
|
|
1981 /* Estimate static overhead for function prologue/epilogue and alignment. */
|
|
1982 int size = 2;
|
|
1983 /* Benefits are scaled by probability of elimination that is in range
|
|
1984 <0,2>. */
|
|
1985 basic_block bb;
|
|
1986 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
|
|
1987 int freq;
|
|
1988 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
|
|
1989 predicate bb_predicate;
|
|
1990 struct ipa_func_body_info fbi;
|
|
1991 vec<predicate> nonconstant_names = vNULL;
|
|
1992 int nblocks, n;
|
|
1993 int *order;
|
|
1994 predicate array_index = true;
|
|
1995 gimple *fix_builtin_expect_stmt;
|
|
1996
|
|
1997 gcc_assert (my_function && my_function->cfg);
|
|
1998 gcc_assert (cfun == my_function);
|
|
1999
|
|
2000 memset(&fbi, 0, sizeof(fbi));
|
|
2001 info->conds = NULL;
|
|
2002 info->size_time_table = NULL;
|
|
2003
|
|
2004 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
|
|
2005 so we can produce proper inline hints.
|
|
2006
|
|
2007 When optimizing and analyzing for early inliner, initialize node params
|
|
2008 so we can produce correct BB predicates. */
|
|
2009
|
|
2010 if (opt_for_fn (node->decl, optimize))
|
|
2011 {
|
|
2012 calculate_dominance_info (CDI_DOMINATORS);
|
|
2013 if (!early)
|
|
2014 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
|
|
2015 else
|
|
2016 {
|
|
2017 ipa_check_create_node_params ();
|
|
2018 ipa_initialize_node_params (node);
|
|
2019 }
|
|
2020
|
|
2021 if (ipa_node_params_sum)
|
|
2022 {
|
|
2023 fbi.node = node;
|
|
2024 fbi.info = IPA_NODE_REF (node);
|
|
2025 fbi.bb_infos = vNULL;
|
|
2026 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
|
|
2027 fbi.param_count = count_formal_params(node->decl);
|
|
2028 nonconstant_names.safe_grow_cleared
|
|
2029 (SSANAMES (my_function)->length ());
|
|
2030 }
|
|
2031 }
|
|
2032
|
|
2033 if (dump_file)
|
|
2034 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
|
|
2035 node->name ());
|
|
2036
|
|
2037 /* When we run into maximal number of entries, we assign everything to the
|
|
2038 constant truth case. Be sure to have it in list. */
|
|
2039 bb_predicate = true;
|
|
2040 info->account_size_time (0, 0, bb_predicate, bb_predicate);
|
|
2041
|
|
2042 bb_predicate = predicate::not_inlined ();
|
|
2043 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate,
|
|
2044 bb_predicate);
|
|
2045
|
|
2046 if (fbi.info)
|
|
2047 compute_bb_predicates (&fbi, node, info);
|
|
2048 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
|
|
2049 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
|
|
2050 for (n = 0; n < nblocks; n++)
|
|
2051 {
|
|
2052 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
|
|
2053 freq = compute_call_stmt_bb_frequency (node->decl, bb);
|
|
2054 if (clobber_only_eh_bb_p (bb))
|
|
2055 {
|
|
2056 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2057 fprintf (dump_file, "\n Ignoring BB %i;"
|
|
2058 " it will be optimized away by cleanup_clobbers\n",
|
|
2059 bb->index);
|
|
2060 continue;
|
|
2061 }
|
|
2062
|
|
2063 /* TODO: Obviously predicates can be propagated down across CFG. */
|
|
2064 if (fbi.info)
|
|
2065 {
|
|
2066 if (bb->aux)
|
|
2067 bb_predicate = *(predicate *) bb->aux;
|
|
2068 else
|
|
2069 bb_predicate = false;
|
|
2070 }
|
|
2071 else
|
|
2072 bb_predicate = true;
|
|
2073
|
|
2074 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2075 {
|
|
2076 fprintf (dump_file, "\n BB %i predicate:", bb->index);
|
|
2077 bb_predicate.dump (dump_file, info->conds);
|
|
2078 }
|
|
2079
|
|
2080 if (fbi.info && nonconstant_names.exists ())
|
|
2081 {
|
|
2082 predicate phi_predicate;
|
|
2083 bool first_phi = true;
|
|
2084
|
|
2085 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
|
|
2086 gsi_next (&bsi))
|
|
2087 {
|
|
2088 if (first_phi
|
|
2089 && !phi_result_unknown_predicate (fbi.info, info, bb,
|
|
2090 &phi_predicate,
|
|
2091 nonconstant_names))
|
|
2092 break;
|
|
2093 first_phi = false;
|
|
2094 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2095 {
|
|
2096 fprintf (dump_file, " ");
|
|
2097 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
|
|
2098 }
|
|
2099 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
|
|
2100 nonconstant_names);
|
|
2101 }
|
|
2102 }
|
|
2103
|
|
2104 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
|
|
2105
|
|
2106 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
|
|
2107 gsi_next (&bsi))
|
|
2108 {
|
|
2109 gimple *stmt = gsi_stmt (bsi);
|
|
2110 int this_size = estimate_num_insns (stmt, &eni_size_weights);
|
|
2111 int this_time = estimate_num_insns (stmt, &eni_time_weights);
|
|
2112 int prob;
|
|
2113 predicate will_be_nonconstant;
|
|
2114
|
|
2115 /* This relation stmt should be folded after we remove
|
|
2116 buildin_expect call. Adjust the cost here. */
|
|
2117 if (stmt == fix_builtin_expect_stmt)
|
|
2118 {
|
|
2119 this_size--;
|
|
2120 this_time--;
|
|
2121 }
|
|
2122
|
|
2123 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2124 {
|
|
2125 fprintf (dump_file, " ");
|
|
2126 print_gimple_stmt (dump_file, stmt, 0);
|
|
2127 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
|
|
2128 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
|
|
2129 this_time);
|
|
2130 }
|
|
2131
|
|
2132 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
|
|
2133 {
|
|
2134 predicate this_array_index;
|
|
2135 this_array_index =
|
|
2136 array_index_predicate (info, nonconstant_names,
|
|
2137 gimple_assign_rhs1 (stmt));
|
|
2138 if (this_array_index != false)
|
|
2139 array_index &= this_array_index;
|
|
2140 }
|
|
2141 if (gimple_store_p (stmt) && nonconstant_names.exists ())
|
|
2142 {
|
|
2143 predicate this_array_index;
|
|
2144 this_array_index =
|
|
2145 array_index_predicate (info, nonconstant_names,
|
|
2146 gimple_get_lhs (stmt));
|
|
2147 if (this_array_index != false)
|
|
2148 array_index &= this_array_index;
|
|
2149 }
|
|
2150
|
|
2151
|
|
2152 if (is_gimple_call (stmt)
|
|
2153 && !gimple_call_internal_p (stmt))
|
|
2154 {
|
|
2155 struct cgraph_edge *edge = node->get_edge (stmt);
|
|
2156 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
2157
|
|
2158 /* Special case: results of BUILT_IN_CONSTANT_P will be always
|
|
2159 resolved as constant. We however don't want to optimize
|
|
2160 out the cgraph edges. */
|
|
2161 if (nonconstant_names.exists ()
|
|
2162 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
|
|
2163 && gimple_call_lhs (stmt)
|
|
2164 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
|
|
2165 {
|
|
2166 predicate false_p = false;
|
|
2167 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
|
|
2168 = false_p;
|
|
2169 }
|
|
2170 if (ipa_node_params_sum)
|
|
2171 {
|
|
2172 int count = gimple_call_num_args (stmt);
|
|
2173 int i;
|
|
2174
|
|
2175 if (count)
|
|
2176 es->param.safe_grow_cleared (count);
|
|
2177 for (i = 0; i < count; i++)
|
|
2178 {
|
|
2179 int prob = param_change_prob (stmt, i);
|
|
2180 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
|
|
2181 es->param[i].change_prob = prob;
|
|
2182 }
|
|
2183 }
|
|
2184
|
|
2185 es->call_stmt_size = this_size;
|
|
2186 es->call_stmt_time = this_time;
|
|
2187 es->loop_depth = bb_loop_depth (bb);
|
|
2188 edge_set_predicate (edge, &bb_predicate);
|
|
2189 }
|
|
2190
|
|
2191 /* TODO: When conditional jump or swithc is known to be constant, but
|
|
2192 we did not translate it into the predicates, we really can account
|
|
2193 just maximum of the possible paths. */
|
|
2194 if (fbi.info)
|
|
2195 will_be_nonconstant
|
|
2196 = will_be_nonconstant_predicate (&fbi, info,
|
|
2197 stmt, nonconstant_names);
|
|
2198 else
|
|
2199 will_be_nonconstant = true;
|
|
2200 if (this_time || this_size)
|
|
2201 {
|
|
2202 this_time *= freq;
|
|
2203
|
|
2204 prob = eliminated_by_inlining_prob (stmt);
|
|
2205 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
|
|
2206 fprintf (dump_file,
|
|
2207 "\t\t50%% will be eliminated by inlining\n");
|
|
2208 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
|
|
2209 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
|
|
2210
|
|
2211 struct predicate p = bb_predicate & will_be_nonconstant;
|
|
2212
|
|
2213 /* We can ignore statement when we proved it is never going
|
|
2214 to happen, but we can not do that for call statements
|
|
2215 because edges are accounted specially. */
|
|
2216
|
|
2217 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
|
|
2218 {
|
|
2219 time += this_time;
|
|
2220 size += this_size;
|
|
2221 }
|
|
2222
|
|
2223 /* We account everything but the calls. Calls have their own
|
|
2224 size/time info attached to cgraph edges. This is necessary
|
|
2225 in order to make the cost disappear after inlining. */
|
|
2226 if (!is_gimple_call (stmt))
|
|
2227 {
|
|
2228 if (prob)
|
|
2229 {
|
|
2230 predicate ip = bb_predicate & predicate::not_inlined ();
|
|
2231 info->account_size_time (this_size * prob,
|
|
2232 (sreal)(this_time * prob)
|
|
2233 / (CGRAPH_FREQ_BASE * 2), ip,
|
|
2234 p);
|
|
2235 }
|
|
2236 if (prob != 2)
|
|
2237 info->account_size_time (this_size * (2 - prob),
|
|
2238 (sreal)(this_time * (2 - prob))
|
|
2239 / (CGRAPH_FREQ_BASE * 2),
|
|
2240 bb_predicate,
|
|
2241 p);
|
|
2242 }
|
|
2243
|
|
2244 if (!info->fp_expressions && fp_expression_p (stmt))
|
|
2245 {
|
|
2246 info->fp_expressions = true;
|
|
2247 if (dump_file)
|
|
2248 fprintf (dump_file, " fp_expression set\n");
|
|
2249 }
|
|
2250
|
|
2251 gcc_assert (time >= 0);
|
|
2252 gcc_assert (size >= 0);
|
|
2253 }
|
|
2254 }
|
|
2255 }
|
|
2256 set_hint_predicate (&ipa_fn_summaries->get (node)->array_index, array_index);
|
|
2257 time = time / CGRAPH_FREQ_BASE;
|
|
2258 free (order);
|
|
2259
|
|
2260 if (nonconstant_names.exists () && !early)
|
|
2261 {
|
|
2262 struct loop *loop;
|
|
2263 predicate loop_iterations = true;
|
|
2264 predicate loop_stride = true;
|
|
2265
|
|
2266 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2267 flow_loops_dump (dump_file, NULL, 0);
|
|
2268 scev_initialize ();
|
|
2269 FOR_EACH_LOOP (loop, 0)
|
|
2270 {
|
|
2271 vec<edge> exits;
|
|
2272 edge ex;
|
|
2273 unsigned int j;
|
|
2274 struct tree_niter_desc niter_desc;
|
|
2275 bb_predicate = *(predicate *) loop->header->aux;
|
|
2276
|
|
2277 exits = get_loop_exit_edges (loop);
|
|
2278 FOR_EACH_VEC_ELT (exits, j, ex)
|
|
2279 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
|
|
2280 && !is_gimple_min_invariant (niter_desc.niter))
|
|
2281 {
|
|
2282 predicate will_be_nonconstant
|
|
2283 = will_be_nonconstant_expr_predicate (fbi.info, info,
|
|
2284 niter_desc.niter,
|
|
2285 nonconstant_names);
|
|
2286 if (will_be_nonconstant != true)
|
|
2287 will_be_nonconstant = bb_predicate & will_be_nonconstant;
|
|
2288 if (will_be_nonconstant != true
|
|
2289 && will_be_nonconstant != false)
|
|
2290 /* This is slightly inprecise. We may want to represent each
|
|
2291 loop with independent predicate. */
|
|
2292 loop_iterations &= will_be_nonconstant;
|
|
2293 }
|
|
2294 exits.release ();
|
|
2295 }
|
|
2296
|
|
2297 /* To avoid quadratic behavior we analyze stride predicates only
|
|
2298 with respect to the containing loop. Thus we simply iterate
|
|
2299 over all defs in the outermost loop body. */
|
|
2300 for (loop = loops_for_fn (cfun)->tree_root->inner;
|
|
2301 loop != NULL; loop = loop->next)
|
|
2302 {
|
|
2303 basic_block *body = get_loop_body (loop);
|
|
2304 for (unsigned i = 0; i < loop->num_nodes; i++)
|
|
2305 {
|
|
2306 gimple_stmt_iterator gsi;
|
|
2307 bb_predicate = *(predicate *) body[i]->aux;
|
|
2308 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
|
|
2309 gsi_next (&gsi))
|
|
2310 {
|
|
2311 gimple *stmt = gsi_stmt (gsi);
|
|
2312
|
|
2313 if (!is_gimple_assign (stmt))
|
|
2314 continue;
|
|
2315
|
|
2316 tree def = gimple_assign_lhs (stmt);
|
|
2317 if (TREE_CODE (def) != SSA_NAME)
|
|
2318 continue;
|
|
2319
|
|
2320 affine_iv iv;
|
|
2321 if (!simple_iv (loop_containing_stmt (stmt),
|
|
2322 loop_containing_stmt (stmt),
|
|
2323 def, &iv, true)
|
|
2324 || is_gimple_min_invariant (iv.step))
|
|
2325 continue;
|
|
2326
|
|
2327 predicate will_be_nonconstant
|
|
2328 = will_be_nonconstant_expr_predicate (fbi.info, info,
|
|
2329 iv.step,
|
|
2330 nonconstant_names);
|
|
2331 if (will_be_nonconstant != true)
|
|
2332 will_be_nonconstant = bb_predicate & will_be_nonconstant;
|
|
2333 if (will_be_nonconstant != true
|
|
2334 && will_be_nonconstant != false)
|
|
2335 /* This is slightly inprecise. We may want to represent
|
|
2336 each loop with independent predicate. */
|
|
2337 loop_stride = loop_stride & will_be_nonconstant;
|
|
2338 }
|
|
2339 }
|
|
2340 free (body);
|
|
2341 }
|
|
2342 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_iterations,
|
|
2343 loop_iterations);
|
|
2344 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_stride,
|
|
2345 loop_stride);
|
|
2346 scev_finalize ();
|
|
2347 }
|
|
2348 FOR_ALL_BB_FN (bb, my_function)
|
|
2349 {
|
|
2350 edge e;
|
|
2351 edge_iterator ei;
|
|
2352
|
|
2353 if (bb->aux)
|
|
2354 edge_predicate_pool.remove ((predicate *)bb->aux);
|
|
2355 bb->aux = NULL;
|
|
2356 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
2357 {
|
|
2358 if (e->aux)
|
|
2359 edge_predicate_pool.remove ((predicate *) e->aux);
|
|
2360 e->aux = NULL;
|
|
2361 }
|
|
2362 }
|
|
2363 ipa_fn_summaries->get (node)->time = time;
|
|
2364 ipa_fn_summaries->get (node)->self_size = size;
|
|
2365 nonconstant_names.release ();
|
|
2366 ipa_release_body_info (&fbi);
|
|
2367 if (opt_for_fn (node->decl, optimize))
|
|
2368 {
|
|
2369 if (!early)
|
|
2370 loop_optimizer_finalize ();
|
|
2371 else if (!ipa_edge_args_sum)
|
|
2372 ipa_free_all_node_params ();
|
|
2373 free_dominance_info (CDI_DOMINATORS);
|
|
2374 }
|
|
2375 if (dump_file)
|
|
2376 {
|
|
2377 fprintf (dump_file, "\n");
|
|
2378 ipa_dump_fn_summary (dump_file, node);
|
|
2379 }
|
|
2380 }
|
|
2381
|
|
2382
|
|
2383 /* Compute function summary.
|
|
2384 EARLY is true when we compute parameters during early opts. */
|
|
2385
|
|
2386 void
|
|
2387 compute_fn_summary (struct cgraph_node *node, bool early)
|
|
2388 {
|
|
2389 HOST_WIDE_INT self_stack_size;
|
|
2390 struct cgraph_edge *e;
|
|
2391 struct ipa_fn_summary *info;
|
|
2392
|
|
2393 gcc_assert (!node->global.inlined_to);
|
|
2394
|
|
2395 if (!ipa_fn_summaries)
|
|
2396 ipa_fn_summary_alloc ();
|
|
2397
|
|
2398 info = ipa_fn_summaries->get (node);
|
|
2399 info->reset (node);
|
|
2400
|
|
2401 /* Estimate the stack size for the function if we're optimizing. */
|
|
2402 self_stack_size = optimize && !node->thunk.thunk_p
|
|
2403 ? estimated_stack_frame_size (node) : 0;
|
|
2404 info->estimated_self_stack_size = self_stack_size;
|
|
2405 info->estimated_stack_size = self_stack_size;
|
|
2406 info->stack_frame_offset = 0;
|
|
2407
|
|
2408 if (node->thunk.thunk_p)
|
|
2409 {
|
|
2410 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
|
|
2411 predicate t = true;
|
|
2412
|
|
2413 node->local.can_change_signature = false;
|
|
2414 es->call_stmt_size = eni_size_weights.call_cost;
|
|
2415 es->call_stmt_time = eni_time_weights.call_cost;
|
|
2416 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t);
|
|
2417 t = predicate::not_inlined ();
|
|
2418 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
|
|
2419 ipa_update_overall_fn_summary (node);
|
|
2420 info->self_size = info->size;
|
|
2421 /* We can not inline instrumentation clones. */
|
|
2422 if (node->thunk.add_pointer_bounds_args)
|
|
2423 {
|
|
2424 info->inlinable = false;
|
|
2425 node->callees->inline_failed = CIF_CHKP;
|
|
2426 }
|
|
2427 else
|
|
2428 info->inlinable = true;
|
|
2429 }
|
|
2430 else
|
|
2431 {
|
|
2432 /* Even is_gimple_min_invariant rely on current_function_decl. */
|
|
2433 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
|
|
2434
|
|
2435 /* Can this function be inlined at all? */
|
|
2436 if (!opt_for_fn (node->decl, optimize)
|
|
2437 && !lookup_attribute ("always_inline",
|
|
2438 DECL_ATTRIBUTES (node->decl)))
|
|
2439 info->inlinable = false;
|
|
2440 else
|
|
2441 info->inlinable = tree_inlinable_function_p (node->decl);
|
|
2442
|
|
2443 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
|
|
2444
|
|
2445 /* Type attributes can use parameter indices to describe them. */
|
|
2446 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
|
|
2447 node->local.can_change_signature = false;
|
|
2448 else
|
|
2449 {
|
|
2450 /* Otherwise, inlinable functions always can change signature. */
|
|
2451 if (info->inlinable)
|
|
2452 node->local.can_change_signature = true;
|
|
2453 else
|
|
2454 {
|
|
2455 /* Functions calling builtin_apply can not change signature. */
|
|
2456 for (e = node->callees; e; e = e->next_callee)
|
|
2457 {
|
|
2458 tree cdecl = e->callee->decl;
|
|
2459 if (DECL_BUILT_IN (cdecl)
|
|
2460 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
|
|
2461 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
|
|
2462 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
|
|
2463 break;
|
|
2464 }
|
|
2465 node->local.can_change_signature = !e;
|
|
2466 }
|
|
2467 }
|
|
2468 /* Functions called by instrumentation thunk can't change signature
|
|
2469 because instrumentation thunk modification is not supported. */
|
|
2470 if (node->local.can_change_signature)
|
|
2471 for (e = node->callers; e; e = e->next_caller)
|
|
2472 if (e->caller->thunk.thunk_p
|
|
2473 && e->caller->thunk.add_pointer_bounds_args)
|
|
2474 {
|
|
2475 node->local.can_change_signature = false;
|
|
2476 break;
|
|
2477 }
|
|
2478 analyze_function_body (node, early);
|
|
2479 pop_cfun ();
|
|
2480 }
|
|
2481 for (e = node->callees; e; e = e->next_callee)
|
|
2482 if (e->callee->comdat_local_p ())
|
|
2483 break;
|
|
2484 node->calls_comdat_local = (e != NULL);
|
|
2485
|
|
2486 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
|
|
2487 info->size = info->self_size;
|
|
2488 info->stack_frame_offset = 0;
|
|
2489 info->estimated_stack_size = info->estimated_self_stack_size;
|
|
2490
|
|
2491 /* Code above should compute exactly the same result as
|
|
2492 ipa_update_overall_fn_summary but because computation happens in
|
|
2493 different order the roundoff errors result in slight changes. */
|
|
2494 ipa_update_overall_fn_summary (node);
|
|
2495 gcc_assert (info->size == info->self_size);
|
|
2496 }
|
|
2497
|
|
2498
|
|
2499 /* Compute parameters of functions used by inliner using
|
|
2500 current_function_decl. */
|
|
2501
|
|
2502 static unsigned int
|
|
2503 compute_fn_summary_for_current (void)
|
|
2504 {
|
|
2505 compute_fn_summary (cgraph_node::get (current_function_decl), true);
|
|
2506 return 0;
|
|
2507 }
|
|
2508
|
|
2509 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
|
|
2510 KNOWN_CONTEXTS and KNOWN_AGGS. */
|
|
2511
|
|
2512 static bool
|
|
2513 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
|
|
2514 int *size, int *time,
|
|
2515 vec<tree> known_vals,
|
|
2516 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2517 vec<ipa_agg_jump_function_p> known_aggs)
|
|
2518 {
|
|
2519 tree target;
|
|
2520 struct cgraph_node *callee;
|
|
2521 struct ipa_fn_summary *isummary;
|
|
2522 enum availability avail;
|
|
2523 bool speculative;
|
|
2524
|
|
2525 if (!known_vals.exists () && !known_contexts.exists ())
|
|
2526 return false;
|
|
2527 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
|
|
2528 return false;
|
|
2529
|
|
2530 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
|
|
2531 known_aggs, &speculative);
|
|
2532 if (!target || speculative)
|
|
2533 return false;
|
|
2534
|
|
2535 /* Account for difference in cost between indirect and direct calls. */
|
|
2536 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
|
|
2537 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
|
|
2538 gcc_checking_assert (*time >= 0);
|
|
2539 gcc_checking_assert (*size >= 0);
|
|
2540
|
|
2541 callee = cgraph_node::get (target);
|
|
2542 if (!callee || !callee->definition)
|
|
2543 return false;
|
|
2544 callee = callee->function_symbol (&avail);
|
|
2545 if (avail < AVAIL_AVAILABLE)
|
|
2546 return false;
|
|
2547 isummary = ipa_fn_summaries->get (callee);
|
|
2548 return isummary->inlinable;
|
|
2549 }
|
|
2550
|
|
2551 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
|
|
2552 handle edge E with probability PROB.
|
|
2553 Set HINTS if edge may be devirtualized.
|
|
2554 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
|
|
2555 site. */
|
|
2556
|
|
2557 static inline void
|
|
2558 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
|
|
2559 sreal *time,
|
|
2560 int prob,
|
|
2561 vec<tree> known_vals,
|
|
2562 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2563 vec<ipa_agg_jump_function_p> known_aggs,
|
|
2564 ipa_hints *hints)
|
|
2565 {
|
|
2566 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2567 int call_size = es->call_stmt_size;
|
|
2568 int call_time = es->call_stmt_time;
|
|
2569 int cur_size;
|
|
2570 if (!e->callee
|
|
2571 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
|
|
2572 known_vals, known_contexts, known_aggs)
|
|
2573 && hints && e->maybe_hot_p ())
|
|
2574 *hints |= INLINE_HINT_indirect_call;
|
|
2575 cur_size = call_size * ipa_fn_summary::size_scale;
|
|
2576 *size += cur_size;
|
|
2577 if (min_size)
|
|
2578 *min_size += cur_size;
|
|
2579 if (prob == REG_BR_PROB_BASE)
|
|
2580 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
|
|
2581 else
|
|
2582 *time += ((sreal)call_time) * (prob * e->frequency)
|
|
2583 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
|
|
2584 }
|
|
2585
|
|
2586
|
|
2587
|
|
2588 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
|
|
2589 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
|
|
2590 describe context of the call site. */
|
|
2591
|
|
2592 static void
|
|
2593 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
|
|
2594 int *min_size, sreal *time,
|
|
2595 ipa_hints *hints,
|
|
2596 clause_t possible_truths,
|
|
2597 vec<tree> known_vals,
|
|
2598 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2599 vec<ipa_agg_jump_function_p> known_aggs)
|
|
2600 {
|
|
2601 struct cgraph_edge *e;
|
|
2602 for (e = node->callees; e; e = e->next_callee)
|
|
2603 {
|
|
2604 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2605
|
|
2606 /* Do not care about zero sized builtins. */
|
|
2607 if (e->inline_failed && !es->call_stmt_size)
|
|
2608 {
|
|
2609 gcc_checking_assert (!es->call_stmt_time);
|
|
2610 continue;
|
|
2611 }
|
|
2612 if (!es->predicate
|
|
2613 || es->predicate->evaluate (possible_truths))
|
|
2614 {
|
|
2615 if (e->inline_failed)
|
|
2616 {
|
|
2617 /* Predicates of calls shall not use NOT_CHANGED codes,
|
|
2618 sowe do not need to compute probabilities. */
|
|
2619 estimate_edge_size_and_time (e, size,
|
|
2620 es->predicate ? NULL : min_size,
|
|
2621 time, REG_BR_PROB_BASE,
|
|
2622 known_vals, known_contexts,
|
|
2623 known_aggs, hints);
|
|
2624 }
|
|
2625 else
|
|
2626 estimate_calls_size_and_time (e->callee, size, min_size, time,
|
|
2627 hints,
|
|
2628 possible_truths,
|
|
2629 known_vals, known_contexts,
|
|
2630 known_aggs);
|
|
2631 }
|
|
2632 }
|
|
2633 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
2634 {
|
|
2635 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2636 if (!es->predicate
|
|
2637 || es->predicate->evaluate (possible_truths))
|
|
2638 estimate_edge_size_and_time (e, size,
|
|
2639 es->predicate ? NULL : min_size,
|
|
2640 time, REG_BR_PROB_BASE,
|
|
2641 known_vals, known_contexts, known_aggs,
|
|
2642 hints);
|
|
2643 }
|
|
2644 }
|
|
2645
|
|
2646
|
|
2647 /* Estimate size and time needed to execute NODE assuming
|
|
2648 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
|
|
2649 information about NODE's arguments. If non-NULL use also probability
|
|
2650 information present in INLINE_PARAM_SUMMARY vector.
|
|
2651 Additionally detemine hints determined by the context. Finally compute
|
|
2652 minimal size needed for the call that is independent on the call context and
|
|
2653 can be used for fast estimates. Return the values in RET_SIZE,
|
|
2654 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
|
|
2655
|
|
2656 void
|
|
2657 estimate_node_size_and_time (struct cgraph_node *node,
|
|
2658 clause_t possible_truths,
|
|
2659 clause_t nonspec_possible_truths,
|
|
2660 vec<tree> known_vals,
|
|
2661 vec<ipa_polymorphic_call_context> known_contexts,
|
|
2662 vec<ipa_agg_jump_function_p> known_aggs,
|
|
2663 int *ret_size, int *ret_min_size,
|
|
2664 sreal *ret_time,
|
|
2665 sreal *ret_nonspecialized_time,
|
|
2666 ipa_hints *ret_hints,
|
|
2667 vec<inline_param_summary>
|
|
2668 inline_param_summary)
|
|
2669 {
|
|
2670 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
|
|
2671 size_time_entry *e;
|
|
2672 int size = 0;
|
|
2673 sreal time = 0;
|
|
2674 int min_size = 0;
|
|
2675 ipa_hints hints = 0;
|
|
2676 int i;
|
|
2677
|
|
2678 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2679 {
|
|
2680 bool found = false;
|
|
2681 fprintf (dump_file, " Estimating body: %s/%i\n"
|
|
2682 " Known to be false: ", node->name (),
|
|
2683 node->order);
|
|
2684
|
|
2685 for (i = predicate::not_inlined_condition;
|
|
2686 i < (predicate::first_dynamic_condition
|
|
2687 + (int) vec_safe_length (info->conds)); i++)
|
|
2688 if (!(possible_truths & (1 << i)))
|
|
2689 {
|
|
2690 if (found)
|
|
2691 fprintf (dump_file, ", ");
|
|
2692 found = true;
|
|
2693 dump_condition (dump_file, info->conds, i);
|
|
2694 }
|
|
2695 }
|
|
2696
|
|
2697 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
|
|
2698 known_vals, known_contexts, known_aggs);
|
|
2699 sreal nonspecialized_time = time;
|
|
2700
|
|
2701 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
|
|
2702 {
|
|
2703 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
|
|
2704
|
|
2705 /* Because predicates are conservative, it can happen that nonconst is 1
|
|
2706 but exec is 0. */
|
|
2707 if (exec)
|
|
2708 {
|
|
2709 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
|
|
2710
|
|
2711 gcc_checking_assert (e->time >= 0);
|
|
2712 gcc_checking_assert (time >= 0);
|
|
2713
|
|
2714 /* We compute specialized size only because size of nonspecialized
|
|
2715 copy is context independent.
|
|
2716
|
|
2717 The difference between nonspecialized execution and specialized is
|
|
2718 that nonspecialized is not going to have optimized out computations
|
|
2719 known to be constant in a specialized setting. */
|
|
2720 if (nonconst)
|
|
2721 size += e->size;
|
|
2722 nonspecialized_time += e->time;
|
|
2723 if (!nonconst)
|
|
2724 ;
|
|
2725 else if (!inline_param_summary.exists ())
|
|
2726 {
|
|
2727 if (nonconst)
|
|
2728 time += e->time;
|
|
2729 }
|
|
2730 else
|
|
2731 {
|
|
2732 int prob = e->nonconst_predicate.probability
|
|
2733 (info->conds, possible_truths,
|
|
2734 inline_param_summary);
|
|
2735 gcc_checking_assert (prob >= 0);
|
|
2736 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
|
|
2737 time += e->time * prob / REG_BR_PROB_BASE;
|
|
2738 }
|
|
2739 gcc_checking_assert (time >= 0);
|
|
2740 }
|
|
2741 }
|
|
2742 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
|
|
2743 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
|
|
2744 min_size = (*info->size_time_table)[0].size;
|
|
2745 gcc_checking_assert (size >= 0);
|
|
2746 gcc_checking_assert (time >= 0);
|
|
2747 /* nonspecialized_time should be always bigger than specialized time.
|
|
2748 Roundoff issues however may get into the way. */
|
|
2749 gcc_checking_assert ((nonspecialized_time - time) >= -1);
|
|
2750
|
|
2751 /* Roundoff issues may make specialized time bigger than nonspecialized
|
|
2752 time. We do not really want that to happen because some heurstics
|
|
2753 may get confused by seeing negative speedups. */
|
|
2754 if (time > nonspecialized_time)
|
|
2755 time = nonspecialized_time;
|
|
2756
|
|
2757 if (info->loop_iterations
|
|
2758 && !info->loop_iterations->evaluate (possible_truths))
|
|
2759 hints |= INLINE_HINT_loop_iterations;
|
|
2760 if (info->loop_stride
|
|
2761 && !info->loop_stride->evaluate (possible_truths))
|
|
2762 hints |= INLINE_HINT_loop_stride;
|
|
2763 if (info->array_index
|
|
2764 && !info->array_index->evaluate (possible_truths))
|
|
2765 hints |= INLINE_HINT_array_index;
|
|
2766 if (info->scc_no)
|
|
2767 hints |= INLINE_HINT_in_scc;
|
|
2768 if (DECL_DECLARED_INLINE_P (node->decl))
|
|
2769 hints |= INLINE_HINT_declared_inline;
|
|
2770
|
|
2771 size = RDIV (size, ipa_fn_summary::size_scale);
|
|
2772 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
|
|
2773
|
|
2774 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2775 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
|
|
2776 time.to_double (), nonspecialized_time.to_double ());
|
|
2777 if (ret_time)
|
|
2778 *ret_time = time;
|
|
2779 if (ret_nonspecialized_time)
|
|
2780 *ret_nonspecialized_time = nonspecialized_time;
|
|
2781 if (ret_size)
|
|
2782 *ret_size = size;
|
|
2783 if (ret_min_size)
|
|
2784 *ret_min_size = min_size;
|
|
2785 if (ret_hints)
|
|
2786 *ret_hints = hints;
|
|
2787 return;
|
|
2788 }
|
|
2789
|
|
2790
|
|
2791 /* Estimate size and time needed to execute callee of EDGE assuming that
|
|
2792 parameters known to be constant at caller of EDGE are propagated.
|
|
2793 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
|
|
2794 and types for parameters. */
|
|
2795
|
|
2796 void
|
|
2797 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
|
|
2798 vec<tree> known_vals,
|
|
2799 vec<ipa_polymorphic_call_context>
|
|
2800 known_contexts,
|
|
2801 vec<ipa_agg_jump_function_p> known_aggs,
|
|
2802 int *ret_size, sreal *ret_time,
|
|
2803 sreal *ret_nonspec_time,
|
|
2804 ipa_hints *hints)
|
|
2805 {
|
|
2806 clause_t clause, nonspec_clause;
|
|
2807
|
|
2808 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
|
|
2809 &clause, &nonspec_clause);
|
|
2810 estimate_node_size_and_time (node, clause, nonspec_clause,
|
|
2811 known_vals, known_contexts,
|
|
2812 known_aggs, ret_size, NULL, ret_time,
|
|
2813 ret_nonspec_time, hints, vNULL);
|
|
2814 }
|
|
2815
|
|
2816
|
|
2817 /* Update summary information of inline clones after inlining.
|
|
2818 Compute peak stack usage. */
|
|
2819
|
|
2820 static void
|
|
2821 inline_update_callee_summaries (struct cgraph_node *node, int depth)
|
|
2822 {
|
|
2823 struct cgraph_edge *e;
|
|
2824 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
|
|
2825 struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
|
|
2826 HOST_WIDE_INT peak;
|
|
2827
|
|
2828 callee_info->stack_frame_offset
|
|
2829 = caller_info->stack_frame_offset
|
|
2830 + caller_info->estimated_self_stack_size;
|
|
2831 peak = callee_info->stack_frame_offset
|
|
2832 + callee_info->estimated_self_stack_size;
|
|
2833 if (ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
|
|
2834 ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
|
|
2835 ipa_propagate_frequency (node);
|
|
2836 for (e = node->callees; e; e = e->next_callee)
|
|
2837 {
|
|
2838 if (!e->inline_failed)
|
|
2839 inline_update_callee_summaries (e->callee, depth);
|
|
2840 ipa_call_summaries->get (e)->loop_depth += depth;
|
|
2841 }
|
|
2842 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
2843 ipa_call_summaries->get (e)->loop_depth += depth;
|
|
2844 }
|
|
2845
|
|
2846 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
|
|
2847 When functoin A is inlined in B and A calls C with parameter that
|
|
2848 changes with probability PROB1 and C is known to be passthroug
|
|
2849 of argument if B that change with probability PROB2, the probability
|
|
2850 of change is now PROB1*PROB2. */
|
|
2851
|
|
2852 static void
|
|
2853 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
|
|
2854 struct cgraph_edge *edge)
|
|
2855 {
|
|
2856 if (ipa_node_params_sum)
|
|
2857 {
|
|
2858 int i;
|
|
2859 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
|
|
2860 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
2861 struct ipa_call_summary *inlined_es
|
|
2862 = ipa_call_summaries->get (inlined_edge);
|
|
2863
|
|
2864 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
|
|
2865 {
|
|
2866 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
|
|
2867 if (jfunc->type == IPA_JF_PASS_THROUGH
|
|
2868 || jfunc->type == IPA_JF_ANCESTOR)
|
|
2869 {
|
|
2870 int id = jfunc->type == IPA_JF_PASS_THROUGH
|
|
2871 ? ipa_get_jf_pass_through_formal_id (jfunc)
|
|
2872 : ipa_get_jf_ancestor_formal_id (jfunc);
|
|
2873 if (id < (int) inlined_es->param.length ())
|
|
2874 {
|
|
2875 int prob1 = es->param[i].change_prob;
|
|
2876 int prob2 = inlined_es->param[id].change_prob;
|
|
2877 int prob = combine_probabilities (prob1, prob2);
|
|
2878
|
|
2879 if (prob1 && prob2 && !prob)
|
|
2880 prob = 1;
|
|
2881
|
|
2882 es->param[i].change_prob = prob;
|
|
2883 }
|
|
2884 }
|
|
2885 }
|
|
2886 }
|
|
2887 }
|
|
2888
|
|
2889 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
|
|
2890
|
|
2891 Remap predicates of callees of NODE. Rest of arguments match
|
|
2892 remap_predicate.
|
|
2893
|
|
2894 Also update change probabilities. */
|
|
2895
|
|
2896 static void
|
|
2897 remap_edge_summaries (struct cgraph_edge *inlined_edge,
|
|
2898 struct cgraph_node *node,
|
|
2899 struct ipa_fn_summary *info,
|
|
2900 struct ipa_fn_summary *callee_info,
|
|
2901 vec<int> operand_map,
|
|
2902 vec<int> offset_map,
|
|
2903 clause_t possible_truths,
|
|
2904 predicate *toplev_predicate)
|
|
2905 {
|
|
2906 struct cgraph_edge *e, *next;
|
|
2907 for (e = node->callees; e; e = next)
|
|
2908 {
|
|
2909 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2910 predicate p;
|
|
2911 next = e->next_callee;
|
|
2912
|
|
2913 if (e->inline_failed)
|
|
2914 {
|
|
2915 remap_edge_change_prob (inlined_edge, e);
|
|
2916
|
|
2917 if (es->predicate)
|
|
2918 {
|
|
2919 p = es->predicate->remap_after_inlining
|
|
2920 (info, callee_info, operand_map,
|
|
2921 offset_map, possible_truths,
|
|
2922 *toplev_predicate);
|
|
2923 edge_set_predicate (e, &p);
|
|
2924 }
|
|
2925 else
|
|
2926 edge_set_predicate (e, toplev_predicate);
|
|
2927 }
|
|
2928 else
|
|
2929 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
|
|
2930 operand_map, offset_map, possible_truths,
|
|
2931 toplev_predicate);
|
|
2932 }
|
|
2933 for (e = node->indirect_calls; e; e = next)
|
|
2934 {
|
|
2935 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
2936 predicate p;
|
|
2937 next = e->next_callee;
|
|
2938
|
|
2939 remap_edge_change_prob (inlined_edge, e);
|
|
2940 if (es->predicate)
|
|
2941 {
|
|
2942 p = es->predicate->remap_after_inlining
|
|
2943 (info, callee_info, operand_map, offset_map,
|
|
2944 possible_truths, *toplev_predicate);
|
|
2945 edge_set_predicate (e, &p);
|
|
2946 }
|
|
2947 else
|
|
2948 edge_set_predicate (e, toplev_predicate);
|
|
2949 }
|
|
2950 }
|
|
2951
|
|
2952 /* Same as remap_predicate, but set result into hint *HINT. */
|
|
2953
|
|
2954 static void
|
|
2955 remap_hint_predicate (struct ipa_fn_summary *info,
|
|
2956 struct ipa_fn_summary *callee_info,
|
|
2957 predicate **hint,
|
|
2958 vec<int> operand_map,
|
|
2959 vec<int> offset_map,
|
|
2960 clause_t possible_truths,
|
|
2961 predicate *toplev_predicate)
|
|
2962 {
|
|
2963 predicate p;
|
|
2964
|
|
2965 if (!*hint)
|
|
2966 return;
|
|
2967 p = (*hint)->remap_after_inlining
|
|
2968 (info, callee_info,
|
|
2969 operand_map, offset_map,
|
|
2970 possible_truths, *toplev_predicate);
|
|
2971 if (p != false && p != true)
|
|
2972 {
|
|
2973 if (!*hint)
|
|
2974 set_hint_predicate (hint, p);
|
|
2975 else
|
|
2976 **hint &= p;
|
|
2977 }
|
|
2978 }
|
|
2979
|
|
2980 /* We inlined EDGE. Update summary of the function we inlined into. */
|
|
2981
|
|
2982 void
|
|
2983 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
|
|
2984 {
|
|
2985 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
|
|
2986 struct cgraph_node *to = (edge->caller->global.inlined_to
|
|
2987 ? edge->caller->global.inlined_to : edge->caller);
|
|
2988 struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
|
|
2989 clause_t clause = 0; /* not_inline is known to be false. */
|
|
2990 size_time_entry *e;
|
|
2991 vec<int> operand_map = vNULL;
|
|
2992 vec<int> offset_map = vNULL;
|
|
2993 int i;
|
|
2994 predicate toplev_predicate;
|
|
2995 predicate true_p = true;
|
|
2996 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
|
|
2997
|
|
2998 if (es->predicate)
|
|
2999 toplev_predicate = *es->predicate;
|
|
3000 else
|
|
3001 toplev_predicate = true;
|
|
3002
|
|
3003 info->fp_expressions |= callee_info->fp_expressions;
|
|
3004
|
|
3005 if (callee_info->conds)
|
|
3006 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
|
|
3007 if (ipa_node_params_sum && callee_info->conds)
|
|
3008 {
|
|
3009 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
|
|
3010 int count = ipa_get_cs_argument_count (args);
|
|
3011 int i;
|
|
3012
|
|
3013 if (count)
|
|
3014 {
|
|
3015 operand_map.safe_grow_cleared (count);
|
|
3016 offset_map.safe_grow_cleared (count);
|
|
3017 }
|
|
3018 for (i = 0; i < count; i++)
|
|
3019 {
|
|
3020 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
|
|
3021 int map = -1;
|
|
3022
|
|
3023 /* TODO: handle non-NOPs when merging. */
|
|
3024 if (jfunc->type == IPA_JF_PASS_THROUGH)
|
|
3025 {
|
|
3026 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
|
|
3027 map = ipa_get_jf_pass_through_formal_id (jfunc);
|
|
3028 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
|
|
3029 offset_map[i] = -1;
|
|
3030 }
|
|
3031 else if (jfunc->type == IPA_JF_ANCESTOR)
|
|
3032 {
|
|
3033 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
|
|
3034 if (offset >= 0 && offset < INT_MAX)
|
|
3035 {
|
|
3036 map = ipa_get_jf_ancestor_formal_id (jfunc);
|
|
3037 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
|
|
3038 offset = -1;
|
|
3039 offset_map[i] = offset;
|
|
3040 }
|
|
3041 }
|
|
3042 operand_map[i] = map;
|
|
3043 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
|
|
3044 }
|
|
3045 }
|
|
3046 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
|
|
3047 {
|
|
3048 predicate p;
|
|
3049 p = e->exec_predicate.remap_after_inlining
|
|
3050 (info, callee_info, operand_map,
|
|
3051 offset_map, clause,
|
|
3052 toplev_predicate);
|
|
3053 predicate nonconstp;
|
|
3054 nonconstp = e->nonconst_predicate.remap_after_inlining
|
|
3055 (info, callee_info, operand_map,
|
|
3056 offset_map, clause,
|
|
3057 toplev_predicate);
|
|
3058 if (p != false && nonconstp != false)
|
|
3059 {
|
|
3060 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
|
|
3061 int prob = e->nonconst_predicate.probability (callee_info->conds,
|
|
3062 clause, es->param);
|
|
3063 add_time = add_time * prob / REG_BR_PROB_BASE;
|
|
3064 if (prob != REG_BR_PROB_BASE
|
|
3065 && dump_file && (dump_flags & TDF_DETAILS))
|
|
3066 {
|
|
3067 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
|
|
3068 (double) prob / REG_BR_PROB_BASE);
|
|
3069 }
|
|
3070 info->account_size_time (e->size, add_time, p, nonconstp);
|
|
3071 }
|
|
3072 }
|
|
3073 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
|
|
3074 offset_map, clause, &toplev_predicate);
|
|
3075 remap_hint_predicate (info, callee_info,
|
|
3076 &callee_info->loop_iterations,
|
|
3077 operand_map, offset_map, clause, &toplev_predicate);
|
|
3078 remap_hint_predicate (info, callee_info,
|
|
3079 &callee_info->loop_stride,
|
|
3080 operand_map, offset_map, clause, &toplev_predicate);
|
|
3081 remap_hint_predicate (info, callee_info,
|
|
3082 &callee_info->array_index,
|
|
3083 operand_map, offset_map, clause, &toplev_predicate);
|
|
3084
|
|
3085 inline_update_callee_summaries (edge->callee,
|
|
3086 ipa_call_summaries->get (edge)->loop_depth);
|
|
3087
|
|
3088 /* We do not maintain predicates of inlined edges, free it. */
|
|
3089 edge_set_predicate (edge, &true_p);
|
|
3090 /* Similarly remove param summaries. */
|
|
3091 es->param.release ();
|
|
3092 operand_map.release ();
|
|
3093 offset_map.release ();
|
|
3094 }
|
|
3095
|
|
3096 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
|
|
3097 and time. Recompute it. */
|
|
3098
|
|
3099 void
|
|
3100 ipa_update_overall_fn_summary (struct cgraph_node *node)
|
|
3101 {
|
|
3102 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
|
|
3103 size_time_entry *e;
|
|
3104 int i;
|
|
3105
|
|
3106 info->size = 0;
|
|
3107 info->time = 0;
|
|
3108 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
|
|
3109 {
|
|
3110 info->size += e->size;
|
|
3111 info->time += e->time;
|
|
3112 }
|
|
3113 estimate_calls_size_and_time (node, &info->size, &info->min_size,
|
|
3114 &info->time, NULL,
|
|
3115 ~(clause_t) (1 << predicate::false_condition),
|
|
3116 vNULL, vNULL, vNULL);
|
|
3117 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
|
|
3118 }
|
|
3119
|
|
3120
|
|
3121 /* This function performs intraprocedural analysis in NODE that is required to
|
|
3122 inline indirect calls. */
|
|
3123
|
|
3124 static void
|
|
3125 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
|
|
3126 {
|
|
3127 ipa_analyze_node (node);
|
|
3128 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3129 {
|
|
3130 ipa_print_node_params (dump_file, node);
|
|
3131 ipa_print_node_jump_functions (dump_file, node);
|
|
3132 }
|
|
3133 }
|
|
3134
|
|
3135
|
|
3136 /* Note function body size. */
|
|
3137
|
|
3138 void
|
|
3139 inline_analyze_function (struct cgraph_node *node)
|
|
3140 {
|
|
3141 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
|
|
3142
|
|
3143 if (dump_file)
|
|
3144 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
|
|
3145 node->name (), node->order);
|
|
3146 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
|
|
3147 inline_indirect_intraprocedural_analysis (node);
|
|
3148 compute_fn_summary (node, false);
|
|
3149 if (!optimize)
|
|
3150 {
|
|
3151 struct cgraph_edge *e;
|
|
3152 for (e = node->callees; e; e = e->next_callee)
|
|
3153 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
|
|
3154 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
3155 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
|
|
3156 }
|
|
3157
|
|
3158 pop_cfun ();
|
|
3159 }
|
|
3160
|
|
3161
|
|
3162 /* Called when new function is inserted to callgraph late. */
|
|
3163
|
|
3164 void
|
|
3165 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
|
|
3166 {
|
|
3167 inline_analyze_function (node);
|
|
3168 }
|
|
3169
|
|
3170 /* Note function body size. */
|
|
3171
|
|
3172 static void
|
|
3173 ipa_fn_summary_generate (void)
|
|
3174 {
|
|
3175 struct cgraph_node *node;
|
|
3176
|
|
3177 FOR_EACH_DEFINED_FUNCTION (node)
|
|
3178 if (DECL_STRUCT_FUNCTION (node->decl))
|
|
3179 node->local.versionable = tree_versionable_function_p (node->decl);
|
|
3180
|
|
3181 ipa_fn_summary_alloc ();
|
|
3182
|
|
3183 ipa_fn_summaries->enable_insertion_hook ();
|
|
3184
|
|
3185 ipa_register_cgraph_hooks ();
|
|
3186
|
|
3187 FOR_EACH_DEFINED_FUNCTION (node)
|
|
3188 if (!node->alias
|
|
3189 && (flag_generate_lto || flag_generate_offload|| flag_wpa
|
|
3190 || opt_for_fn (node->decl, optimize)))
|
|
3191 inline_analyze_function (node);
|
|
3192 }
|
|
3193
|
|
3194
|
|
3195 /* Write inline summary for edge E to OB. */
|
|
3196
|
|
3197 static void
|
|
3198 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
|
|
3199 {
|
|
3200 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
3201 predicate p;
|
|
3202 int length, i;
|
|
3203
|
|
3204 es->call_stmt_size = streamer_read_uhwi (ib);
|
|
3205 es->call_stmt_time = streamer_read_uhwi (ib);
|
|
3206 es->loop_depth = streamer_read_uhwi (ib);
|
|
3207 p.stream_in (ib);
|
|
3208 edge_set_predicate (e, &p);
|
|
3209 length = streamer_read_uhwi (ib);
|
|
3210 if (length)
|
|
3211 {
|
|
3212 es->param.safe_grow_cleared (length);
|
|
3213 for (i = 0; i < length; i++)
|
|
3214 es->param[i].change_prob = streamer_read_uhwi (ib);
|
|
3215 }
|
|
3216 }
|
|
3217
|
|
3218
|
|
3219 /* Stream in inline summaries from the section. */
|
|
3220
|
|
3221 static void
|
|
3222 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
|
|
3223 size_t len)
|
|
3224 {
|
|
3225 const struct lto_function_header *header =
|
|
3226 (const struct lto_function_header *) data;
|
|
3227 const int cfg_offset = sizeof (struct lto_function_header);
|
|
3228 const int main_offset = cfg_offset + header->cfg_size;
|
|
3229 const int string_offset = main_offset + header->main_size;
|
|
3230 struct data_in *data_in;
|
|
3231 unsigned int i, count2, j;
|
|
3232 unsigned int f_count;
|
|
3233
|
|
3234 lto_input_block ib ((const char *) data + main_offset, header->main_size,
|
|
3235 file_data->mode_table);
|
|
3236
|
|
3237 data_in =
|
|
3238 lto_data_in_create (file_data, (const char *) data + string_offset,
|
|
3239 header->string_size, vNULL);
|
|
3240 f_count = streamer_read_uhwi (&ib);
|
|
3241 for (i = 0; i < f_count; i++)
|
|
3242 {
|
|
3243 unsigned int index;
|
|
3244 struct cgraph_node *node;
|
|
3245 struct ipa_fn_summary *info;
|
|
3246 lto_symtab_encoder_t encoder;
|
|
3247 struct bitpack_d bp;
|
|
3248 struct cgraph_edge *e;
|
|
3249 predicate p;
|
|
3250
|
|
3251 index = streamer_read_uhwi (&ib);
|
|
3252 encoder = file_data->symtab_node_encoder;
|
|
3253 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
|
|
3254 index));
|
|
3255 info = ipa_fn_summaries->get (node);
|
|
3256
|
|
3257 info->estimated_stack_size
|
|
3258 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
|
|
3259 info->size = info->self_size = streamer_read_uhwi (&ib);
|
|
3260 info->time = sreal::stream_in (&ib);
|
|
3261
|
|
3262 bp = streamer_read_bitpack (&ib);
|
|
3263 info->inlinable = bp_unpack_value (&bp, 1);
|
|
3264 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
|
|
3265 info->fp_expressions = bp_unpack_value (&bp, 1);
|
|
3266
|
|
3267 count2 = streamer_read_uhwi (&ib);
|
|
3268 gcc_assert (!info->conds);
|
|
3269 for (j = 0; j < count2; j++)
|
|
3270 {
|
|
3271 struct condition c;
|
|
3272 c.operand_num = streamer_read_uhwi (&ib);
|
|
3273 c.size = streamer_read_uhwi (&ib);
|
|
3274 c.code = (enum tree_code) streamer_read_uhwi (&ib);
|
|
3275 c.val = stream_read_tree (&ib, data_in);
|
|
3276 bp = streamer_read_bitpack (&ib);
|
|
3277 c.agg_contents = bp_unpack_value (&bp, 1);
|
|
3278 c.by_ref = bp_unpack_value (&bp, 1);
|
|
3279 if (c.agg_contents)
|
|
3280 c.offset = streamer_read_uhwi (&ib);
|
|
3281 vec_safe_push (info->conds, c);
|
|
3282 }
|
|
3283 count2 = streamer_read_uhwi (&ib);
|
|
3284 gcc_assert (!info->size_time_table);
|
|
3285 for (j = 0; j < count2; j++)
|
|
3286 {
|
|
3287 struct size_time_entry e;
|
|
3288
|
|
3289 e.size = streamer_read_uhwi (&ib);
|
|
3290 e.time = sreal::stream_in (&ib);
|
|
3291 e.exec_predicate.stream_in (&ib);
|
|
3292 e.nonconst_predicate.stream_in (&ib);
|
|
3293
|
|
3294 vec_safe_push (info->size_time_table, e);
|
|
3295 }
|
|
3296
|
|
3297 p.stream_in (&ib);
|
|
3298 set_hint_predicate (&info->loop_iterations, p);
|
|
3299 p.stream_in (&ib);
|
|
3300 set_hint_predicate (&info->loop_stride, p);
|
|
3301 p.stream_in (&ib);
|
|
3302 set_hint_predicate (&info->array_index, p);
|
|
3303 for (e = node->callees; e; e = e->next_callee)
|
|
3304 read_ipa_call_summary (&ib, e);
|
|
3305 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
3306 read_ipa_call_summary (&ib, e);
|
|
3307 }
|
|
3308
|
|
3309 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
|
|
3310 len);
|
|
3311 lto_data_in_delete (data_in);
|
|
3312 }
|
|
3313
|
|
3314
|
|
3315 /* Read inline summary. Jump functions are shared among ipa-cp
|
|
3316 and inliner, so when ipa-cp is active, we don't need to write them
|
|
3317 twice. */
|
|
3318
|
|
3319 static void
|
|
3320 ipa_fn_summary_read (void)
|
|
3321 {
|
|
3322 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
|
|
3323 struct lto_file_decl_data *file_data;
|
|
3324 unsigned int j = 0;
|
|
3325
|
|
3326 ipa_fn_summary_alloc ();
|
|
3327
|
|
3328 while ((file_data = file_data_vec[j++]))
|
|
3329 {
|
|
3330 size_t len;
|
|
3331 const char *data = lto_get_section_data (file_data,
|
|
3332 LTO_section_ipa_fn_summary,
|
|
3333 NULL, &len);
|
|
3334 if (data)
|
|
3335 inline_read_section (file_data, data, len);
|
|
3336 else
|
|
3337 /* Fatal error here. We do not want to support compiling ltrans units
|
|
3338 with different version of compiler or different flags than the WPA
|
|
3339 unit, so this should never happen. */
|
|
3340 fatal_error (input_location,
|
|
3341 "ipa inline summary is missing in input file");
|
|
3342 }
|
|
3343 ipa_register_cgraph_hooks ();
|
|
3344 if (!flag_ipa_cp)
|
|
3345 ipa_prop_read_jump_functions ();
|
|
3346
|
|
3347 gcc_assert (ipa_fn_summaries);
|
|
3348 ipa_fn_summaries->enable_insertion_hook ();
|
|
3349 }
|
|
3350
|
|
3351
|
|
3352 /* Write inline summary for edge E to OB. */
|
|
3353
|
|
3354 static void
|
|
3355 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
|
|
3356 {
|
|
3357 struct ipa_call_summary *es = ipa_call_summaries->get (e);
|
|
3358 int i;
|
|
3359
|
|
3360 streamer_write_uhwi (ob, es->call_stmt_size);
|
|
3361 streamer_write_uhwi (ob, es->call_stmt_time);
|
|
3362 streamer_write_uhwi (ob, es->loop_depth);
|
|
3363 if (es->predicate)
|
|
3364 es->predicate->stream_out (ob);
|
|
3365 else
|
|
3366 streamer_write_uhwi (ob, 0);
|
|
3367 streamer_write_uhwi (ob, es->param.length ());
|
|
3368 for (i = 0; i < (int) es->param.length (); i++)
|
|
3369 streamer_write_uhwi (ob, es->param[i].change_prob);
|
|
3370 }
|
|
3371
|
|
3372
|
|
3373 /* Write inline summary for node in SET.
|
|
3374 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
|
|
3375 active, we don't need to write them twice. */
|
|
3376
|
|
3377 static void
|
|
3378 ipa_fn_summary_write (void)
|
|
3379 {
|
|
3380 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
|
|
3381 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
|
|
3382 unsigned int count = 0;
|
|
3383 int i;
|
|
3384
|
|
3385 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
|
|
3386 {
|
|
3387 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
|
|
3388 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
|
|
3389 if (cnode && cnode->definition && !cnode->alias)
|
|
3390 count++;
|
|
3391 }
|
|
3392 streamer_write_uhwi (ob, count);
|
|
3393
|
|
3394 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
|
|
3395 {
|
|
3396 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
|
|
3397 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
|
|
3398 if (cnode && cnode->definition && !cnode->alias)
|
|
3399 {
|
|
3400 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
|
|
3401 struct bitpack_d bp;
|
|
3402 struct cgraph_edge *edge;
|
|
3403 int i;
|
|
3404 size_time_entry *e;
|
|
3405 struct condition *c;
|
|
3406
|
|
3407 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
|
|
3408 streamer_write_hwi (ob, info->estimated_self_stack_size);
|
|
3409 streamer_write_hwi (ob, info->self_size);
|
|
3410 info->time.stream_out (ob);
|
|
3411 bp = bitpack_create (ob->main_stream);
|
|
3412 bp_pack_value (&bp, info->inlinable, 1);
|
|
3413 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
|
|
3414 bp_pack_value (&bp, info->fp_expressions, 1);
|
|
3415 streamer_write_bitpack (&bp);
|
|
3416 streamer_write_uhwi (ob, vec_safe_length (info->conds));
|
|
3417 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
|
|
3418 {
|
|
3419 streamer_write_uhwi (ob, c->operand_num);
|
|
3420 streamer_write_uhwi (ob, c->size);
|
|
3421 streamer_write_uhwi (ob, c->code);
|
|
3422 stream_write_tree (ob, c->val, true);
|
|
3423 bp = bitpack_create (ob->main_stream);
|
|
3424 bp_pack_value (&bp, c->agg_contents, 1);
|
|
3425 bp_pack_value (&bp, c->by_ref, 1);
|
|
3426 streamer_write_bitpack (&bp);
|
|
3427 if (c->agg_contents)
|
|
3428 streamer_write_uhwi (ob, c->offset);
|
|
3429 }
|
|
3430 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
|
|
3431 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
|
|
3432 {
|
|
3433 streamer_write_uhwi (ob, e->size);
|
|
3434 e->time.stream_out (ob);
|
|
3435 e->exec_predicate.stream_out (ob);
|
|
3436 e->nonconst_predicate.stream_out (ob);
|
|
3437 }
|
|
3438 if (info->loop_iterations)
|
|
3439 info->loop_iterations->stream_out (ob);
|
|
3440 else
|
|
3441 streamer_write_uhwi (ob, 0);
|
|
3442 if (info->loop_stride)
|
|
3443 info->loop_stride->stream_out (ob);
|
|
3444 else
|
|
3445 streamer_write_uhwi (ob, 0);
|
|
3446 if (info->array_index)
|
|
3447 info->array_index->stream_out (ob);
|
|
3448 else
|
|
3449 streamer_write_uhwi (ob, 0);
|
|
3450 for (edge = cnode->callees; edge; edge = edge->next_callee)
|
|
3451 write_ipa_call_summary (ob, edge);
|
|
3452 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
|
|
3453 write_ipa_call_summary (ob, edge);
|
|
3454 }
|
|
3455 }
|
|
3456 streamer_write_char_stream (ob->main_stream, 0);
|
|
3457 produce_asm (ob, NULL);
|
|
3458 destroy_output_block (ob);
|
|
3459
|
|
3460 if (!flag_ipa_cp)
|
|
3461 ipa_prop_write_jump_functions ();
|
|
3462 }
|
|
3463
|
|
3464
|
|
3465 /* Release inline summary. */
|
|
3466
|
|
3467 void
|
|
3468 ipa_free_fn_summary (void)
|
|
3469 {
|
|
3470 struct cgraph_node *node;
|
|
3471 if (!ipa_call_summaries)
|
|
3472 return;
|
|
3473 FOR_EACH_DEFINED_FUNCTION (node)
|
|
3474 if (!node->alias)
|
|
3475 ipa_fn_summaries->get (node)->reset (node);
|
|
3476 ipa_fn_summaries->release ();
|
|
3477 ipa_fn_summaries = NULL;
|
|
3478 ipa_call_summaries->release ();
|
|
3479 delete ipa_call_summaries;
|
|
3480 ipa_call_summaries = NULL;
|
|
3481 edge_predicate_pool.release ();
|
|
3482 }
|
|
3483
|
|
3484 namespace {
|
|
3485
|
|
3486 const pass_data pass_data_local_fn_summary =
|
|
3487 {
|
|
3488 GIMPLE_PASS, /* type */
|
|
3489 "local-fnsummary", /* name */
|
|
3490 OPTGROUP_INLINE, /* optinfo_flags */
|
|
3491 TV_INLINE_PARAMETERS, /* tv_id */
|
|
3492 0, /* properties_required */
|
|
3493 0, /* properties_provided */
|
|
3494 0, /* properties_destroyed */
|
|
3495 0, /* todo_flags_start */
|
|
3496 0, /* todo_flags_finish */
|
|
3497 };
|
|
3498
|
|
3499 class pass_local_fn_summary : public gimple_opt_pass
|
|
3500 {
|
|
3501 public:
|
|
3502 pass_local_fn_summary (gcc::context *ctxt)
|
|
3503 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
|
|
3504 {}
|
|
3505
|
|
3506 /* opt_pass methods: */
|
|
3507 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
|
|
3508 virtual unsigned int execute (function *)
|
|
3509 {
|
|
3510 return compute_fn_summary_for_current ();
|
|
3511 }
|
|
3512
|
|
3513 }; // class pass_local_fn_summary
|
|
3514
|
|
3515 } // anon namespace
|
|
3516
|
|
3517 gimple_opt_pass *
|
|
3518 make_pass_local_fn_summary (gcc::context *ctxt)
|
|
3519 {
|
|
3520 return new pass_local_fn_summary (ctxt);
|
|
3521 }
|
|
3522
|
|
3523
|
|
3524 /* Free inline summary. */
|
|
3525
|
|
3526 namespace {
|
|
3527
|
|
3528 const pass_data pass_data_ipa_free_fn_summary =
|
|
3529 {
|
|
3530 SIMPLE_IPA_PASS, /* type */
|
|
3531 "free-fnsummary", /* name */
|
|
3532 OPTGROUP_NONE, /* optinfo_flags */
|
|
3533 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
|
|
3534 0, /* properties_required */
|
|
3535 0, /* properties_provided */
|
|
3536 0, /* properties_destroyed */
|
|
3537 0, /* todo_flags_start */
|
|
3538 /* Early optimizations may make function unreachable. We can not
|
|
3539 remove unreachable functions as part of the ealry opts pass because
|
|
3540 TODOs are run before subpasses. Do it here. */
|
|
3541 ( TODO_remove_functions | TODO_dump_symtab ), /* todo_flags_finish */
|
|
3542 };
|
|
3543
|
|
3544 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
|
|
3545 {
|
|
3546 public:
|
|
3547 pass_ipa_free_fn_summary (gcc::context *ctxt)
|
|
3548 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt)
|
|
3549 {}
|
|
3550
|
|
3551 /* opt_pass methods: */
|
|
3552 virtual unsigned int execute (function *)
|
|
3553 {
|
|
3554 ipa_free_fn_summary ();
|
|
3555 return 0;
|
|
3556 }
|
|
3557
|
|
3558 }; // class pass_ipa_free_fn_summary
|
|
3559
|
|
3560 } // anon namespace
|
|
3561
|
|
3562 simple_ipa_opt_pass *
|
|
3563 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
|
|
3564 {
|
|
3565 return new pass_ipa_free_fn_summary (ctxt);
|
|
3566 }
|
|
3567
|
|
3568 namespace {
|
|
3569
|
|
3570 const pass_data pass_data_ipa_fn_summary =
|
|
3571 {
|
|
3572 IPA_PASS, /* type */
|
|
3573 "fnsummary", /* name */
|
|
3574 OPTGROUP_INLINE, /* optinfo_flags */
|
|
3575 TV_IPA_FNSUMMARY, /* tv_id */
|
|
3576 0, /* properties_required */
|
|
3577 0, /* properties_provided */
|
|
3578 0, /* properties_destroyed */
|
|
3579 0, /* todo_flags_start */
|
|
3580 ( TODO_dump_symtab ), /* todo_flags_finish */
|
|
3581 };
|
|
3582
|
|
3583 class pass_ipa_fn_summary : public ipa_opt_pass_d
|
|
3584 {
|
|
3585 public:
|
|
3586 pass_ipa_fn_summary (gcc::context *ctxt)
|
|
3587 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
|
|
3588 ipa_fn_summary_generate, /* generate_summary */
|
|
3589 ipa_fn_summary_write, /* write_summary */
|
|
3590 ipa_fn_summary_read, /* read_summary */
|
|
3591 NULL, /* write_optimization_summary */
|
|
3592 NULL, /* read_optimization_summary */
|
|
3593 NULL, /* stmt_fixup */
|
|
3594 0, /* function_transform_todo_flags_start */
|
|
3595 NULL, /* function_transform */
|
|
3596 NULL) /* variable_transform */
|
|
3597 {}
|
|
3598
|
|
3599 /* opt_pass methods: */
|
|
3600 virtual unsigned int execute (function *) { return 0; }
|
|
3601
|
|
3602 }; // class pass_ipa_fn_summary
|
|
3603
|
|
3604 } // anon namespace
|
|
3605
|
|
3606 ipa_opt_pass_d *
|
|
3607 make_pass_ipa_fn_summary (gcc::context *ctxt)
|
|
3608 {
|
|
3609 return new pass_ipa_fn_summary (ctxt);
|
|
3610 }
|