Mercurial > hg > CbC > CbC_gcc
comparison gcc/ipa-fnsummary.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Function summary pass. | 1 /* Function summary pass. |
2 Copyright (C) 2003-2018 Free Software Foundation, Inc. | 2 Copyright (C) 2003-2020 Free Software Foundation, Inc. |
3 Contributed by Jan Hubicka | 3 Contributed by Jan Hubicka |
4 | 4 |
5 This file is part of GCC. | 5 This file is part of GCC. |
6 | 6 |
7 GCC is free software; you can redistribute it and/or modify it under | 7 GCC is free software; you can redistribute it and/or modify it under |
65 #include "diagnostic.h" | 65 #include "diagnostic.h" |
66 #include "fold-const.h" | 66 #include "fold-const.h" |
67 #include "print-tree.h" | 67 #include "print-tree.h" |
68 #include "tree-inline.h" | 68 #include "tree-inline.h" |
69 #include "gimple-pretty-print.h" | 69 #include "gimple-pretty-print.h" |
70 #include "params.h" | |
71 #include "cfganal.h" | 70 #include "cfganal.h" |
72 #include "gimple-iterator.h" | 71 #include "gimple-iterator.h" |
73 #include "tree-cfg.h" | 72 #include "tree-cfg.h" |
74 #include "tree-ssa-loop-niter.h" | 73 #include "tree-ssa-loop-niter.h" |
75 #include "tree-ssa-loop.h" | 74 #include "tree-ssa-loop.h" |
81 #include "ipa-utils.h" | 80 #include "ipa-utils.h" |
82 #include "cfgexpand.h" | 81 #include "cfgexpand.h" |
83 #include "gimplify.h" | 82 #include "gimplify.h" |
84 #include "stringpool.h" | 83 #include "stringpool.h" |
85 #include "attribs.h" | 84 #include "attribs.h" |
85 #include "tree-into-ssa.h" | |
86 | 86 |
87 /* Summaries. */ | 87 /* Summaries. */ |
88 function_summary <ipa_fn_summary *> *ipa_fn_summaries; | 88 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries; |
89 call_summary <ipa_call_summary *> *ipa_call_summaries; | 89 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries; |
90 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries; | |
90 | 91 |
91 /* Edge predicates goes here. */ | 92 /* Edge predicates goes here. */ |
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates"); | 93 static object_allocator<predicate> edge_predicate_pool ("edge predicates"); |
93 | 94 |
94 | 95 |
132 if (hints & INLINE_HINT_declared_inline) | 133 if (hints & INLINE_HINT_declared_inline) |
133 { | 134 { |
134 hints &= ~INLINE_HINT_declared_inline; | 135 hints &= ~INLINE_HINT_declared_inline; |
135 fprintf (f, " declared_inline"); | 136 fprintf (f, " declared_inline"); |
136 } | 137 } |
137 if (hints & INLINE_HINT_array_index) | |
138 { | |
139 hints &= ~INLINE_HINT_array_index; | |
140 fprintf (f, " array_index"); | |
141 } | |
142 if (hints & INLINE_HINT_known_hot) | 138 if (hints & INLINE_HINT_known_hot) |
143 { | 139 { |
144 hints &= ~INLINE_HINT_known_hot; | 140 hints &= ~INLINE_HINT_known_hot; |
145 fprintf (f, " known_hot"); | 141 fprintf (f, " known_hot"); |
146 } | 142 } |
148 } | 144 } |
149 | 145 |
150 | 146 |
151 /* Record SIZE and TIME to SUMMARY. | 147 /* Record SIZE and TIME to SUMMARY. |
152 The accounted code will be executed when EXEC_PRED is true. | 148 The accounted code will be executed when EXEC_PRED is true. |
153 When NONCONST_PRED is false the code will evaulate to constant and | 149 When NONCONST_PRED is false the code will evaluate to constant and |
154 will get optimized out in specialized clones of the function. */ | 150 will get optimized out in specialized clones of the function. |
151 If CALL is true account to call_size_time_table rather than | |
152 size_time_table. */ | |
155 | 153 |
156 void | 154 void |
157 ipa_fn_summary::account_size_time (int size, sreal time, | 155 ipa_fn_summary::account_size_time (int size, sreal time, |
158 const predicate &exec_pred, | 156 const predicate &exec_pred, |
159 const predicate &nonconst_pred_in) | 157 const predicate &nonconst_pred_in, |
158 bool call) | |
160 { | 159 { |
161 size_time_entry *e; | 160 size_time_entry *e; |
162 bool found = false; | 161 bool found = false; |
163 int i; | 162 int i; |
164 predicate nonconst_pred; | 163 predicate nonconst_pred; |
164 vec<size_time_entry, va_gc> *table = call | |
165 ? call_size_time_table : size_time_table; | |
165 | 166 |
166 if (exec_pred == false) | 167 if (exec_pred == false) |
167 return; | 168 return; |
168 | 169 |
169 nonconst_pred = nonconst_pred_in & exec_pred; | 170 nonconst_pred = nonconst_pred_in & exec_pred; |
170 | 171 |
171 if (nonconst_pred == false) | 172 if (nonconst_pred == false) |
172 return; | 173 return; |
173 | 174 |
174 /* We need to create initial empty unconitional clause, but otherwie | 175 /* We need to create initial empty unconditional clause, but otherwise |
175 we don't need to account empty times and sizes. */ | 176 we don't need to account empty times and sizes. */ |
176 if (!size && time == 0 && size_time_table) | 177 if (!size && time == 0 && table) |
177 return; | 178 return; |
178 | 179 |
179 gcc_assert (time >= 0); | 180 /* Only for calls we are unaccounting what we previously recorded. */ |
180 | 181 gcc_checking_assert (time >= 0 || call); |
181 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++) | 182 |
183 for (i = 0; vec_safe_iterate (table, i, &e); i++) | |
182 if (e->exec_predicate == exec_pred | 184 if (e->exec_predicate == exec_pred |
183 && e->nonconst_predicate == nonconst_pred) | 185 && e->nonconst_predicate == nonconst_pred) |
184 { | 186 { |
185 found = true; | 187 found = true; |
186 break; | 188 break; |
187 } | 189 } |
188 if (i == 256) | 190 if (i == max_size_time_table_size) |
189 { | 191 { |
190 i = 0; | 192 i = 0; |
191 found = true; | 193 found = true; |
192 e = &(*size_time_table)[0]; | 194 e = &(*table)[0]; |
193 if (dump_file && (dump_flags & TDF_DETAILS)) | 195 if (dump_file && (dump_flags & TDF_DETAILS)) |
194 fprintf (dump_file, | 196 fprintf (dump_file, |
195 "\t\tReached limit on number of entries, " | 197 "\t\tReached limit on number of entries, " |
196 "ignoring the predicate."); | 198 "ignoring the predicate."); |
197 } | 199 } |
210 else | 212 else |
211 fprintf (dump_file, "\n"); | 213 fprintf (dump_file, "\n"); |
212 } | 214 } |
213 if (!found) | 215 if (!found) |
214 { | 216 { |
215 struct size_time_entry new_entry; | 217 class size_time_entry new_entry; |
216 new_entry.size = size; | 218 new_entry.size = size; |
217 new_entry.time = time; | 219 new_entry.time = time; |
218 new_entry.exec_predicate = exec_pred; | 220 new_entry.exec_predicate = exec_pred; |
219 new_entry.nonconst_predicate = nonconst_pred; | 221 new_entry.nonconst_predicate = nonconst_pred; |
220 vec_safe_push (size_time_table, new_entry); | 222 if (call) |
223 vec_safe_push (call_size_time_table, new_entry); | |
224 else | |
225 vec_safe_push (size_time_table, new_entry); | |
221 } | 226 } |
222 else | 227 else |
223 { | 228 { |
224 e->size += size; | 229 e->size += size; |
225 e->time += time; | 230 e->time += time; |
226 } | 231 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */ |
227 } | 232 /* Tolerate small roundoff issues. */ |
228 | 233 if (e->time < 0) |
229 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */ | 234 e->time = 0; |
235 } | |
236 } | |
237 | |
238 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */ | |
230 | 239 |
231 static struct cgraph_edge * | 240 static struct cgraph_edge * |
232 redirect_to_unreachable (struct cgraph_edge *e) | 241 redirect_to_unreachable (struct cgraph_edge *e) |
233 { | 242 { |
234 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL; | 243 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL; |
235 struct cgraph_node *target = cgraph_node::get_create | 244 struct cgraph_node *target = cgraph_node::get_create |
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE)); | 245 (builtin_decl_implicit (BUILT_IN_UNREACHABLE)); |
237 | 246 |
238 if (e->speculative) | 247 if (e->speculative) |
239 e = e->resolve_speculation (target->decl); | 248 e = cgraph_edge::resolve_speculation (e, target->decl); |
240 else if (!e->callee) | 249 else if (!e->callee) |
241 e->make_direct (target); | 250 e = cgraph_edge::make_direct (e, target); |
242 else | 251 else |
243 e->redirect_callee (target); | 252 e->redirect_callee (target); |
244 struct ipa_call_summary *es = ipa_call_summaries->get (e); | 253 class ipa_call_summary *es = ipa_call_summaries->get (e); |
245 e->inline_failed = CIF_UNREACHABLE; | 254 e->inline_failed = CIF_UNREACHABLE; |
246 e->count = profile_count::zero (); | 255 e->count = profile_count::zero (); |
247 es->call_stmt_size = 0; | 256 es->call_stmt_size = 0; |
248 es->call_stmt_time = 0; | 257 es->call_stmt_time = 0; |
249 if (callee) | 258 if (callee) |
264 just once. Do it always on the direct edge, so we do not | 273 just once. Do it always on the direct edge, so we do not |
265 attempt to resolve speculation while duplicating the edge. */ | 274 attempt to resolve speculation while duplicating the edge. */ |
266 && (!e->speculative || e->callee)) | 275 && (!e->speculative || e->callee)) |
267 e = redirect_to_unreachable (e); | 276 e = redirect_to_unreachable (e); |
268 | 277 |
269 struct ipa_call_summary *es = ipa_call_summaries->get (e); | 278 class ipa_call_summary *es = ipa_call_summaries->get (e); |
270 if (predicate && *predicate != true) | 279 if (predicate && *predicate != true) |
271 { | 280 { |
272 if (!es->predicate) | 281 if (!es->predicate) |
273 es->predicate = edge_predicate_pool.allocate (); | 282 es->predicate = edge_predicate_pool.allocate (); |
274 *es->predicate = *predicate; | 283 *es->predicate = *predicate; |
299 **p = new_predicate; | 308 **p = new_predicate; |
300 } | 309 } |
301 } | 310 } |
302 | 311 |
303 | 312 |
304 /* Compute what conditions may or may not hold given invormation about | 313 /* Compute what conditions may or may not hold given information about |
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy, | 314 parameters. RET_CLAUSE returns truths that may hold in a specialized copy, |
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized | 315 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized |
307 copy when called in a given context. It is a bitmask of conditions. Bit | 316 copy when called in a given context. It is a bitmask of conditions. Bit |
308 0 means that condition is known to be false, while bit 1 means that condition | 317 0 means that condition is known to be false, while bit 1 means that condition |
309 may or may not be true. These differs - for example NOT_INLINED condition | 318 may or may not be true. These differs - for example NOT_INLINED condition |
310 is always false in the second and also builtin_constant_p tests can not use | 319 is always false in the second and also builtin_constant_p tests cannot use |
311 the fact that parameter is indeed a constant. | 320 the fact that parameter is indeed a constant. |
312 | 321 |
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values. | 322 KNOWN_VALS is partial mapping of parameters of NODE to constant values. |
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter. | 323 KNOWN_AGGS is a vector of aggregate known offset/value set for each |
315 Return clause of possible truths. When INLINE_P is true, assume that we are | 324 parameter. Return clause of possible truths. When INLINE_P is true, assume |
316 inlining. | 325 that we are inlining. |
317 | 326 |
318 ERROR_MARK means compile time invariant. */ | 327 ERROR_MARK means compile time invariant. */ |
319 | 328 |
320 static void | 329 static void |
321 evaluate_conditions_for_known_args (struct cgraph_node *node, | 330 evaluate_conditions_for_known_args (struct cgraph_node *node, |
322 bool inline_p, | 331 bool inline_p, |
323 vec<tree> known_vals, | 332 vec<tree> known_vals, |
324 vec<ipa_agg_jump_function_p> | 333 vec<value_range> known_value_ranges, |
325 known_aggs, | 334 vec<ipa_agg_value_set> known_aggs, |
326 clause_t *ret_clause, | 335 clause_t *ret_clause, |
327 clause_t *ret_nonspec_clause) | 336 clause_t *ret_nonspec_clause) |
328 { | 337 { |
329 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition; | 338 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition; |
330 clause_t nonspec_clause = 1 << predicate::not_inlined_condition; | 339 clause_t nonspec_clause = 1 << predicate::not_inlined_condition; |
331 struct ipa_fn_summary *info = ipa_fn_summaries->get (node); | 340 class ipa_fn_summary *info = ipa_fn_summaries->get (node); |
332 int i; | 341 int i; |
333 struct condition *c; | 342 struct condition *c; |
334 | 343 |
335 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++) | 344 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++) |
336 { | 345 { |
337 tree val; | 346 tree val = NULL; |
338 tree res; | 347 tree res; |
348 int j; | |
349 struct expr_eval_op *op; | |
339 | 350 |
340 /* We allow call stmt to have fewer arguments than the callee function | 351 /* We allow call stmt to have fewer arguments than the callee function |
341 (especially for K&R style programs). So bound check here (we assume | 352 (especially for K&R style programs). So bound check here (we assume |
342 known_aggs vector, if non-NULL, has the same length as | 353 known_aggs vector, if non-NULL, has the same length as |
343 known_vals). */ | 354 known_vals). */ |
344 gcc_checking_assert (!known_aggs.exists () | 355 gcc_checking_assert (!known_aggs.length () || !known_vals.length () |
345 || (known_vals.length () == known_aggs.length ())); | 356 || (known_vals.length () == known_aggs.length ())); |
346 if (c->operand_num >= (int) known_vals.length ()) | 357 |
358 if (c->agg_contents) | |
359 { | |
360 struct ipa_agg_value_set *agg; | |
361 | |
362 if (c->code == predicate::changed | |
363 && !c->by_ref | |
364 && c->operand_num < (int)known_vals.length () | |
365 && (known_vals[c->operand_num] == error_mark_node)) | |
366 continue; | |
367 | |
368 if (c->operand_num < (int)known_aggs.length ()) | |
369 { | |
370 agg = &known_aggs[c->operand_num]; | |
371 val = ipa_find_agg_cst_for_param (agg, | |
372 c->operand_num | |
373 < (int) known_vals.length () | |
374 ? known_vals[c->operand_num] | |
375 : NULL, | |
376 c->offset, c->by_ref); | |
377 } | |
378 else | |
379 val = NULL_TREE; | |
380 } | |
381 else if (c->operand_num < (int) known_vals.length ()) | |
382 { | |
383 val = known_vals[c->operand_num]; | |
384 if (val == error_mark_node && c->code != predicate::changed) | |
385 val = NULL_TREE; | |
386 } | |
387 | |
388 if (!val | |
389 && (c->code == predicate::changed | |
390 || c->code == predicate::is_not_constant)) | |
347 { | 391 { |
348 clause |= 1 << (i + predicate::first_dynamic_condition); | 392 clause |= 1 << (i + predicate::first_dynamic_condition); |
349 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); | 393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
350 continue; | 394 continue; |
351 } | 395 } |
352 | 396 if (c->code == predicate::changed) |
353 if (c->agg_contents) | 397 { |
354 { | |
355 struct ipa_agg_jump_function *agg; | |
356 | |
357 if (c->code == predicate::changed | |
358 && !c->by_ref | |
359 && (known_vals[c->operand_num] == error_mark_node)) | |
360 continue; | |
361 | |
362 if (known_aggs.exists ()) | |
363 { | |
364 agg = known_aggs[c->operand_num]; | |
365 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num], | |
366 c->offset, c->by_ref); | |
367 } | |
368 else | |
369 val = NULL_TREE; | |
370 } | |
371 else | |
372 { | |
373 val = known_vals[c->operand_num]; | |
374 if (val == error_mark_node && c->code != predicate::changed) | |
375 val = NULL_TREE; | |
376 } | |
377 | |
378 if (!val) | |
379 { | |
380 clause |= 1 << (i + predicate::first_dynamic_condition); | |
381 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); | 398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
382 continue; | 399 continue; |
383 } | 400 } |
384 if (c->code == predicate::changed) | 401 |
402 if (c->code == predicate::is_not_constant) | |
385 { | 403 { |
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); | 404 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
387 continue; | 405 continue; |
388 } | 406 } |
389 | 407 |
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size) | 408 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val))) |
391 { | 409 { |
392 clause |= 1 << (i + predicate::first_dynamic_condition); | 410 if (c->type != TREE_TYPE (val)) |
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); | 411 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val); |
394 continue; | 412 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) |
395 } | 413 { |
396 if (c->code == predicate::is_not_constant) | 414 if (!val) |
397 { | 415 break; |
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); | 416 if (!op->val[0]) |
399 continue; | 417 val = fold_unary (op->code, op->type, val); |
400 } | 418 else if (!op->val[1]) |
401 | 419 val = fold_binary (op->code, op->type, |
402 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val); | 420 op->index ? op->val[0] : val, |
403 res = val | 421 op->index ? val : op->val[0]); |
404 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val) | 422 else if (op->index == 0) |
405 : NULL; | 423 val = fold_ternary (op->code, op->type, |
406 | 424 val, op->val[0], op->val[1]); |
407 if (res && integer_zerop (res)) | 425 else if (op->index == 1) |
408 continue; | 426 val = fold_ternary (op->code, op->type, |
427 op->val[0], val, op->val[1]); | |
428 else if (op->index == 2) | |
429 val = fold_ternary (op->code, op->type, | |
430 op->val[0], op->val[1], val); | |
431 else | |
432 val = NULL_TREE; | |
433 } | |
434 | |
435 res = val | |
436 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val) | |
437 : NULL; | |
438 | |
439 if (res && integer_zerop (res)) | |
440 continue; | |
441 if (res && integer_onep (res)) | |
442 { | |
443 clause |= 1 << (i + predicate::first_dynamic_condition); | |
444 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); | |
445 continue; | |
446 } | |
447 } | |
448 if (c->operand_num < (int) known_value_ranges.length () | |
449 && !c->agg_contents | |
450 && !known_value_ranges[c->operand_num].undefined_p () | |
451 && !known_value_ranges[c->operand_num].varying_p () | |
452 && TYPE_SIZE (c->type) | |
453 == TYPE_SIZE (known_value_ranges[c->operand_num].type ()) | |
454 && (!val || TREE_CODE (val) != INTEGER_CST)) | |
455 { | |
456 value_range vr = known_value_ranges[c->operand_num]; | |
457 if (!useless_type_conversion_p (c->type, vr.type ())) | |
458 { | |
459 value_range res; | |
460 range_fold_unary_expr (&res, NOP_EXPR, | |
461 c->type, &vr, vr.type ()); | |
462 vr = res; | |
463 } | |
464 tree type = c->type; | |
465 | |
466 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) | |
467 { | |
468 if (vr.varying_p () || vr.undefined_p ()) | |
469 break; | |
470 | |
471 value_range res; | |
472 if (!op->val[0]) | |
473 range_fold_unary_expr (&res, op->code, op->type, &vr, type); | |
474 else if (!op->val[1]) | |
475 { | |
476 value_range op0 (op->val[0], op->val[0]); | |
477 range_fold_binary_expr (&res, op->code, op->type, | |
478 op->index ? &op0 : &vr, | |
479 op->index ? &vr : &op0); | |
480 } | |
481 else | |
482 gcc_unreachable (); | |
483 type = op->type; | |
484 vr = res; | |
485 } | |
486 if (!vr.varying_p () && !vr.undefined_p ()) | |
487 { | |
488 value_range res; | |
489 value_range val_vr (c->val, c->val); | |
490 range_fold_binary_expr (&res, c->code, boolean_type_node, | |
491 &vr, | |
492 &val_vr); | |
493 if (res.zero_p ()) | |
494 continue; | |
495 } | |
496 } | |
409 | 497 |
410 clause |= 1 << (i + predicate::first_dynamic_condition); | 498 clause |= 1 << (i + predicate::first_dynamic_condition); |
411 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); | 499 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
412 } | 500 } |
413 *ret_clause = clause; | 501 *ret_clause = clause; |
414 if (ret_nonspec_clause) | 502 if (ret_nonspec_clause) |
415 *ret_nonspec_clause = nonspec_clause; | 503 *ret_nonspec_clause = nonspec_clause; |
416 } | 504 } |
417 | 505 |
418 | 506 |
419 /* Work out what conditions might be true at invocation of E. */ | 507 /* Work out what conditions might be true at invocation of E. |
508 Compute costs for inlined edge if INLINE_P is true. | |
509 | |
510 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR | |
511 (if non-NULL) conditions evaluated for nonspecialized clone called | |
512 in a given context. | |
513 | |
514 KNOWN_VALS_PTR and KNOWN_AGGS_PTR must be non-NULL and will be filled by | |
515 known constant and aggregate values of parameters. | |
516 | |
517 KNOWN_CONTEXT_PTR, if non-NULL, will be filled by polymorphic call contexts | |
518 of parameter used by a polymorphic call. */ | |
420 | 519 |
421 void | 520 void |
422 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, | 521 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, |
423 clause_t *clause_ptr, | 522 clause_t *clause_ptr, |
424 clause_t *nonspec_clause_ptr, | 523 clause_t *nonspec_clause_ptr, |
425 vec<tree> *known_vals_ptr, | 524 vec<tree> *known_vals_ptr, |
426 vec<ipa_polymorphic_call_context> | 525 vec<ipa_polymorphic_call_context> |
427 *known_contexts_ptr, | 526 *known_contexts_ptr, |
428 vec<ipa_agg_jump_function_p> *known_aggs_ptr) | 527 vec<ipa_agg_value_set> *known_aggs_ptr) |
429 { | 528 { |
430 struct cgraph_node *callee = e->callee->ultimate_alias_target (); | 529 struct cgraph_node *callee = e->callee->ultimate_alias_target (); |
431 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee); | 530 class ipa_fn_summary *info = ipa_fn_summaries->get (callee); |
432 vec<tree> known_vals = vNULL; | 531 auto_vec<value_range, 32> known_value_ranges; |
433 vec<ipa_agg_jump_function_p> known_aggs = vNULL; | 532 class ipa_edge_args *args; |
434 | 533 |
435 if (clause_ptr) | 534 if (clause_ptr) |
436 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition; | 535 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition; |
437 if (known_vals_ptr) | |
438 known_vals_ptr->create (0); | |
439 if (known_contexts_ptr) | |
440 known_contexts_ptr->create (0); | |
441 | 536 |
442 if (ipa_node_params_sum | 537 if (ipa_node_params_sum |
443 && !e->call_stmt_cannot_inline_p | 538 && !e->call_stmt_cannot_inline_p |
444 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)) | 539 && (info->conds || known_contexts_ptr) |
445 { | 540 && (args = IPA_EDGE_REF (e)) != NULL) |
446 struct ipa_node_params *caller_parms_info, *callee_pi; | 541 { |
447 struct ipa_edge_args *args = IPA_EDGE_REF (e); | 542 struct cgraph_node *caller; |
448 struct ipa_call_summary *es = ipa_call_summaries->get (e); | 543 class ipa_node_params *caller_parms_info, *callee_pi = NULL; |
544 class ipa_call_summary *es = ipa_call_summaries->get (e); | |
449 int i, count = ipa_get_cs_argument_count (args); | 545 int i, count = ipa_get_cs_argument_count (args); |
450 | 546 |
451 if (e->caller->global.inlined_to) | 547 if (count) |
452 caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to); | 548 { |
453 else | 549 if (e->caller->inlined_to) |
454 caller_parms_info = IPA_NODE_REF (e->caller); | 550 caller = e->caller->inlined_to; |
455 callee_pi = IPA_NODE_REF (e->callee); | 551 else |
456 | 552 caller = e->caller; |
457 if (count && (info->conds || known_vals_ptr)) | 553 caller_parms_info = IPA_NODE_REF (caller); |
458 known_vals.safe_grow_cleared (count); | 554 callee_pi = IPA_NODE_REF (callee); |
459 if (count && (info->conds || known_aggs_ptr)) | 555 |
460 known_aggs.safe_grow_cleared (count); | 556 /* Watch for thunks. */ |
461 if (count && known_contexts_ptr) | 557 if (callee_pi) |
462 known_contexts_ptr->safe_grow_cleared (count); | 558 /* Watch for variadic functions. */ |
463 | 559 count = MIN (count, ipa_get_param_count (callee_pi)); |
464 for (i = 0; i < count; i++) | 560 } |
465 { | 561 |
466 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); | 562 if (callee_pi) |
467 tree cst = ipa_value_from_jfunc (caller_parms_info, jf, | 563 for (i = 0; i < count; i++) |
468 ipa_get_type (callee_pi, i)); | 564 { |
469 | 565 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); |
470 if (!cst && e->call_stmt | 566 |
471 && i < (int)gimple_call_num_args (e->call_stmt)) | 567 if (ipa_is_param_used_by_indirect_call (callee_pi, i) |
472 { | 568 || ipa_is_param_used_by_ipa_predicates (callee_pi, i)) |
473 cst = gimple_call_arg (e->call_stmt, i); | 569 { |
474 if (!is_gimple_min_invariant (cst)) | 570 /* Determine if we know constant value of the parameter. */ |
475 cst = NULL; | 571 tree cst = ipa_value_from_jfunc (caller_parms_info, jf, |
476 } | 572 ipa_get_type (callee_pi, i)); |
477 if (cst) | 573 |
478 { | 574 if (!cst && e->call_stmt |
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); | 575 && i < (int)gimple_call_num_args (e->call_stmt)) |
480 if (known_vals.exists ()) | 576 { |
481 known_vals[i] = cst; | 577 cst = gimple_call_arg (e->call_stmt, i); |
482 } | 578 if (!is_gimple_min_invariant (cst)) |
483 else if (inline_p && !es->param[i].change_prob) | 579 cst = NULL; |
484 known_vals[i] = error_mark_node; | 580 } |
485 | 581 if (cst) |
486 if (known_contexts_ptr) | 582 { |
487 (*known_contexts_ptr)[i] | 583 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); |
488 = ipa_context_from_jfunc (caller_parms_info, e, i, jf); | 584 if (!known_vals_ptr->length ()) |
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump | 585 vec_safe_grow_cleared (known_vals_ptr, count); |
490 functions, use its knowledge of the caller too, just like the | 586 (*known_vals_ptr)[i] = cst; |
491 scalar case above. */ | 587 } |
492 known_aggs[i] = &jf->agg; | 588 else if (inline_p && !es->param[i].change_prob) |
493 } | 589 { |
494 } | 590 if (!known_vals_ptr->length ()) |
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p | 591 vec_safe_grow_cleared (known_vals_ptr, count); |
496 && ((clause_ptr && info->conds) || known_vals_ptr)) | 592 (*known_vals_ptr)[i] = error_mark_node; |
593 } | |
594 | |
595 /* If we failed to get simple constant, try value range. */ | |
596 if ((!cst || TREE_CODE (cst) != INTEGER_CST) | |
597 && ipa_is_param_used_by_ipa_predicates (callee_pi, i)) | |
598 { | |
599 value_range vr | |
600 = ipa_value_range_from_jfunc (caller_parms_info, e, jf, | |
601 ipa_get_type (callee_pi, | |
602 i)); | |
603 if (!vr.undefined_p () && !vr.varying_p ()) | |
604 { | |
605 if (!known_value_ranges.length ()) | |
606 known_value_ranges.safe_grow_cleared (count); | |
607 known_value_ranges[i] = vr; | |
608 } | |
609 } | |
610 | |
611 /* Determine known aggregate values. */ | |
612 ipa_agg_value_set agg | |
613 = ipa_agg_value_set_from_jfunc (caller_parms_info, | |
614 caller, &jf->agg); | |
615 if (agg.items.length ()) | |
616 { | |
617 if (!known_aggs_ptr->length ()) | |
618 vec_safe_grow_cleared (known_aggs_ptr, count); | |
619 (*known_aggs_ptr)[i] = agg; | |
620 } | |
621 } | |
622 | |
623 /* For calls used in polymorphic calls we further determine | |
624 polymorphic call context. */ | |
625 if (known_contexts_ptr | |
626 && ipa_is_param_used_by_polymorphic_call (callee_pi, i)) | |
627 { | |
628 ipa_polymorphic_call_context | |
629 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf); | |
630 if (!ctx.useless_p ()) | |
631 { | |
632 if (!known_contexts_ptr->length ()) | |
633 known_contexts_ptr->safe_grow_cleared (count); | |
634 (*known_contexts_ptr)[i] | |
635 = ipa_context_from_jfunc (caller_parms_info, e, i, jf); | |
636 } | |
637 } | |
638 } | |
639 else | |
640 gcc_assert (!count || callee->thunk.thunk_p); | |
641 } | |
642 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds) | |
497 { | 643 { |
498 int i, count = (int)gimple_call_num_args (e->call_stmt); | 644 int i, count = (int)gimple_call_num_args (e->call_stmt); |
499 | 645 |
500 if (count && (info->conds || known_vals_ptr)) | |
501 known_vals.safe_grow_cleared (count); | |
502 for (i = 0; i < count; i++) | 646 for (i = 0; i < count; i++) |
503 { | 647 { |
504 tree cst = gimple_call_arg (e->call_stmt, i); | 648 tree cst = gimple_call_arg (e->call_stmt, i); |
505 if (!is_gimple_min_invariant (cst)) | 649 if (!is_gimple_min_invariant (cst)) |
506 cst = NULL; | 650 cst = NULL; |
507 if (cst) | 651 if (cst) |
508 known_vals[i] = cst; | 652 { |
653 if (!known_vals_ptr->length ()) | |
654 vec_safe_grow_cleared (known_vals_ptr, count); | |
655 (*known_vals_ptr)[i] = cst; | |
656 } | |
509 } | 657 } |
510 } | 658 } |
511 | 659 |
512 evaluate_conditions_for_known_args (callee, inline_p, | 660 evaluate_conditions_for_known_args (callee, inline_p, |
513 known_vals, known_aggs, clause_ptr, | 661 *known_vals_ptr, |
662 known_value_ranges, | |
663 *known_aggs_ptr, | |
664 clause_ptr, | |
514 nonspec_clause_ptr); | 665 nonspec_clause_ptr); |
515 | |
516 if (known_vals_ptr) | |
517 *known_vals_ptr = known_vals; | |
518 else | |
519 known_vals.release (); | |
520 | |
521 if (known_aggs_ptr) | |
522 *known_aggs_ptr = known_aggs; | |
523 else | |
524 known_aggs.release (); | |
525 } | 666 } |
526 | 667 |
527 | 668 |
528 /* Allocate the function summary. */ | 669 /* Allocate the function summary. */ |
529 | 670 |
530 static void | 671 static void |
531 ipa_fn_summary_alloc (void) | 672 ipa_fn_summary_alloc (void) |
532 { | 673 { |
533 gcc_checking_assert (!ipa_fn_summaries); | 674 gcc_checking_assert (!ipa_fn_summaries); |
675 ipa_size_summaries = new ipa_size_summary_t (symtab); | |
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab); | 676 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab); |
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false); | 677 ipa_call_summaries = new ipa_call_summary_t (symtab); |
536 } | 678 } |
537 | 679 |
538 ipa_call_summary::~ipa_call_summary () | 680 ipa_call_summary::~ipa_call_summary () |
539 { | 681 { |
540 if (predicate) | 682 if (predicate) |
547 { | 689 { |
548 if (loop_iterations) | 690 if (loop_iterations) |
549 edge_predicate_pool.remove (loop_iterations); | 691 edge_predicate_pool.remove (loop_iterations); |
550 if (loop_stride) | 692 if (loop_stride) |
551 edge_predicate_pool.remove (loop_stride); | 693 edge_predicate_pool.remove (loop_stride); |
552 if (array_index) | |
553 edge_predicate_pool.remove (array_index); | |
554 vec_free (conds); | 694 vec_free (conds); |
555 vec_free (size_time_table); | 695 vec_free (size_time_table); |
696 vec_free (call_size_time_table); | |
556 } | 697 } |
557 | 698 |
558 void | 699 void |
559 ipa_fn_summary_t::remove_callees (cgraph_node *node) | 700 ipa_fn_summary_t::remove_callees (cgraph_node *node) |
560 { | 701 { |
602 out that something was optimized out. */ | 743 out that something was optimized out. */ |
603 if (ipa_node_params_sum && dst->clone.tree_map) | 744 if (ipa_node_params_sum && dst->clone.tree_map) |
604 { | 745 { |
605 vec<size_time_entry, va_gc> *entry = info->size_time_table; | 746 vec<size_time_entry, va_gc> *entry = info->size_time_table; |
606 /* Use SRC parm info since it may not be copied yet. */ | 747 /* Use SRC parm info since it may not be copied yet. */ |
607 struct ipa_node_params *parms_info = IPA_NODE_REF (src); | 748 class ipa_node_params *parms_info = IPA_NODE_REF (src); |
608 vec<tree> known_vals = vNULL; | 749 vec<tree> known_vals = vNULL; |
609 int count = ipa_get_param_count (parms_info); | 750 int count = ipa_get_param_count (parms_info); |
610 int i, j; | 751 int i, j; |
611 clause_t possible_truths; | 752 clause_t possible_truths; |
612 predicate true_pred = true; | 753 predicate true_pred = true; |
621 { | 762 { |
622 struct ipa_replace_map *r; | 763 struct ipa_replace_map *r; |
623 | 764 |
624 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++) | 765 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++) |
625 { | 766 { |
626 if (((!r->old_tree && r->parm_num == i) | 767 if (r->parm_num == i) |
627 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i))) | |
628 && r->replace_p && !r->ref_p) | |
629 { | 768 { |
630 known_vals[i] = r->new_tree; | 769 known_vals[i] = r->new_tree; |
631 break; | 770 break; |
632 } | 771 } |
633 } | 772 } |
634 } | 773 } |
635 evaluate_conditions_for_known_args (dst, false, | 774 evaluate_conditions_for_known_args (dst, false, |
636 known_vals, | 775 known_vals, |
776 vNULL, | |
637 vNULL, | 777 vNULL, |
638 &possible_truths, | 778 &possible_truths, |
639 /* We are going to specialize, | 779 /* We are going to specialize, |
640 so ignore nonspec truths. */ | 780 so ignore nonspec truths. */ |
641 NULL); | 781 NULL); |
642 known_vals.release (); | 782 known_vals.release (); |
643 | 783 |
644 info->account_size_time (0, 0, true_pred, true_pred); | 784 info->account_size_time (0, 0, true_pred, true_pred); |
645 | 785 |
646 /* Remap size_time vectors. | 786 /* Remap size_time vectors. |
647 Simplify the predicate by prunning out alternatives that are known | 787 Simplify the predicate by pruning out alternatives that are known |
648 to be false. | 788 to be false. |
649 TODO: as on optimization, we can also eliminate conditions known | 789 TODO: as on optimization, we can also eliminate conditions known |
650 to be true. */ | 790 to be true. */ |
651 for (i = 0; vec_safe_iterate (entry, i, &e); i++) | 791 for (i = 0; vec_safe_iterate (entry, i, &e); i++) |
652 { | 792 { |
666 /* Remap edge predicates with the same simplification as above. | 806 /* Remap edge predicates with the same simplification as above. |
667 Also copy constantness arrays. */ | 807 Also copy constantness arrays. */ |
668 for (edge = dst->callees; edge; edge = next) | 808 for (edge = dst->callees; edge; edge = next) |
669 { | 809 { |
670 predicate new_predicate; | 810 predicate new_predicate; |
671 struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); | 811 class ipa_call_summary *es = ipa_call_summaries->get (edge); |
672 next = edge->next_callee; | 812 next = edge->next_callee; |
673 | 813 |
674 if (!edge->inline_failed) | 814 if (!edge->inline_failed) |
675 inlined_to_p = true; | 815 inlined_to_p = true; |
676 if (!es->predicate) | 816 if (!es->predicate) |
680 if (new_predicate == false && *es->predicate != false) | 820 if (new_predicate == false && *es->predicate != false) |
681 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale; | 821 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale; |
682 edge_set_predicate (edge, &new_predicate); | 822 edge_set_predicate (edge, &new_predicate); |
683 } | 823 } |
684 | 824 |
685 /* Remap indirect edge predicates with the same simplificaiton as above. | 825 /* Remap indirect edge predicates with the same simplification as above. |
686 Also copy constantness arrays. */ | 826 Also copy constantness arrays. */ |
687 for (edge = dst->indirect_calls; edge; edge = next) | 827 for (edge = dst->indirect_calls; edge; edge = next) |
688 { | 828 { |
689 predicate new_predicate; | 829 predicate new_predicate; |
690 struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); | 830 class ipa_call_summary *es = ipa_call_summaries->get (edge); |
691 next = edge->next_callee; | 831 next = edge->next_callee; |
692 | 832 |
693 gcc_checking_assert (edge->inline_failed); | 833 gcc_checking_assert (edge->inline_failed); |
694 if (!es->predicate) | 834 if (!es->predicate) |
695 continue; | 835 continue; |
701 } | 841 } |
702 remap_hint_predicate_after_duplication (&info->loop_iterations, | 842 remap_hint_predicate_after_duplication (&info->loop_iterations, |
703 possible_truths); | 843 possible_truths); |
704 remap_hint_predicate_after_duplication (&info->loop_stride, | 844 remap_hint_predicate_after_duplication (&info->loop_stride, |
705 possible_truths); | 845 possible_truths); |
706 remap_hint_predicate_after_duplication (&info->array_index, | |
707 possible_truths); | |
708 | 846 |
709 /* If inliner or someone after inliner will ever start producing | 847 /* If inliner or someone after inliner will ever start producing |
710 non-trivial clones, we will get trouble with lack of information | 848 non-trivial clones, we will get trouble with lack of information |
711 about updating self sizes, because size vectors already contains | 849 about updating self sizes, because size vectors already contains |
712 sizes of the calees. */ | 850 sizes of the callees. */ |
713 gcc_assert (!inlined_to_p || !optimized_out_size); | 851 gcc_assert (!inlined_to_p || !optimized_out_size); |
714 } | 852 } |
715 else | 853 else |
716 { | 854 { |
717 info->size_time_table = vec_safe_copy (info->size_time_table); | 855 info->size_time_table = vec_safe_copy (info->size_time_table); |
725 { | 863 { |
726 predicate p = *info->loop_stride; | 864 predicate p = *info->loop_stride; |
727 info->loop_stride = NULL; | 865 info->loop_stride = NULL; |
728 set_hint_predicate (&info->loop_stride, p); | 866 set_hint_predicate (&info->loop_stride, p); |
729 } | 867 } |
730 if (info->array_index) | 868 } |
731 { | 869 if (!dst->inlined_to) |
732 predicate p = *info->array_index; | |
733 info->array_index = NULL; | |
734 set_hint_predicate (&info->array_index, p); | |
735 } | |
736 } | |
737 if (!dst->global.inlined_to) | |
738 ipa_update_overall_fn_summary (dst); | 870 ipa_update_overall_fn_summary (dst); |
739 } | 871 } |
740 | 872 |
741 | 873 |
742 /* Hook that is called by cgraph.c when a node is duplicated. */ | 874 /* Hook that is called by cgraph.c when a node is duplicated. */ |
743 | 875 |
744 void | 876 void |
745 ipa_call_summary_t::duplicate (struct cgraph_edge *src, | 877 ipa_call_summary_t::duplicate (struct cgraph_edge *src, |
746 struct cgraph_edge *dst, | 878 struct cgraph_edge *dst, |
747 struct ipa_call_summary *srcinfo, | 879 class ipa_call_summary *srcinfo, |
748 struct ipa_call_summary *info) | 880 class ipa_call_summary *info) |
749 { | 881 { |
750 new (info) ipa_call_summary (*srcinfo); | 882 new (info) ipa_call_summary (*srcinfo); |
751 info->predicate = NULL; | 883 info->predicate = NULL; |
752 edge_set_predicate (dst, srcinfo->predicate); | 884 edge_set_predicate (dst, srcinfo->predicate); |
753 info->param = srcinfo->param.copy (); | 885 info->param = srcinfo->param.copy (); |
763 /* Dump edge summaries associated to NODE and recursively to all clones. | 895 /* Dump edge summaries associated to NODE and recursively to all clones. |
764 Indent by INDENT. */ | 896 Indent by INDENT. */ |
765 | 897 |
766 static void | 898 static void |
767 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node, | 899 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node, |
768 struct ipa_fn_summary *info) | 900 class ipa_fn_summary *info) |
769 { | 901 { |
770 struct cgraph_edge *edge; | 902 struct cgraph_edge *edge; |
771 for (edge = node->callees; edge; edge = edge->next_callee) | 903 for (edge = node->callees; edge; edge = edge->next_callee) |
772 { | 904 { |
773 struct ipa_call_summary *es = ipa_call_summaries->get (edge); | 905 class ipa_call_summary *es = ipa_call_summaries->get (edge); |
774 struct cgraph_node *callee = edge->callee->ultimate_alias_target (); | 906 struct cgraph_node *callee = edge->callee->ultimate_alias_target (); |
775 int i; | 907 int i; |
776 | 908 |
777 fprintf (f, | 909 fprintf (f, |
778 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i", | 910 "%*s%s %s\n%*s freq:%4.2f", |
779 indent, "", callee->name (), callee->order, | 911 indent, "", callee->dump_name (), |
780 !edge->inline_failed | 912 !edge->inline_failed |
781 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed), | 913 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed), |
782 indent, "", es->loop_depth, edge->sreal_frequency ().to_double (), | 914 indent, "", edge->sreal_frequency ().to_double ()); |
783 es->call_stmt_size, es->call_stmt_time); | 915 |
916 if (cross_module_call_p (edge)) | |
917 fprintf (f, " cross module"); | |
918 | |
919 if (es) | |
920 fprintf (f, " loop depth:%2i size:%2i time: %2i", | |
921 es->loop_depth, es->call_stmt_size, es->call_stmt_time); | |
784 | 922 |
785 ipa_fn_summary *s = ipa_fn_summaries->get (callee); | 923 ipa_fn_summary *s = ipa_fn_summaries->get (callee); |
924 ipa_size_summary *ss = ipa_size_summaries->get (callee); | |
786 if (s != NULL) | 925 if (s != NULL) |
787 fprintf (f, "callee size:%2i stack:%2i", | 926 fprintf (f, " callee size:%2i stack:%2i", |
788 (int) (s->size / ipa_fn_summary::size_scale), | 927 (int) (ss->size / ipa_fn_summary::size_scale), |
789 (int) s->estimated_stack_size); | 928 (int) s->estimated_stack_size); |
790 | 929 |
791 if (es->predicate) | 930 if (es && es->predicate) |
792 { | 931 { |
793 fprintf (f, " predicate: "); | 932 fprintf (f, " predicate: "); |
794 es->predicate->dump (f, info->conds); | 933 es->predicate->dump (f, info->conds); |
795 } | 934 } |
796 else | 935 else |
797 fprintf (f, "\n"); | 936 fprintf (f, "\n"); |
798 if (es->param.exists ()) | 937 if (es && es->param.exists ()) |
799 for (i = 0; i < (int) es->param.length (); i++) | 938 for (i = 0; i < (int) es->param.length (); i++) |
800 { | 939 { |
801 int prob = es->param[i].change_prob; | 940 int prob = es->param[i].change_prob; |
802 | 941 |
803 if (!prob) | 942 if (!prob) |
807 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i, | 946 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i, |
808 prob * 100.0 / REG_BR_PROB_BASE); | 947 prob * 100.0 / REG_BR_PROB_BASE); |
809 } | 948 } |
810 if (!edge->inline_failed) | 949 if (!edge->inline_failed) |
811 { | 950 { |
812 ipa_fn_summary *s = ipa_fn_summaries->get (callee); | 951 ipa_size_summary *ss = ipa_size_summaries->get (callee); |
813 fprintf (f, "%*sStack frame offset %i, callee self size %i," | 952 fprintf (f, "%*sStack frame offset %i, callee self size %i\n", |
814 " callee size %i\n", | |
815 indent + 2, "", | 953 indent + 2, "", |
816 (int) s->stack_frame_offset, | 954 (int) ipa_get_stack_frame_offset (callee), |
817 (int) s->estimated_self_stack_size, | 955 (int) ss->estimated_self_stack_size); |
818 (int) s->estimated_stack_size); | |
819 dump_ipa_call_summary (f, indent + 2, callee, info); | 956 dump_ipa_call_summary (f, indent + 2, callee, info); |
820 } | 957 } |
821 } | 958 } |
822 for (edge = node->indirect_calls; edge; edge = edge->next_callee) | 959 for (edge = node->indirect_calls; edge; edge = edge->next_callee) |
823 { | 960 { |
824 struct ipa_call_summary *es = ipa_call_summaries->get (edge); | 961 class ipa_call_summary *es = ipa_call_summaries->get (edge); |
825 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i" | 962 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i" |
826 " time: %2i", | 963 " time: %2i", |
827 indent, "", | 964 indent, "", |
828 es->loop_depth, | 965 es->loop_depth, |
829 edge->sreal_frequency ().to_double (), es->call_stmt_size, | 966 edge->sreal_frequency ().to_double (), es->call_stmt_size, |
842 void | 979 void |
843 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) | 980 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) |
844 { | 981 { |
845 if (node->definition) | 982 if (node->definition) |
846 { | 983 { |
847 struct ipa_fn_summary *s = ipa_fn_summaries->get (node); | 984 class ipa_fn_summary *s = ipa_fn_summaries->get (node); |
985 class ipa_size_summary *ss = ipa_size_summaries->get (node); | |
848 if (s != NULL) | 986 if (s != NULL) |
849 { | 987 { |
850 size_time_entry *e; | 988 size_time_entry *e; |
851 int i; | 989 int i; |
852 fprintf (f, "IPA function summary for %s", node->dump_name ()); | 990 fprintf (f, "IPA function summary for %s", node->dump_name ()); |
855 if (s->inlinable) | 993 if (s->inlinable) |
856 fprintf (f, " inlinable"); | 994 fprintf (f, " inlinable"); |
857 if (s->fp_expressions) | 995 if (s->fp_expressions) |
858 fprintf (f, " fp_expression"); | 996 fprintf (f, " fp_expression"); |
859 fprintf (f, "\n global time: %f\n", s->time.to_double ()); | 997 fprintf (f, "\n global time: %f\n", s->time.to_double ()); |
860 fprintf (f, " self size: %i\n", s->self_size); | 998 fprintf (f, " self size: %i\n", ss->self_size); |
861 fprintf (f, " global size: %i\n", s->size); | 999 fprintf (f, " global size: %i\n", ss->size); |
862 fprintf (f, " min size: %i\n", s->min_size); | 1000 fprintf (f, " min size: %i\n", s->min_size); |
863 fprintf (f, " self stack: %i\n", | 1001 fprintf (f, " self stack: %i\n", |
864 (int) s->estimated_self_stack_size); | 1002 (int) ss->estimated_self_stack_size); |
865 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); | 1003 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); |
866 if (s->growth) | 1004 if (s->growth) |
867 fprintf (f, " estimated growth:%i\n", (int) s->growth); | 1005 fprintf (f, " estimated growth:%i\n", (int) s->growth); |
868 if (s->scc_no) | 1006 if (s->scc_no) |
869 fprintf (f, " In SCC: %i\n", (int) s->scc_no); | 1007 fprintf (f, " In SCC: %i\n", (int) s->scc_no); |
892 if (s->loop_stride) | 1030 if (s->loop_stride) |
893 { | 1031 { |
894 fprintf (f, " loop stride:"); | 1032 fprintf (f, " loop stride:"); |
895 s->loop_stride->dump (f, s->conds); | 1033 s->loop_stride->dump (f, s->conds); |
896 } | 1034 } |
897 if (s->array_index) | |
898 { | |
899 fprintf (f, " array index:"); | |
900 s->array_index->dump (f, s->conds); | |
901 } | |
902 fprintf (f, " calls:\n"); | 1035 fprintf (f, " calls:\n"); |
903 dump_ipa_call_summary (f, 4, node, s); | 1036 dump_ipa_call_summary (f, 4, node, s); |
904 fprintf (f, "\n"); | 1037 fprintf (f, "\n"); |
905 } | 1038 } |
906 else | 1039 else |
918 ipa_dump_fn_summaries (FILE *f) | 1051 ipa_dump_fn_summaries (FILE *f) |
919 { | 1052 { |
920 struct cgraph_node *node; | 1053 struct cgraph_node *node; |
921 | 1054 |
922 FOR_EACH_DEFINED_FUNCTION (node) | 1055 FOR_EACH_DEFINED_FUNCTION (node) |
923 if (!node->global.inlined_to) | 1056 if (!node->inlined_to) |
924 ipa_dump_fn_summary (f, node); | 1057 ipa_dump_fn_summary (f, node); |
925 } | 1058 } |
926 | 1059 |
927 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the | 1060 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the |
928 boolean variable pointed to by DATA. */ | 1061 boolean variable pointed to by DATA. */ |
939 /* If OP refers to value of function parameter, return the corresponding | 1072 /* If OP refers to value of function parameter, return the corresponding |
940 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the | 1073 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the |
941 PARM_DECL) will be stored to *SIZE_P in that case too. */ | 1074 PARM_DECL) will be stored to *SIZE_P in that case too. */ |
942 | 1075 |
943 static tree | 1076 static tree |
944 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p) | 1077 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op, |
1078 poly_int64 *size_p) | |
945 { | 1079 { |
946 /* SSA_NAME referring to parm default def? */ | 1080 /* SSA_NAME referring to parm default def? */ |
947 if (TREE_CODE (op) == SSA_NAME | 1081 if (TREE_CODE (op) == SSA_NAME |
948 && SSA_NAME_IS_DEFAULT_DEF (op) | 1082 && SSA_NAME_IS_DEFAULT_DEF (op) |
949 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) | 1083 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) |
950 { | 1084 { |
951 if (size_p) | 1085 if (size_p) |
952 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op))); | 1086 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); |
953 return SSA_NAME_VAR (op); | 1087 return SSA_NAME_VAR (op); |
954 } | 1088 } |
955 /* Non-SSA parm reference? */ | 1089 /* Non-SSA parm reference? */ |
956 if (TREE_CODE (op) == PARM_DECL) | 1090 if (TREE_CODE (op) == PARM_DECL) |
957 { | 1091 { |
958 bool modified = false; | 1092 bool modified = false; |
959 | 1093 |
960 ao_ref refd; | 1094 ao_ref refd; |
961 ao_ref_init (&refd, op); | 1095 ao_ref_init (&refd, op); |
962 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified, | 1096 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt), |
963 NULL); | 1097 mark_modified, &modified, NULL, NULL, |
1098 fbi->aa_walk_budget + 1); | |
1099 if (walked < 0) | |
1100 { | |
1101 fbi->aa_walk_budget = 0; | |
1102 return NULL_TREE; | |
1103 } | |
964 if (!modified) | 1104 if (!modified) |
965 { | 1105 { |
966 if (size_p) | 1106 if (size_p) |
967 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op))); | 1107 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); |
968 return op; | 1108 return op; |
969 } | 1109 } |
970 } | 1110 } |
971 return NULL_TREE; | 1111 return NULL_TREE; |
972 } | 1112 } |
975 parameter. Also traverse chains of SSA register assignments. If non-NULL, | 1115 parameter. Also traverse chains of SSA register assignments. If non-NULL, |
976 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be | 1116 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be |
977 stored to *SIZE_P in that case too. */ | 1117 stored to *SIZE_P in that case too. */ |
978 | 1118 |
979 static tree | 1119 static tree |
980 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p) | 1120 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op, |
981 { | 1121 poly_int64 *size_p) |
982 tree res = unmodified_parm_1 (stmt, op, size_p); | 1122 { |
1123 tree res = unmodified_parm_1 (fbi, stmt, op, size_p); | |
983 if (res) | 1124 if (res) |
984 return res; | 1125 return res; |
985 | 1126 |
986 if (TREE_CODE (op) == SSA_NAME | 1127 if (TREE_CODE (op) == SSA_NAME |
987 && !SSA_NAME_IS_DEFAULT_DEF (op) | 1128 && !SSA_NAME_IS_DEFAULT_DEF (op) |
988 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op))) | 1129 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op))) |
989 return unmodified_parm (SSA_NAME_DEF_STMT (op), | 1130 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op), |
990 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)), | 1131 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)), |
991 size_p); | 1132 size_p); |
992 return NULL_TREE; | 1133 return NULL_TREE; |
993 } | 1134 } |
994 | 1135 |
1000 statement in which OP is used or loaded. */ | 1141 statement in which OP is used or loaded. */ |
1001 | 1142 |
1002 static bool | 1143 static bool |
1003 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi, | 1144 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi, |
1004 gimple *stmt, tree op, int *index_p, | 1145 gimple *stmt, tree op, int *index_p, |
1005 HOST_WIDE_INT *size_p, | 1146 poly_int64 *size_p, |
1006 struct agg_position_info *aggpos) | 1147 struct agg_position_info *aggpos) |
1007 { | 1148 { |
1008 tree res = unmodified_parm_1 (stmt, op, size_p); | 1149 tree res = unmodified_parm_1 (fbi, stmt, op, size_p); |
1009 | 1150 |
1010 gcc_checking_assert (aggpos); | 1151 gcc_checking_assert (aggpos); |
1011 if (res) | 1152 if (res) |
1012 { | 1153 { |
1013 *index_p = ipa_get_param_decl_index (fbi->info, res); | 1154 *index_p = ipa_get_param_decl_index (fbi->info, res); |
1042 2 - for sure it is eliminated. | 1183 2 - for sure it is eliminated. |
1043 We are not terribly sophisticated, basically looking for simple abstraction | 1184 We are not terribly sophisticated, basically looking for simple abstraction |
1044 penalty wrappers. */ | 1185 penalty wrappers. */ |
1045 | 1186 |
1046 static int | 1187 static int |
1047 eliminated_by_inlining_prob (gimple *stmt) | 1188 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt) |
1048 { | 1189 { |
1049 enum gimple_code code = gimple_code (stmt); | 1190 enum gimple_code code = gimple_code (stmt); |
1050 enum tree_code rhs_code; | 1191 enum tree_code rhs_code; |
1051 | 1192 |
1052 if (!optimize) | 1193 if (!optimize) |
1062 | 1203 |
1063 rhs_code = gimple_assign_rhs_code (stmt); | 1204 rhs_code = gimple_assign_rhs_code (stmt); |
1064 | 1205 |
1065 /* Casts of parameters, loads from parameters passed by reference | 1206 /* Casts of parameters, loads from parameters passed by reference |
1066 and stores to return value or parameters are often free after | 1207 and stores to return value or parameters are often free after |
1067 inlining dua to SRA and further combining. | 1208 inlining due to SRA and further combining. |
1068 Assume that half of statements goes away. */ | 1209 Assume that half of statements goes away. */ |
1069 if (CONVERT_EXPR_CODE_P (rhs_code) | 1210 if (CONVERT_EXPR_CODE_P (rhs_code) |
1070 || rhs_code == VIEW_CONVERT_EXPR | 1211 || rhs_code == VIEW_CONVERT_EXPR |
1071 || rhs_code == ADDR_EXPR | 1212 || rhs_code == ADDR_EXPR |
1072 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) | 1213 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) |
1082 inner_rhs = rhs; | 1223 inner_rhs = rhs; |
1083 if (!inner_lhs) | 1224 if (!inner_lhs) |
1084 inner_lhs = lhs; | 1225 inner_lhs = lhs; |
1085 | 1226 |
1086 /* Reads of parameter are expected to be free. */ | 1227 /* Reads of parameter are expected to be free. */ |
1087 if (unmodified_parm (stmt, inner_rhs, NULL)) | 1228 if (unmodified_parm (fbi, stmt, inner_rhs, NULL)) |
1088 rhs_free = true; | 1229 rhs_free = true; |
1089 /* Match expressions of form &this->field. Those will most likely | 1230 /* Match expressions of form &this->field. Those will most likely |
1090 combine with something upstream after inlining. */ | 1231 combine with something upstream after inlining. */ |
1091 else if (TREE_CODE (inner_rhs) == ADDR_EXPR) | 1232 else if (TREE_CODE (inner_rhs) == ADDR_EXPR) |
1092 { | 1233 { |
1093 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0)); | 1234 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0)); |
1094 if (TREE_CODE (op) == PARM_DECL) | 1235 if (TREE_CODE (op) == PARM_DECL) |
1095 rhs_free = true; | 1236 rhs_free = true; |
1096 else if (TREE_CODE (op) == MEM_REF | 1237 else if (TREE_CODE (op) == MEM_REF |
1097 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL)) | 1238 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0), |
1239 NULL)) | |
1098 rhs_free = true; | 1240 rhs_free = true; |
1099 } | 1241 } |
1100 | 1242 |
1101 /* When parameter is not SSA register because its address is taken | 1243 /* When parameter is not SSA register because its address is taken |
1102 and it is just copied into one, the statement will be completely | 1244 and it is just copied into one, the statement will be completely |
1105 return 2; | 1247 return 2; |
1106 | 1248 |
1107 /* Reads of parameters passed by reference | 1249 /* Reads of parameters passed by reference |
1108 expected to be free (i.e. optimized out after inlining). */ | 1250 expected to be free (i.e. optimized out after inlining). */ |
1109 if (TREE_CODE (inner_rhs) == MEM_REF | 1251 if (TREE_CODE (inner_rhs) == MEM_REF |
1110 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL)) | 1252 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL)) |
1111 rhs_free = true; | 1253 rhs_free = true; |
1112 | 1254 |
1113 /* Copying parameter passed by reference into gimple register is | 1255 /* Copying parameter passed by reference into gimple register is |
1114 probably also going to copy propagate, but we can't be quite | 1256 probably also going to copy propagate, but we can't be quite |
1115 sure. */ | 1257 sure. */ |
1116 if (rhs_free && is_gimple_reg (lhs)) | 1258 if (rhs_free && is_gimple_reg (lhs)) |
1117 lhs_free = true; | 1259 lhs_free = true; |
1118 | 1260 |
1119 /* Writes to parameters, parameters passed by value and return value | 1261 /* Writes to parameters, parameters passed by value and return value |
1120 (either dirrectly or passed via invisible reference) are free. | 1262 (either directly or passed via invisible reference) are free. |
1121 | 1263 |
1122 TODO: We ought to handle testcase like | 1264 TODO: We ought to handle testcase like |
1123 struct a {int a,b;}; | 1265 struct a {int a,b;}; |
1124 struct a | 1266 struct a |
1125 retrurnsturct (void) | 1267 returnstruct (void) |
1126 { | 1268 { |
1127 struct a a ={1,2}; | 1269 struct a a ={1,2}; |
1128 return a; | 1270 return a; |
1129 } | 1271 } |
1130 | 1272 |
1131 This translate into: | 1273 This translate into: |
1132 | 1274 |
1133 retrurnsturct () | 1275 returnstruct () |
1134 { | 1276 { |
1135 int a$b; | 1277 int a$b; |
1136 int a$a; | 1278 int a$a; |
1137 struct a a; | 1279 struct a a; |
1138 struct a D.2739; | 1280 struct a D.2739; |
1146 For that we either need to copy ipa-split logic detecting writes | 1288 For that we either need to copy ipa-split logic detecting writes |
1147 to return value. */ | 1289 to return value. */ |
1148 if (TREE_CODE (inner_lhs) == PARM_DECL | 1290 if (TREE_CODE (inner_lhs) == PARM_DECL |
1149 || TREE_CODE (inner_lhs) == RESULT_DECL | 1291 || TREE_CODE (inner_lhs) == RESULT_DECL |
1150 || (TREE_CODE (inner_lhs) == MEM_REF | 1292 || (TREE_CODE (inner_lhs) == MEM_REF |
1151 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL) | 1293 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0), |
1294 NULL) | |
1152 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME | 1295 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME |
1153 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0)) | 1296 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0)) |
1154 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND | 1297 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND |
1155 (inner_lhs, | 1298 (inner_lhs, |
1156 0))) == RESULT_DECL)))) | 1299 0))) == RESULT_DECL)))) |
1165 default: | 1308 default: |
1166 return 0; | 1309 return 0; |
1167 } | 1310 } |
1168 } | 1311 } |
1169 | 1312 |
1313 /* Analyze EXPR if it represents a series of simple operations performed on | |
1314 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and | |
1315 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item. | |
1316 Type of the parameter or load from an aggregate via the parameter is | |
1317 stored in *TYPE_P. Operations on the parameter are recorded to | |
1318 PARAM_OPS_P if it is not NULL. */ | |
1319 | |
1320 static bool | |
1321 decompose_param_expr (struct ipa_func_body_info *fbi, | |
1322 gimple *stmt, tree expr, | |
1323 int *index_p, tree *type_p, | |
1324 struct agg_position_info *aggpos, | |
1325 expr_eval_ops *param_ops_p = NULL) | |
1326 { | |
1327 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops); | |
1328 int op_count = 0; | |
1329 | |
1330 if (param_ops_p) | |
1331 *param_ops_p = NULL; | |
1332 | |
1333 while (true) | |
1334 { | |
1335 expr_eval_op eval_op; | |
1336 unsigned rhs_count; | |
1337 unsigned cst_count = 0; | |
1338 | |
1339 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL, | |
1340 aggpos)) | |
1341 { | |
1342 tree type = TREE_TYPE (expr); | |
1343 | |
1344 if (aggpos->agg_contents) | |
1345 { | |
1346 /* Stop if containing bit-field. */ | |
1347 if (TREE_CODE (expr) == BIT_FIELD_REF | |
1348 || contains_bitfld_component_ref_p (expr)) | |
1349 break; | |
1350 } | |
1351 | |
1352 *type_p = type; | |
1353 return true; | |
1354 } | |
1355 | |
1356 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr)) | |
1357 break; | |
1358 | |
1359 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr))) | |
1360 break; | |
1361 | |
1362 switch (gimple_assign_rhs_class (stmt)) | |
1363 { | |
1364 case GIMPLE_SINGLE_RHS: | |
1365 expr = gimple_assign_rhs1 (stmt); | |
1366 continue; | |
1367 | |
1368 case GIMPLE_UNARY_RHS: | |
1369 rhs_count = 1; | |
1370 break; | |
1371 | |
1372 case GIMPLE_BINARY_RHS: | |
1373 rhs_count = 2; | |
1374 break; | |
1375 | |
1376 case GIMPLE_TERNARY_RHS: | |
1377 rhs_count = 3; | |
1378 break; | |
1379 | |
1380 default: | |
1381 goto fail; | |
1382 } | |
1383 | |
1384 /* Stop if expression is too complex. */ | |
1385 if (op_count++ == op_limit) | |
1386 break; | |
1387 | |
1388 if (param_ops_p) | |
1389 { | |
1390 eval_op.code = gimple_assign_rhs_code (stmt); | |
1391 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
1392 eval_op.val[0] = NULL_TREE; | |
1393 eval_op.val[1] = NULL_TREE; | |
1394 } | |
1395 | |
1396 expr = NULL_TREE; | |
1397 for (unsigned i = 0; i < rhs_count; i++) | |
1398 { | |
1399 tree op = gimple_op (stmt, i + 1); | |
1400 | |
1401 gcc_assert (op && !TYPE_P (op)); | |
1402 if (is_gimple_ip_invariant (op)) | |
1403 { | |
1404 if (++cst_count == rhs_count) | |
1405 goto fail; | |
1406 | |
1407 eval_op.val[cst_count - 1] = op; | |
1408 } | |
1409 else if (!expr) | |
1410 { | |
1411 /* Found a non-constant operand, and record its index in rhs | |
1412 operands. */ | |
1413 eval_op.index = i; | |
1414 expr = op; | |
1415 } | |
1416 else | |
1417 { | |
1418 /* Found more than one non-constant operands. */ | |
1419 goto fail; | |
1420 } | |
1421 } | |
1422 | |
1423 if (param_ops_p) | |
1424 vec_safe_insert (*param_ops_p, 0, eval_op); | |
1425 } | |
1426 | |
1427 /* Failed to decompose, free resource and return. */ | |
1428 fail: | |
1429 if (param_ops_p) | |
1430 vec_free (*param_ops_p); | |
1431 | |
1432 return false; | |
1433 } | |
1170 | 1434 |
1171 /* If BB ends by a conditional we can turn into predicates, attach corresponding | 1435 /* If BB ends by a conditional we can turn into predicates, attach corresponding |
1172 predicates to the CFG edges. */ | 1436 predicates to the CFG edges. */ |
1173 | 1437 |
1174 static void | 1438 static void |
1175 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi, | 1439 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi, |
1176 struct ipa_fn_summary *summary, | 1440 class ipa_fn_summary *summary, |
1441 class ipa_node_params *params_summary, | |
1177 basic_block bb) | 1442 basic_block bb) |
1178 { | 1443 { |
1179 gimple *last; | 1444 gimple *last; |
1180 tree op; | 1445 tree op, op2; |
1181 int index; | 1446 int index; |
1182 HOST_WIDE_INT size; | |
1183 struct agg_position_info aggpos; | 1447 struct agg_position_info aggpos; |
1184 enum tree_code code, inverted_code; | 1448 enum tree_code code, inverted_code; |
1185 edge e; | 1449 edge e; |
1186 edge_iterator ei; | 1450 edge_iterator ei; |
1187 gimple *set_stmt; | 1451 gimple *set_stmt; |
1188 tree op2; | 1452 tree param_type; |
1453 expr_eval_ops param_ops; | |
1189 | 1454 |
1190 last = last_stmt (bb); | 1455 last = last_stmt (bb); |
1191 if (!last || gimple_code (last) != GIMPLE_COND) | 1456 if (!last || gimple_code (last) != GIMPLE_COND) |
1192 return; | 1457 return; |
1193 if (!is_gimple_ip_invariant (gimple_cond_rhs (last))) | 1458 if (!is_gimple_ip_invariant (gimple_cond_rhs (last))) |
1194 return; | 1459 return; |
1195 op = gimple_cond_lhs (last); | 1460 op = gimple_cond_lhs (last); |
1196 /* TODO: handle conditionals like | 1461 |
1197 var = op0 < 4; | 1462 if (decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, |
1198 if (var != 0). */ | 1463 ¶m_ops)) |
1199 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos)) | |
1200 { | 1464 { |
1201 code = gimple_cond_code (last); | 1465 code = gimple_cond_code (last); |
1202 inverted_code = invert_tree_comparison (code, HONOR_NANS (op)); | 1466 inverted_code = invert_tree_comparison (code, HONOR_NANS (op)); |
1203 | 1467 |
1204 FOR_EACH_EDGE (e, ei, bb->succs) | 1468 FOR_EACH_EDGE (e, ei, bb->succs) |
1205 { | 1469 { |
1206 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE | 1470 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE |
1207 ? code : inverted_code); | 1471 ? code : inverted_code); |
1208 /* invert_tree_comparison will return ERROR_MARK on FP | 1472 /* invert_tree_comparison will return ERROR_MARK on FP |
1209 comparsions that are not EQ/NE instead of returning proper | 1473 comparisons that are not EQ/NE instead of returning proper |
1210 unordered one. Be sure it is not confused with NON_CONSTANT. */ | 1474 unordered one. Be sure it is not confused with NON_CONSTANT. |
1211 if (this_code != ERROR_MARK) | 1475 |
1476 And if the edge's target is the final block of diamond CFG graph | |
1477 of this conditional statement, we do not need to compute | |
1478 predicate for the edge because the final block's predicate must | |
1479 be at least as that of the first block of the statement. */ | |
1480 if (this_code != ERROR_MARK | |
1481 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) | |
1212 { | 1482 { |
1213 predicate p | 1483 predicate p |
1214 = add_condition (summary, index, size, &aggpos, this_code, | 1484 = add_condition (summary, params_summary, index, |
1215 unshare_expr_without_location | 1485 param_type, &aggpos, |
1216 (gimple_cond_rhs (last))); | 1486 this_code, gimple_cond_rhs (last), param_ops); |
1217 e->aux = edge_predicate_pool.allocate (); | 1487 e->aux = edge_predicate_pool.allocate (); |
1218 *(predicate *) e->aux = p; | 1488 *(predicate *) e->aux = p; |
1219 } | 1489 } |
1220 } | 1490 } |
1491 vec_free (param_ops); | |
1221 } | 1492 } |
1222 | 1493 |
1223 if (TREE_CODE (op) != SSA_NAME) | 1494 if (TREE_CODE (op) != SSA_NAME) |
1224 return; | 1495 return; |
1225 /* Special case | 1496 /* Special case |
1228 else | 1499 else |
1229 nonconstant_code. | 1500 nonconstant_code. |
1230 Here we can predicate nonconstant_code. We can't | 1501 Here we can predicate nonconstant_code. We can't |
1231 really handle constant_code since we have no predicate | 1502 really handle constant_code since we have no predicate |
1232 for this and also the constant code is not known to be | 1503 for this and also the constant code is not known to be |
1233 optimized away when inliner doen't see operand is constant. | 1504 optimized away when inliner doesn't see operand is constant. |
1234 Other optimizers might think otherwise. */ | 1505 Other optimizers might think otherwise. */ |
1235 if (gimple_cond_code (last) != NE_EXPR | 1506 if (gimple_cond_code (last) != NE_EXPR |
1236 || !integer_zerop (gimple_cond_rhs (last))) | 1507 || !integer_zerop (gimple_cond_rhs (last))) |
1237 return; | 1508 return; |
1238 set_stmt = SSA_NAME_DEF_STMT (op); | 1509 set_stmt = SSA_NAME_DEF_STMT (op); |
1239 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P) | 1510 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P) |
1240 || gimple_call_num_args (set_stmt) != 1) | 1511 || gimple_call_num_args (set_stmt) != 1) |
1241 return; | 1512 return; |
1242 op2 = gimple_call_arg (set_stmt, 0); | 1513 op2 = gimple_call_arg (set_stmt, 0); |
1243 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size, | 1514 if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos)) |
1244 &aggpos)) | |
1245 return; | 1515 return; |
1246 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE) | 1516 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE) |
1247 { | 1517 { |
1248 predicate p = add_condition (summary, index, size, &aggpos, | 1518 predicate p = add_condition (summary, params_summary, index, |
1519 param_type, &aggpos, | |
1249 predicate::is_not_constant, NULL_TREE); | 1520 predicate::is_not_constant, NULL_TREE); |
1250 e->aux = edge_predicate_pool.allocate (); | 1521 e->aux = edge_predicate_pool.allocate (); |
1251 *(predicate *) e->aux = p; | 1522 *(predicate *) e->aux = p; |
1252 } | 1523 } |
1253 } | 1524 } |
1256 /* If BB ends by a switch we can turn into predicates, attach corresponding | 1527 /* If BB ends by a switch we can turn into predicates, attach corresponding |
1257 predicates to the CFG edges. */ | 1528 predicates to the CFG edges. */ |
1258 | 1529 |
1259 static void | 1530 static void |
1260 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi, | 1531 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi, |
1261 struct ipa_fn_summary *summary, | 1532 class ipa_fn_summary *summary, |
1533 class ipa_node_params *params_summary, | |
1262 basic_block bb) | 1534 basic_block bb) |
1263 { | 1535 { |
1264 gimple *lastg; | 1536 gimple *lastg; |
1265 tree op; | 1537 tree op; |
1266 int index; | 1538 int index; |
1267 HOST_WIDE_INT size; | |
1268 struct agg_position_info aggpos; | 1539 struct agg_position_info aggpos; |
1269 edge e; | 1540 edge e; |
1270 edge_iterator ei; | 1541 edge_iterator ei; |
1271 size_t n; | 1542 size_t n; |
1272 size_t case_idx; | 1543 size_t case_idx; |
1544 tree param_type; | |
1545 expr_eval_ops param_ops; | |
1273 | 1546 |
1274 lastg = last_stmt (bb); | 1547 lastg = last_stmt (bb); |
1275 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH) | 1548 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH) |
1276 return; | 1549 return; |
1277 gswitch *last = as_a <gswitch *> (lastg); | 1550 gswitch *last = as_a <gswitch *> (lastg); |
1278 op = gimple_switch_index (last); | 1551 op = gimple_switch_index (last); |
1279 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos)) | 1552 if (!decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, |
1553 ¶m_ops)) | |
1280 return; | 1554 return; |
1555 | |
1556 auto_vec<std::pair<tree, tree> > ranges; | |
1557 tree type = TREE_TYPE (op); | |
1558 int bound_limit = opt_for_fn (fbi->node->decl, | |
1559 param_ipa_max_switch_predicate_bounds); | |
1560 int bound_count = 0; | |
1561 wide_int vr_wmin, vr_wmax; | |
1562 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax); | |
1281 | 1563 |
1282 FOR_EACH_EDGE (e, ei, bb->succs) | 1564 FOR_EACH_EDGE (e, ei, bb->succs) |
1283 { | 1565 { |
1284 e->aux = edge_predicate_pool.allocate (); | 1566 e->aux = edge_predicate_pool.allocate (); |
1285 *(predicate *) e->aux = false; | 1567 *(predicate *) e->aux = false; |
1286 } | 1568 } |
1569 | |
1570 e = gimple_switch_edge (cfun, last, 0); | |
1571 /* Set BOUND_COUNT to maximum count to bypass computing predicate for | |
1572 default case if its target basic block is in convergence point of all | |
1573 switch cases, which can be determined by checking whether it | |
1574 post-dominates the switch statement. */ | |
1575 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) | |
1576 bound_count = INT_MAX; | |
1577 | |
1287 n = gimple_switch_num_labels (last); | 1578 n = gimple_switch_num_labels (last); |
1288 for (case_idx = 0; case_idx < n; ++case_idx) | 1579 for (case_idx = 1; case_idx < n; ++case_idx) |
1289 { | 1580 { |
1290 tree cl = gimple_switch_label (last, case_idx); | 1581 tree cl = gimple_switch_label (last, case_idx); |
1291 tree min, max; | 1582 tree min = CASE_LOW (cl); |
1583 tree max = CASE_HIGH (cl); | |
1292 predicate p; | 1584 predicate p; |
1293 | 1585 |
1294 e = gimple_switch_edge (cfun, last, case_idx); | 1586 e = gimple_switch_edge (cfun, last, case_idx); |
1295 min = CASE_LOW (cl); | 1587 |
1296 max = CASE_HIGH (cl); | 1588 /* The case value might not have same type as switch expression, |
1297 | 1589 extend the value based on the expression type. */ |
1298 /* For default we might want to construct predicate that none | 1590 if (TREE_TYPE (min) != type) |
1299 of cases is met, but it is bit hard to do not having negations | 1591 min = wide_int_to_tree (type, wi::to_wide (min)); |
1300 of conditionals handy. */ | 1592 |
1301 if (!min && !max) | 1593 if (!max) |
1594 max = min; | |
1595 else if (TREE_TYPE (max) != type) | |
1596 max = wide_int_to_tree (type, wi::to_wide (max)); | |
1597 | |
1598 /* The case's target basic block is in convergence point of all switch | |
1599 cases, its predicate should be at least as that of the switch | |
1600 statement. */ | |
1601 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) | |
1302 p = true; | 1602 p = true; |
1303 else if (!max) | 1603 else if (min == max) |
1304 p = add_condition (summary, index, size, &aggpos, EQ_EXPR, | 1604 p = add_condition (summary, params_summary, index, param_type, |
1305 unshare_expr_without_location (min)); | 1605 &aggpos, EQ_EXPR, min, param_ops); |
1306 else | 1606 else |
1307 { | 1607 { |
1308 predicate p1, p2; | 1608 predicate p1, p2; |
1309 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR, | 1609 p1 = add_condition (summary, params_summary, index, param_type, |
1310 unshare_expr_without_location (min)); | 1610 &aggpos, GE_EXPR, min, param_ops); |
1311 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR, | 1611 p2 = add_condition (summary, params_summary,index, param_type, |
1312 unshare_expr_without_location (max)); | 1612 &aggpos, LE_EXPR, max, param_ops); |
1313 p = p1 & p2; | 1613 p = p1 & p2; |
1314 } | 1614 } |
1315 *(struct predicate *) e->aux | 1615 *(class predicate *) e->aux |
1316 = p.or_with (summary->conds, *(struct predicate *) e->aux); | 1616 = p.or_with (summary->conds, *(class predicate *) e->aux); |
1317 } | 1617 |
1618 /* If there are too many disjoint case ranges, predicate for default | |
1619 case might become too complicated. So add a limit here. */ | |
1620 if (bound_count > bound_limit) | |
1621 continue; | |
1622 | |
1623 bool new_range = true; | |
1624 | |
1625 if (!ranges.is_empty ()) | |
1626 { | |
1627 wide_int curr_wmin = wi::to_wide (min); | |
1628 wide_int last_wmax = wi::to_wide (ranges.last ().second); | |
1629 | |
1630 /* Merge case ranges if they are continuous. */ | |
1631 if (curr_wmin == last_wmax + 1) | |
1632 new_range = false; | |
1633 else if (vr_type == VR_ANTI_RANGE) | |
1634 { | |
1635 /* If two disjoint case ranges can be connected by anti-range | |
1636 of switch index, combine them to one range. */ | |
1637 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type))) | |
1638 vr_type = VR_UNDEFINED; | |
1639 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type))) | |
1640 new_range = false; | |
1641 } | |
1642 } | |
1643 | |
1644 /* Create/extend a case range. And we count endpoints of range set, | |
1645 this number nearly equals to number of conditions that we will create | |
1646 for predicate of default case. */ | |
1647 if (new_range) | |
1648 { | |
1649 bound_count += (min == max) ? 1 : 2; | |
1650 ranges.safe_push (std::make_pair (min, max)); | |
1651 } | |
1652 else | |
1653 { | |
1654 bound_count += (ranges.last ().first == ranges.last ().second); | |
1655 ranges.last ().second = max; | |
1656 } | |
1657 } | |
1658 | |
1659 e = gimple_switch_edge (cfun, last, 0); | |
1660 if (bound_count > bound_limit) | |
1661 { | |
1662 *(class predicate *) e->aux = true; | |
1663 vec_free (param_ops); | |
1664 return; | |
1665 } | |
1666 | |
1667 predicate p_seg = true; | |
1668 predicate p_all = false; | |
1669 | |
1670 if (vr_type != VR_RANGE) | |
1671 { | |
1672 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type)); | |
1673 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type)); | |
1674 } | |
1675 | |
1676 /* Construct predicate to represent default range set that is negation of | |
1677 all case ranges. Case range is classified as containing single/non-single | |
1678 values. Suppose a piece of case ranges in the following. | |
1679 | |
1680 [D1...D2] [S1] ... [Sn] [D3...D4] | |
1681 | |
1682 To represent default case's range sets between two non-single value | |
1683 case ranges (From D2 to D3), we construct predicate as: | |
1684 | |
1685 D2 < x < D3 && x != S1 && ... && x != Sn | |
1686 */ | |
1687 for (size_t i = 0; i < ranges.length (); i++) | |
1688 { | |
1689 tree min = ranges[i].first; | |
1690 tree max = ranges[i].second; | |
1691 | |
1692 if (min == max) | |
1693 p_seg &= add_condition (summary, params_summary, index, | |
1694 param_type, &aggpos, NE_EXPR, | |
1695 min, param_ops); | |
1696 else | |
1697 { | |
1698 /* Do not create sub-predicate for range that is beyond low bound | |
1699 of switch index. */ | |
1700 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type))) | |
1701 { | |
1702 p_seg &= add_condition (summary, params_summary, index, | |
1703 param_type, &aggpos, | |
1704 LT_EXPR, min, param_ops); | |
1705 p_all = p_all.or_with (summary->conds, p_seg); | |
1706 } | |
1707 | |
1708 /* Do not create sub-predicate for range that is beyond up bound | |
1709 of switch index. */ | |
1710 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type))) | |
1711 { | |
1712 p_seg = false; | |
1713 break; | |
1714 } | |
1715 | |
1716 p_seg = add_condition (summary, params_summary, index, | |
1717 param_type, &aggpos, GT_EXPR, | |
1718 max, param_ops); | |
1719 } | |
1720 } | |
1721 | |
1722 p_all = p_all.or_with (summary->conds, p_seg); | |
1723 *(class predicate *) e->aux | |
1724 = p_all.or_with (summary->conds, *(class predicate *) e->aux); | |
1725 | |
1726 vec_free (param_ops); | |
1318 } | 1727 } |
1319 | 1728 |
1320 | 1729 |
1321 /* For each BB in NODE attach to its AUX pointer predicate under | 1730 /* For each BB in NODE attach to its AUX pointer predicate under |
1322 which it is executable. */ | 1731 which it is executable. */ |
1323 | 1732 |
1324 static void | 1733 static void |
1325 compute_bb_predicates (struct ipa_func_body_info *fbi, | 1734 compute_bb_predicates (struct ipa_func_body_info *fbi, |
1326 struct cgraph_node *node, | 1735 struct cgraph_node *node, |
1327 struct ipa_fn_summary *summary) | 1736 class ipa_fn_summary *summary, |
1737 class ipa_node_params *params_summary) | |
1328 { | 1738 { |
1329 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); | 1739 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); |
1330 bool done = false; | 1740 bool done = false; |
1331 basic_block bb; | 1741 basic_block bb; |
1332 | 1742 |
1333 FOR_EACH_BB_FN (bb, my_function) | 1743 FOR_EACH_BB_FN (bb, my_function) |
1334 { | 1744 { |
1335 set_cond_stmt_execution_predicate (fbi, summary, bb); | 1745 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb); |
1336 set_switch_stmt_execution_predicate (fbi, summary, bb); | 1746 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb); |
1337 } | 1747 } |
1338 | 1748 |
1339 /* Entry block is always executable. */ | 1749 /* Entry block is always executable. */ |
1340 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux | 1750 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux |
1341 = edge_predicate_pool.allocate (); | 1751 = edge_predicate_pool.allocate (); |
1356 if (e->src->aux) | 1766 if (e->src->aux) |
1357 { | 1767 { |
1358 predicate this_bb_predicate | 1768 predicate this_bb_predicate |
1359 = *(predicate *) e->src->aux; | 1769 = *(predicate *) e->src->aux; |
1360 if (e->aux) | 1770 if (e->aux) |
1361 this_bb_predicate &= (*(struct predicate *) e->aux); | 1771 this_bb_predicate &= (*(class predicate *) e->aux); |
1362 p = p.or_with (summary->conds, this_bb_predicate); | 1772 p = p.or_with (summary->conds, this_bb_predicate); |
1363 if (p == true) | 1773 if (p == true) |
1364 break; | 1774 break; |
1365 } | 1775 } |
1366 } | 1776 } |
1367 if (p == false) | 1777 if (p != false) |
1368 gcc_checking_assert (!bb->aux); | 1778 { |
1369 else | 1779 basic_block pdom_bb; |
1370 { | 1780 |
1371 if (!bb->aux) | 1781 if (!bb->aux) |
1372 { | 1782 { |
1373 done = false; | 1783 done = false; |
1374 bb->aux = edge_predicate_pool.allocate (); | 1784 bb->aux = edge_predicate_pool.allocate (); |
1375 *((predicate *) bb->aux) = p; | 1785 *((predicate *) bb->aux) = p; |
1384 { | 1794 { |
1385 done = false; | 1795 done = false; |
1386 *((predicate *) bb->aux) = p; | 1796 *((predicate *) bb->aux) = p; |
1387 } | 1797 } |
1388 } | 1798 } |
1799 | |
1800 /* For switch/if statement, we can OR-combine predicates of all | |
1801 its cases/branches to get predicate for basic block in their | |
1802 convergence point, but sometimes this will generate very | |
1803 complicated predicate. Actually, we can get simplified | |
1804 predicate in another way by using the fact that predicate | |
1805 for a basic block must also hold true for its post dominators. | |
1806 To be specific, basic block in convergence point of | |
1807 conditional statement should include predicate of the | |
1808 statement. */ | |
1809 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb); | |
1810 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb) | |
1811 ; | |
1812 else if (!pdom_bb->aux) | |
1813 { | |
1814 done = false; | |
1815 pdom_bb->aux = edge_predicate_pool.allocate (); | |
1816 *((predicate *) pdom_bb->aux) = p; | |
1817 } | |
1818 else if (p != *(predicate *) pdom_bb->aux) | |
1819 { | |
1820 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux); | |
1821 if (p != *(predicate *) pdom_bb->aux) | |
1822 { | |
1823 done = false; | |
1824 *((predicate *) pdom_bb->aux) = p; | |
1825 } | |
1826 } | |
1389 } | 1827 } |
1390 } | 1828 } |
1391 } | 1829 } |
1392 } | 1830 } |
1393 | 1831 |
1394 | 1832 |
1395 /* Return predicate specifying when the STMT might have result that is not | 1833 /* Return predicate specifying when the STMT might have result that is not |
1396 a compile time constant. */ | 1834 a compile time constant. */ |
1397 | 1835 |
1398 static predicate | 1836 static predicate |
1399 will_be_nonconstant_expr_predicate (struct ipa_node_params *info, | 1837 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi, |
1400 struct ipa_fn_summary *summary, | 1838 class ipa_fn_summary *summary, |
1839 class ipa_node_params *params_summary, | |
1401 tree expr, | 1840 tree expr, |
1402 vec<predicate> nonconstant_names) | 1841 vec<predicate> nonconstant_names) |
1403 { | 1842 { |
1404 tree parm; | 1843 tree parm; |
1405 int index; | 1844 int index; |
1406 HOST_WIDE_INT size; | |
1407 | 1845 |
1408 while (UNARY_CLASS_P (expr)) | 1846 while (UNARY_CLASS_P (expr)) |
1409 expr = TREE_OPERAND (expr, 0); | 1847 expr = TREE_OPERAND (expr, 0); |
1410 | 1848 |
1411 parm = unmodified_parm (NULL, expr, &size); | 1849 parm = unmodified_parm (fbi, NULL, expr, NULL); |
1412 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0) | 1850 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) |
1413 return add_condition (summary, index, size, NULL, predicate::changed, | 1851 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL, |
1414 NULL_TREE); | 1852 predicate::changed, NULL_TREE); |
1415 if (is_gimple_min_invariant (expr)) | 1853 if (is_gimple_min_invariant (expr)) |
1416 return false; | 1854 return false; |
1417 if (TREE_CODE (expr) == SSA_NAME) | 1855 if (TREE_CODE (expr) == SSA_NAME) |
1418 return nonconstant_names[SSA_NAME_VERSION (expr)]; | 1856 return nonconstant_names[SSA_NAME_VERSION (expr)]; |
1419 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr)) | 1857 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr)) |
1420 { | 1858 { |
1421 predicate p1 = will_be_nonconstant_expr_predicate | 1859 predicate p1 |
1422 (info, summary, TREE_OPERAND (expr, 0), | 1860 = will_be_nonconstant_expr_predicate (fbi, summary, |
1423 nonconstant_names); | 1861 params_summary, |
1862 TREE_OPERAND (expr, 0), | |
1863 nonconstant_names); | |
1424 if (p1 == true) | 1864 if (p1 == true) |
1425 return p1; | 1865 return p1; |
1426 | 1866 |
1427 predicate p2; | 1867 predicate p2 |
1428 p2 = will_be_nonconstant_expr_predicate (info, summary, | 1868 = will_be_nonconstant_expr_predicate (fbi, summary, |
1429 TREE_OPERAND (expr, 1), | 1869 params_summary, |
1430 nonconstant_names); | 1870 TREE_OPERAND (expr, 1), |
1871 nonconstant_names); | |
1431 return p1.or_with (summary->conds, p2); | 1872 return p1.or_with (summary->conds, p2); |
1432 } | 1873 } |
1433 else if (TREE_CODE (expr) == COND_EXPR) | 1874 else if (TREE_CODE (expr) == COND_EXPR) |
1434 { | 1875 { |
1435 predicate p1 = will_be_nonconstant_expr_predicate | 1876 predicate p1 |
1436 (info, summary, TREE_OPERAND (expr, 0), | 1877 = will_be_nonconstant_expr_predicate (fbi, summary, |
1437 nonconstant_names); | 1878 params_summary, |
1879 TREE_OPERAND (expr, 0), | |
1880 nonconstant_names); | |
1438 if (p1 == true) | 1881 if (p1 == true) |
1439 return p1; | 1882 return p1; |
1440 | 1883 |
1441 predicate p2; | 1884 predicate p2 |
1442 p2 = will_be_nonconstant_expr_predicate (info, summary, | 1885 = will_be_nonconstant_expr_predicate (fbi, summary, |
1443 TREE_OPERAND (expr, 1), | 1886 params_summary, |
1444 nonconstant_names); | 1887 TREE_OPERAND (expr, 1), |
1888 nonconstant_names); | |
1445 if (p2 == true) | 1889 if (p2 == true) |
1446 return p2; | 1890 return p2; |
1447 p1 = p1.or_with (summary->conds, p2); | 1891 p1 = p1.or_with (summary->conds, p2); |
1448 p2 = will_be_nonconstant_expr_predicate (info, summary, | 1892 p2 = will_be_nonconstant_expr_predicate (fbi, summary, |
1893 params_summary, | |
1449 TREE_OPERAND (expr, 2), | 1894 TREE_OPERAND (expr, 2), |
1450 nonconstant_names); | 1895 nonconstant_names); |
1451 return p2.or_with (summary->conds, p1); | 1896 return p2.or_with (summary->conds, p1); |
1452 } | 1897 } |
1453 else if (TREE_CODE (expr) == CALL_EXPR) | 1898 else if (TREE_CODE (expr) == CALL_EXPR) |
1464 /* Return predicate specifying when the STMT might have result that is not | 1909 /* Return predicate specifying when the STMT might have result that is not |
1465 a compile time constant. */ | 1910 a compile time constant. */ |
1466 | 1911 |
1467 static predicate | 1912 static predicate |
1468 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi, | 1913 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi, |
1469 struct ipa_fn_summary *summary, | 1914 class ipa_fn_summary *summary, |
1915 class ipa_node_params *params_summary, | |
1470 gimple *stmt, | 1916 gimple *stmt, |
1471 vec<predicate> nonconstant_names) | 1917 vec<predicate> nonconstant_names) |
1472 { | 1918 { |
1473 predicate p = true; | 1919 predicate p = true; |
1474 ssa_op_iter iter; | 1920 ssa_op_iter iter; |
1475 tree use; | 1921 tree use; |
1922 tree param_type = NULL_TREE; | |
1476 predicate op_non_const; | 1923 predicate op_non_const; |
1477 bool is_load; | 1924 bool is_load; |
1478 int base_index; | 1925 int base_index; |
1479 HOST_WIDE_INT size; | |
1480 struct agg_position_info aggpos; | 1926 struct agg_position_info aggpos; |
1481 | 1927 |
1482 /* What statments might be optimized away | 1928 /* What statements might be optimized away |
1483 when their arguments are constant. */ | 1929 when their arguments are constant. */ |
1484 if (gimple_code (stmt) != GIMPLE_ASSIGN | 1930 if (gimple_code (stmt) != GIMPLE_ASSIGN |
1485 && gimple_code (stmt) != GIMPLE_COND | 1931 && gimple_code (stmt) != GIMPLE_COND |
1486 && gimple_code (stmt) != GIMPLE_SWITCH | 1932 && gimple_code (stmt) != GIMPLE_SWITCH |
1487 && (gimple_code (stmt) != GIMPLE_CALL | 1933 && (gimple_code (stmt) != GIMPLE_CALL |
1495 is_load = gimple_assign_load_p (stmt); | 1941 is_load = gimple_assign_load_p (stmt); |
1496 | 1942 |
1497 /* Loads can be optimized when the value is known. */ | 1943 /* Loads can be optimized when the value is known. */ |
1498 if (is_load) | 1944 if (is_load) |
1499 { | 1945 { |
1500 tree op; | 1946 tree op = gimple_assign_rhs1 (stmt); |
1501 gcc_assert (gimple_assign_single_p (stmt)); | 1947 if (!decompose_param_expr (fbi, stmt, op, &base_index, ¶m_type, |
1502 op = gimple_assign_rhs1 (stmt); | 1948 &aggpos)) |
1503 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size, | |
1504 &aggpos)) | |
1505 return p; | 1949 return p; |
1506 } | 1950 } |
1507 else | 1951 else |
1508 base_index = -1; | 1952 base_index = -1; |
1509 | 1953 |
1510 /* See if we understand all operands before we start | 1954 /* See if we understand all operands before we start |
1511 adding conditionals. */ | 1955 adding conditionals. */ |
1512 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) | 1956 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) |
1513 { | 1957 { |
1514 tree parm = unmodified_parm (stmt, use, NULL); | 1958 tree parm = unmodified_parm (fbi, stmt, use, NULL); |
1515 /* For arguments we can build a condition. */ | 1959 /* For arguments we can build a condition. */ |
1516 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0) | 1960 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0) |
1517 continue; | 1961 continue; |
1518 if (TREE_CODE (use) != SSA_NAME) | 1962 if (TREE_CODE (use) != SSA_NAME) |
1519 return p; | 1963 return p; |
1524 return p; | 1968 return p; |
1525 } | 1969 } |
1526 | 1970 |
1527 if (is_load) | 1971 if (is_load) |
1528 op_non_const = | 1972 op_non_const = |
1529 add_condition (summary, base_index, size, &aggpos, predicate::changed, | 1973 add_condition (summary, params_summary, |
1530 NULL); | 1974 base_index, param_type, &aggpos, |
1975 predicate::changed, NULL_TREE); | |
1531 else | 1976 else |
1532 op_non_const = false; | 1977 op_non_const = false; |
1533 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) | 1978 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) |
1534 { | 1979 { |
1535 HOST_WIDE_INT size; | 1980 tree parm = unmodified_parm (fbi, stmt, use, NULL); |
1536 tree parm = unmodified_parm (stmt, use, &size); | |
1537 int index; | 1981 int index; |
1538 | 1982 |
1539 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) | 1983 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) |
1540 { | 1984 { |
1541 if (index != base_index) | 1985 if (index != base_index) |
1542 p = add_condition (summary, index, size, NULL, predicate::changed, | 1986 p = add_condition (summary, params_summary, index, |
1543 NULL_TREE); | 1987 TREE_TYPE (parm), NULL, |
1988 predicate::changed, NULL_TREE); | |
1544 else | 1989 else |
1545 continue; | 1990 continue; |
1546 } | 1991 } |
1547 else | 1992 else |
1548 p = nonconstant_names[SSA_NAME_VERSION (use)]; | 1993 p = nonconstant_names[SSA_NAME_VERSION (use)]; |
1561 tree op; | 2006 tree op; |
1562 bitmap bb_set; | 2007 bitmap bb_set; |
1563 gimple *stmt; | 2008 gimple *stmt; |
1564 }; | 2009 }; |
1565 | 2010 |
1566 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute | 2011 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute |
1567 probability how often it changes between USE_BB. | 2012 probability how often it changes between USE_BB. |
1568 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB | 2013 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB |
1569 is in different loop nest, we can do better. | 2014 is in different loop nest, we can do better. |
1570 This is all just estimate. In theory we look for minimal cut separating | 2015 This is all just estimate. In theory we look for minimal cut separating |
1571 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion | 2016 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion |
1572 anyway. */ | 2017 anyway. */ |
1573 | 2018 |
1574 static basic_block | 2019 static basic_block |
1575 get_minimal_bb (basic_block init_bb, basic_block use_bb) | 2020 get_minimal_bb (basic_block init_bb, basic_block use_bb) |
1576 { | 2021 { |
1577 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); | 2022 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); |
1578 if (l && l->header->count < init_bb->count) | 2023 if (l && l->header->count < init_bb->count) |
1579 return l->header; | 2024 return l->header; |
1580 return init_bb; | 2025 return init_bb; |
1581 } | 2026 } |
1582 | 2027 |
1618 Value 0 is reserved for compile time invariants. | 2063 Value 0 is reserved for compile time invariants. |
1619 For common parameters it is REG_BR_PROB_BASE. For loop invariants it | 2064 For common parameters it is REG_BR_PROB_BASE. For loop invariants it |
1620 ought to be REG_BR_PROB_BASE / estimated_iters. */ | 2065 ought to be REG_BR_PROB_BASE / estimated_iters. */ |
1621 | 2066 |
1622 static int | 2067 static int |
1623 param_change_prob (gimple *stmt, int i) | 2068 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i) |
1624 { | 2069 { |
1625 tree op = gimple_call_arg (stmt, i); | 2070 tree op = gimple_call_arg (stmt, i); |
1626 basic_block bb = gimple_bb (stmt); | 2071 basic_block bb = gimple_bb (stmt); |
1627 | 2072 |
1628 if (TREE_CODE (op) == WITH_SIZE_EXPR) | 2073 if (TREE_CODE (op) == WITH_SIZE_EXPR) |
1670 return 0; | 2115 return 0; |
1671 if (!bb->count.nonzero_p ()) | 2116 if (!bb->count.nonzero_p ()) |
1672 return REG_BR_PROB_BASE; | 2117 return REG_BR_PROB_BASE; |
1673 if (dump_file) | 2118 if (dump_file) |
1674 { | 2119 { |
1675 fprintf (dump_file, " Analyzing param change probablity of "); | 2120 fprintf (dump_file, " Analyzing param change probability of "); |
1676 print_generic_expr (dump_file, op, TDF_SLIM); | 2121 print_generic_expr (dump_file, op, TDF_SLIM); |
1677 fprintf (dump_file, "\n"); | 2122 fprintf (dump_file, "\n"); |
1678 } | 2123 } |
1679 ao_ref_init (&refd, op); | 2124 ao_ref_init (&refd, op); |
1680 info.op = op; | 2125 info.op = op; |
1681 info.stmt = stmt; | 2126 info.stmt = stmt; |
1682 info.bb_set = BITMAP_ALLOC (NULL); | 2127 info.bb_set = BITMAP_ALLOC (NULL); |
1683 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info, | 2128 int walked |
1684 NULL); | 2129 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info, |
1685 if (bitmap_bit_p (info.bb_set, bb->index)) | 2130 NULL, NULL, fbi->aa_walk_budget); |
2131 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index)) | |
1686 { | 2132 { |
1687 if (dump_file) | 2133 if (dump_file) |
1688 fprintf (dump_file, " Set in same BB as used.\n"); | 2134 { |
2135 if (walked < 0) | |
2136 fprintf (dump_file, " Ran out of AA walking budget.\n"); | |
2137 else | |
2138 fprintf (dump_file, " Set in same BB as used.\n"); | |
2139 } | |
1689 BITMAP_FREE (info.bb_set); | 2140 BITMAP_FREE (info.bb_set); |
1690 return REG_BR_PROB_BASE; | 2141 return REG_BR_PROB_BASE; |
1691 } | 2142 } |
1692 | 2143 |
1693 bitmap_iterator bi; | 2144 bitmap_iterator bi; |
1717 /* Find whether a basic block BB is the final block of a (half) diamond CFG | 2168 /* Find whether a basic block BB is the final block of a (half) diamond CFG |
1718 sub-graph and if the predicate the condition depends on is known. If so, | 2169 sub-graph and if the predicate the condition depends on is known. If so, |
1719 return true and store the pointer the predicate in *P. */ | 2170 return true and store the pointer the predicate in *P. */ |
1720 | 2171 |
1721 static bool | 2172 static bool |
1722 phi_result_unknown_predicate (struct ipa_node_params *info, | 2173 phi_result_unknown_predicate (ipa_func_body_info *fbi, |
1723 ipa_fn_summary *summary, basic_block bb, | 2174 ipa_fn_summary *summary, |
2175 class ipa_node_params *params_summary, | |
2176 basic_block bb, | |
1724 predicate *p, | 2177 predicate *p, |
1725 vec<predicate> nonconstant_names) | 2178 vec<predicate> nonconstant_names) |
1726 { | 2179 { |
1727 edge e; | 2180 edge e; |
1728 edge_iterator ei; | 2181 edge_iterator ei; |
1762 if (!stmt | 2215 if (!stmt |
1763 || gimple_code (stmt) != GIMPLE_COND | 2216 || gimple_code (stmt) != GIMPLE_COND |
1764 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt))) | 2217 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt))) |
1765 return false; | 2218 return false; |
1766 | 2219 |
1767 *p = will_be_nonconstant_expr_predicate (info, summary, | 2220 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary, |
1768 gimple_cond_lhs (stmt), | 2221 gimple_cond_lhs (stmt), |
1769 nonconstant_names); | 2222 nonconstant_names); |
1770 if (*p == true) | 2223 if (*p == true) |
1771 return false; | 2224 return false; |
1772 else | 2225 else |
1777 and *P being the predicate describing whether the selected PHI argument is | 2230 and *P being the predicate describing whether the selected PHI argument is |
1778 known, store a predicate for the result of the PHI statement into | 2231 known, store a predicate for the result of the PHI statement into |
1779 NONCONSTANT_NAMES, if possible. */ | 2232 NONCONSTANT_NAMES, if possible. */ |
1780 | 2233 |
1781 static void | 2234 static void |
1782 predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi, | 2235 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi, |
1783 predicate *p, | 2236 predicate *p, |
1784 vec<predicate> nonconstant_names) | 2237 vec<predicate> nonconstant_names) |
1785 { | 2238 { |
1786 unsigned i; | 2239 unsigned i; |
1787 | 2240 |
1802 { | 2255 { |
1803 fprintf (dump_file, "\t\tphi predicate: "); | 2256 fprintf (dump_file, "\t\tphi predicate: "); |
1804 p->dump (dump_file, summary->conds); | 2257 p->dump (dump_file, summary->conds); |
1805 } | 2258 } |
1806 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p; | 2259 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p; |
1807 } | |
1808 | |
1809 /* Return predicate specifying when array index in access OP becomes non-constant. */ | |
1810 | |
1811 static predicate | |
1812 array_index_predicate (ipa_fn_summary *info, | |
1813 vec< predicate> nonconstant_names, tree op) | |
1814 { | |
1815 predicate p = false; | |
1816 while (handled_component_p (op)) | |
1817 { | |
1818 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF) | |
1819 { | |
1820 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME) | |
1821 p = p.or_with (info->conds, | |
1822 nonconstant_names[SSA_NAME_VERSION | |
1823 (TREE_OPERAND (op, 1))]); | |
1824 } | |
1825 op = TREE_OPERAND (op, 0); | |
1826 } | |
1827 return p; | |
1828 } | 2260 } |
1829 | 2261 |
1830 /* For a typical usage of __builtin_expect (a<b, 1), we | 2262 /* For a typical usage of __builtin_expect (a<b, 1), we |
1831 may introduce an extra relation stmt: | 2263 may introduce an extra relation stmt: |
1832 With the builtin, we have | 2264 With the builtin, we have |
1903 /* Return true when the basic blocks contains only clobbers followed by RESX. | 2335 /* Return true when the basic blocks contains only clobbers followed by RESX. |
1904 Such BBs are kept around to make removal of dead stores possible with | 2336 Such BBs are kept around to make removal of dead stores possible with |
1905 presence of EH and will be optimized out by optimize_clobbers later in the | 2337 presence of EH and will be optimized out by optimize_clobbers later in the |
1906 game. | 2338 game. |
1907 | 2339 |
1908 NEED_EH is used to recurse in case the clobber has non-EH predecestors | 2340 NEED_EH is used to recurse in case the clobber has non-EH predecessors |
1909 that can be clobber only, too.. When it is false, the RESX is not necessary | 2341 that can be clobber only, too.. When it is false, the RESX is not necessary |
1910 on the end of basic block. */ | 2342 on the end of basic block. */ |
1911 | 2343 |
1912 static bool | 2344 static bool |
1913 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true) | 2345 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true) |
1937 if (gimple_code (stmt) == GIMPLE_LABEL) | 2369 if (gimple_code (stmt) == GIMPLE_LABEL) |
1938 break; | 2370 break; |
1939 return false; | 2371 return false; |
1940 } | 2372 } |
1941 | 2373 |
1942 /* See if all predecestors are either throws or clobber only BBs. */ | 2374 /* See if all predecessors are either throws or clobber only BBs. */ |
1943 FOR_EACH_EDGE (e, ei, bb->preds) | 2375 FOR_EACH_EDGE (e, ei, bb->preds) |
1944 if (!(e->flags & EDGE_EH) | 2376 if (!(e->flags & EDGE_EH) |
1945 && !clobber_only_eh_bb_p (e->src, false)) | 2377 && !clobber_only_eh_bb_p (e->src, false)) |
1946 return false; | 2378 return false; |
1947 | 2379 |
1967 EARLY indicates run from early optimization pipeline. */ | 2399 EARLY indicates run from early optimization pipeline. */ |
1968 | 2400 |
1969 static void | 2401 static void |
1970 analyze_function_body (struct cgraph_node *node, bool early) | 2402 analyze_function_body (struct cgraph_node *node, bool early) |
1971 { | 2403 { |
1972 sreal time = 0; | 2404 sreal time = opt_for_fn (node->decl, param_uninlined_function_time); |
1973 /* Estimate static overhead for function prologue/epilogue and alignment. */ | 2405 /* Estimate static overhead for function prologue/epilogue and alignment. */ |
1974 int size = 2; | 2406 int size = opt_for_fn (node->decl, param_uninlined_function_insns); |
1975 /* Benefits are scaled by probability of elimination that is in range | 2407 /* Benefits are scaled by probability of elimination that is in range |
1976 <0,2>. */ | 2408 <0,2>. */ |
1977 basic_block bb; | 2409 basic_block bb; |
1978 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); | 2410 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); |
1979 sreal freq; | 2411 sreal freq; |
1980 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); | 2412 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); |
2413 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node); | |
1981 predicate bb_predicate; | 2414 predicate bb_predicate; |
1982 struct ipa_func_body_info fbi; | 2415 struct ipa_func_body_info fbi; |
1983 vec<predicate> nonconstant_names = vNULL; | 2416 vec<predicate> nonconstant_names = vNULL; |
1984 int nblocks, n; | 2417 int nblocks, n; |
1985 int *order; | 2418 int *order; |
1986 predicate array_index = true; | |
1987 gimple *fix_builtin_expect_stmt; | 2419 gimple *fix_builtin_expect_stmt; |
1988 | 2420 |
1989 gcc_assert (my_function && my_function->cfg); | 2421 gcc_assert (my_function && my_function->cfg); |
1990 gcc_assert (cfun == my_function); | 2422 gcc_assert (cfun == my_function); |
1991 | 2423 |
1992 memset(&fbi, 0, sizeof(fbi)); | 2424 memset(&fbi, 0, sizeof(fbi)); |
2425 vec_free (info->conds); | |
1993 info->conds = NULL; | 2426 info->conds = NULL; |
2427 vec_free (info->size_time_table); | |
1994 info->size_time_table = NULL; | 2428 info->size_time_table = NULL; |
1995 | 2429 |
1996 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer | 2430 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer |
1997 so we can produce proper inline hints. | 2431 so we can produce proper inline hints. |
1998 | 2432 |
2000 so we can produce correct BB predicates. */ | 2434 so we can produce correct BB predicates. */ |
2001 | 2435 |
2002 if (opt_for_fn (node->decl, optimize)) | 2436 if (opt_for_fn (node->decl, optimize)) |
2003 { | 2437 { |
2004 calculate_dominance_info (CDI_DOMINATORS); | 2438 calculate_dominance_info (CDI_DOMINATORS); |
2439 calculate_dominance_info (CDI_POST_DOMINATORS); | |
2005 if (!early) | 2440 if (!early) |
2006 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); | 2441 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
2007 else | 2442 else |
2008 { | 2443 { |
2009 ipa_check_create_node_params (); | 2444 ipa_check_create_node_params (); |
2014 { | 2449 { |
2015 fbi.node = node; | 2450 fbi.node = node; |
2016 fbi.info = IPA_NODE_REF (node); | 2451 fbi.info = IPA_NODE_REF (node); |
2017 fbi.bb_infos = vNULL; | 2452 fbi.bb_infos = vNULL; |
2018 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun)); | 2453 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun)); |
2019 fbi.param_count = count_formal_params(node->decl); | 2454 fbi.param_count = count_formal_params (node->decl); |
2455 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps); | |
2456 | |
2020 nonconstant_names.safe_grow_cleared | 2457 nonconstant_names.safe_grow_cleared |
2021 (SSANAMES (my_function)->length ()); | 2458 (SSANAMES (my_function)->length ()); |
2022 } | 2459 } |
2023 } | 2460 } |
2024 | 2461 |
2025 if (dump_file) | 2462 if (dump_file) |
2026 fprintf (dump_file, "\nAnalyzing function body size: %s\n", | 2463 fprintf (dump_file, "\nAnalyzing function body size: %s\n", |
2027 node->name ()); | 2464 node->dump_name ()); |
2028 | 2465 |
2029 /* When we run into maximal number of entries, we assign everything to the | 2466 /* When we run into maximal number of entries, we assign everything to the |
2030 constant truth case. Be sure to have it in list. */ | 2467 constant truth case. Be sure to have it in list. */ |
2031 bb_predicate = true; | 2468 bb_predicate = true; |
2032 info->account_size_time (0, 0, bb_predicate, bb_predicate); | 2469 info->account_size_time (0, 0, bb_predicate, bb_predicate); |
2033 | 2470 |
2034 bb_predicate = predicate::not_inlined (); | 2471 bb_predicate = predicate::not_inlined (); |
2035 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate, | 2472 info->account_size_time (opt_for_fn (node->decl, |
2473 param_uninlined_function_insns) | |
2474 * ipa_fn_summary::size_scale, | |
2475 opt_for_fn (node->decl, | |
2476 param_uninlined_function_time), | |
2477 bb_predicate, | |
2036 bb_predicate); | 2478 bb_predicate); |
2037 | 2479 |
2038 if (fbi.info) | 2480 if (fbi.info) |
2039 compute_bb_predicates (&fbi, node, info); | 2481 compute_bb_predicates (&fbi, node, info, params_summary); |
2040 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); | 2482 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); |
2041 nblocks = pre_and_rev_post_order_compute (NULL, order, false); | 2483 nblocks = pre_and_rev_post_order_compute (NULL, order, false); |
2042 for (n = 0; n < nblocks; n++) | 2484 for (n = 0; n < nblocks; n++) |
2043 { | 2485 { |
2044 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]); | 2486 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]); |
2076 | 2518 |
2077 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); | 2519 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); |
2078 gsi_next (&bsi)) | 2520 gsi_next (&bsi)) |
2079 { | 2521 { |
2080 if (first_phi | 2522 if (first_phi |
2081 && !phi_result_unknown_predicate (fbi.info, info, bb, | 2523 && !phi_result_unknown_predicate (&fbi, info, |
2524 params_summary, | |
2525 bb, | |
2082 &phi_predicate, | 2526 &phi_predicate, |
2083 nonconstant_names)) | 2527 nonconstant_names)) |
2084 break; | 2528 break; |
2085 first_phi = false; | 2529 first_phi = false; |
2086 if (dump_file && (dump_flags & TDF_DETAILS)) | 2530 if (dump_file && (dump_flags & TDF_DETAILS)) |
2093 } | 2537 } |
2094 } | 2538 } |
2095 | 2539 |
2096 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb); | 2540 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb); |
2097 | 2541 |
2098 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); | 2542 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); |
2099 gsi_next (&bsi)) | 2543 !gsi_end_p (bsi); gsi_next_nondebug (&bsi)) |
2100 { | 2544 { |
2101 gimple *stmt = gsi_stmt (bsi); | 2545 gimple *stmt = gsi_stmt (bsi); |
2102 int this_size = estimate_num_insns (stmt, &eni_size_weights); | 2546 int this_size = estimate_num_insns (stmt, &eni_size_weights); |
2103 int this_time = estimate_num_insns (stmt, &eni_time_weights); | 2547 int this_time = estimate_num_insns (stmt, &eni_time_weights); |
2104 int prob; | 2548 int prob; |
2105 predicate will_be_nonconstant; | 2549 predicate will_be_nonconstant; |
2106 | 2550 |
2107 /* This relation stmt should be folded after we remove | 2551 /* This relation stmt should be folded after we remove |
2108 buildin_expect call. Adjust the cost here. */ | 2552 __builtin_expect call. Adjust the cost here. */ |
2109 if (stmt == fix_builtin_expect_stmt) | 2553 if (stmt == fix_builtin_expect_stmt) |
2110 { | 2554 { |
2111 this_size--; | 2555 this_size--; |
2112 this_time--; | 2556 this_time--; |
2113 } | 2557 } |
2118 print_gimple_stmt (dump_file, stmt, 0); | 2562 print_gimple_stmt (dump_file, stmt, 0); |
2119 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n", | 2563 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n", |
2120 freq.to_double (), this_size, | 2564 freq.to_double (), this_size, |
2121 this_time); | 2565 this_time); |
2122 } | 2566 } |
2123 | |
2124 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ()) | |
2125 { | |
2126 predicate this_array_index; | |
2127 this_array_index = | |
2128 array_index_predicate (info, nonconstant_names, | |
2129 gimple_assign_rhs1 (stmt)); | |
2130 if (this_array_index != false) | |
2131 array_index &= this_array_index; | |
2132 } | |
2133 if (gimple_store_p (stmt) && nonconstant_names.exists ()) | |
2134 { | |
2135 predicate this_array_index; | |
2136 this_array_index = | |
2137 array_index_predicate (info, nonconstant_names, | |
2138 gimple_get_lhs (stmt)); | |
2139 if (this_array_index != false) | |
2140 array_index &= this_array_index; | |
2141 } | |
2142 | |
2143 | 2567 |
2144 if (is_gimple_call (stmt) | 2568 if (is_gimple_call (stmt) |
2145 && !gimple_call_internal_p (stmt)) | 2569 && !gimple_call_internal_p (stmt)) |
2146 { | 2570 { |
2147 struct cgraph_edge *edge = node->get_edge (stmt); | 2571 struct cgraph_edge *edge = node->get_edge (stmt); |
2166 | 2590 |
2167 if (count) | 2591 if (count) |
2168 es->param.safe_grow_cleared (count); | 2592 es->param.safe_grow_cleared (count); |
2169 for (i = 0; i < count; i++) | 2593 for (i = 0; i < count; i++) |
2170 { | 2594 { |
2171 int prob = param_change_prob (stmt, i); | 2595 int prob = param_change_prob (&fbi, stmt, i); |
2172 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE); | 2596 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE); |
2173 es->param[i].change_prob = prob; | 2597 es->param[i].change_prob = prob; |
2174 } | 2598 } |
2175 } | 2599 } |
2176 | 2600 |
2177 es->call_stmt_size = this_size; | 2601 es->call_stmt_size = this_size; |
2178 es->call_stmt_time = this_time; | 2602 es->call_stmt_time = this_time; |
2179 es->loop_depth = bb_loop_depth (bb); | 2603 es->loop_depth = bb_loop_depth (bb); |
2180 edge_set_predicate (edge, &bb_predicate); | 2604 edge_set_predicate (edge, &bb_predicate); |
2181 } | 2605 if (edge->speculative) |
2182 | 2606 { |
2183 /* TODO: When conditional jump or swithc is known to be constant, but | 2607 cgraph_edge *indirect |
2608 = edge->speculative_call_indirect_edge (); | |
2609 ipa_call_summary *es2 | |
2610 = ipa_call_summaries->get_create (indirect); | |
2611 ipa_call_summaries->duplicate (edge, indirect, | |
2612 es, es2); | |
2613 | |
2614 /* Edge is the first direct call. | |
2615 create and duplicate call summaries for multiple | |
2616 speculative call targets. */ | |
2617 for (cgraph_edge *direct | |
2618 = edge->next_speculative_call_target (); | |
2619 direct; | |
2620 direct = direct->next_speculative_call_target ()) | |
2621 { | |
2622 ipa_call_summary *es3 | |
2623 = ipa_call_summaries->get_create (direct); | |
2624 ipa_call_summaries->duplicate (edge, direct, | |
2625 es, es3); | |
2626 } | |
2627 } | |
2628 } | |
2629 | |
2630 /* TODO: When conditional jump or switch is known to be constant, but | |
2184 we did not translate it into the predicates, we really can account | 2631 we did not translate it into the predicates, we really can account |
2185 just maximum of the possible paths. */ | 2632 just maximum of the possible paths. */ |
2186 if (fbi.info) | 2633 if (fbi.info) |
2187 will_be_nonconstant | 2634 will_be_nonconstant |
2188 = will_be_nonconstant_predicate (&fbi, info, | 2635 = will_be_nonconstant_predicate (&fbi, info, params_summary, |
2189 stmt, nonconstant_names); | 2636 stmt, nonconstant_names); |
2190 else | 2637 else |
2191 will_be_nonconstant = true; | 2638 will_be_nonconstant = true; |
2192 if (this_time || this_size) | 2639 if (this_time || this_size) |
2193 { | 2640 { |
2194 sreal final_time = (sreal)this_time * freq; | 2641 sreal final_time = (sreal)this_time * freq; |
2195 | 2642 |
2196 prob = eliminated_by_inlining_prob (stmt); | 2643 prob = eliminated_by_inlining_prob (&fbi, stmt); |
2197 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) | 2644 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) |
2198 fprintf (dump_file, | 2645 fprintf (dump_file, |
2199 "\t\t50%% will be eliminated by inlining\n"); | 2646 "\t\t50%% will be eliminated by inlining\n"); |
2200 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS)) | 2647 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS)) |
2201 fprintf (dump_file, "\t\tWill be eliminated by inlining\n"); | 2648 fprintf (dump_file, "\t\tWill be eliminated by inlining\n"); |
2202 | 2649 |
2203 struct predicate p = bb_predicate & will_be_nonconstant; | 2650 class predicate p = bb_predicate & will_be_nonconstant; |
2204 | 2651 |
2205 /* We can ignore statement when we proved it is never going | 2652 /* We can ignore statement when we proved it is never going |
2206 to happen, but we can not do that for call statements | 2653 to happen, but we cannot do that for call statements |
2207 because edges are accounted specially. */ | 2654 because edges are accounted specially. */ |
2208 | 2655 |
2209 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false) | 2656 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false) |
2210 { | 2657 { |
2211 time += final_time; | 2658 time += final_time; |
2219 { | 2666 { |
2220 if (prob) | 2667 if (prob) |
2221 { | 2668 { |
2222 predicate ip = bb_predicate & predicate::not_inlined (); | 2669 predicate ip = bb_predicate & predicate::not_inlined (); |
2223 info->account_size_time (this_size * prob, | 2670 info->account_size_time (this_size * prob, |
2224 (this_time * prob) / 2, ip, | 2671 (final_time * prob) / 2, ip, |
2225 p); | 2672 p); |
2226 } | 2673 } |
2227 if (prob != 2) | 2674 if (prob != 2) |
2228 info->account_size_time (this_size * (2 - prob), | 2675 info->account_size_time (this_size * (2 - prob), |
2229 (this_time * (2 - prob) / 2), | 2676 (final_time * (2 - prob) / 2), |
2230 bb_predicate, | 2677 bb_predicate, |
2231 p); | 2678 p); |
2232 } | 2679 } |
2233 | 2680 |
2234 if (!info->fp_expressions && fp_expression_p (stmt)) | 2681 if (!info->fp_expressions && fp_expression_p (stmt)) |
2235 { | 2682 { |
2236 info->fp_expressions = true; | 2683 info->fp_expressions = true; |
2237 if (dump_file) | 2684 if (dump_file) |
2238 fprintf (dump_file, " fp_expression set\n"); | 2685 fprintf (dump_file, " fp_expression set\n"); |
2239 } | 2686 } |
2240 | 2687 } |
2241 gcc_assert (time >= 0); | 2688 |
2242 gcc_assert (size >= 0); | 2689 /* Account cost of address calculations in the statements. */ |
2243 } | 2690 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++) |
2244 } | 2691 { |
2245 } | 2692 for (tree op = gimple_op (stmt, i); |
2246 set_hint_predicate (&ipa_fn_summaries->get_create (node)->array_index, | 2693 op && handled_component_p (op); |
2247 array_index); | 2694 op = TREE_OPERAND (op, 0)) |
2695 if ((TREE_CODE (op) == ARRAY_REF | |
2696 || TREE_CODE (op) == ARRAY_RANGE_REF) | |
2697 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME) | |
2698 { | |
2699 predicate p = bb_predicate; | |
2700 if (fbi.info) | |
2701 p = p & will_be_nonconstant_expr_predicate | |
2702 (&fbi, info, params_summary, | |
2703 TREE_OPERAND (op, 1), | |
2704 nonconstant_names); | |
2705 if (p != false) | |
2706 { | |
2707 time += freq; | |
2708 size += 1; | |
2709 if (dump_file) | |
2710 fprintf (dump_file, | |
2711 "\t\tAccounting address calculation.\n"); | |
2712 info->account_size_time (ipa_fn_summary::size_scale, | |
2713 freq, | |
2714 bb_predicate, | |
2715 p); | |
2716 } | |
2717 } | |
2718 } | |
2719 | |
2720 } | |
2721 } | |
2248 free (order); | 2722 free (order); |
2249 | 2723 |
2250 if (nonconstant_names.exists () && !early) | 2724 if (nonconstant_names.exists () && !early) |
2251 { | 2725 { |
2252 struct loop *loop; | 2726 class loop *loop; |
2253 predicate loop_iterations = true; | 2727 predicate loop_iterations = true; |
2254 predicate loop_stride = true; | 2728 predicate loop_stride = true; |
2255 | 2729 |
2256 if (dump_file && (dump_flags & TDF_DETAILS)) | 2730 if (dump_file && (dump_flags & TDF_DETAILS)) |
2257 flow_loops_dump (dump_file, NULL, 0); | 2731 flow_loops_dump (dump_file, NULL, 0); |
2259 FOR_EACH_LOOP (loop, 0) | 2733 FOR_EACH_LOOP (loop, 0) |
2260 { | 2734 { |
2261 vec<edge> exits; | 2735 vec<edge> exits; |
2262 edge ex; | 2736 edge ex; |
2263 unsigned int j; | 2737 unsigned int j; |
2264 struct tree_niter_desc niter_desc; | 2738 class tree_niter_desc niter_desc; |
2265 bb_predicate = *(predicate *) loop->header->aux; | 2739 bb_predicate = *(predicate *) loop->header->aux; |
2266 | 2740 |
2267 exits = get_loop_exit_edges (loop); | 2741 exits = get_loop_exit_edges (loop); |
2268 FOR_EACH_VEC_ELT (exits, j, ex) | 2742 FOR_EACH_VEC_ELT (exits, j, ex) |
2269 if (number_of_iterations_exit (loop, ex, &niter_desc, false) | 2743 if (number_of_iterations_exit (loop, ex, &niter_desc, false) |
2270 && !is_gimple_min_invariant (niter_desc.niter)) | 2744 && !is_gimple_min_invariant (niter_desc.niter)) |
2271 { | 2745 { |
2272 predicate will_be_nonconstant | 2746 predicate will_be_nonconstant |
2273 = will_be_nonconstant_expr_predicate (fbi.info, info, | 2747 = will_be_nonconstant_expr_predicate (&fbi, info, |
2748 params_summary, | |
2274 niter_desc.niter, | 2749 niter_desc.niter, |
2275 nonconstant_names); | 2750 nonconstant_names); |
2276 if (will_be_nonconstant != true) | 2751 if (will_be_nonconstant != true) |
2277 will_be_nonconstant = bb_predicate & will_be_nonconstant; | 2752 will_be_nonconstant = bb_predicate & will_be_nonconstant; |
2278 if (will_be_nonconstant != true | 2753 if (will_be_nonconstant != true |
2313 def, &iv, true) | 2788 def, &iv, true) |
2314 || is_gimple_min_invariant (iv.step)) | 2789 || is_gimple_min_invariant (iv.step)) |
2315 continue; | 2790 continue; |
2316 | 2791 |
2317 predicate will_be_nonconstant | 2792 predicate will_be_nonconstant |
2318 = will_be_nonconstant_expr_predicate (fbi.info, info, | 2793 = will_be_nonconstant_expr_predicate (&fbi, info, |
2319 iv.step, | 2794 params_summary, |
2795 iv.step, | |
2320 nonconstant_names); | 2796 nonconstant_names); |
2321 if (will_be_nonconstant != true) | 2797 if (will_be_nonconstant != true) |
2322 will_be_nonconstant = bb_predicate & will_be_nonconstant; | 2798 will_be_nonconstant = bb_predicate & will_be_nonconstant; |
2323 if (will_be_nonconstant != true | 2799 if (will_be_nonconstant != true |
2324 && will_be_nonconstant != false) | 2800 && will_be_nonconstant != false) |
2348 edge_predicate_pool.remove ((predicate *) e->aux); | 2824 edge_predicate_pool.remove ((predicate *) e->aux); |
2349 e->aux = NULL; | 2825 e->aux = NULL; |
2350 } | 2826 } |
2351 } | 2827 } |
2352 ipa_fn_summary *s = ipa_fn_summaries->get (node); | 2828 ipa_fn_summary *s = ipa_fn_summaries->get (node); |
2829 ipa_size_summary *ss = ipa_size_summaries->get (node); | |
2353 s->time = time; | 2830 s->time = time; |
2354 s->self_size = size; | 2831 ss->self_size = size; |
2355 nonconstant_names.release (); | 2832 nonconstant_names.release (); |
2356 ipa_release_body_info (&fbi); | 2833 ipa_release_body_info (&fbi); |
2357 if (opt_for_fn (node->decl, optimize)) | 2834 if (opt_for_fn (node->decl, optimize)) |
2358 { | 2835 { |
2359 if (!early) | 2836 if (!early) |
2360 loop_optimizer_finalize (); | 2837 loop_optimizer_finalize (); |
2361 else if (!ipa_edge_args_sum) | 2838 else if (!ipa_edge_args_sum) |
2362 ipa_free_all_node_params (); | 2839 ipa_free_all_node_params (); |
2363 free_dominance_info (CDI_DOMINATORS); | 2840 free_dominance_info (CDI_DOMINATORS); |
2841 free_dominance_info (CDI_POST_DOMINATORS); | |
2364 } | 2842 } |
2365 if (dump_file) | 2843 if (dump_file) |
2366 { | 2844 { |
2367 fprintf (dump_file, "\n"); | 2845 fprintf (dump_file, "\n"); |
2368 ipa_dump_fn_summary (dump_file, node); | 2846 ipa_dump_fn_summary (dump_file, node); |
2376 void | 2854 void |
2377 compute_fn_summary (struct cgraph_node *node, bool early) | 2855 compute_fn_summary (struct cgraph_node *node, bool early) |
2378 { | 2856 { |
2379 HOST_WIDE_INT self_stack_size; | 2857 HOST_WIDE_INT self_stack_size; |
2380 struct cgraph_edge *e; | 2858 struct cgraph_edge *e; |
2381 struct ipa_fn_summary *info; | 2859 |
2382 | 2860 gcc_assert (!node->inlined_to); |
2383 gcc_assert (!node->global.inlined_to); | |
2384 | 2861 |
2385 if (!ipa_fn_summaries) | 2862 if (!ipa_fn_summaries) |
2386 ipa_fn_summary_alloc (); | 2863 ipa_fn_summary_alloc (); |
2387 | 2864 |
2388 /* Create a new ipa_fn_summary. */ | 2865 /* Create a new ipa_fn_summary. */ |
2389 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node); | 2866 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node); |
2390 ipa_fn_summaries->remove (node); | 2867 ipa_fn_summaries->remove (node); |
2391 info = ipa_fn_summaries->get_create (node); | 2868 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); |
2869 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node); | |
2392 | 2870 |
2393 /* Estimate the stack size for the function if we're optimizing. */ | 2871 /* Estimate the stack size for the function if we're optimizing. */ |
2394 self_stack_size = optimize && !node->thunk.thunk_p | 2872 self_stack_size = optimize && !node->thunk.thunk_p |
2395 ? estimated_stack_frame_size (node) : 0; | 2873 ? estimated_stack_frame_size (node) : 0; |
2396 info->estimated_self_stack_size = self_stack_size; | 2874 size_info->estimated_self_stack_size = self_stack_size; |
2397 info->estimated_stack_size = self_stack_size; | 2875 info->estimated_stack_size = self_stack_size; |
2398 info->stack_frame_offset = 0; | |
2399 | 2876 |
2400 if (node->thunk.thunk_p) | 2877 if (node->thunk.thunk_p) |
2401 { | 2878 { |
2402 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees); | 2879 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees); |
2403 predicate t = true; | 2880 predicate t = true; |
2404 | 2881 |
2405 node->local.can_change_signature = false; | 2882 node->can_change_signature = false; |
2406 es->call_stmt_size = eni_size_weights.call_cost; | 2883 es->call_stmt_size = eni_size_weights.call_cost; |
2407 es->call_stmt_time = eni_time_weights.call_cost; | 2884 es->call_stmt_time = eni_time_weights.call_cost; |
2408 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t); | 2885 info->account_size_time (ipa_fn_summary::size_scale |
2886 * opt_for_fn (node->decl, | |
2887 param_uninlined_function_thunk_insns), | |
2888 opt_for_fn (node->decl, | |
2889 param_uninlined_function_thunk_time), t, t); | |
2409 t = predicate::not_inlined (); | 2890 t = predicate::not_inlined (); |
2410 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t); | 2891 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t); |
2411 ipa_update_overall_fn_summary (node); | 2892 ipa_update_overall_fn_summary (node); |
2412 info->self_size = info->size; | 2893 size_info->self_size = size_info->size; |
2413 /* We can not inline instrumentation clones. */ | 2894 if (stdarg_p (TREE_TYPE (node->decl))) |
2414 if (node->thunk.add_pointer_bounds_args) | |
2415 { | |
2416 info->inlinable = false; | |
2417 node->callees->inline_failed = CIF_CHKP; | |
2418 } | |
2419 else if (stdarg_p (TREE_TYPE (node->decl))) | |
2420 { | 2895 { |
2421 info->inlinable = false; | 2896 info->inlinable = false; |
2422 node->callees->inline_failed = CIF_VARIADIC_THUNK; | 2897 node->callees->inline_failed = CIF_VARIADIC_THUNK; |
2423 } | 2898 } |
2424 else | 2899 else |
2426 } | 2901 } |
2427 else | 2902 else |
2428 { | 2903 { |
2429 /* Even is_gimple_min_invariant rely on current_function_decl. */ | 2904 /* Even is_gimple_min_invariant rely on current_function_decl. */ |
2430 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); | 2905 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
2906 | |
2907 /* During IPA profile merging we may be called w/o virtual SSA form | |
2908 built. */ | |
2909 update_ssa (TODO_update_ssa_only_virtuals); | |
2431 | 2910 |
2432 /* Can this function be inlined at all? */ | 2911 /* Can this function be inlined at all? */ |
2433 if (!opt_for_fn (node->decl, optimize) | 2912 if (!opt_for_fn (node->decl, optimize) |
2434 && !lookup_attribute ("always_inline", | 2913 && !lookup_attribute ("always_inline", |
2435 DECL_ATTRIBUTES (node->decl))) | 2914 DECL_ATTRIBUTES (node->decl))) |
2441 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)) | 2920 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)) |
2442 /* Likewise for #pragma omp declare simd functions or functions | 2921 /* Likewise for #pragma omp declare simd functions or functions |
2443 with simd attribute. */ | 2922 with simd attribute. */ |
2444 || lookup_attribute ("omp declare simd", | 2923 || lookup_attribute ("omp declare simd", |
2445 DECL_ATTRIBUTES (node->decl))) | 2924 DECL_ATTRIBUTES (node->decl))) |
2446 node->local.can_change_signature = false; | 2925 node->can_change_signature = false; |
2447 else | 2926 else |
2448 { | 2927 { |
2449 /* Otherwise, inlinable functions always can change signature. */ | 2928 /* Otherwise, inlinable functions always can change signature. */ |
2450 if (info->inlinable) | 2929 if (info->inlinable) |
2451 node->local.can_change_signature = true; | 2930 node->can_change_signature = true; |
2452 else | 2931 else |
2453 { | 2932 { |
2454 /* Functions calling builtin_apply can not change signature. */ | 2933 /* Functions calling builtin_apply cannot change signature. */ |
2455 for (e = node->callees; e; e = e->next_callee) | 2934 for (e = node->callees; e; e = e->next_callee) |
2456 { | 2935 { |
2457 tree cdecl = e->callee->decl; | 2936 tree cdecl = e->callee->decl; |
2458 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS) | 2937 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS) |
2459 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START)) | 2938 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START)) |
2460 break; | 2939 break; |
2461 } | 2940 } |
2462 node->local.can_change_signature = !e; | 2941 node->can_change_signature = !e; |
2463 } | 2942 } |
2464 } | 2943 } |
2465 /* Functions called by instrumentation thunk can't change signature | |
2466 because instrumentation thunk modification is not supported. */ | |
2467 if (node->local.can_change_signature) | |
2468 for (e = node->callers; e; e = e->next_caller) | |
2469 if (e->caller->thunk.thunk_p | |
2470 && e->caller->thunk.add_pointer_bounds_args) | |
2471 { | |
2472 node->local.can_change_signature = false; | |
2473 break; | |
2474 } | |
2475 analyze_function_body (node, early); | 2944 analyze_function_body (node, early); |
2476 pop_cfun (); | 2945 pop_cfun (); |
2477 } | 2946 } |
2478 for (e = node->callees; e; e = e->next_callee) | 2947 for (e = node->callees; e; e = e->next_callee) |
2479 if (e->callee->comdat_local_p ()) | 2948 if (e->callee->comdat_local_p ()) |
2480 break; | 2949 break; |
2481 node->calls_comdat_local = (e != NULL); | 2950 node->calls_comdat_local = (e != NULL); |
2482 | 2951 |
2483 /* Inlining characteristics are maintained by the cgraph_mark_inline. */ | 2952 /* Inlining characteristics are maintained by the cgraph_mark_inline. */ |
2484 info->size = info->self_size; | 2953 size_info->size = size_info->self_size; |
2485 info->stack_frame_offset = 0; | 2954 info->estimated_stack_size = size_info->estimated_self_stack_size; |
2486 info->estimated_stack_size = info->estimated_self_stack_size; | |
2487 | 2955 |
2488 /* Code above should compute exactly the same result as | 2956 /* Code above should compute exactly the same result as |
2489 ipa_update_overall_fn_summary but because computation happens in | 2957 ipa_update_overall_fn_summary but because computation happens in |
2490 different order the roundoff errors result in slight changes. */ | 2958 different order the roundoff errors result in slight changes. */ |
2491 ipa_update_overall_fn_summary (node); | 2959 ipa_update_overall_fn_summary (node); |
2492 gcc_assert (info->size == info->self_size); | 2960 /* In LTO mode we may have speculative edges set. */ |
2961 gcc_assert (in_lto_p || size_info->size == size_info->self_size); | |
2493 } | 2962 } |
2494 | 2963 |
2495 | 2964 |
2496 /* Compute parameters of functions used by inliner using | 2965 /* Compute parameters of functions used by inliner using |
2497 current_function_decl. */ | 2966 current_function_decl. */ |
2509 static bool | 2978 static bool |
2510 estimate_edge_devirt_benefit (struct cgraph_edge *ie, | 2979 estimate_edge_devirt_benefit (struct cgraph_edge *ie, |
2511 int *size, int *time, | 2980 int *size, int *time, |
2512 vec<tree> known_vals, | 2981 vec<tree> known_vals, |
2513 vec<ipa_polymorphic_call_context> known_contexts, | 2982 vec<ipa_polymorphic_call_context> known_contexts, |
2514 vec<ipa_agg_jump_function_p> known_aggs) | 2983 vec<ipa_agg_value_set> known_aggs) |
2515 { | 2984 { |
2516 tree target; | 2985 tree target; |
2517 struct cgraph_node *callee; | 2986 struct cgraph_node *callee; |
2518 struct ipa_fn_summary *isummary; | 2987 class ipa_fn_summary *isummary; |
2519 enum availability avail; | 2988 enum availability avail; |
2520 bool speculative; | 2989 bool speculative; |
2521 | 2990 |
2522 if (!known_vals.exists () && !known_contexts.exists ()) | 2991 if (!known_vals.length () && !known_contexts.length ()) |
2523 return false; | 2992 return false; |
2524 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining)) | 2993 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining)) |
2525 return false; | 2994 return false; |
2526 | 2995 |
2527 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts, | 2996 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts, |
2540 return false; | 3009 return false; |
2541 callee = callee->function_symbol (&avail); | 3010 callee = callee->function_symbol (&avail); |
2542 if (avail < AVAIL_AVAILABLE) | 3011 if (avail < AVAIL_AVAILABLE) |
2543 return false; | 3012 return false; |
2544 isummary = ipa_fn_summaries->get (callee); | 3013 isummary = ipa_fn_summaries->get (callee); |
3014 if (isummary == NULL) | |
3015 return false; | |
3016 | |
2545 return isummary->inlinable; | 3017 return isummary->inlinable; |
2546 } | 3018 } |
2547 | 3019 |
2548 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to | 3020 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to |
2549 handle edge E with probability PROB. | 3021 handle edge E with probability PROB. |
2552 site. */ | 3024 site. */ |
2553 | 3025 |
2554 static inline void | 3026 static inline void |
2555 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size, | 3027 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size, |
2556 sreal *time, | 3028 sreal *time, |
2557 int prob, | |
2558 vec<tree> known_vals, | 3029 vec<tree> known_vals, |
2559 vec<ipa_polymorphic_call_context> known_contexts, | 3030 vec<ipa_polymorphic_call_context> known_contexts, |
2560 vec<ipa_agg_jump_function_p> known_aggs, | 3031 vec<ipa_agg_value_set> known_aggs, |
2561 ipa_hints *hints) | 3032 ipa_hints *hints) |
2562 { | 3033 { |
2563 struct ipa_call_summary *es = ipa_call_summaries->get (e); | 3034 class ipa_call_summary *es = ipa_call_summaries->get (e); |
2564 int call_size = es->call_stmt_size; | 3035 int call_size = es->call_stmt_size; |
2565 int call_time = es->call_stmt_time; | 3036 int call_time = es->call_stmt_time; |
2566 int cur_size; | 3037 int cur_size; |
2567 if (!e->callee | 3038 |
3039 if (!e->callee && hints && e->maybe_hot_p () | |
2568 && estimate_edge_devirt_benefit (e, &call_size, &call_time, | 3040 && estimate_edge_devirt_benefit (e, &call_size, &call_time, |
2569 known_vals, known_contexts, known_aggs) | 3041 known_vals, known_contexts, known_aggs)) |
2570 && hints && e->maybe_hot_p ()) | |
2571 *hints |= INLINE_HINT_indirect_call; | 3042 *hints |= INLINE_HINT_indirect_call; |
2572 cur_size = call_size * ipa_fn_summary::size_scale; | 3043 cur_size = call_size * ipa_fn_summary::size_scale; |
2573 *size += cur_size; | 3044 *size += cur_size; |
2574 if (min_size) | 3045 if (min_size) |
2575 *min_size += cur_size; | 3046 *min_size += cur_size; |
2576 if (prob == REG_BR_PROB_BASE) | 3047 if (time) |
2577 *time += ((sreal)call_time) * e->sreal_frequency (); | 3048 *time += ((sreal)call_time) * e->sreal_frequency (); |
2578 else | 3049 } |
2579 *time += ((sreal)call_time * prob) * e->sreal_frequency (); | 3050 |
2580 } | 3051 |
2581 | 3052 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all |
2582 | 3053 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS |
3054 describe context of the call site. | |
3055 | |
3056 Helper for estimate_calls_size_and_time which does the same but | |
3057 (in most cases) faster. */ | |
3058 | |
3059 static void | |
3060 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size, | |
3061 int *min_size, sreal *time, | |
3062 ipa_hints *hints, | |
3063 clause_t possible_truths, | |
3064 vec<tree> known_vals, | |
3065 vec<ipa_polymorphic_call_context> known_contexts, | |
3066 vec<ipa_agg_value_set> known_aggs) | |
3067 { | |
3068 struct cgraph_edge *e; | |
3069 for (e = node->callees; e; e = e->next_callee) | |
3070 { | |
3071 if (!e->inline_failed) | |
3072 { | |
3073 gcc_checking_assert (!ipa_call_summaries->get (e)); | |
3074 estimate_calls_size_and_time_1 (e->callee, size, min_size, time, | |
3075 hints, | |
3076 possible_truths, | |
3077 known_vals, known_contexts, | |
3078 known_aggs); | |
3079 continue; | |
3080 } | |
3081 class ipa_call_summary *es = ipa_call_summaries->get (e); | |
3082 | |
3083 /* Do not care about zero sized builtins. */ | |
3084 if (!es->call_stmt_size) | |
3085 { | |
3086 gcc_checking_assert (!es->call_stmt_time); | |
3087 continue; | |
3088 } | |
3089 if (!es->predicate | |
3090 || es->predicate->evaluate (possible_truths)) | |
3091 { | |
3092 /* Predicates of calls shall not use NOT_CHANGED codes, | |
3093 so we do not need to compute probabilities. */ | |
3094 estimate_edge_size_and_time (e, size, | |
3095 es->predicate ? NULL : min_size, | |
3096 time, | |
3097 known_vals, known_contexts, | |
3098 known_aggs, hints); | |
3099 } | |
3100 } | |
3101 for (e = node->indirect_calls; e; e = e->next_callee) | |
3102 { | |
3103 class ipa_call_summary *es = ipa_call_summaries->get (e); | |
3104 if (!es->predicate | |
3105 || es->predicate->evaluate (possible_truths)) | |
3106 estimate_edge_size_and_time (e, size, | |
3107 es->predicate ? NULL : min_size, | |
3108 time, | |
3109 known_vals, known_contexts, known_aggs, | |
3110 hints); | |
3111 } | |
3112 } | |
3113 | |
3114 /* Populate sum->call_size_time_table for edges from NODE. */ | |
3115 | |
3116 static void | |
3117 summarize_calls_size_and_time (struct cgraph_node *node, | |
3118 ipa_fn_summary *sum) | |
3119 { | |
3120 struct cgraph_edge *e; | |
3121 for (e = node->callees; e; e = e->next_callee) | |
3122 { | |
3123 if (!e->inline_failed) | |
3124 { | |
3125 gcc_checking_assert (!ipa_call_summaries->get (e)); | |
3126 summarize_calls_size_and_time (e->callee, sum); | |
3127 continue; | |
3128 } | |
3129 int size = 0; | |
3130 sreal time = 0; | |
3131 | |
3132 estimate_edge_size_and_time (e, &size, NULL, &time, | |
3133 vNULL, vNULL, vNULL, NULL); | |
3134 | |
3135 struct predicate pred = true; | |
3136 class ipa_call_summary *es = ipa_call_summaries->get (e); | |
3137 | |
3138 if (es->predicate) | |
3139 pred = *es->predicate; | |
3140 sum->account_size_time (size, time, pred, pred, true); | |
3141 } | |
3142 for (e = node->indirect_calls; e; e = e->next_callee) | |
3143 { | |
3144 int size = 0; | |
3145 sreal time = 0; | |
3146 | |
3147 estimate_edge_size_and_time (e, &size, NULL, &time, | |
3148 vNULL, vNULL, vNULL, NULL); | |
3149 struct predicate pred = true; | |
3150 class ipa_call_summary *es = ipa_call_summaries->get (e); | |
3151 | |
3152 if (es->predicate) | |
3153 pred = *es->predicate; | |
3154 sum->account_size_time (size, time, pred, pred, true); | |
3155 } | |
3156 } | |
2583 | 3157 |
2584 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all | 3158 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all |
2585 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS | 3159 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS |
2586 describe context of the call site. */ | 3160 describe context of the call site. */ |
2587 | 3161 |
2590 int *min_size, sreal *time, | 3164 int *min_size, sreal *time, |
2591 ipa_hints *hints, | 3165 ipa_hints *hints, |
2592 clause_t possible_truths, | 3166 clause_t possible_truths, |
2593 vec<tree> known_vals, | 3167 vec<tree> known_vals, |
2594 vec<ipa_polymorphic_call_context> known_contexts, | 3168 vec<ipa_polymorphic_call_context> known_contexts, |
2595 vec<ipa_agg_jump_function_p> known_aggs) | 3169 vec<ipa_agg_value_set> known_aggs) |
2596 { | 3170 { |
2597 struct cgraph_edge *e; | 3171 class ipa_fn_summary *sum = ipa_fn_summaries->get (node); |
2598 for (e = node->callees; e; e = e->next_callee) | 3172 bool use_table = true; |
2599 { | 3173 |
2600 struct ipa_call_summary *es = ipa_call_summaries->get_create (e); | 3174 gcc_assert (node->callees || node->indirect_calls); |
2601 | 3175 |
2602 /* Do not care about zero sized builtins. */ | 3176 /* During early inlining we do not calculate info for very |
2603 if (e->inline_failed && !es->call_stmt_size) | 3177 large functions and thus there is no need for producing |
2604 { | 3178 summaries. */ |
2605 gcc_checking_assert (!es->call_stmt_time); | 3179 if (!ipa_node_params_sum) |
2606 continue; | 3180 use_table = false; |
2607 } | 3181 /* Do not calculate summaries for simple wrappers; it is waste |
2608 if (!es->predicate | 3182 of memory. */ |
2609 || es->predicate->evaluate (possible_truths)) | 3183 else if (node->callees && node->indirect_calls |
2610 { | 3184 && node->callees->inline_failed && !node->callees->next_callee) |
2611 if (e->inline_failed) | 3185 use_table = false; |
2612 { | 3186 /* If there is an indirect edge that may be optimized, we need |
2613 /* Predicates of calls shall not use NOT_CHANGED codes, | 3187 to go the slow way. */ |
2614 sowe do not need to compute probabilities. */ | 3188 else if ((known_vals.length () |
2615 estimate_edge_size_and_time (e, size, | 3189 || known_contexts.length () |
2616 es->predicate ? NULL : min_size, | 3190 || known_aggs.length ()) && hints) |
2617 time, REG_BR_PROB_BASE, | 3191 { |
2618 known_vals, known_contexts, | 3192 class ipa_node_params *params_summary = IPA_NODE_REF (node); |
2619 known_aggs, hints); | 3193 unsigned int nargs = params_summary |
2620 } | 3194 ? ipa_get_param_count (params_summary) : 0; |
2621 else | 3195 |
2622 estimate_calls_size_and_time (e->callee, size, min_size, time, | 3196 for (unsigned int i = 0; i < nargs && use_table; i++) |
2623 hints, | 3197 { |
2624 possible_truths, | 3198 if (ipa_is_param_used_by_indirect_call (params_summary, i) |
2625 known_vals, known_contexts, | 3199 && ((known_vals.length () > i && known_vals[i]) |
2626 known_aggs); | 3200 || (known_aggs.length () > i |
2627 } | 3201 && known_aggs[i].items.length ()))) |
2628 } | 3202 use_table = false; |
2629 for (e = node->indirect_calls; e; e = e->next_callee) | 3203 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i) |
2630 { | 3204 && (known_contexts.length () > i |
2631 struct ipa_call_summary *es = ipa_call_summaries->get_create (e); | 3205 && !known_contexts[i].useless_p ())) |
2632 if (!es->predicate | 3206 use_table = false; |
2633 || es->predicate->evaluate (possible_truths)) | 3207 } |
2634 estimate_edge_size_and_time (e, size, | 3208 } |
2635 es->predicate ? NULL : min_size, | 3209 |
2636 time, REG_BR_PROB_BASE, | 3210 /* Fast path is via the call size time table. */ |
2637 known_vals, known_contexts, known_aggs, | 3211 if (use_table) |
2638 hints); | 3212 { |
2639 } | 3213 /* Build summary if it is absent. */ |
2640 } | 3214 if (!sum->call_size_time_table) |
2641 | 3215 { |
2642 | 3216 predicate true_pred = true; |
2643 /* Estimate size and time needed to execute NODE assuming | 3217 sum->account_size_time (0, 0, true_pred, true_pred, true); |
2644 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS | 3218 summarize_calls_size_and_time (node, sum); |
2645 information about NODE's arguments. If non-NULL use also probability | 3219 } |
2646 information present in INLINE_PARAM_SUMMARY vector. | 3220 |
2647 Additionally detemine hints determined by the context. Finally compute | 3221 int old_size = *size; |
3222 sreal old_time = time ? *time : 0; | |
3223 | |
3224 if (min_size) | |
3225 *min_size += (*sum->call_size_time_table)[0].size; | |
3226 | |
3227 unsigned int i; | |
3228 size_time_entry *e; | |
3229 | |
3230 /* Walk the table and account sizes and times. */ | |
3231 for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e); | |
3232 i++) | |
3233 if (e->exec_predicate.evaluate (possible_truths)) | |
3234 { | |
3235 *size += e->size; | |
3236 if (time) | |
3237 *time += e->time; | |
3238 } | |
3239 | |
3240 /* Be careful and see if both methods agree. */ | |
3241 if ((flag_checking || dump_file) | |
3242 /* Do not try to sanity check when we know we lost some | |
3243 precision. */ | |
3244 && sum->call_size_time_table->length () | |
3245 < ipa_fn_summary::max_size_time_table_size) | |
3246 { | |
3247 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL, | |
3248 possible_truths, known_vals, | |
3249 known_contexts, known_aggs); | |
3250 gcc_assert (*size == old_size); | |
3251 if (time && (*time - old_time > 1 || *time - old_time < -1) | |
3252 && dump_file) | |
3253 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n", | |
3254 old_time.to_double (), | |
3255 time->to_double ()); | |
3256 } | |
3257 } | |
3258 /* Slow path by walking all edges. */ | |
3259 else | |
3260 estimate_calls_size_and_time_1 (node, size, min_size, time, hints, | |
3261 possible_truths, known_vals, known_contexts, | |
3262 known_aggs); | |
3263 } | |
3264 | |
3265 /* Default constructor for ipa call context. | |
3266 Memory allocation of known_vals, known_contexts | |
3267 and known_aggs vectors is owned by the caller, but can | |
3268 be release by ipa_call_context::release. | |
3269 | |
3270 inline_param_summary is owned by the caller. */ | |
3271 ipa_call_context::ipa_call_context (cgraph_node *node, | |
3272 clause_t possible_truths, | |
3273 clause_t nonspec_possible_truths, | |
3274 vec<tree> known_vals, | |
3275 vec<ipa_polymorphic_call_context> | |
3276 known_contexts, | |
3277 vec<ipa_agg_value_set> known_aggs, | |
3278 vec<inline_param_summary> | |
3279 inline_param_summary) | |
3280 : m_node (node), m_possible_truths (possible_truths), | |
3281 m_nonspec_possible_truths (nonspec_possible_truths), | |
3282 m_inline_param_summary (inline_param_summary), | |
3283 m_known_vals (known_vals), | |
3284 m_known_contexts (known_contexts), | |
3285 m_known_aggs (known_aggs) | |
3286 { | |
3287 } | |
3288 | |
3289 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */ | |
3290 | |
3291 void | |
3292 ipa_call_context::duplicate_from (const ipa_call_context &ctx) | |
3293 { | |
3294 m_node = ctx.m_node; | |
3295 m_possible_truths = ctx.m_possible_truths; | |
3296 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths; | |
3297 class ipa_node_params *params_summary = IPA_NODE_REF (m_node); | |
3298 unsigned int nargs = params_summary | |
3299 ? ipa_get_param_count (params_summary) : 0; | |
3300 | |
3301 m_inline_param_summary = vNULL; | |
3302 /* Copy the info only if there is at least one useful entry. */ | |
3303 if (ctx.m_inline_param_summary.exists ()) | |
3304 { | |
3305 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs); | |
3306 | |
3307 for (unsigned int i = 0; i < n; i++) | |
3308 if (ipa_is_param_used_by_ipa_predicates (params_summary, i) | |
3309 && !ctx.m_inline_param_summary[i].useless_p ()) | |
3310 { | |
3311 m_inline_param_summary | |
3312 = ctx.m_inline_param_summary.copy (); | |
3313 break; | |
3314 } | |
3315 } | |
3316 m_known_vals = vNULL; | |
3317 if (ctx.m_known_vals.exists ()) | |
3318 { | |
3319 unsigned int n = MIN (ctx.m_known_vals.length (), nargs); | |
3320 | |
3321 for (unsigned int i = 0; i < n; i++) | |
3322 if (ipa_is_param_used_by_indirect_call (params_summary, i) | |
3323 && ctx.m_known_vals[i]) | |
3324 { | |
3325 m_known_vals = ctx.m_known_vals.copy (); | |
3326 break; | |
3327 } | |
3328 } | |
3329 | |
3330 m_known_contexts = vNULL; | |
3331 if (ctx.m_known_contexts.exists ()) | |
3332 { | |
3333 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs); | |
3334 | |
3335 for (unsigned int i = 0; i < n; i++) | |
3336 if (ipa_is_param_used_by_polymorphic_call (params_summary, i) | |
3337 && !ctx.m_known_contexts[i].useless_p ()) | |
3338 { | |
3339 m_known_contexts = ctx.m_known_contexts.copy (); | |
3340 break; | |
3341 } | |
3342 } | |
3343 | |
3344 m_known_aggs = vNULL; | |
3345 if (ctx.m_known_aggs.exists ()) | |
3346 { | |
3347 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs); | |
3348 | |
3349 for (unsigned int i = 0; i < n; i++) | |
3350 if (ipa_is_param_used_by_indirect_call (params_summary, i) | |
3351 && !ctx.m_known_aggs[i].is_empty ()) | |
3352 { | |
3353 m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs); | |
3354 break; | |
3355 } | |
3356 } | |
3357 } | |
3358 | |
3359 /* Release memory used by known_vals/contexts/aggs vectors. | |
3360 If ALL is true release also inline_param_summary. | |
3361 This happens when context was previously duplicated to be stored | |
3362 into cache. */ | |
3363 | |
3364 void | |
3365 ipa_call_context::release (bool all) | |
3366 { | |
3367 /* See if context is initialized at first place. */ | |
3368 if (!m_node) | |
3369 return; | |
3370 ipa_release_agg_values (m_known_aggs, all); | |
3371 if (all) | |
3372 { | |
3373 m_known_vals.release (); | |
3374 m_known_contexts.release (); | |
3375 m_inline_param_summary.release (); | |
3376 } | |
3377 } | |
3378 | |
3379 /* Return true if CTX describes the same call context as THIS. */ | |
3380 | |
3381 bool | |
3382 ipa_call_context::equal_to (const ipa_call_context &ctx) | |
3383 { | |
3384 if (m_node != ctx.m_node | |
3385 || m_possible_truths != ctx.m_possible_truths | |
3386 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths) | |
3387 return false; | |
3388 | |
3389 class ipa_node_params *params_summary = IPA_NODE_REF (m_node); | |
3390 unsigned int nargs = params_summary | |
3391 ? ipa_get_param_count (params_summary) : 0; | |
3392 | |
3393 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ()) | |
3394 { | |
3395 for (unsigned int i = 0; i < nargs; i++) | |
3396 { | |
3397 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i)) | |
3398 continue; | |
3399 if (i >= m_inline_param_summary.length () | |
3400 || m_inline_param_summary[i].useless_p ()) | |
3401 { | |
3402 if (i < ctx.m_inline_param_summary.length () | |
3403 && !ctx.m_inline_param_summary[i].useless_p ()) | |
3404 return false; | |
3405 continue; | |
3406 } | |
3407 if (i >= ctx.m_inline_param_summary.length () | |
3408 || ctx.m_inline_param_summary[i].useless_p ()) | |
3409 { | |
3410 if (i < m_inline_param_summary.length () | |
3411 && !m_inline_param_summary[i].useless_p ()) | |
3412 return false; | |
3413 continue; | |
3414 } | |
3415 if (!m_inline_param_summary[i].equal_to | |
3416 (ctx.m_inline_param_summary[i])) | |
3417 return false; | |
3418 } | |
3419 } | |
3420 if (m_known_vals.exists () || ctx.m_known_vals.exists ()) | |
3421 { | |
3422 for (unsigned int i = 0; i < nargs; i++) | |
3423 { | |
3424 if (!ipa_is_param_used_by_indirect_call (params_summary, i)) | |
3425 continue; | |
3426 if (i >= m_known_vals.length () || !m_known_vals[i]) | |
3427 { | |
3428 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i]) | |
3429 return false; | |
3430 continue; | |
3431 } | |
3432 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i]) | |
3433 { | |
3434 if (i < m_known_vals.length () && m_known_vals[i]) | |
3435 return false; | |
3436 continue; | |
3437 } | |
3438 if (m_known_vals[i] != ctx.m_known_vals[i]) | |
3439 return false; | |
3440 } | |
3441 } | |
3442 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ()) | |
3443 { | |
3444 for (unsigned int i = 0; i < nargs; i++) | |
3445 { | |
3446 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i)) | |
3447 continue; | |
3448 if (i >= m_known_contexts.length () | |
3449 || m_known_contexts[i].useless_p ()) | |
3450 { | |
3451 if (i < ctx.m_known_contexts.length () | |
3452 && !ctx.m_known_contexts[i].useless_p ()) | |
3453 return false; | |
3454 continue; | |
3455 } | |
3456 if (i >= ctx.m_known_contexts.length () | |
3457 || ctx.m_known_contexts[i].useless_p ()) | |
3458 { | |
3459 if (i < m_known_contexts.length () | |
3460 && !m_known_contexts[i].useless_p ()) | |
3461 return false; | |
3462 continue; | |
3463 } | |
3464 if (!m_known_contexts[i].equal_to | |
3465 (ctx.m_known_contexts[i])) | |
3466 return false; | |
3467 } | |
3468 } | |
3469 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ()) | |
3470 { | |
3471 for (unsigned int i = 0; i < nargs; i++) | |
3472 { | |
3473 if (!ipa_is_param_used_by_indirect_call (params_summary, i)) | |
3474 continue; | |
3475 if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ()) | |
3476 { | |
3477 if (i < ctx.m_known_aggs.length () | |
3478 && !ctx.m_known_aggs[i].is_empty ()) | |
3479 return false; | |
3480 continue; | |
3481 } | |
3482 if (i >= ctx.m_known_aggs.length () | |
3483 || ctx.m_known_aggs[i].is_empty ()) | |
3484 { | |
3485 if (i < m_known_aggs.length () | |
3486 && !m_known_aggs[i].is_empty ()) | |
3487 return false; | |
3488 continue; | |
3489 } | |
3490 if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i])) | |
3491 return false; | |
3492 } | |
3493 } | |
3494 return true; | |
3495 } | |
3496 | |
3497 /* Estimate size and time needed to execute call in the given context. | |
3498 Additionally determine hints determined by the context. Finally compute | |
2648 minimal size needed for the call that is independent on the call context and | 3499 minimal size needed for the call that is independent on the call context and |
2649 can be used for fast estimates. Return the values in RET_SIZE, | 3500 can be used for fast estimates. Return the values in RET_SIZE, |
2650 RET_MIN_SIZE, RET_TIME and RET_HINTS. */ | 3501 RET_MIN_SIZE, RET_TIME and RET_HINTS. */ |
2651 | 3502 |
2652 void | 3503 void |
2653 estimate_node_size_and_time (struct cgraph_node *node, | 3504 ipa_call_context::estimate_size_and_time (int *ret_size, |
2654 clause_t possible_truths, | 3505 int *ret_min_size, |
2655 clause_t nonspec_possible_truths, | 3506 sreal *ret_time, |
2656 vec<tree> known_vals, | 3507 sreal *ret_nonspecialized_time, |
2657 vec<ipa_polymorphic_call_context> known_contexts, | 3508 ipa_hints *ret_hints) |
2658 vec<ipa_agg_jump_function_p> known_aggs, | 3509 { |
2659 int *ret_size, int *ret_min_size, | 3510 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node); |
2660 sreal *ret_time, | |
2661 sreal *ret_nonspecialized_time, | |
2662 ipa_hints *ret_hints, | |
2663 vec<inline_param_summary> | |
2664 inline_param_summary) | |
2665 { | |
2666 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); | |
2667 size_time_entry *e; | 3511 size_time_entry *e; |
2668 int size = 0; | 3512 int size = 0; |
2669 sreal time = 0; | 3513 sreal time = 0; |
2670 int min_size = 0; | 3514 int min_size = 0; |
2671 ipa_hints hints = 0; | 3515 ipa_hints hints = 0; |
2672 int i; | 3516 int i; |
2673 | 3517 |
2674 if (dump_file && (dump_flags & TDF_DETAILS)) | 3518 if (dump_file && (dump_flags & TDF_DETAILS)) |
2675 { | 3519 { |
2676 bool found = false; | 3520 bool found = false; |
2677 fprintf (dump_file, " Estimating body: %s/%i\n" | 3521 fprintf (dump_file, " Estimating body: %s\n" |
2678 " Known to be false: ", node->name (), | 3522 " Known to be false: ", m_node->dump_name ()); |
2679 node->order); | |
2680 | 3523 |
2681 for (i = predicate::not_inlined_condition; | 3524 for (i = predicate::not_inlined_condition; |
2682 i < (predicate::first_dynamic_condition | 3525 i < (predicate::first_dynamic_condition |
2683 + (int) vec_safe_length (info->conds)); i++) | 3526 + (int) vec_safe_length (info->conds)); i++) |
2684 if (!(possible_truths & (1 << i))) | 3527 if (!(m_possible_truths & (1 << i))) |
2685 { | 3528 { |
2686 if (found) | 3529 if (found) |
2687 fprintf (dump_file, ", "); | 3530 fprintf (dump_file, ", "); |
2688 found = true; | 3531 found = true; |
2689 dump_condition (dump_file, info->conds, i); | 3532 dump_condition (dump_file, info->conds, i); |
2690 } | 3533 } |
2691 } | 3534 } |
2692 | 3535 |
2693 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths, | 3536 if (m_node->callees || m_node->indirect_calls) |
2694 known_vals, known_contexts, known_aggs); | 3537 estimate_calls_size_and_time (m_node, &size, &min_size, |
3538 ret_time ? &time : NULL, | |
3539 ret_hints ? &hints : NULL, m_possible_truths, | |
3540 m_known_vals, m_known_contexts, m_known_aggs); | |
3541 | |
2695 sreal nonspecialized_time = time; | 3542 sreal nonspecialized_time = time; |
2696 | 3543 |
3544 min_size += (*info->size_time_table)[0].size; | |
2697 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) | 3545 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) |
2698 { | 3546 { |
2699 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths); | 3547 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths); |
2700 | 3548 |
2701 /* Because predicates are conservative, it can happen that nonconst is 1 | 3549 /* Because predicates are conservative, it can happen that nonconst is 1 |
2702 but exec is 0. */ | 3550 but exec is 0. */ |
2703 if (exec) | 3551 if (exec) |
2704 { | 3552 { |
2705 bool nonconst = e->nonconst_predicate.evaluate (possible_truths); | 3553 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths); |
2706 | 3554 |
2707 gcc_checking_assert (e->time >= 0); | 3555 gcc_checking_assert (e->time >= 0); |
2708 gcc_checking_assert (time >= 0); | 3556 gcc_checking_assert (time >= 0); |
2709 | 3557 |
2710 /* We compute specialized size only because size of nonspecialized | 3558 /* We compute specialized size only because size of nonspecialized |
2713 The difference between nonspecialized execution and specialized is | 3561 The difference between nonspecialized execution and specialized is |
2714 that nonspecialized is not going to have optimized out computations | 3562 that nonspecialized is not going to have optimized out computations |
2715 known to be constant in a specialized setting. */ | 3563 known to be constant in a specialized setting. */ |
2716 if (nonconst) | 3564 if (nonconst) |
2717 size += e->size; | 3565 size += e->size; |
3566 if (!ret_time) | |
3567 continue; | |
2718 nonspecialized_time += e->time; | 3568 nonspecialized_time += e->time; |
2719 if (!nonconst) | 3569 if (!nonconst) |
2720 ; | 3570 ; |
2721 else if (!inline_param_summary.exists ()) | 3571 else if (!m_inline_param_summary.exists ()) |
2722 { | 3572 { |
2723 if (nonconst) | 3573 if (nonconst) |
2724 time += e->time; | 3574 time += e->time; |
2725 } | 3575 } |
2726 else | 3576 else |
2727 { | 3577 { |
2728 int prob = e->nonconst_predicate.probability | 3578 int prob = e->nonconst_predicate.probability |
2729 (info->conds, possible_truths, | 3579 (info->conds, m_possible_truths, |
2730 inline_param_summary); | 3580 m_inline_param_summary); |
2731 gcc_checking_assert (prob >= 0); | 3581 gcc_checking_assert (prob >= 0); |
2732 gcc_checking_assert (prob <= REG_BR_PROB_BASE); | 3582 gcc_checking_assert (prob <= REG_BR_PROB_BASE); |
2733 time += e->time * prob / REG_BR_PROB_BASE; | 3583 if (prob == REG_BR_PROB_BASE) |
3584 time += e->time; | |
3585 else | |
3586 time += e->time * prob / REG_BR_PROB_BASE; | |
2734 } | 3587 } |
2735 gcc_checking_assert (time >= 0); | 3588 gcc_checking_assert (time >= 0); |
2736 } | 3589 } |
2737 } | 3590 } |
2738 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true); | 3591 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true); |
2739 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true); | 3592 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true); |
2740 min_size = (*info->size_time_table)[0].size; | 3593 gcc_checking_assert (min_size >= 0); |
2741 gcc_checking_assert (size >= 0); | 3594 gcc_checking_assert (size >= 0); |
2742 gcc_checking_assert (time >= 0); | 3595 gcc_checking_assert (time >= 0); |
2743 /* nonspecialized_time should be always bigger than specialized time. | 3596 /* nonspecialized_time should be always bigger than specialized time. |
2744 Roundoff issues however may get into the way. */ | 3597 Roundoff issues however may get into the way. */ |
2745 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1); | 3598 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1); |
2746 | 3599 |
2747 /* Roundoff issues may make specialized time bigger than nonspecialized | 3600 /* Roundoff issues may make specialized time bigger than nonspecialized |
2748 time. We do not really want that to happen because some heurstics | 3601 time. We do not really want that to happen because some heuristics |
2749 may get confused by seeing negative speedups. */ | 3602 may get confused by seeing negative speedups. */ |
2750 if (time > nonspecialized_time) | 3603 if (time > nonspecialized_time) |
2751 time = nonspecialized_time; | 3604 time = nonspecialized_time; |
2752 | 3605 |
2753 if (info->loop_iterations | 3606 if (ret_hints) |
2754 && !info->loop_iterations->evaluate (possible_truths)) | 3607 { |
2755 hints |= INLINE_HINT_loop_iterations; | 3608 if (info->loop_iterations |
2756 if (info->loop_stride | 3609 && !info->loop_iterations->evaluate (m_possible_truths)) |
2757 && !info->loop_stride->evaluate (possible_truths)) | 3610 hints |= INLINE_HINT_loop_iterations; |
2758 hints |= INLINE_HINT_loop_stride; | 3611 if (info->loop_stride |
2759 if (info->array_index | 3612 && !info->loop_stride->evaluate (m_possible_truths)) |
2760 && !info->array_index->evaluate (possible_truths)) | 3613 hints |= INLINE_HINT_loop_stride; |
2761 hints |= INLINE_HINT_array_index; | 3614 if (info->scc_no) |
2762 if (info->scc_no) | 3615 hints |= INLINE_HINT_in_scc; |
2763 hints |= INLINE_HINT_in_scc; | 3616 if (DECL_DECLARED_INLINE_P (m_node->decl)) |
2764 if (DECL_DECLARED_INLINE_P (node->decl)) | 3617 hints |= INLINE_HINT_declared_inline; |
2765 hints |= INLINE_HINT_declared_inline; | 3618 } |
2766 | 3619 |
2767 size = RDIV (size, ipa_fn_summary::size_scale); | 3620 size = RDIV (size, ipa_fn_summary::size_scale); |
2768 min_size = RDIV (min_size, ipa_fn_summary::size_scale); | 3621 min_size = RDIV (min_size, ipa_fn_summary::size_scale); |
2769 | 3622 |
2770 if (dump_file && (dump_flags & TDF_DETAILS)) | 3623 if (dump_file && (dump_flags & TDF_DETAILS)) |
2792 void | 3645 void |
2793 estimate_ipcp_clone_size_and_time (struct cgraph_node *node, | 3646 estimate_ipcp_clone_size_and_time (struct cgraph_node *node, |
2794 vec<tree> known_vals, | 3647 vec<tree> known_vals, |
2795 vec<ipa_polymorphic_call_context> | 3648 vec<ipa_polymorphic_call_context> |
2796 known_contexts, | 3649 known_contexts, |
2797 vec<ipa_agg_jump_function_p> known_aggs, | 3650 vec<ipa_agg_value_set> known_aggs, |
2798 int *ret_size, sreal *ret_time, | 3651 int *ret_size, sreal *ret_time, |
2799 sreal *ret_nonspec_time, | 3652 sreal *ret_nonspec_time, |
2800 ipa_hints *hints) | 3653 ipa_hints *hints) |
2801 { | 3654 { |
2802 clause_t clause, nonspec_clause; | 3655 clause_t clause, nonspec_clause; |
2803 | 3656 |
2804 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs, | 3657 /* TODO: Also pass known value ranges. */ |
2805 &clause, &nonspec_clause); | 3658 evaluate_conditions_for_known_args (node, false, known_vals, vNULL, |
2806 estimate_node_size_and_time (node, clause, nonspec_clause, | 3659 known_aggs, &clause, &nonspec_clause); |
2807 known_vals, known_contexts, | 3660 ipa_call_context ctx (node, clause, nonspec_clause, |
2808 known_aggs, ret_size, NULL, ret_time, | 3661 known_vals, known_contexts, |
2809 ret_nonspec_time, hints, vNULL); | 3662 known_aggs, vNULL); |
3663 ctx.estimate_size_and_time (ret_size, NULL, ret_time, | |
3664 ret_nonspec_time, hints); | |
3665 } | |
3666 | |
3667 /* Return stack frame offset where frame of NODE is supposed to start inside | |
3668 of the function it is inlined to. | |
3669 Return 0 for functions that are not inlined. */ | |
3670 | |
3671 HOST_WIDE_INT | |
3672 ipa_get_stack_frame_offset (struct cgraph_node *node) | |
3673 { | |
3674 HOST_WIDE_INT offset = 0; | |
3675 if (!node->inlined_to) | |
3676 return 0; | |
3677 node = node->callers->caller; | |
3678 while (true) | |
3679 { | |
3680 offset += ipa_size_summaries->get (node)->estimated_self_stack_size; | |
3681 if (!node->inlined_to) | |
3682 return offset; | |
3683 node = node->callers->caller; | |
3684 } | |
2810 } | 3685 } |
2811 | 3686 |
2812 | 3687 |
2813 /* Update summary information of inline clones after inlining. | 3688 /* Update summary information of inline clones after inlining. |
2814 Compute peak stack usage. */ | 3689 Compute peak stack usage. */ |
2815 | 3690 |
2816 static void | 3691 static void |
2817 inline_update_callee_summaries (struct cgraph_node *node, int depth) | 3692 inline_update_callee_summaries (struct cgraph_node *node, int depth) |
2818 { | 3693 { |
2819 struct cgraph_edge *e; | 3694 struct cgraph_edge *e; |
2820 ipa_fn_summary *callee_info = ipa_fn_summaries->get (node); | 3695 |
2821 ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller); | |
2822 HOST_WIDE_INT peak; | |
2823 | |
2824 callee_info->stack_frame_offset | |
2825 = caller_info->stack_frame_offset | |
2826 + caller_info->estimated_self_stack_size; | |
2827 peak = callee_info->stack_frame_offset | |
2828 + callee_info->estimated_self_stack_size; | |
2829 | |
2830 ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to); | |
2831 if (s->estimated_stack_size < peak) | |
2832 s->estimated_stack_size = peak; | |
2833 ipa_propagate_frequency (node); | 3696 ipa_propagate_frequency (node); |
2834 for (e = node->callees; e; e = e->next_callee) | 3697 for (e = node->callees; e; e = e->next_callee) |
2835 { | 3698 { |
2836 if (!e->inline_failed) | 3699 if (!e->inline_failed) |
2837 inline_update_callee_summaries (e->callee, depth); | 3700 inline_update_callee_summaries (e->callee, depth); |
2838 ipa_call_summaries->get (e)->loop_depth += depth; | 3701 else |
3702 ipa_call_summaries->get (e)->loop_depth += depth; | |
2839 } | 3703 } |
2840 for (e = node->indirect_calls; e; e = e->next_callee) | 3704 for (e = node->indirect_calls; e; e = e->next_callee) |
2841 ipa_call_summaries->get (e)->loop_depth += depth; | 3705 ipa_call_summaries->get (e)->loop_depth += depth; |
2842 } | 3706 } |
2843 | 3707 |
2844 /* Update change_prob of EDGE after INLINED_EDGE has been inlined. | 3708 /* Update change_prob of EDGE after INLINED_EDGE has been inlined. |
2845 When functoin A is inlined in B and A calls C with parameter that | 3709 When function A is inlined in B and A calls C with parameter that |
2846 changes with probability PROB1 and C is known to be passthroug | 3710 changes with probability PROB1 and C is known to be passthrough |
2847 of argument if B that change with probability PROB2, the probability | 3711 of argument if B that change with probability PROB2, the probability |
2848 of change is now PROB1*PROB2. */ | 3712 of change is now PROB1*PROB2. */ |
2849 | 3713 |
2850 static void | 3714 static void |
2851 remap_edge_change_prob (struct cgraph_edge *inlined_edge, | 3715 remap_edge_change_prob (struct cgraph_edge *inlined_edge, |
2852 struct cgraph_edge *edge) | 3716 struct cgraph_edge *edge) |
2853 { | 3717 { |
2854 if (ipa_node_params_sum) | 3718 if (ipa_node_params_sum) |
2855 { | 3719 { |
2856 int i; | 3720 int i; |
2857 struct ipa_edge_args *args = IPA_EDGE_REF (edge); | 3721 class ipa_edge_args *args = IPA_EDGE_REF (edge); |
2858 struct ipa_call_summary *es = ipa_call_summaries->get (edge); | 3722 if (!args) |
2859 struct ipa_call_summary *inlined_es | 3723 return; |
3724 class ipa_call_summary *es = ipa_call_summaries->get (edge); | |
3725 class ipa_call_summary *inlined_es | |
2860 = ipa_call_summaries->get (inlined_edge); | 3726 = ipa_call_summaries->get (inlined_edge); |
3727 | |
3728 if (es->param.length () == 0) | |
3729 return; | |
2861 | 3730 |
2862 for (i = 0; i < ipa_get_cs_argument_count (args); i++) | 3731 for (i = 0; i < ipa_get_cs_argument_count (args); i++) |
2863 { | 3732 { |
2864 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i); | 3733 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i); |
2865 if (jfunc->type == IPA_JF_PASS_THROUGH | 3734 if (jfunc->type == IPA_JF_PASS_THROUGH |
2892 Also update change probabilities. */ | 3761 Also update change probabilities. */ |
2893 | 3762 |
2894 static void | 3763 static void |
2895 remap_edge_summaries (struct cgraph_edge *inlined_edge, | 3764 remap_edge_summaries (struct cgraph_edge *inlined_edge, |
2896 struct cgraph_node *node, | 3765 struct cgraph_node *node, |
2897 struct ipa_fn_summary *info, | 3766 class ipa_fn_summary *info, |
2898 struct ipa_fn_summary *callee_info, | 3767 class ipa_node_params *params_summary, |
3768 class ipa_fn_summary *callee_info, | |
2899 vec<int> operand_map, | 3769 vec<int> operand_map, |
2900 vec<int> offset_map, | 3770 vec<int> offset_map, |
2901 clause_t possible_truths, | 3771 clause_t possible_truths, |
2902 predicate *toplev_predicate) | 3772 predicate *toplev_predicate) |
2903 { | 3773 { |
2904 struct cgraph_edge *e, *next; | 3774 struct cgraph_edge *e, *next; |
2905 for (e = node->callees; e; e = next) | 3775 for (e = node->callees; e; e = next) |
2906 { | 3776 { |
2907 struct ipa_call_summary *es = ipa_call_summaries->get (e); | |
2908 predicate p; | 3777 predicate p; |
2909 next = e->next_callee; | 3778 next = e->next_callee; |
2910 | 3779 |
2911 if (e->inline_failed) | 3780 if (e->inline_failed) |
2912 { | 3781 { |
3782 class ipa_call_summary *es = ipa_call_summaries->get (e); | |
2913 remap_edge_change_prob (inlined_edge, e); | 3783 remap_edge_change_prob (inlined_edge, e); |
2914 | 3784 |
2915 if (es->predicate) | 3785 if (es->predicate) |
2916 { | 3786 { |
2917 p = es->predicate->remap_after_inlining | 3787 p = es->predicate->remap_after_inlining |
2918 (info, callee_info, operand_map, | 3788 (info, params_summary, |
3789 callee_info, operand_map, | |
2919 offset_map, possible_truths, | 3790 offset_map, possible_truths, |
2920 *toplev_predicate); | 3791 *toplev_predicate); |
2921 edge_set_predicate (e, &p); | 3792 edge_set_predicate (e, &p); |
2922 } | 3793 } |
2923 else | 3794 else |
2924 edge_set_predicate (e, toplev_predicate); | 3795 edge_set_predicate (e, toplev_predicate); |
2925 } | 3796 } |
2926 else | 3797 else |
2927 remap_edge_summaries (inlined_edge, e->callee, info, callee_info, | 3798 remap_edge_summaries (inlined_edge, e->callee, info, |
3799 params_summary, callee_info, | |
2928 operand_map, offset_map, possible_truths, | 3800 operand_map, offset_map, possible_truths, |
2929 toplev_predicate); | 3801 toplev_predicate); |
2930 } | 3802 } |
2931 for (e = node->indirect_calls; e; e = next) | 3803 for (e = node->indirect_calls; e; e = next) |
2932 { | 3804 { |
2933 struct ipa_call_summary *es = ipa_call_summaries->get (e); | 3805 class ipa_call_summary *es = ipa_call_summaries->get (e); |
2934 predicate p; | 3806 predicate p; |
2935 next = e->next_callee; | 3807 next = e->next_callee; |
2936 | 3808 |
2937 remap_edge_change_prob (inlined_edge, e); | 3809 remap_edge_change_prob (inlined_edge, e); |
2938 if (es->predicate) | 3810 if (es->predicate) |
2939 { | 3811 { |
2940 p = es->predicate->remap_after_inlining | 3812 p = es->predicate->remap_after_inlining |
2941 (info, callee_info, operand_map, offset_map, | 3813 (info, params_summary, |
3814 callee_info, operand_map, offset_map, | |
2942 possible_truths, *toplev_predicate); | 3815 possible_truths, *toplev_predicate); |
2943 edge_set_predicate (e, &p); | 3816 edge_set_predicate (e, &p); |
2944 } | 3817 } |
2945 else | 3818 else |
2946 edge_set_predicate (e, toplev_predicate); | 3819 edge_set_predicate (e, toplev_predicate); |
2948 } | 3821 } |
2949 | 3822 |
2950 /* Same as remap_predicate, but set result into hint *HINT. */ | 3823 /* Same as remap_predicate, but set result into hint *HINT. */ |
2951 | 3824 |
2952 static void | 3825 static void |
2953 remap_hint_predicate (struct ipa_fn_summary *info, | 3826 remap_hint_predicate (class ipa_fn_summary *info, |
2954 struct ipa_fn_summary *callee_info, | 3827 class ipa_node_params *params_summary, |
3828 class ipa_fn_summary *callee_info, | |
2955 predicate **hint, | 3829 predicate **hint, |
2956 vec<int> operand_map, | 3830 vec<int> operand_map, |
2957 vec<int> offset_map, | 3831 vec<int> offset_map, |
2958 clause_t possible_truths, | 3832 clause_t possible_truths, |
2959 predicate *toplev_predicate) | 3833 predicate *toplev_predicate) |
2961 predicate p; | 3835 predicate p; |
2962 | 3836 |
2963 if (!*hint) | 3837 if (!*hint) |
2964 return; | 3838 return; |
2965 p = (*hint)->remap_after_inlining | 3839 p = (*hint)->remap_after_inlining |
2966 (info, callee_info, | 3840 (info, params_summary, callee_info, |
2967 operand_map, offset_map, | 3841 operand_map, offset_map, |
2968 possible_truths, *toplev_predicate); | 3842 possible_truths, *toplev_predicate); |
2969 if (p != false && p != true) | 3843 if (p != false && p != true) |
2970 { | 3844 { |
2971 if (!*hint) | 3845 if (!*hint) |
2979 | 3853 |
2980 void | 3854 void |
2981 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) | 3855 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) |
2982 { | 3856 { |
2983 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee); | 3857 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee); |
2984 struct cgraph_node *to = (edge->caller->global.inlined_to | 3858 struct cgraph_node *to = (edge->caller->inlined_to |
2985 ? edge->caller->global.inlined_to : edge->caller); | 3859 ? edge->caller->inlined_to : edge->caller); |
2986 struct ipa_fn_summary *info = ipa_fn_summaries->get (to); | 3860 class ipa_fn_summary *info = ipa_fn_summaries->get (to); |
2987 clause_t clause = 0; /* not_inline is known to be false. */ | 3861 clause_t clause = 0; /* not_inline is known to be false. */ |
2988 size_time_entry *e; | 3862 size_time_entry *e; |
2989 vec<int> operand_map = vNULL; | 3863 auto_vec<int, 8> operand_map; |
2990 vec<int> offset_map = vNULL; | 3864 auto_vec<int, 8> offset_map; |
2991 int i; | 3865 int i; |
2992 predicate toplev_predicate; | 3866 predicate toplev_predicate; |
2993 predicate true_p = true; | 3867 class ipa_call_summary *es = ipa_call_summaries->get (edge); |
2994 struct ipa_call_summary *es = ipa_call_summaries->get (edge); | 3868 class ipa_node_params *params_summary = (ipa_node_params_sum |
3869 ? IPA_NODE_REF (to) : NULL); | |
2995 | 3870 |
2996 if (es->predicate) | 3871 if (es->predicate) |
2997 toplev_predicate = *es->predicate; | 3872 toplev_predicate = *es->predicate; |
2998 else | 3873 else |
2999 toplev_predicate = true; | 3874 toplev_predicate = true; |
3000 | 3875 |
3001 info->fp_expressions |= callee_info->fp_expressions; | 3876 info->fp_expressions |= callee_info->fp_expressions; |
3002 | 3877 |
3003 if (callee_info->conds) | 3878 if (callee_info->conds) |
3004 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL); | 3879 { |
3880 auto_vec<tree, 32> known_vals; | |
3881 auto_vec<ipa_agg_value_set, 32> known_aggs; | |
3882 evaluate_properties_for_edge (edge, true, &clause, NULL, | |
3883 &known_vals, NULL, &known_aggs); | |
3884 } | |
3005 if (ipa_node_params_sum && callee_info->conds) | 3885 if (ipa_node_params_sum && callee_info->conds) |
3006 { | 3886 { |
3007 struct ipa_edge_args *args = IPA_EDGE_REF (edge); | 3887 class ipa_edge_args *args = IPA_EDGE_REF (edge); |
3008 int count = ipa_get_cs_argument_count (args); | 3888 int count = args ? ipa_get_cs_argument_count (args) : 0; |
3009 int i; | 3889 int i; |
3010 | 3890 |
3011 if (count) | 3891 if (count) |
3012 { | 3892 { |
3013 operand_map.safe_grow_cleared (count); | 3893 operand_map.safe_grow_cleared (count); |
3036 offset = -1; | 3916 offset = -1; |
3037 offset_map[i] = offset; | 3917 offset_map[i] = offset; |
3038 } | 3918 } |
3039 } | 3919 } |
3040 operand_map[i] = map; | 3920 operand_map[i] = map; |
3041 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to))); | 3921 gcc_assert (map < ipa_get_param_count (params_summary)); |
3042 } | 3922 } |
3043 } | 3923 } |
3924 sreal freq = edge->sreal_frequency (); | |
3044 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++) | 3925 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++) |
3045 { | 3926 { |
3046 predicate p; | 3927 predicate p; |
3047 p = e->exec_predicate.remap_after_inlining | 3928 p = e->exec_predicate.remap_after_inlining |
3048 (info, callee_info, operand_map, | 3929 (info, params_summary, |
3930 callee_info, operand_map, | |
3049 offset_map, clause, | 3931 offset_map, clause, |
3050 toplev_predicate); | 3932 toplev_predicate); |
3051 predicate nonconstp; | 3933 predicate nonconstp; |
3052 nonconstp = e->nonconst_predicate.remap_after_inlining | 3934 nonconstp = e->nonconst_predicate.remap_after_inlining |
3053 (info, callee_info, operand_map, | 3935 (info, params_summary, |
3936 callee_info, operand_map, | |
3054 offset_map, clause, | 3937 offset_map, clause, |
3055 toplev_predicate); | 3938 toplev_predicate); |
3056 if (p != false && nonconstp != false) | 3939 if (p != false && nonconstp != false) |
3057 { | 3940 { |
3058 sreal add_time = ((sreal)e->time * edge->sreal_frequency ()); | 3941 sreal add_time = ((sreal)e->time * freq); |
3059 int prob = e->nonconst_predicate.probability (callee_info->conds, | 3942 int prob = e->nonconst_predicate.probability (callee_info->conds, |
3060 clause, es->param); | 3943 clause, es->param); |
3061 add_time = add_time * prob / REG_BR_PROB_BASE; | 3944 if (prob != REG_BR_PROB_BASE) |
3945 add_time = add_time * prob / REG_BR_PROB_BASE; | |
3062 if (prob != REG_BR_PROB_BASE | 3946 if (prob != REG_BR_PROB_BASE |
3063 && dump_file && (dump_flags & TDF_DETAILS)) | 3947 && dump_file && (dump_flags & TDF_DETAILS)) |
3064 { | 3948 { |
3065 fprintf (dump_file, "\t\tScaling time by probability:%f\n", | 3949 fprintf (dump_file, "\t\tScaling time by probability:%f\n", |
3066 (double) prob / REG_BR_PROB_BASE); | 3950 (double) prob / REG_BR_PROB_BASE); |
3067 } | 3951 } |
3068 info->account_size_time (e->size, add_time, p, nonconstp); | 3952 info->account_size_time (e->size, add_time, p, nonconstp); |
3069 } | 3953 } |
3070 } | 3954 } |
3071 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map, | 3955 remap_edge_summaries (edge, edge->callee, info, params_summary, |
3956 callee_info, operand_map, | |
3072 offset_map, clause, &toplev_predicate); | 3957 offset_map, clause, &toplev_predicate); |
3073 remap_hint_predicate (info, callee_info, | 3958 remap_hint_predicate (info, params_summary, callee_info, |
3074 &callee_info->loop_iterations, | 3959 &callee_info->loop_iterations, |
3075 operand_map, offset_map, clause, &toplev_predicate); | 3960 operand_map, offset_map, clause, &toplev_predicate); |
3076 remap_hint_predicate (info, callee_info, | 3961 remap_hint_predicate (info, params_summary, callee_info, |
3077 &callee_info->loop_stride, | 3962 &callee_info->loop_stride, |
3078 operand_map, offset_map, clause, &toplev_predicate); | 3963 operand_map, offset_map, clause, &toplev_predicate); |
3079 remap_hint_predicate (info, callee_info, | 3964 |
3080 &callee_info->array_index, | 3965 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee); |
3081 operand_map, offset_map, clause, &toplev_predicate); | 3966 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size; |
3082 | 3967 |
3083 ipa_call_summary *s = ipa_call_summaries->get (edge); | 3968 if (info->estimated_stack_size < peak) |
3084 inline_update_callee_summaries (edge->callee, s->loop_depth); | 3969 info->estimated_stack_size = peak; |
3085 | 3970 |
3086 /* We do not maintain predicates of inlined edges, free it. */ | 3971 inline_update_callee_summaries (edge->callee, es->loop_depth); |
3087 edge_set_predicate (edge, &true_p); | 3972 if (info->call_size_time_table) |
3088 /* Similarly remove param summaries. */ | 3973 { |
3089 es->param.release (); | 3974 int edge_size = 0; |
3090 operand_map.release (); | 3975 sreal edge_time = 0; |
3091 offset_map.release (); | 3976 |
3092 } | 3977 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, vNULL, |
3093 | 3978 vNULL, vNULL, 0); |
3094 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size | 3979 /* Unaccount size and time of the optimized out call. */ |
3095 and time. Recompute it. */ | 3980 info->account_size_time (-edge_size, -edge_time, |
3981 es->predicate ? *es->predicate : true, | |
3982 es->predicate ? *es->predicate : true, | |
3983 true); | |
3984 /* Account new calls. */ | |
3985 summarize_calls_size_and_time (edge->callee, info); | |
3986 } | |
3987 | |
3988 /* Free summaries that are not maintained for inline clones/edges. */ | |
3989 ipa_call_summaries->remove (edge); | |
3990 ipa_fn_summaries->remove (edge->callee); | |
3991 ipa_remove_from_growth_caches (edge); | |
3992 } | |
3993 | |
3994 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating | |
3995 overall size and time. Recompute it. | |
3996 If RESET is true also recompute call_time_size_table. */ | |
3096 | 3997 |
3097 void | 3998 void |
3098 ipa_update_overall_fn_summary (struct cgraph_node *node) | 3999 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset) |
3099 { | 4000 { |
3100 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); | 4001 class ipa_fn_summary *info = ipa_fn_summaries->get (node); |
4002 class ipa_size_summary *size_info = ipa_size_summaries->get (node); | |
3101 size_time_entry *e; | 4003 size_time_entry *e; |
3102 int i; | 4004 int i; |
3103 | 4005 |
3104 info->size = 0; | 4006 size_info->size = 0; |
3105 info->time = 0; | 4007 info->time = 0; |
3106 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) | 4008 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) |
3107 { | 4009 { |
3108 info->size += e->size; | 4010 size_info->size += e->size; |
3109 info->time += e->time; | 4011 info->time += e->time; |
3110 } | 4012 } |
3111 estimate_calls_size_and_time (node, &info->size, &info->min_size, | 4013 info->min_size = (*info->size_time_table)[0].size; |
3112 &info->time, NULL, | 4014 if (reset) |
3113 ~(clause_t) (1 << predicate::false_condition), | 4015 vec_free (info->call_size_time_table); |
3114 vNULL, vNULL, vNULL); | 4016 if (node->callees || node->indirect_calls) |
3115 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale; | 4017 estimate_calls_size_and_time (node, &size_info->size, &info->min_size, |
4018 &info->time, NULL, | |
4019 ~(clause_t) (1 << predicate::false_condition), | |
4020 vNULL, vNULL, vNULL); | |
4021 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale); | |
4022 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale); | |
3116 } | 4023 } |
3117 | 4024 |
3118 | 4025 |
3119 /* This function performs intraprocedural analysis in NODE that is required to | 4026 /* This function performs intraprocedural analysis in NODE that is required to |
3120 inline indirect calls. */ | 4027 inline indirect calls. */ |
3137 inline_analyze_function (struct cgraph_node *node) | 4044 inline_analyze_function (struct cgraph_node *node) |
3138 { | 4045 { |
3139 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); | 4046 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
3140 | 4047 |
3141 if (dump_file) | 4048 if (dump_file) |
3142 fprintf (dump_file, "\nAnalyzing function: %s/%u\n", | 4049 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ()); |
3143 node->name (), node->order); | |
3144 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p) | 4050 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p) |
3145 inline_indirect_intraprocedural_analysis (node); | 4051 inline_indirect_intraprocedural_analysis (node); |
3146 compute_fn_summary (node, false); | 4052 compute_fn_summary (node, false); |
3147 if (!optimize) | 4053 if (!optimize) |
3148 { | 4054 { |
3172 { | 4078 { |
3173 struct cgraph_node *node; | 4079 struct cgraph_node *node; |
3174 | 4080 |
3175 FOR_EACH_DEFINED_FUNCTION (node) | 4081 FOR_EACH_DEFINED_FUNCTION (node) |
3176 if (DECL_STRUCT_FUNCTION (node->decl)) | 4082 if (DECL_STRUCT_FUNCTION (node->decl)) |
3177 node->local.versionable = tree_versionable_function_p (node->decl); | 4083 node->versionable = tree_versionable_function_p (node->decl); |
3178 | 4084 |
3179 ipa_fn_summary_alloc (); | 4085 ipa_fn_summary_alloc (); |
3180 | 4086 |
3181 ipa_fn_summaries->enable_insertion_hook (); | 4087 ipa_fn_summaries->enable_insertion_hook (); |
3182 | 4088 |
3191 | 4097 |
3192 | 4098 |
3193 /* Write inline summary for edge E to OB. */ | 4099 /* Write inline summary for edge E to OB. */ |
3194 | 4100 |
3195 static void | 4101 static void |
3196 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e) | 4102 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e, |
3197 { | 4103 bool prevails) |
3198 struct ipa_call_summary *es = ipa_call_summaries->get_create (e); | 4104 { |
4105 class ipa_call_summary *es = prevails | |
4106 ? ipa_call_summaries->get_create (e) : NULL; | |
3199 predicate p; | 4107 predicate p; |
3200 int length, i; | 4108 int length, i; |
3201 | 4109 |
3202 es->call_stmt_size = streamer_read_uhwi (ib); | 4110 int size = streamer_read_uhwi (ib); |
3203 es->call_stmt_time = streamer_read_uhwi (ib); | 4111 int time = streamer_read_uhwi (ib); |
3204 es->loop_depth = streamer_read_uhwi (ib); | 4112 int depth = streamer_read_uhwi (ib); |
4113 | |
4114 if (es) | |
4115 { | |
4116 es->call_stmt_size = size; | |
4117 es->call_stmt_time = time; | |
4118 es->loop_depth = depth; | |
4119 } | |
3205 | 4120 |
3206 bitpack_d bp = streamer_read_bitpack (ib); | 4121 bitpack_d bp = streamer_read_bitpack (ib); |
3207 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1); | 4122 if (es) |
4123 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1); | |
4124 else | |
4125 bp_unpack_value (&bp, 1); | |
3208 | 4126 |
3209 p.stream_in (ib); | 4127 p.stream_in (ib); |
3210 edge_set_predicate (e, &p); | 4128 if (es) |
4129 edge_set_predicate (e, &p); | |
3211 length = streamer_read_uhwi (ib); | 4130 length = streamer_read_uhwi (ib); |
3212 if (length) | 4131 if (length && es && e->possibly_call_in_translation_unit_p ()) |
3213 { | 4132 { |
3214 es->param.safe_grow_cleared (length); | 4133 es->param.safe_grow_cleared (length); |
3215 for (i = 0; i < length; i++) | 4134 for (i = 0; i < length; i++) |
3216 es->param[i].change_prob = streamer_read_uhwi (ib); | 4135 es->param[i].change_prob = streamer_read_uhwi (ib); |
4136 } | |
4137 else | |
4138 { | |
4139 for (i = 0; i < length; i++) | |
4140 streamer_read_uhwi (ib); | |
3217 } | 4141 } |
3218 } | 4142 } |
3219 | 4143 |
3220 | 4144 |
3221 /* Stream in inline summaries from the section. */ | 4145 /* Stream in inline summaries from the section. */ |
3227 const struct lto_function_header *header = | 4151 const struct lto_function_header *header = |
3228 (const struct lto_function_header *) data; | 4152 (const struct lto_function_header *) data; |
3229 const int cfg_offset = sizeof (struct lto_function_header); | 4153 const int cfg_offset = sizeof (struct lto_function_header); |
3230 const int main_offset = cfg_offset + header->cfg_size; | 4154 const int main_offset = cfg_offset + header->cfg_size; |
3231 const int string_offset = main_offset + header->main_size; | 4155 const int string_offset = main_offset + header->main_size; |
3232 struct data_in *data_in; | 4156 class data_in *data_in; |
3233 unsigned int i, count2, j; | 4157 unsigned int i, count2, j; |
3234 unsigned int f_count; | 4158 unsigned int f_count; |
3235 | 4159 |
3236 lto_input_block ib ((const char *) data + main_offset, header->main_size, | 4160 lto_input_block ib ((const char *) data + main_offset, header->main_size, |
3237 file_data->mode_table); | 4161 file_data->mode_table); |
3242 f_count = streamer_read_uhwi (&ib); | 4166 f_count = streamer_read_uhwi (&ib); |
3243 for (i = 0; i < f_count; i++) | 4167 for (i = 0; i < f_count; i++) |
3244 { | 4168 { |
3245 unsigned int index; | 4169 unsigned int index; |
3246 struct cgraph_node *node; | 4170 struct cgraph_node *node; |
3247 struct ipa_fn_summary *info; | 4171 class ipa_fn_summary *info; |
4172 class ipa_node_params *params_summary; | |
4173 class ipa_size_summary *size_info; | |
3248 lto_symtab_encoder_t encoder; | 4174 lto_symtab_encoder_t encoder; |
3249 struct bitpack_d bp; | 4175 struct bitpack_d bp; |
3250 struct cgraph_edge *e; | 4176 struct cgraph_edge *e; |
3251 predicate p; | 4177 predicate p; |
3252 | 4178 |
3253 index = streamer_read_uhwi (&ib); | 4179 index = streamer_read_uhwi (&ib); |
3254 encoder = file_data->symtab_node_encoder; | 4180 encoder = file_data->symtab_node_encoder; |
3255 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, | 4181 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, |
3256 index)); | 4182 index)); |
3257 info = ipa_fn_summaries->get_create (node); | 4183 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL; |
3258 | 4184 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL; |
3259 info->estimated_stack_size | 4185 size_info = node->prevailing_p () |
3260 = info->estimated_self_stack_size = streamer_read_uhwi (&ib); | 4186 ? ipa_size_summaries->get_create (node) : NULL; |
3261 info->size = info->self_size = streamer_read_uhwi (&ib); | 4187 |
3262 info->time = sreal::stream_in (&ib); | 4188 int stack_size = streamer_read_uhwi (&ib); |
4189 int size = streamer_read_uhwi (&ib); | |
4190 sreal time = sreal::stream_in (&ib); | |
4191 | |
4192 if (info) | |
4193 { | |
4194 info->estimated_stack_size | |
4195 = size_info->estimated_self_stack_size = stack_size; | |
4196 size_info->size = size_info->self_size = size; | |
4197 info->time = time; | |
4198 } | |
3263 | 4199 |
3264 bp = streamer_read_bitpack (&ib); | 4200 bp = streamer_read_bitpack (&ib); |
3265 info->inlinable = bp_unpack_value (&bp, 1); | 4201 if (info) |
3266 info->fp_expressions = bp_unpack_value (&bp, 1); | 4202 { |
4203 info->inlinable = bp_unpack_value (&bp, 1); | |
4204 info->fp_expressions = bp_unpack_value (&bp, 1); | |
4205 } | |
4206 else | |
4207 { | |
4208 bp_unpack_value (&bp, 1); | |
4209 bp_unpack_value (&bp, 1); | |
4210 } | |
3267 | 4211 |
3268 count2 = streamer_read_uhwi (&ib); | 4212 count2 = streamer_read_uhwi (&ib); |
3269 gcc_assert (!info->conds); | 4213 gcc_assert (!info || !info->conds); |
4214 if (info) | |
4215 vec_safe_reserve_exact (info->conds, count2); | |
3270 for (j = 0; j < count2; j++) | 4216 for (j = 0; j < count2; j++) |
3271 { | 4217 { |
3272 struct condition c; | 4218 struct condition c; |
4219 unsigned int k, count3; | |
3273 c.operand_num = streamer_read_uhwi (&ib); | 4220 c.operand_num = streamer_read_uhwi (&ib); |
3274 c.size = streamer_read_uhwi (&ib); | |
3275 c.code = (enum tree_code) streamer_read_uhwi (&ib); | 4221 c.code = (enum tree_code) streamer_read_uhwi (&ib); |
4222 c.type = stream_read_tree (&ib, data_in); | |
3276 c.val = stream_read_tree (&ib, data_in); | 4223 c.val = stream_read_tree (&ib, data_in); |
3277 bp = streamer_read_bitpack (&ib); | 4224 bp = streamer_read_bitpack (&ib); |
3278 c.agg_contents = bp_unpack_value (&bp, 1); | 4225 c.agg_contents = bp_unpack_value (&bp, 1); |
3279 c.by_ref = bp_unpack_value (&bp, 1); | 4226 c.by_ref = bp_unpack_value (&bp, 1); |
3280 if (c.agg_contents) | 4227 if (c.agg_contents) |
3281 c.offset = streamer_read_uhwi (&ib); | 4228 c.offset = streamer_read_uhwi (&ib); |
3282 vec_safe_push (info->conds, c); | 4229 count3 = streamer_read_uhwi (&ib); |
4230 c.param_ops = NULL; | |
4231 if (info) | |
4232 vec_safe_reserve_exact (c.param_ops, count3); | |
4233 if (params_summary) | |
4234 ipa_set_param_used_by_ipa_predicates | |
4235 (params_summary, c.operand_num, true); | |
4236 for (k = 0; k < count3; k++) | |
4237 { | |
4238 struct expr_eval_op op; | |
4239 enum gimple_rhs_class rhs_class; | |
4240 op.code = (enum tree_code) streamer_read_uhwi (&ib); | |
4241 op.type = stream_read_tree (&ib, data_in); | |
4242 switch (rhs_class = get_gimple_rhs_class (op.code)) | |
4243 { | |
4244 case GIMPLE_UNARY_RHS: | |
4245 op.index = 0; | |
4246 op.val[0] = NULL_TREE; | |
4247 op.val[1] = NULL_TREE; | |
4248 break; | |
4249 | |
4250 case GIMPLE_BINARY_RHS: | |
4251 case GIMPLE_TERNARY_RHS: | |
4252 bp = streamer_read_bitpack (&ib); | |
4253 op.index = bp_unpack_value (&bp, 2); | |
4254 op.val[0] = stream_read_tree (&ib, data_in); | |
4255 if (rhs_class == GIMPLE_BINARY_RHS) | |
4256 op.val[1] = NULL_TREE; | |
4257 else | |
4258 op.val[1] = stream_read_tree (&ib, data_in); | |
4259 break; | |
4260 | |
4261 default: | |
4262 fatal_error (UNKNOWN_LOCATION, | |
4263 "invalid fnsummary in LTO stream"); | |
4264 } | |
4265 if (info) | |
4266 c.param_ops->quick_push (op); | |
4267 } | |
4268 if (info) | |
4269 info->conds->quick_push (c); | |
3283 } | 4270 } |
3284 count2 = streamer_read_uhwi (&ib); | 4271 count2 = streamer_read_uhwi (&ib); |
3285 gcc_assert (!info->size_time_table); | 4272 gcc_assert (!info || !info->size_time_table); |
4273 if (info && count2) | |
4274 vec_safe_reserve_exact (info->size_time_table, count2); | |
3286 for (j = 0; j < count2; j++) | 4275 for (j = 0; j < count2; j++) |
3287 { | 4276 { |
3288 struct size_time_entry e; | 4277 class size_time_entry e; |
3289 | 4278 |
3290 e.size = streamer_read_uhwi (&ib); | 4279 e.size = streamer_read_uhwi (&ib); |
3291 e.time = sreal::stream_in (&ib); | 4280 e.time = sreal::stream_in (&ib); |
3292 e.exec_predicate.stream_in (&ib); | 4281 e.exec_predicate.stream_in (&ib); |
3293 e.nonconst_predicate.stream_in (&ib); | 4282 e.nonconst_predicate.stream_in (&ib); |
3294 | 4283 |
3295 vec_safe_push (info->size_time_table, e); | 4284 if (info) |
4285 info->size_time_table->quick_push (e); | |
3296 } | 4286 } |
3297 | 4287 |
3298 p.stream_in (&ib); | 4288 p.stream_in (&ib); |
3299 set_hint_predicate (&info->loop_iterations, p); | 4289 if (info) |
4290 set_hint_predicate (&info->loop_iterations, p); | |
3300 p.stream_in (&ib); | 4291 p.stream_in (&ib); |
3301 set_hint_predicate (&info->loop_stride, p); | 4292 if (info) |
3302 p.stream_in (&ib); | 4293 set_hint_predicate (&info->loop_stride, p); |
3303 set_hint_predicate (&info->array_index, p); | |
3304 for (e = node->callees; e; e = e->next_callee) | 4294 for (e = node->callees; e; e = e->next_callee) |
3305 read_ipa_call_summary (&ib, e); | 4295 read_ipa_call_summary (&ib, e, info != NULL); |
3306 for (e = node->indirect_calls; e; e = e->next_callee) | 4296 for (e = node->indirect_calls; e; e = e->next_callee) |
3307 read_ipa_call_summary (&ib, e); | 4297 read_ipa_call_summary (&ib, e, info != NULL); |
3308 } | 4298 } |
3309 | 4299 |
3310 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data, | 4300 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data, |
3311 len); | 4301 len); |
3312 lto_data_in_delete (data_in); | 4302 lto_data_in_delete (data_in); |
3327 ipa_fn_summary_alloc (); | 4317 ipa_fn_summary_alloc (); |
3328 | 4318 |
3329 while ((file_data = file_data_vec[j++])) | 4319 while ((file_data = file_data_vec[j++])) |
3330 { | 4320 { |
3331 size_t len; | 4321 size_t len; |
3332 const char *data = lto_get_section_data (file_data, | 4322 const char *data |
3333 LTO_section_ipa_fn_summary, | 4323 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary, |
3334 NULL, &len); | 4324 &len); |
3335 if (data) | 4325 if (data) |
3336 inline_read_section (file_data, data, len); | 4326 inline_read_section (file_data, data, len); |
3337 else | 4327 else |
3338 /* Fatal error here. We do not want to support compiling ltrans units | 4328 /* Fatal error here. We do not want to support compiling ltrans units |
3339 with different version of compiler or different flags than the WPA | 4329 with different version of compiler or different flags than the WPA |
3353 /* Write inline summary for edge E to OB. */ | 4343 /* Write inline summary for edge E to OB. */ |
3354 | 4344 |
3355 static void | 4345 static void |
3356 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e) | 4346 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e) |
3357 { | 4347 { |
3358 struct ipa_call_summary *es = ipa_call_summaries->get (e); | 4348 class ipa_call_summary *es = ipa_call_summaries->get (e); |
3359 int i; | 4349 int i; |
3360 | 4350 |
3361 streamer_write_uhwi (ob, es->call_stmt_size); | 4351 streamer_write_uhwi (ob, es->call_stmt_size); |
3362 streamer_write_uhwi (ob, es->call_stmt_time); | 4352 streamer_write_uhwi (ob, es->call_stmt_time); |
3363 streamer_write_uhwi (ob, es->loop_depth); | 4353 streamer_write_uhwi (ob, es->loop_depth); |
3382 | 4372 |
3383 static void | 4373 static void |
3384 ipa_fn_summary_write (void) | 4374 ipa_fn_summary_write (void) |
3385 { | 4375 { |
3386 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary); | 4376 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary); |
4377 lto_symtab_encoder_iterator lsei; | |
3387 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; | 4378 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; |
3388 unsigned int count = 0; | 4379 unsigned int count = 0; |
3389 int i; | 4380 |
3390 | 4381 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); |
3391 for (i = 0; i < lto_symtab_encoder_size (encoder); i++) | 4382 lsei_next_function_in_partition (&lsei)) |
3392 { | 4383 { |
3393 symtab_node *snode = lto_symtab_encoder_deref (encoder, i); | 4384 cgraph_node *cnode = lsei_cgraph_node (lsei); |
3394 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode); | 4385 if (cnode->definition && !cnode->alias) |
3395 if (cnode && cnode->definition && !cnode->alias) | |
3396 count++; | 4386 count++; |
3397 } | 4387 } |
3398 streamer_write_uhwi (ob, count); | 4388 streamer_write_uhwi (ob, count); |
3399 | 4389 |
3400 for (i = 0; i < lto_symtab_encoder_size (encoder); i++) | 4390 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); |
3401 { | 4391 lsei_next_function_in_partition (&lsei)) |
3402 symtab_node *snode = lto_symtab_encoder_deref (encoder, i); | 4392 { |
3403 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode); | 4393 cgraph_node *cnode = lsei_cgraph_node (lsei); |
3404 if (cnode && cnode->definition && !cnode->alias) | 4394 if (cnode->definition && !cnode->alias) |
3405 { | 4395 { |
3406 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode); | 4396 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode); |
4397 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode); | |
3407 struct bitpack_d bp; | 4398 struct bitpack_d bp; |
3408 struct cgraph_edge *edge; | 4399 struct cgraph_edge *edge; |
3409 int i; | 4400 int i; |
3410 size_time_entry *e; | 4401 size_time_entry *e; |
3411 struct condition *c; | 4402 struct condition *c; |
3412 | 4403 |
3413 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode)); | 4404 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode)); |
3414 streamer_write_hwi (ob, info->estimated_self_stack_size); | 4405 streamer_write_hwi (ob, size_info->estimated_self_stack_size); |
3415 streamer_write_hwi (ob, info->self_size); | 4406 streamer_write_hwi (ob, size_info->self_size); |
3416 info->time.stream_out (ob); | 4407 info->time.stream_out (ob); |
3417 bp = bitpack_create (ob->main_stream); | 4408 bp = bitpack_create (ob->main_stream); |
3418 bp_pack_value (&bp, info->inlinable, 1); | 4409 bp_pack_value (&bp, info->inlinable, 1); |
3419 bp_pack_value (&bp, false, 1); | 4410 bp_pack_value (&bp, false, 1); |
3420 bp_pack_value (&bp, info->fp_expressions, 1); | 4411 bp_pack_value (&bp, info->fp_expressions, 1); |
3421 streamer_write_bitpack (&bp); | 4412 streamer_write_bitpack (&bp); |
3422 streamer_write_uhwi (ob, vec_safe_length (info->conds)); | 4413 streamer_write_uhwi (ob, vec_safe_length (info->conds)); |
3423 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++) | 4414 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++) |
3424 { | 4415 { |
4416 int j; | |
4417 struct expr_eval_op *op; | |
4418 | |
3425 streamer_write_uhwi (ob, c->operand_num); | 4419 streamer_write_uhwi (ob, c->operand_num); |
3426 streamer_write_uhwi (ob, c->size); | |
3427 streamer_write_uhwi (ob, c->code); | 4420 streamer_write_uhwi (ob, c->code); |
4421 stream_write_tree (ob, c->type, true); | |
3428 stream_write_tree (ob, c->val, true); | 4422 stream_write_tree (ob, c->val, true); |
3429 bp = bitpack_create (ob->main_stream); | 4423 bp = bitpack_create (ob->main_stream); |
3430 bp_pack_value (&bp, c->agg_contents, 1); | 4424 bp_pack_value (&bp, c->agg_contents, 1); |
3431 bp_pack_value (&bp, c->by_ref, 1); | 4425 bp_pack_value (&bp, c->by_ref, 1); |
3432 streamer_write_bitpack (&bp); | 4426 streamer_write_bitpack (&bp); |
3433 if (c->agg_contents) | 4427 if (c->agg_contents) |
3434 streamer_write_uhwi (ob, c->offset); | 4428 streamer_write_uhwi (ob, c->offset); |
4429 streamer_write_uhwi (ob, vec_safe_length (c->param_ops)); | |
4430 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) | |
4431 { | |
4432 streamer_write_uhwi (ob, op->code); | |
4433 stream_write_tree (ob, op->type, true); | |
4434 if (op->val[0]) | |
4435 { | |
4436 bp = bitpack_create (ob->main_stream); | |
4437 bp_pack_value (&bp, op->index, 2); | |
4438 streamer_write_bitpack (&bp); | |
4439 stream_write_tree (ob, op->val[0], true); | |
4440 if (op->val[1]) | |
4441 stream_write_tree (ob, op->val[1], true); | |
4442 } | |
4443 } | |
3435 } | 4444 } |
3436 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table)); | 4445 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table)); |
3437 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) | 4446 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) |
3438 { | 4447 { |
3439 streamer_write_uhwi (ob, e->size); | 4448 streamer_write_uhwi (ob, e->size); |
3447 streamer_write_uhwi (ob, 0); | 4456 streamer_write_uhwi (ob, 0); |
3448 if (info->loop_stride) | 4457 if (info->loop_stride) |
3449 info->loop_stride->stream_out (ob); | 4458 info->loop_stride->stream_out (ob); |
3450 else | 4459 else |
3451 streamer_write_uhwi (ob, 0); | 4460 streamer_write_uhwi (ob, 0); |
3452 if (info->array_index) | |
3453 info->array_index->stream_out (ob); | |
3454 else | |
3455 streamer_write_uhwi (ob, 0); | |
3456 for (edge = cnode->callees; edge; edge = edge->next_callee) | 4461 for (edge = cnode->callees; edge; edge = edge->next_callee) |
3457 write_ipa_call_summary (ob, edge); | 4462 write_ipa_call_summary (ob, edge); |
3458 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee) | 4463 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee) |
3459 write_ipa_call_summary (ob, edge); | 4464 write_ipa_call_summary (ob, edge); |
3460 } | 4465 } |
3466 if (!flag_ipa_cp) | 4471 if (!flag_ipa_cp) |
3467 ipa_prop_write_jump_functions (); | 4472 ipa_prop_write_jump_functions (); |
3468 } | 4473 } |
3469 | 4474 |
3470 | 4475 |
3471 /* Release inline summary. */ | 4476 /* Release function summary. */ |
3472 | 4477 |
3473 void | 4478 void |
3474 ipa_free_fn_summary (void) | 4479 ipa_free_fn_summary (void) |
3475 { | 4480 { |
3476 struct cgraph_node *node; | |
3477 if (!ipa_call_summaries) | 4481 if (!ipa_call_summaries) |
3478 return; | 4482 return; |
3479 FOR_EACH_DEFINED_FUNCTION (node) | 4483 ggc_delete (ipa_fn_summaries); |
3480 if (!node->alias) | |
3481 ipa_fn_summaries->remove (node); | |
3482 ipa_fn_summaries->release (); | |
3483 ipa_fn_summaries = NULL; | 4484 ipa_fn_summaries = NULL; |
3484 ipa_call_summaries->release (); | |
3485 delete ipa_call_summaries; | 4485 delete ipa_call_summaries; |
3486 ipa_call_summaries = NULL; | 4486 ipa_call_summaries = NULL; |
3487 edge_predicate_pool.release (); | 4487 edge_predicate_pool.release (); |
4488 /* During IPA this is one of largest datastructures to release. */ | |
4489 if (flag_wpa) | |
4490 ggc_trim (); | |
4491 } | |
4492 | |
4493 /* Release function summary. */ | |
4494 | |
4495 void | |
4496 ipa_free_size_summary (void) | |
4497 { | |
4498 if (!ipa_size_summaries) | |
4499 return; | |
4500 delete ipa_size_summaries; | |
4501 ipa_size_summaries = NULL; | |
3488 } | 4502 } |
3489 | 4503 |
3490 namespace { | 4504 namespace { |
3491 | 4505 |
3492 const pass_data pass_data_local_fn_summary = | 4506 const pass_data pass_data_local_fn_summary = |
3557 void set_pass_param (unsigned int n, bool param) | 4571 void set_pass_param (unsigned int n, bool param) |
3558 { | 4572 { |
3559 gcc_assert (n == 0); | 4573 gcc_assert (n == 0); |
3560 small_p = param; | 4574 small_p = param; |
3561 } | 4575 } |
3562 virtual bool gate (function *) { return small_p || !flag_wpa; } | 4576 virtual bool gate (function *) { return true; } |
3563 virtual unsigned int execute (function *) | 4577 virtual unsigned int execute (function *) |
3564 { | 4578 { |
3565 ipa_free_fn_summary (); | 4579 ipa_free_fn_summary (); |
3566 /* Early optimizations may make function unreachable. We can not | 4580 if (!flag_wpa) |
3567 remove unreachable functions as part of the early opts pass because | 4581 ipa_free_size_summary (); |
3568 TODOs are run before subpasses. Do it here. */ | 4582 return 0; |
3569 return small_p ? TODO_remove_functions | TODO_dump_symtab : 0; | |
3570 } | 4583 } |
3571 | 4584 |
3572 private: | 4585 private: |
3573 bool small_p; | 4586 bool small_p; |
3574 }; // class pass_ipa_free_fn_summary | 4587 }; // class pass_ipa_free_fn_summary |