Mercurial > hg > CbC > CbC_gcc
diff gcc/ipa-fnsummary.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/ipa-fnsummary.c Fri Oct 27 22:46:09 2017 +0900 +++ b/gcc/ipa-fnsummary.c Thu Oct 25 07:37:49 2018 +0900 @@ -1,5 +1,5 @@ /* Function summary pass. - Copyright (C) 2003-2017 Free Software Foundation, Inc. + Copyright (C) 2003-2018 Free Software Foundation, Inc. Contributed by Jan Hubicka This file is part of GCC. @@ -79,7 +79,6 @@ #include "cfgloop.h" #include "tree-scalar-evolution.h" #include "ipa-utils.h" -#include "cilk.h" #include "cfgexpand.h" #include "gimplify.h" #include "stringpool.h" @@ -244,7 +243,6 @@ e->redirect_callee (target); struct ipa_call_summary *es = ipa_call_summaries->get (e); e->inline_failed = CIF_UNREACHABLE; - e->frequency = 0; e->count = profile_count::zero (); es->call_stmt_size = 0; es->call_stmt_time = 0; @@ -445,15 +443,16 @@ && !e->call_stmt_cannot_inline_p && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)) { - struct ipa_node_params *parms_info; + struct ipa_node_params *caller_parms_info, *callee_pi; struct ipa_edge_args *args = IPA_EDGE_REF (e); struct ipa_call_summary *es = ipa_call_summaries->get (e); int i, count = ipa_get_cs_argument_count (args); if (e->caller->global.inlined_to) - parms_info = IPA_NODE_REF (e->caller->global.inlined_to); + caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to); else - parms_info = IPA_NODE_REF (e->caller); + caller_parms_info = IPA_NODE_REF (e->caller); + callee_pi = IPA_NODE_REF (e->callee); if (count && (info->conds || known_vals_ptr)) known_vals.safe_grow_cleared (count); @@ -465,7 +464,8 @@ for (i = 0; i < count; i++) { struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); - tree cst = ipa_value_from_jfunc (parms_info, jf); + tree cst = ipa_value_from_jfunc (caller_parms_info, jf, + ipa_get_type (callee_pi, i)); if (!cst && e->call_stmt && i < (int)gimple_call_num_args (e->call_stmt)) @@ -484,8 +484,8 @@ known_vals[i] = error_mark_node; if (known_contexts_ptr) - (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e, - i, jf); + (*known_contexts_ptr)[i] + = ipa_context_from_jfunc (caller_parms_info, e, i, jf); /* TODO: When IPA-CP starts propagating and merging aggregate jump functions, use its knowledge of the caller too, just like the scalar case above. */ @@ -535,65 +535,34 @@ ipa_call_summaries = new ipa_call_summary_t (symtab, false); } -/* We are called multiple time for given function; clear - data from previous run so they are not cumulated. */ - -void -ipa_call_summary::reset () +ipa_call_summary::~ipa_call_summary () { - call_stmt_size = call_stmt_time = 0; if (predicate) edge_predicate_pool.remove (predicate); - predicate = NULL; + param.release (); } -/* We are called multiple time for given function; clear - data from previous run so they are not cumulated. */ +ipa_fn_summary::~ipa_fn_summary () +{ + if (loop_iterations) + edge_predicate_pool.remove (loop_iterations); + if (loop_stride) + edge_predicate_pool.remove (loop_stride); + if (array_index) + edge_predicate_pool.remove (array_index); + vec_free (conds); + vec_free (size_time_table); +} void -ipa_fn_summary::reset (struct cgraph_node *node) +ipa_fn_summary_t::remove_callees (cgraph_node *node) { - struct cgraph_edge *e; - - self_size = 0; - estimated_stack_size = 0; - estimated_self_stack_size = 0; - stack_frame_offset = 0; - size = 0; - time = 0; - growth = 0; - scc_no = 0; - if (loop_iterations) - { - edge_predicate_pool.remove (loop_iterations); - loop_iterations = NULL; - } - if (loop_stride) - { - edge_predicate_pool.remove (loop_stride); - loop_stride = NULL; - } - if (array_index) - { - edge_predicate_pool.remove (array_index); - array_index = NULL; - } - vec_free (conds); - vec_free (size_time_table); + cgraph_edge *e; for (e = node->callees; e; e = e->next_callee) - ipa_call_summaries->get (e)->reset (); + ipa_call_summaries->remove (e); for (e = node->indirect_calls; e; e = e->next_callee) - ipa_call_summaries->get (e)->reset (); - fp_expressions = false; -} - -/* Hook that is called by cgraph.c when a node is removed. */ - -void -ipa_fn_summary_t::remove (cgraph_node *node, ipa_fn_summary *info) -{ - info->reset (node); + ipa_call_summaries->remove (e); } /* Same as remap_predicate_after_duplication but handle hint predicate *P. @@ -624,7 +593,7 @@ ipa_fn_summary *, ipa_fn_summary *info) { - memcpy (info, ipa_fn_summaries->get (src), sizeof (ipa_fn_summary)); + new (info) ipa_fn_summary (*ipa_fn_summaries->get (src)); /* TODO: as an optimization, we may avoid copying conditions that are known to be false or true. */ info->conds = vec_safe_copy (info->conds); @@ -699,7 +668,7 @@ for (edge = dst->callees; edge; edge = next) { predicate new_predicate; - struct ipa_call_summary *es = ipa_call_summaries->get (edge); + struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); next = edge->next_callee; if (!edge->inline_failed) @@ -718,7 +687,7 @@ for (edge = dst->indirect_calls; edge; edge = next) { predicate new_predicate; - struct ipa_call_summary *es = ipa_call_summaries->get (edge); + struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); next = edge->next_callee; gcc_checking_assert (edge->inline_failed); @@ -778,7 +747,7 @@ struct ipa_call_summary *srcinfo, struct ipa_call_summary *info) { - *info = *srcinfo; + new (info) ipa_call_summary (*srcinfo); info->predicate = NULL; edge_set_predicate (dst, srcinfo->predicate); info->param = srcinfo->param.copy (); @@ -791,17 +760,6 @@ } } - -/* Keep edge cache consistent across edge removal. */ - -void -ipa_call_summary_t::remove (struct cgraph_edge *, - struct ipa_call_summary *sum) -{ - sum->reset (); -} - - /* Dump edge summaries associated to NODE and recursively to all clones. Indent by INDENT. */ @@ -817,15 +775,18 @@ int i; fprintf (f, - "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i" - " time: %2i callee size:%2i stack:%2i", + "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i", indent, "", callee->name (), callee->order, !edge->inline_failed ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed), - indent, "", es->loop_depth, edge->frequency, - es->call_stmt_size, es->call_stmt_time, - (int) ipa_fn_summaries->get (callee)->size / ipa_fn_summary::size_scale, - (int) ipa_fn_summaries->get (callee)->estimated_stack_size); + indent, "", es->loop_depth, edge->sreal_frequency ().to_double (), + es->call_stmt_size, es->call_stmt_time); + + ipa_fn_summary *s = ipa_fn_summaries->get (callee); + if (s != NULL) + fprintf (f, "callee size:%2i stack:%2i", + (int) (s->size / ipa_fn_summary::size_scale), + (int) s->estimated_stack_size); if (es->predicate) { @@ -848,23 +809,25 @@ } if (!edge->inline_failed) { + ipa_fn_summary *s = ipa_fn_summaries->get (callee); fprintf (f, "%*sStack frame offset %i, callee self size %i," " callee size %i\n", indent + 2, "", - (int) ipa_fn_summaries->get (callee)->stack_frame_offset, - (int) ipa_fn_summaries->get (callee)->estimated_self_stack_size, - (int) ipa_fn_summaries->get (callee)->estimated_stack_size); + (int) s->stack_frame_offset, + (int) s->estimated_self_stack_size, + (int) s->estimated_stack_size); dump_ipa_call_summary (f, indent + 2, callee, info); } } for (edge = node->indirect_calls; edge; edge = edge->next_callee) { struct ipa_call_summary *es = ipa_call_summaries->get (edge); - fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i" + fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i" " time: %2i", indent, "", es->loop_depth, - edge->frequency, es->call_stmt_size, es->call_stmt_time); + edge->sreal_frequency ().to_double (), es->call_stmt_size, + es->call_stmt_time); if (es->predicate) { fprintf (f, "predicate: "); @@ -882,64 +845,66 @@ if (node->definition) { struct ipa_fn_summary *s = ipa_fn_summaries->get (node); - size_time_entry *e; - int i; - fprintf (f, "IPA function summary for %s/%i", node->name (), - node->order); - if (DECL_DISREGARD_INLINE_LIMITS (node->decl)) - fprintf (f, " always_inline"); - if (s->inlinable) - fprintf (f, " inlinable"); - if (s->contains_cilk_spawn) - fprintf (f, " contains_cilk_spawn"); - if (s->fp_expressions) - fprintf (f, " fp_expression"); - fprintf (f, "\n global time: %f\n", s->time.to_double ()); - fprintf (f, " self size: %i\n", s->self_size); - fprintf (f, " global size: %i\n", s->size); - fprintf (f, " min size: %i\n", s->min_size); - fprintf (f, " self stack: %i\n", - (int) s->estimated_self_stack_size); - fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); - if (s->growth) - fprintf (f, " estimated growth:%i\n", (int) s->growth); - if (s->scc_no) - fprintf (f, " In SCC: %i\n", (int) s->scc_no); - for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++) + if (s != NULL) { - fprintf (f, " size:%f, time:%f", - (double) e->size / ipa_fn_summary::size_scale, - e->time.to_double ()); - if (e->exec_predicate != true) + size_time_entry *e; + int i; + fprintf (f, "IPA function summary for %s", node->dump_name ()); + if (DECL_DISREGARD_INLINE_LIMITS (node->decl)) + fprintf (f, " always_inline"); + if (s->inlinable) + fprintf (f, " inlinable"); + if (s->fp_expressions) + fprintf (f, " fp_expression"); + fprintf (f, "\n global time: %f\n", s->time.to_double ()); + fprintf (f, " self size: %i\n", s->self_size); + fprintf (f, " global size: %i\n", s->size); + fprintf (f, " min size: %i\n", s->min_size); + fprintf (f, " self stack: %i\n", + (int) s->estimated_self_stack_size); + fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); + if (s->growth) + fprintf (f, " estimated growth:%i\n", (int) s->growth); + if (s->scc_no) + fprintf (f, " In SCC: %i\n", (int) s->scc_no); + for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++) { - fprintf (f, ", executed if:"); - e->exec_predicate.dump (f, s->conds, 0); + fprintf (f, " size:%f, time:%f", + (double) e->size / ipa_fn_summary::size_scale, + e->time.to_double ()); + if (e->exec_predicate != true) + { + fprintf (f, ", executed if:"); + e->exec_predicate.dump (f, s->conds, 0); + } + if (e->exec_predicate != e->nonconst_predicate) + { + fprintf (f, ", nonconst if:"); + e->nonconst_predicate.dump (f, s->conds, 0); + } + fprintf (f, "\n"); } - if (e->exec_predicate != e->nonconst_predicate) + if (s->loop_iterations) + { + fprintf (f, " loop iterations:"); + s->loop_iterations->dump (f, s->conds); + } + if (s->loop_stride) { - fprintf (f, ", nonconst if:"); - e->nonconst_predicate.dump (f, s->conds, 0); + fprintf (f, " loop stride:"); + s->loop_stride->dump (f, s->conds); } + if (s->array_index) + { + fprintf (f, " array index:"); + s->array_index->dump (f, s->conds); + } + fprintf (f, " calls:\n"); + dump_ipa_call_summary (f, 4, node, s); fprintf (f, "\n"); } - if (s->loop_iterations) - { - fprintf (f, " loop iterations:"); - s->loop_iterations->dump (f, s->conds); - } - if (s->loop_stride) - { - fprintf (f, " loop stride:"); - s->loop_stride->dump (f, s->conds); - } - if (s->array_index) - { - fprintf (f, " array index:"); - s->array_index->dump (f, s->conds); - } - fprintf (f, " calls:\n"); - dump_ipa_call_summary (f, 4, node, s); - fprintf (f, "\n"); + else + fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ()); } } @@ -1326,7 +1291,7 @@ tree min, max; predicate p; - e = find_edge (bb, label_to_block (CASE_LABEL (cl))); + e = gimple_switch_edge (cfun, last, case_idx); min = CASE_LOW (cl); max = CASE_HIGH (cl); @@ -1485,6 +1450,8 @@ nonconstant_names); return p2.or_with (summary->conds, p1); } + else if (TREE_CODE (expr) == CALL_EXPR) + return true; else { debug_tree (expr); @@ -1591,13 +1558,14 @@ struct record_modified_bb_info { + tree op; bitmap bb_set; gimple *stmt; }; /* Value is initialized in INIT_BB and used in USE_BB. We want to copute probability how often it changes between USE_BB. - INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB + INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB is in different loop nest, we can do better. This is all just estimate. In theory we look for minimal cut separating INIT_BB and USE_BB, but we only want to anticipate loop invariant motion @@ -1607,7 +1575,7 @@ get_minimal_bb (basic_block init_bb, basic_block use_bb) { struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); - if (l && l->header->frequency < init_bb->frequency) + if (l && l->header->count < init_bb->count) return l->header; return init_bb; } @@ -1622,12 +1590,25 @@ (struct record_modified_bb_info *) data; if (SSA_NAME_DEF_STMT (vdef) == info->stmt) return false; + if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef))) + return false; bitmap_set_bit (info->bb_set, SSA_NAME_IS_DEFAULT_DEF (vdef) ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index : get_minimal_bb (gimple_bb (SSA_NAME_DEF_STMT (vdef)), gimple_bb (info->stmt))->index); + if (dump_file) + { + fprintf (dump_file, " Param "); + print_generic_expr (dump_file, info->op, TDF_SLIM); + fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ", + gimple_bb (SSA_NAME_DEF_STMT (vdef))->index, + get_minimal_bb + (gimple_bb (SSA_NAME_DEF_STMT (vdef)), + gimple_bb (info->stmt))->index); + print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0); + } return false; } @@ -1661,65 +1642,75 @@ than the statement defining value, we take the frequency 1/N. */ if (TREE_CODE (base) == SSA_NAME) { - int init_freq; - - if (!bb->frequency) + profile_count init_count; + + if (!bb->count.nonzero_p ()) return REG_BR_PROB_BASE; if (SSA_NAME_IS_DEFAULT_DEF (base)) - init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; + init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; else - init_freq = get_minimal_bb + init_count = get_minimal_bb (gimple_bb (SSA_NAME_DEF_STMT (base)), - gimple_bb (stmt))->frequency; - - if (!init_freq) - init_freq = 1; - if (init_freq < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1); - else - return REG_BR_PROB_BASE; + gimple_bb (stmt))->count; + + if (init_count < bb->count) + return MAX ((init_count.to_sreal_scale (bb->count) + * REG_BR_PROB_BASE).to_int (), 1); + return REG_BR_PROB_BASE; } else { ao_ref refd; - int max; + profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; struct record_modified_bb_info info; - bitmap_iterator bi; - unsigned index; tree init = ctor_for_folding (base); if (init != error_mark_node) return 0; - if (!bb->frequency) + if (!bb->count.nonzero_p ()) return REG_BR_PROB_BASE; + if (dump_file) + { + fprintf (dump_file, " Analyzing param change probablity of "); + print_generic_expr (dump_file, op, TDF_SLIM); + fprintf (dump_file, "\n"); + } ao_ref_init (&refd, op); + info.op = op; info.stmt = stmt; info.bb_set = BITMAP_ALLOC (NULL); walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info, NULL); if (bitmap_bit_p (info.bb_set, bb->index)) { + if (dump_file) + fprintf (dump_file, " Set in same BB as used.\n"); BITMAP_FREE (info.bb_set); return REG_BR_PROB_BASE; } - /* Assume that every memory is initialized at entry. - TODO: Can we easilly determine if value is always defined - and thus we may skip entry block? */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency) - max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; - else - max = 1; - + bitmap_iterator bi; + unsigned index; + /* Lookup the most frequent update of the value and believe that + it dominates all the other; precise analysis here is difficult. */ EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi) - max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency); + max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count); + if (dump_file) + { + fprintf (dump_file, " Set with count "); + max.dump (dump_file); + fprintf (dump_file, " and used with count "); + bb->count.dump (dump_file); + fprintf (dump_file, " freq %f\n", + max.to_sreal_scale (bb->count).to_double ()); + } BITMAP_FREE (info.bb_set); - if (max < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1); - else - return REG_BR_PROB_BASE; + if (max < bb->count) + return MAX ((max.to_sreal_scale (bb->count) + * REG_BR_PROB_BASE).to_int (), 1); + return REG_BR_PROB_BASE; } } @@ -1860,6 +1851,7 @@ { gimple *stmt = gsi_stmt (bsi); if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT) + || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY) || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT)) { tree var = gimple_call_lhs (stmt); @@ -1984,8 +1976,8 @@ <0,2>. */ basic_block bb; struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); - int freq; - struct ipa_fn_summary *info = ipa_fn_summaries->get (node); + sreal freq; + struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); predicate bb_predicate; struct ipa_func_body_info fbi; vec<predicate> nonconstant_names = vNULL; @@ -2050,7 +2042,7 @@ for (n = 0; n < nblocks; n++) { bb = BASIC_BLOCK_FOR_FN (cfun, order[n]); - freq = compute_call_stmt_bb_frequency (node->decl, bb); + freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count); if (clobber_only_eh_bb_p (bb)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2125,7 +2117,7 @@ fprintf (dump_file, " "); print_gimple_stmt (dump_file, stmt, 0); fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n", - ((double) freq) / CGRAPH_FREQ_BASE, this_size, + freq.to_double (), this_size, this_time); } @@ -2153,7 +2145,7 @@ && !gimple_call_internal_p (stmt)) { struct cgraph_edge *edge = node->get_edge (stmt); - struct ipa_call_summary *es = ipa_call_summaries->get (edge); + ipa_call_summary *es = ipa_call_summaries->get_create (edge); /* Special case: results of BUILT_IN_CONSTANT_P will be always resolved as constant. We however don't want to optimize @@ -2199,7 +2191,7 @@ will_be_nonconstant = true; if (this_time || this_size) { - this_time *= freq; + sreal final_time = (sreal)this_time * freq; prob = eliminated_by_inlining_prob (stmt); if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) @@ -2216,7 +2208,7 @@ if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false) { - time += this_time; + time += final_time; size += this_size; } @@ -2229,14 +2221,12 @@ { predicate ip = bb_predicate & predicate::not_inlined (); info->account_size_time (this_size * prob, - (sreal)(this_time * prob) - / (CGRAPH_FREQ_BASE * 2), ip, + (this_time * prob) / 2, ip, p); } if (prob != 2) info->account_size_time (this_size * (2 - prob), - (sreal)(this_time * (2 - prob)) - / (CGRAPH_FREQ_BASE * 2), + (this_time * (2 - prob) / 2), bb_predicate, p); } @@ -2253,8 +2243,8 @@ } } } - set_hint_predicate (&ipa_fn_summaries->get (node)->array_index, array_index); - time = time / CGRAPH_FREQ_BASE; + set_hint_predicate (&ipa_fn_summaries->get_create (node)->array_index, + array_index); free (order); if (nonconstant_names.exists () && !early) @@ -2339,10 +2329,9 @@ } free (body); } - set_hint_predicate (&ipa_fn_summaries->get (node)->loop_iterations, - loop_iterations); - set_hint_predicate (&ipa_fn_summaries->get (node)->loop_stride, - loop_stride); + ipa_fn_summary *s = ipa_fn_summaries->get (node); + set_hint_predicate (&s->loop_iterations, loop_iterations); + set_hint_predicate (&s->loop_stride, loop_stride); scev_finalize (); } FOR_ALL_BB_FN (bb, my_function) @@ -2360,8 +2349,9 @@ e->aux = NULL; } } - ipa_fn_summaries->get (node)->time = time; - ipa_fn_summaries->get (node)->self_size = size; + ipa_fn_summary *s = ipa_fn_summaries->get (node); + s->time = time; + s->self_size = size; nonconstant_names.release (); ipa_release_body_info (&fbi); if (opt_for_fn (node->decl, optimize)) @@ -2395,8 +2385,10 @@ if (!ipa_fn_summaries) ipa_fn_summary_alloc (); - info = ipa_fn_summaries->get (node); - info->reset (node); + /* Create a new ipa_fn_summary. */ + ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node); + ipa_fn_summaries->remove (node); + info = ipa_fn_summaries->get_create (node); /* Estimate the stack size for the function if we're optimizing. */ self_stack_size = optimize && !node->thunk.thunk_p @@ -2407,7 +2399,7 @@ if (node->thunk.thunk_p) { - struct ipa_call_summary *es = ipa_call_summaries->get (node->callees); + ipa_call_summary *es = ipa_call_summaries->get_create (node->callees); predicate t = true; node->local.can_change_signature = false; @@ -2424,6 +2416,11 @@ info->inlinable = false; node->callees->inline_failed = CIF_CHKP; } + else if (stdarg_p (TREE_TYPE (node->decl))) + { + info->inlinable = false; + node->callees->inline_failed = CIF_VARIADIC_THUNK; + } else info->inlinable = true; } @@ -2440,10 +2437,12 @@ else info->inlinable = tree_inlinable_function_p (node->decl); - info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun); - /* Type attributes can use parameter indices to describe them. */ - if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))) + if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)) + /* Likewise for #pragma omp declare simd functions or functions + with simd attribute. */ + || lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (node->decl))) node->local.can_change_signature = false; else { @@ -2456,10 +2455,8 @@ for (e = node->callees; e; e = e->next_callee) { tree cdecl = e->callee->decl; - if (DECL_BUILT_IN (cdecl) - && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL - && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS - || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START)) + if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS) + || fndecl_built_in_p (cdecl, BUILT_IN_VA_START)) break; } node->local.can_change_signature = !e; @@ -2577,10 +2574,9 @@ if (min_size) *min_size += cur_size; if (prob == REG_BR_PROB_BASE) - *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE; + *time += ((sreal)call_time) * e->sreal_frequency (); else - *time += ((sreal)call_time) * (prob * e->frequency) - / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE); + *time += ((sreal)call_time * prob) * e->sreal_frequency (); } @@ -2601,7 +2597,7 @@ struct cgraph_edge *e; for (e = node->callees; e; e = e->next_callee) { - struct ipa_call_summary *es = ipa_call_summaries->get (e); + struct ipa_call_summary *es = ipa_call_summaries->get_create (e); /* Do not care about zero sized builtins. */ if (e->inline_failed && !es->call_stmt_size) @@ -2632,7 +2628,7 @@ } for (e = node->indirect_calls; e; e = e->next_callee) { - struct ipa_call_summary *es = ipa_call_summaries->get (e); + struct ipa_call_summary *es = ipa_call_summaries->get_create (e); if (!es->predicate || es->predicate->evaluate (possible_truths)) estimate_edge_size_and_time (e, size, @@ -2667,7 +2663,7 @@ vec<inline_param_summary> inline_param_summary) { - struct ipa_fn_summary *info = ipa_fn_summaries->get (node); + struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); size_time_entry *e; int size = 0; sreal time = 0; @@ -2746,7 +2742,7 @@ gcc_checking_assert (time >= 0); /* nonspecialized_time should be always bigger than specialized time. Roundoff issues however may get into the way. */ - gcc_checking_assert ((nonspecialized_time - time) >= -1); + gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1); /* Roundoff issues may make specialized time bigger than nonspecialized time. We do not really want that to happen because some heurstics @@ -2821,8 +2817,8 @@ inline_update_callee_summaries (struct cgraph_node *node, int depth) { struct cgraph_edge *e; - struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (node); - struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller); + ipa_fn_summary *callee_info = ipa_fn_summaries->get (node); + ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller); HOST_WIDE_INT peak; callee_info->stack_frame_offset @@ -2830,8 +2826,10 @@ + caller_info->estimated_self_stack_size; peak = callee_info->stack_frame_offset + callee_info->estimated_self_stack_size; - if (ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size < peak) - ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size = peak; + + ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to); + if (s->estimated_stack_size < peak) + s->estimated_stack_size = peak; ipa_propagate_frequency (node); for (e = node->callees; e; e = e->next_callee) { @@ -2982,7 +2980,7 @@ void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) { - struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee); + ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee); struct cgraph_node *to = (edge->caller->global.inlined_to ? edge->caller->global.inlined_to : edge->caller); struct ipa_fn_summary *info = ipa_fn_summaries->get (to); @@ -3057,7 +3055,7 @@ toplev_predicate); if (p != false && nonconstp != false) { - sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE; + sreal add_time = ((sreal)e->time * edge->sreal_frequency ()); int prob = e->nonconst_predicate.probability (callee_info->conds, clause, es->param); add_time = add_time * prob / REG_BR_PROB_BASE; @@ -3082,8 +3080,8 @@ &callee_info->array_index, operand_map, offset_map, clause, &toplev_predicate); - inline_update_callee_summaries (edge->callee, - ipa_call_summaries->get (edge)->loop_depth); + ipa_call_summary *s = ipa_call_summaries->get (edge); + inline_update_callee_summaries (edge->callee, s->loop_depth); /* We do not maintain predicates of inlined edges, free it. */ edge_set_predicate (edge, &true_p); @@ -3099,7 +3097,7 @@ void ipa_update_overall_fn_summary (struct cgraph_node *node) { - struct ipa_fn_summary *info = ipa_fn_summaries->get (node); + struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); size_time_entry *e; int i; @@ -3197,13 +3195,17 @@ static void read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e) { - struct ipa_call_summary *es = ipa_call_summaries->get (e); + struct ipa_call_summary *es = ipa_call_summaries->get_create (e); predicate p; int length, i; es->call_stmt_size = streamer_read_uhwi (ib); es->call_stmt_time = streamer_read_uhwi (ib); es->loop_depth = streamer_read_uhwi (ib); + + bitpack_d bp = streamer_read_bitpack (ib); + es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1); + p.stream_in (ib); edge_set_predicate (e, &p); length = streamer_read_uhwi (ib); @@ -3252,7 +3254,7 @@ encoder = file_data->symtab_node_encoder; node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index)); - info = ipa_fn_summaries->get (node); + info = ipa_fn_summaries->get_create (node); info->estimated_stack_size = info->estimated_self_stack_size = streamer_read_uhwi (&ib); @@ -3261,7 +3263,6 @@ bp = streamer_read_bitpack (&ib); info->inlinable = bp_unpack_value (&bp, 1); - info->contains_cilk_spawn = bp_unpack_value (&bp, 1); info->fp_expressions = bp_unpack_value (&bp, 1); count2 = streamer_read_uhwi (&ib); @@ -3360,6 +3361,11 @@ streamer_write_uhwi (ob, es->call_stmt_size); streamer_write_uhwi (ob, es->call_stmt_time); streamer_write_uhwi (ob, es->loop_depth); + + bitpack_d bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, es->is_return_callee_uncaptured, 1); + streamer_write_bitpack (&bp); + if (es->predicate) es->predicate->stream_out (ob); else @@ -3410,7 +3416,7 @@ info->time.stream_out (ob); bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, info->inlinable, 1); - bp_pack_value (&bp, info->contains_cilk_spawn, 1); + bp_pack_value (&bp, false, 1); bp_pack_value (&bp, info->fp_expressions, 1); streamer_write_bitpack (&bp); streamer_write_uhwi (ob, vec_safe_length (info->conds)); @@ -3472,7 +3478,7 @@ return; FOR_EACH_DEFINED_FUNCTION (node) if (!node->alias) - ipa_fn_summaries->get (node)->reset (node); + ipa_fn_summaries->remove (node); ipa_fn_summaries->release (); ipa_fn_summaries = NULL; ipa_call_summaries->release (); @@ -3535,26 +3541,36 @@ 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - /* Early optimizations may make function unreachable. We can not - remove unreachable functions as part of the ealry opts pass because - TODOs are run before subpasses. Do it here. */ - ( TODO_remove_functions | TODO_dump_symtab ), /* todo_flags_finish */ + 0, /* todo_flags_finish */ }; class pass_ipa_free_fn_summary : public simple_ipa_opt_pass { public: pass_ipa_free_fn_summary (gcc::context *ctxt) - : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt) + : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt), + small_p (false) {} /* opt_pass methods: */ + opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); } + void set_pass_param (unsigned int n, bool param) + { + gcc_assert (n == 0); + small_p = param; + } + virtual bool gate (function *) { return small_p || !flag_wpa; } virtual unsigned int execute (function *) { ipa_free_fn_summary (); - return 0; + /* Early optimizations may make function unreachable. We can not + remove unreachable functions as part of the early opts pass because + TODOs are run before subpasses. Do it here. */ + return small_p ? TODO_remove_functions | TODO_dump_symtab : 0; } +private: + bool small_p; }; // class pass_ipa_free_fn_summary } // anon namespace @@ -3608,3 +3624,12 @@ { return new pass_ipa_fn_summary (ctxt); } + +/* Reset all state within ipa-fnsummary.c so that we can rerun the compiler + within the same process. For use by toplev::finalize. */ + +void +ipa_fnsummary_c_finalize (void) +{ + ipa_free_fn_summary (); +}