Mercurial > hg > CbC > CbC_gcc
diff gcc/ipa-profile.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gcc/ipa-profile.c Fri Oct 27 22:46:09 2017 +0900 @@ -0,0 +1,790 @@ +/* Basic IPA optimizations based on profile. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* ipa-profile pass implements the following analysis propagating profille + inter-procedurally. + + - Count histogram construction. This is a histogram analyzing how much + time is spent executing statements with a given execution count read + from profile feedback. This histogram is complete only with LTO, + otherwise it contains information only about the current unit. + + Similar histogram is also estimated by coverage runtime. This histogram + is not dependent on LTO, but it suffers from various defects; first + gcov runtime is not weighting individual basic block by estimated execution + time and second the merging of multiple runs makes assumption that the + histogram distribution did not change. Consequentely histogram constructed + here may be more precise. + + The information is used to set hot/cold thresholds. + - Next speculative indirect call resolution is performed: the local + profile pass assigns profile-id to each function and provide us with a + histogram specifying the most common target. We look up the callgraph + node corresponding to the target and produce a speculative call. + + This call may or may not survive through IPA optimization based on decision + of inliner. + - Finally we propagate the following flags: unlikely executed, executed + once, executed at startup and executed at exit. These flags are used to + control code size/performance threshold and code placement (by producing + .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */ +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "predict.h" +#include "alloc-pool.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "data-streamer.h" +#include "gimple-iterator.h" +#include "ipa-utils.h" +#include "profile.h" +#include "params.h" +#include "value-prof.h" +#include "tree-inline.h" +#include "symbol-summary.h" +#include "tree-vrp.h" +#include "ipa-prop.h" +#include "ipa-fnsummary.h" + +/* Entry in the histogram. */ + +struct histogram_entry +{ + gcov_type count; + int time; + int size; +}; + +/* Histogram of profile values. + The histogram is represented as an ordered vector of entries allocated via + histogram_pool. During construction a separate hashtable is kept to lookup + duplicate entries. */ + +vec<histogram_entry *> histogram; +static object_allocator<histogram_entry> histogram_pool ("IPA histogram"); + +/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ + +struct histogram_hash : nofree_ptr_hash <histogram_entry> +{ + static inline hashval_t hash (const histogram_entry *); + static inline int equal (const histogram_entry *, const histogram_entry *); +}; + +inline hashval_t +histogram_hash::hash (const histogram_entry *val) +{ + return val->count; +} + +inline int +histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2) +{ + return val->count == val2->count; +} + +/* Account TIME and SIZE executed COUNT times into HISTOGRAM. + HASHTABLE is the on-side hash kept to avoid duplicates. */ + +static void +account_time_size (hash_table<histogram_hash> *hashtable, + vec<histogram_entry *> &histogram, + gcov_type count, int time, int size) +{ + histogram_entry key = {count, 0, 0}; + histogram_entry **val = hashtable->find_slot (&key, INSERT); + + if (!*val) + { + *val = histogram_pool.allocate (); + **val = key; + histogram.safe_push (*val); + } + (*val)->time += time; + (*val)->size += size; +} + +int +cmp_counts (const void *v1, const void *v2) +{ + const histogram_entry *h1 = *(const histogram_entry * const *)v1; + const histogram_entry *h2 = *(const histogram_entry * const *)v2; + if (h1->count < h2->count) + return 1; + if (h1->count > h2->count) + return -1; + return 0; +} + +/* Dump HISTOGRAM to FILE. */ + +static void +dump_histogram (FILE *file, vec<histogram_entry *> histogram) +{ + unsigned int i; + gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0; + + fprintf (dump_file, "Histogram:\n"); + for (i = 0; i < histogram.length (); i++) + { + overall_time += histogram[i]->count * histogram[i]->time; + overall_size += histogram[i]->size; + } + if (!overall_time) + overall_time = 1; + if (!overall_size) + overall_size = 1; + for (i = 0; i < histogram.length (); i++) + { + cumulated_time += histogram[i]->count * histogram[i]->time; + cumulated_size += histogram[i]->size; + fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n", + (int64_t) histogram[i]->count, + histogram[i]->time, + cumulated_time * 100.0 / overall_time, + histogram[i]->size, + cumulated_size * 100.0 / overall_size); + } +} + +/* Collect histogram from CFG profiles. */ + +static void +ipa_profile_generate_summary (void) +{ + struct cgraph_node *node; + gimple_stmt_iterator gsi; + basic_block bb; + + hash_table<histogram_hash> hashtable (10); + + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + { + int time = 0; + int size = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_fndecl (stmt)) + { + histogram_value h; + h = gimple_histogram_value_of_type + (DECL_STRUCT_FUNCTION (node->decl), + stmt, HIST_TYPE_INDIR_CALL); + /* No need to do sanity check: gimple_ic_transform already + takes away bad histograms. */ + if (h) + { + /* counter 0 is target, counter 1 is number of execution we called target, + counter 2 is total number of executions. */ + if (h->hvalue.counters[2]) + { + struct cgraph_edge * e = node->get_edge (stmt); + if (e && !e->indirect_unknown_callee) + continue; + e->indirect_info->common_target_id + = h->hvalue.counters [0]; + e->indirect_info->common_target_probability + = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); + if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) + { + if (dump_file) + fprintf (dump_file, "Probability capped to 1\n"); + e->indirect_info->common_target_probability = REG_BR_PROB_BASE; + } + } + gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), + stmt, h); + } + } + time += estimate_num_insns (stmt, &eni_time_weights); + size += estimate_num_insns (stmt, &eni_size_weights); + } + if (bb->count.initialized_p ()) + account_time_size (&hashtable, histogram, bb->count.to_gcov_type (), + time, size); + } + histogram.qsort (cmp_counts); +} + +/* Serialize the ipa info for lto. */ + +static void +ipa_profile_write_summary (void) +{ + struct lto_simple_output_block *ob + = lto_create_simple_output_block (LTO_section_ipa_profile); + unsigned int i; + + streamer_write_uhwi_stream (ob->main_stream, histogram.length ()); + for (i = 0; i < histogram.length (); i++) + { + streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count); + streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time); + streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size); + } + lto_destroy_simple_output_block (ob); +} + +/* Deserialize the ipa info for lto. */ + +static void +ipa_profile_read_summary (void) +{ + struct lto_file_decl_data ** file_data_vec + = lto_get_file_decl_data (); + struct lto_file_decl_data * file_data; + int j = 0; + + hash_table<histogram_hash> hashtable (10); + + while ((file_data = file_data_vec[j++])) + { + const char *data; + size_t len; + struct lto_input_block *ib + = lto_create_simple_input_block (file_data, + LTO_section_ipa_profile, + &data, &len); + if (ib) + { + unsigned int num = streamer_read_uhwi (ib); + unsigned int n; + for (n = 0; n < num; n++) + { + gcov_type count = streamer_read_gcov_count (ib); + int time = streamer_read_uhwi (ib); + int size = streamer_read_uhwi (ib); + account_time_size (&hashtable, histogram, + count, time, size); + } + lto_destroy_simple_input_block (file_data, + LTO_section_ipa_profile, + ib, data, len); + } + } + histogram.qsort (cmp_counts); +} + +/* Data used by ipa_propagate_frequency. */ + +struct ipa_propagate_frequency_data +{ + cgraph_node *function_symbol; + bool maybe_unlikely_executed; + bool maybe_executed_once; + bool only_called_at_startup; + bool only_called_at_exit; +}; + +/* Worker for ipa_propagate_frequency_1. */ + +static bool +ipa_propagate_frequency_1 (struct cgraph_node *node, void *data) +{ + struct ipa_propagate_frequency_data *d; + struct cgraph_edge *edge; + + d = (struct ipa_propagate_frequency_data *)data; + for (edge = node->callers; + edge && (d->maybe_unlikely_executed || d->maybe_executed_once + || d->only_called_at_startup || d->only_called_at_exit); + edge = edge->next_caller) + { + if (edge->caller != d->function_symbol) + { + d->only_called_at_startup &= edge->caller->only_called_at_startup; + /* It makes sense to put main() together with the static constructors. + It will be executed for sure, but rest of functions called from + main are definitely not at startup only. */ + if (MAIN_NAME_P (DECL_NAME (edge->caller->decl))) + d->only_called_at_startup = 0; + d->only_called_at_exit &= edge->caller->only_called_at_exit; + } + + /* When profile feedback is available, do not try to propagate too hard; + counts are already good guide on function frequencies and roundoff + errors can make us to push function into unlikely section even when + it is executed by the train run. Transfer the function only if all + callers are unlikely executed. */ + if (profile_info + && edge->callee->count.initialized_p () + /* Thunks are not profiled. This is more or less implementation + bug. */ + && !d->function_symbol->thunk.thunk_p + && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED + || (edge->caller->global.inlined_to + && edge->caller->global.inlined_to->frequency + != NODE_FREQUENCY_UNLIKELY_EXECUTED))) + d->maybe_unlikely_executed = false; + if (!edge->frequency) + continue; + switch (edge->caller->frequency) + { + case NODE_FREQUENCY_UNLIKELY_EXECUTED: + break; + case NODE_FREQUENCY_EXECUTED_ONCE: + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Called by %s that is executed once\n", + edge->caller->name ()); + d->maybe_unlikely_executed = false; + if (ipa_call_summaries->get (edge)->loop_depth) + { + d->maybe_executed_once = false; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Called in loop\n"); + } + break; + case NODE_FREQUENCY_HOT: + case NODE_FREQUENCY_NORMAL: + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Called by %s that is normal or hot\n", + edge->caller->name ()); + d->maybe_unlikely_executed = false; + d->maybe_executed_once = false; + break; + } + } + return edge != NULL; +} + +/* Return ture if NODE contains hot calls. */ + +bool +contains_hot_call_p (struct cgraph_node *node) +{ + struct cgraph_edge *e; + for (e = node->callees; e; e = e->next_callee) + if (e->maybe_hot_p ()) + return true; + else if (!e->inline_failed + && contains_hot_call_p (e->callee)) + return true; + for (e = node->indirect_calls; e; e = e->next_callee) + if (e->maybe_hot_p ()) + return true; + return false; +} + +/* See if the frequency of NODE can be updated based on frequencies of its + callers. */ +bool +ipa_propagate_frequency (struct cgraph_node *node) +{ + struct ipa_propagate_frequency_data d = {node, true, true, true, true}; + bool changed = false; + + /* We can not propagate anything useful about externally visible functions + nor about virtuals. */ + if (!node->local.local + || node->alias + || (opt_for_fn (node->decl, flag_devirtualize) + && DECL_VIRTUAL_P (node->decl))) + return false; + gcc_assert (node->analyzed); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Processing frequency %s\n", node->name ()); + + node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d, + true); + + if ((d.only_called_at_startup && !d.only_called_at_exit) + && !node->only_called_at_startup) + { + node->only_called_at_startup = true; + if (dump_file) + fprintf (dump_file, "Node %s promoted to only called at startup.\n", + node->name ()); + changed = true; + } + if ((d.only_called_at_exit && !d.only_called_at_startup) + && !node->only_called_at_exit) + { + node->only_called_at_exit = true; + if (dump_file) + fprintf (dump_file, "Node %s promoted to only called at exit.\n", + node->name ()); + changed = true; + } + + /* With profile we can decide on hot/normal based on count. */ + if (node->count.initialized_p ()) + { + bool hot = false; + if (!(node->count == profile_count::zero ()) + && node->count >= get_hot_bb_threshold ()) + hot = true; + if (!hot) + hot |= contains_hot_call_p (node); + if (hot) + { + if (node->frequency != NODE_FREQUENCY_HOT) + { + if (dump_file) + fprintf (dump_file, "Node %s promoted to hot.\n", + node->name ()); + node->frequency = NODE_FREQUENCY_HOT; + return true; + } + return false; + } + else if (node->frequency == NODE_FREQUENCY_HOT) + { + if (dump_file) + fprintf (dump_file, "Node %s reduced to normal.\n", + node->name ()); + node->frequency = NODE_FREQUENCY_NORMAL; + changed = true; + } + } + /* These come either from profile or user hints; never update them. */ + if (node->frequency == NODE_FREQUENCY_HOT + || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) + return changed; + if (d.maybe_unlikely_executed) + { + node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; + if (dump_file) + fprintf (dump_file, "Node %s promoted to unlikely executed.\n", + node->name ()); + changed = true; + } + else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) + { + node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; + if (dump_file) + fprintf (dump_file, "Node %s promoted to executed once.\n", + node->name ()); + changed = true; + } + return changed; +} + +/* Simple ipa profile pass propagating frequencies across the callgraph. */ + +static unsigned int +ipa_profile (void) +{ + struct cgraph_node **order; + struct cgraph_edge *e; + int order_pos; + bool something_changed = false; + int i; + gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; + struct cgraph_node *n,*n2; + int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0; + int nmismatch = 0, nimpossible = 0; + bool node_map_initialized = false; + + if (dump_file) + dump_histogram (dump_file, histogram); + for (i = 0; i < (int)histogram.length (); i++) + { + overall_time += histogram[i]->count * histogram[i]->time; + overall_size += histogram[i]->size; + } + if (overall_time) + { + gcov_type threshold; + + gcc_assert (overall_size); + if (dump_file) + { + gcov_type min, cumulated_time = 0, cumulated_size = 0; + + fprintf (dump_file, "Overall time: %" PRId64"\n", + (int64_t)overall_time); + min = get_hot_bb_threshold (); + for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min; + i++) + { + cumulated_time += histogram[i]->count * histogram[i]->time; + cumulated_size += histogram[i]->size; + } + fprintf (dump_file, "GCOV min count: %" PRId64 + " Time:%3.2f%% Size:%3.2f%%\n", + (int64_t)min, + cumulated_time * 100.0 / overall_time, + cumulated_size * 100.0 / overall_size); + } + cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000; + threshold = 0; + for (i = 0; cumulated < cutoff; i++) + { + cumulated += histogram[i]->count * histogram[i]->time; + threshold = histogram[i]->count; + } + if (!threshold) + threshold = 1; + if (dump_file) + { + gcov_type cumulated_time = 0, cumulated_size = 0; + + for (i = 0; + i < (int)histogram.length () && histogram[i]->count >= threshold; + i++) + { + cumulated_time += histogram[i]->count * histogram[i]->time; + cumulated_size += histogram[i]->size; + } + fprintf (dump_file, "Determined min count: %" PRId64 + " Time:%3.2f%% Size:%3.2f%%\n", + (int64_t)threshold, + cumulated_time * 100.0 / overall_time, + cumulated_size * 100.0 / overall_size); + } + if (threshold > get_hot_bb_threshold () + || in_lto_p) + { + if (dump_file) + fprintf (dump_file, "Threshold updated.\n"); + set_hot_bb_threshold (threshold); + } + } + histogram.release (); + histogram_pool.release (); + + /* Produce speculative calls: we saved common traget from porfiling into + e->common_target_id. Now, at link time, we can look up corresponding + function node and produce speculative call. */ + + FOR_EACH_DEFINED_FUNCTION (n) + { + bool update = false; + + if (!opt_for_fn (n->decl, flag_ipa_profile)) + continue; + + for (e = n->indirect_calls; e; e = e->next_callee) + { + if (n->count.initialized_p ()) + nindirect++; + if (e->indirect_info->common_target_id) + { + if (!node_map_initialized) + init_node_map (false); + node_map_initialized = true; + ncommon++; + n2 = find_func_by_profile_id (e->indirect_info->common_target_id); + if (n2) + { + if (dump_file) + { + fprintf (dump_file, "Indirect call -> direct call from" + " other module %s => %s, prob %3.2f\n", + n->dump_name (), + n2->dump_name (), + e->indirect_info->common_target_probability + / (float)REG_BR_PROB_BASE); + } + if (e->indirect_info->common_target_probability + < REG_BR_PROB_BASE / 2) + { + nuseless++; + if (dump_file) + fprintf (dump_file, + "Not speculating: probability is too low.\n"); + } + else if (!e->maybe_hot_p ()) + { + nuseless++; + if (dump_file) + fprintf (dump_file, + "Not speculating: call is cold.\n"); + } + else if (n2->get_availability () <= AVAIL_INTERPOSABLE + && n2->can_be_discarded_p ()) + { + nuseless++; + if (dump_file) + fprintf (dump_file, + "Not speculating: target is overwritable " + "and can be discarded.\n"); + } + else if (ipa_node_params_sum && ipa_edge_args_sum + && (!vec_safe_is_empty + (IPA_NODE_REF (n2)->descriptors)) + && ipa_get_param_count (IPA_NODE_REF (n2)) + != ipa_get_cs_argument_count (IPA_EDGE_REF (e)) + && (ipa_get_param_count (IPA_NODE_REF (n2)) + >= ipa_get_cs_argument_count (IPA_EDGE_REF (e)) + || !stdarg_p (TREE_TYPE (n2->decl)))) + { + nmismatch++; + if (dump_file) + fprintf (dump_file, + "Not speculating: " + "parameter count mistmatch\n"); + } + else if (e->indirect_info->polymorphic + && !opt_for_fn (n->decl, flag_devirtualize) + && !possible_polymorphic_call_target_p (e, n2)) + { + nimpossible++; + if (dump_file) + fprintf (dump_file, + "Not speculating: " + "function is not in the polymorphic " + "call target list\n"); + } + else + { + /* Target may be overwritable, but profile says that + control flow goes to this particular implementation + of N2. Speculate on the local alias to allow inlining. + */ + if (!n2->can_be_discarded_p ()) + { + cgraph_node *alias; + alias = dyn_cast<cgraph_node *> (n2->noninterposable_alias ()); + if (alias) + n2 = alias; + } + nconverted++; + e->make_speculative + (n2, + e->count.apply_probability + (e->indirect_info->common_target_probability), + apply_scale (e->frequency, + e->indirect_info->common_target_probability)); + update = true; + } + } + else + { + if (dump_file) + fprintf (dump_file, "Function with profile-id %i not found.\n", + e->indirect_info->common_target_id); + nunknown++; + } + } + } + if (update) + ipa_update_overall_fn_summary (n); + } + if (node_map_initialized) + del_node_map (); + if (dump_file && nindirect) + fprintf (dump_file, + "%i indirect calls trained.\n" + "%i (%3.2f%%) have common target.\n" + "%i (%3.2f%%) targets was not found.\n" + "%i (%3.2f%%) targets had parameter count mismatch.\n" + "%i (%3.2f%%) targets was not in polymorphic call target list.\n" + "%i (%3.2f%%) speculations seems useless.\n" + "%i (%3.2f%%) speculations produced.\n", + nindirect, + ncommon, ncommon * 100.0 / nindirect, + nunknown, nunknown * 100.0 / nindirect, + nmismatch, nmismatch * 100.0 / nindirect, + nimpossible, nimpossible * 100.0 / nindirect, + nuseless, nuseless * 100.0 / nindirect, + nconverted, nconverted * 100.0 / nindirect); + + order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); + order_pos = ipa_reverse_postorder (order); + for (i = order_pos - 1; i >= 0; i--) + { + if (order[i]->local.local + && opt_for_fn (order[i]->decl, flag_ipa_profile) + && ipa_propagate_frequency (order[i])) + { + for (e = order[i]->callees; e; e = e->next_callee) + if (e->callee->local.local && !e->callee->aux) + { + something_changed = true; + e->callee->aux = (void *)1; + } + } + order[i]->aux = NULL; + } + + while (something_changed) + { + something_changed = false; + for (i = order_pos - 1; i >= 0; i--) + { + if (order[i]->aux + && opt_for_fn (order[i]->decl, flag_ipa_profile) + && ipa_propagate_frequency (order[i])) + { + for (e = order[i]->callees; e; e = e->next_callee) + if (e->callee->local.local && !e->callee->aux) + { + something_changed = true; + e->callee->aux = (void *)1; + } + } + order[i]->aux = NULL; + } + } + free (order); + return 0; +} + +namespace { + +const pass_data pass_data_ipa_profile = +{ + IPA_PASS, /* type */ + "profile_estimate", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_PROFILE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_profile : public ipa_opt_pass_d +{ +public: + pass_ipa_profile (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_profile, ctxt, + ipa_profile_generate_summary, /* generate_summary */ + ipa_profile_write_summary, /* write_summary */ + ipa_profile_read_summary, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; } + virtual unsigned int execute (function *) { return ipa_profile (); } + +}; // class pass_ipa_profile + +} // anon namespace + +ipa_opt_pass_d * +make_pass_ipa_profile (gcc::context *ctxt) +{ + return new pass_ipa_profile (ctxt); +}