Mercurial > hg > CbC > CbC_gcc
diff gcc/lto-cgraph.c @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 04ced10e8804 |
line wrap: on
line diff
--- a/gcc/lto-cgraph.c Tue May 25 18:58:51 2010 +0900 +++ b/gcc/lto-cgraph.c Tue Mar 22 17:18:12 2011 +0900 @@ -1,7 +1,7 @@ /* Write and read the cgraph to the memory mapped representation of a .o file. - Copyright 2009 Free Software Foundation, Inc. + Copyright 2009, 2010 Free Software Foundation, Inc. Contributed by Kenneth Zadeck <zadeck@naturalbridge.com> This file is part of GCC. @@ -24,7 +24,6 @@ #include "system.h" #include "coretypes.h" #include "tm.h" -#include "toplev.h" #include "tree.h" #include "expr.h" #include "flags.h" @@ -37,7 +36,7 @@ #include "cgraph.h" #include "function.h" #include "ggc.h" -#include "diagnostic.h" +#include "diagnostic-core.h" #include "except.h" #include "vec.h" #include "timevar.h" @@ -47,7 +46,7 @@ #include "gcov-io.h" static void output_varpool (cgraph_node_set, varpool_node_set); -static void output_cgraph_opt_summary (void); +static void output_cgraph_opt_summary (cgraph_node_set set); static void input_cgraph_opt_summary (VEC (cgraph_node_ptr, heap) * nodes); @@ -234,14 +233,6 @@ } -/* Return number of encoded nodes in ENCODER. */ - -static int -lto_varpool_encoder_size (lto_varpool_encoder_t encoder) -{ - return VEC_length (varpool_node_ptr, encoder->nodes); -} - /* Return TRUE if we should encode initializer of NODE (if any). */ bool @@ -268,7 +259,7 @@ { unsigned int uid; intptr_t ref; - struct bitpack_d *bp; + struct bitpack_d bp; if (edge->indirect_unknown_callee) lto_output_uleb128_stream (ob->main_stream, LTO_cgraph_indirect_edge); @@ -288,32 +279,33 @@ lto_output_sleb128_stream (ob->main_stream, edge->count); - bp = bitpack_create (); - uid = flag_wpa ? edge->lto_stmt_uid : gimple_uid (edge->call_stmt); - bp_pack_value (bp, uid, HOST_BITS_PER_INT); - bp_pack_value (bp, edge->inline_failed, HOST_BITS_PER_INT); - bp_pack_value (bp, edge->frequency, HOST_BITS_PER_INT); - bp_pack_value (bp, edge->loop_nest, 30); - bp_pack_value (bp, edge->indirect_inlining_edge, 1); - bp_pack_value (bp, edge->call_stmt_cannot_inline_p, 1); - bp_pack_value (bp, edge->can_throw_external, 1); + bp = bitpack_create (ob->main_stream); + uid = (!gimple_has_body_p (edge->caller->decl) + ? edge->lto_stmt_uid : gimple_uid (edge->call_stmt)); + bp_pack_value (&bp, uid, HOST_BITS_PER_INT); + bp_pack_value (&bp, edge->inline_failed, HOST_BITS_PER_INT); + bp_pack_value (&bp, edge->frequency, HOST_BITS_PER_INT); + bp_pack_value (&bp, edge->loop_nest, 30); + bp_pack_value (&bp, edge->indirect_inlining_edge, 1); + bp_pack_value (&bp, edge->call_stmt_cannot_inline_p, 1); + bp_pack_value (&bp, edge->can_throw_external, 1); if (edge->indirect_unknown_callee) { int flags = edge->indirect_info->ecf_flags; - bp_pack_value (bp, (flags & ECF_CONST) != 0, 1); - bp_pack_value (bp, (flags & ECF_PURE) != 0, 1); - bp_pack_value (bp, (flags & ECF_NORETURN) != 0, 1); - bp_pack_value (bp, (flags & ECF_MALLOC) != 0, 1); - bp_pack_value (bp, (flags & ECF_NOTHROW) != 0, 1); - bp_pack_value (bp, (flags & ECF_RETURNS_TWICE) != 0, 1); + bp_pack_value (&bp, (flags & ECF_CONST) != 0, 1); + bp_pack_value (&bp, (flags & ECF_PURE) != 0, 1); + bp_pack_value (&bp, (flags & ECF_NORETURN) != 0, 1); + bp_pack_value (&bp, (flags & ECF_MALLOC) != 0, 1); + bp_pack_value (&bp, (flags & ECF_NOTHROW) != 0, 1); + bp_pack_value (&bp, (flags & ECF_RETURNS_TWICE) != 0, 1); /* Flags that should not appear on indirect calls. */ gcc_assert (!(flags & (ECF_LOOPING_CONST_OR_PURE | ECF_MAY_BE_ALLOCA | ECF_SIBCALL + | ECF_LEAF | ECF_NOVOPS))); } - lto_output_bitpack (ob->main_stream, bp); - bitpack_delete (bp); + lto_output_bitpack (&bp); } /* Return if LIST contain references from other partitions. */ @@ -328,13 +320,15 @@ { if (ref->refering_type == IPA_REF_CGRAPH) { - if (!cgraph_node_in_set_p (ipa_ref_refering_node (ref), set)) + if (ipa_ref_refering_node (ref)->in_other_partition + || !cgraph_node_in_set_p (ipa_ref_refering_node (ref), set)) return true; } else { - if (!varpool_node_in_set_p (ipa_ref_refering_varpool_node (ref), - vset)) + if (ipa_ref_refering_varpool_node (ref)->in_other_partition + || !varpool_node_in_set_p (ipa_ref_refering_varpool_node (ref), + vset)) return true; } } @@ -352,7 +346,8 @@ if (node->global.inlined_to) return false; for (e = node->callers; e; e = e->next_caller) - if (!cgraph_node_in_set_p (e->caller, set)) + if (e->caller->in_other_partition + || !cgraph_node_in_set_p (e->caller, set)) return true; return false; } @@ -388,10 +383,6 @@ reachable_from_this_partition_p (struct cgraph_node *node, cgraph_node_set set) { struct cgraph_edge *e; - if (!node->analyzed) - return false; - if (node->global.inlined_to) - return false; for (e = node->callers; e; e = e->next_caller) if (cgraph_node_in_set_p (e->caller, set)) return true; @@ -412,7 +403,7 @@ varpool_node_set vset) { unsigned int tag; - struct bitpack_d *bp; + struct bitpack_d bp; bool boundary_p; intptr_t ref; bool in_other_partition = false; @@ -452,11 +443,14 @@ clone_of = node->clone_of; while (clone_of - && (ref = lto_cgraph_encoder_lookup (encoder, node->clone_of)) == LCC_NOT_FOUND) + && (ref = lto_cgraph_encoder_lookup (encoder, clone_of)) == LCC_NOT_FOUND) if (clone_of->prev_sibling_clone) clone_of = clone_of->prev_sibling_clone; else clone_of = clone_of->clone_of; + + if (LTO_cgraph_analyzed_node) + gcc_assert (clone_of || !node->clone_of); if (!clone_of) lto_output_sleb128_stream (ob->main_stream, LCC_NOT_FOUND); else @@ -465,30 +459,7 @@ lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl); lto_output_sleb128_stream (ob->main_stream, node->count); - - bp = bitpack_create (); - bp_pack_value (bp, node->local.local, 1); - bp_pack_value (bp, node->local.externally_visible, 1); - bp_pack_value (bp, node->local.finalized, 1); - bp_pack_value (bp, node->local.inlinable, 1); - bp_pack_value (bp, node->local.versionable, 1); - bp_pack_value (bp, node->local.disregard_inline_limits, 1); - bp_pack_value (bp, node->local.redefined_extern_inline, 1); - bp_pack_value (bp, node->local.vtable_method, 1); - bp_pack_value (bp, node->needed, 1); - bp_pack_value (bp, node->address_taken, 1); - bp_pack_value (bp, node->abstract_and_needed, 1); - bp_pack_value (bp, tag == LTO_cgraph_analyzed_node - && !DECL_EXTERNAL (node->decl) - && (reachable_from_other_partition_p (node, set) - || referenced_from_other_partition_p (&node->ref_list, set, vset)), 1); - bp_pack_value (bp, node->lowered, 1); - bp_pack_value (bp, in_other_partition, 1); - bp_pack_value (bp, node->alias, 1); - bp_pack_value (bp, node->finalized_by_frontend, 1); - bp_pack_value (bp, node->frequency, 2); - lto_output_bitpack (ob->main_stream, bp); - bitpack_delete (bp); + lto_output_sleb128_stream (ob->main_stream, node->count_materialization_scale); if (tag == LTO_cgraph_analyzed_node) { @@ -522,6 +493,34 @@ ref = LCC_NOT_FOUND; lto_output_sleb128_stream (ob->main_stream, ref); + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, node->local.local, 1); + bp_pack_value (&bp, node->local.externally_visible, 1); + bp_pack_value (&bp, node->local.finalized, 1); + bp_pack_value (&bp, node->local.inlinable, 1); + bp_pack_value (&bp, node->local.versionable, 1); + bp_pack_value (&bp, node->local.can_change_signature, 1); + bp_pack_value (&bp, node->local.disregard_inline_limits, 1); + bp_pack_value (&bp, node->local.redefined_extern_inline, 1); + bp_pack_value (&bp, node->local.vtable_method, 1); + bp_pack_value (&bp, node->needed, 1); + bp_pack_value (&bp, node->address_taken, 1); + bp_pack_value (&bp, node->abstract_and_needed, 1); + bp_pack_value (&bp, tag == LTO_cgraph_analyzed_node + && !DECL_EXTERNAL (node->decl) + && !DECL_COMDAT (node->decl) + && (reachable_from_other_partition_p (node, set) + || referenced_from_other_partition_p (&node->ref_list, set, vset)), 1); + bp_pack_value (&bp, node->lowered, 1); + bp_pack_value (&bp, in_other_partition, 1); + bp_pack_value (&bp, node->alias, 1); + bp_pack_value (&bp, node->finalized_by_frontend, 1); + bp_pack_value (&bp, node->frequency, 2); + bp_pack_value (&bp, node->only_called_at_startup, 1); + bp_pack_value (&bp, node->only_called_at_exit, 1); + lto_output_bitpack (&bp); + lto_output_uleb128_stream (ob->main_stream, node->resolution); + if (node->same_body) { struct cgraph_node *alias; @@ -552,6 +551,8 @@ lto_output_fn_decl_index (ob->decl_state, ob->main_stream, alias->thunk.alias); } + gcc_assert (cgraph_get_node (alias->thunk.alias) == node); + lto_output_uleb128_stream (ob->main_stream, alias->resolution); alias = alias->previous; } while (alias); @@ -569,41 +570,41 @@ cgraph_node_set set, varpool_node_set vset) { bool boundary_p = !varpool_node_in_set_p (node, vset) && node->analyzed; - struct bitpack_d *bp; + struct bitpack_d bp; struct varpool_node *alias; int count = 0; int ref; lto_output_var_decl_index (ob->decl_state, ob->main_stream, node->decl); - bp = bitpack_create (); - bp_pack_value (bp, node->externally_visible, 1); - bp_pack_value (bp, node->force_output, 1); - bp_pack_value (bp, node->finalized, 1); - bp_pack_value (bp, node->alias, 1); + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, node->externally_visible, 1); + bp_pack_value (&bp, node->force_output, 1); + bp_pack_value (&bp, node->finalized, 1); + bp_pack_value (&bp, node->alias, 1); gcc_assert (!node->alias || !node->extra_name); gcc_assert (node->finalized || !node->analyzed); gcc_assert (node->needed); /* Constant pool initializers can be de-unified into individual ltrans units. FIXME: Alternatively at -Os we may want to avoid generating for them the local labels and share them across LTRANS partitions. */ - if (DECL_IN_CONSTANT_POOL (node->decl)) + if (DECL_IN_CONSTANT_POOL (node->decl) + && !DECL_COMDAT (node->decl)) { - bp_pack_value (bp, 0, 1); /* used_from_other_parition. */ - bp_pack_value (bp, 0, 1); /* in_other_partition. */ + bp_pack_value (&bp, 0, 1); /* used_from_other_parition. */ + bp_pack_value (&bp, 0, 1); /* in_other_partition. */ } else { - bp_pack_value (bp, node->analyzed + bp_pack_value (&bp, node->analyzed && referenced_from_other_partition_p (&node->ref_list, set, vset), 1); - bp_pack_value (bp, boundary_p, 1); /* in_other_partition. */ + bp_pack_value (&bp, boundary_p, 1); /* in_other_partition. */ } /* Also emit any extra name aliases. */ for (alias = node->extra_name; alias; alias = alias->next) count++; - bp_pack_value (bp, count != 0, 1); - lto_output_bitpack (ob->main_stream, bp); - bitpack_delete (bp); + bp_pack_value (&bp, count != 0, 1); + lto_output_bitpack (&bp); if (node->same_comdat_group && !boundary_p) { ref = lto_varpool_encoder_lookup (varpool_encoder, node->same_comdat_group); @@ -612,12 +613,16 @@ else ref = LCC_NOT_FOUND; lto_output_sleb128_stream (ob->main_stream, ref); + lto_output_uleb128_stream (ob->main_stream, node->resolution); if (count) { lto_output_uleb128_stream (ob->main_stream, count); for (alias = node->extra_name; alias; alias = alias->next) - lto_output_var_decl_index (ob->decl_state, ob->main_stream, alias->decl); + { + lto_output_var_decl_index (ob->decl_state, ob->main_stream, alias->decl); + lto_output_uleb128_stream (ob->main_stream, alias->resolution); + } } } @@ -629,11 +634,11 @@ lto_cgraph_encoder_t encoder, lto_varpool_encoder_t varpool_encoder) { - struct bitpack_d *bp = bitpack_create (); - bp_pack_value (bp, ref->refered_type, 1); - bp_pack_value (bp, ref->use, 2); - lto_output_bitpack (ob->main_stream, bp); - bitpack_delete (bp); + struct bitpack_d bp; + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, ref->refered_type, 1); + bp_pack_value (&bp, ref->use, 2); + lto_output_bitpack (&bp); if (ref->refered_type == IPA_REF_CGRAPH) { int nref = lto_cgraph_encoder_lookup (encoder, ipa_ref_node (ref)); @@ -656,12 +661,12 @@ { if (profile_info) { - /* We do not output num, it is not terribly useful. */ + /* We do not output num, sum_all and run_max, they are not used by + GCC profile feedback and they are difficult to merge from multiple + units. */ gcc_assert (profile_info->runs); lto_output_uleb128_stream (ob->main_stream, profile_info->runs); - lto_output_sleb128_stream (ob->main_stream, profile_info->sum_all); - lto_output_sleb128_stream (ob->main_stream, profile_info->run_max); - lto_output_sleb128_stream (ob->main_stream, profile_info->sum_max); + lto_output_uleb128_stream (ob->main_stream, profile_info->sum_max); } else lto_output_uleb128_stream (ob->main_stream, 0); @@ -814,8 +819,7 @@ if (DECL_INITIAL (vnode->decl) && !lto_varpool_encoder_encode_initializer_p (varpool_encoder, vnode) - && (DECL_IN_CONSTANT_POOL (vnode->decl) - || TREE_READONLY (vnode->decl))) + && const_value_known_p (vnode->decl)) { lto_set_varpool_encoder_encode_initializer (varpool_encoder, vnode); add_references (encoder, varpool_encoder, &vnode->ref_list); @@ -852,9 +856,10 @@ lto_cgraph_encoder_t encoder; lto_varpool_encoder_t varpool_encoder; struct cgraph_asm_node *can; + static bool asm_nodes_output = false; if (flag_wpa) - output_cgraph_opt_summary (); + output_cgraph_opt_summary (set); ob = lto_create_simple_output_block (LTO_section_cgraph); @@ -887,14 +892,21 @@ lto_output_uleb128_stream (ob->main_stream, 0); - /* Emit toplevel asms. */ - for (can = cgraph_asm_nodes; can; can = can->next) + /* Emit toplevel asms. + When doing WPA we must output every asm just once. Since we do not partition asm + nodes at all, output them to first output. This is kind of hack, but should work + well. */ + if (!asm_nodes_output) { - int len = TREE_STRING_LENGTH (can->asm_str); - lto_output_uleb128_stream (ob->main_stream, len); - for (i = 0; i < len; ++i) - lto_output_1_stream (ob->main_stream, - TREE_STRING_POINTER (can->asm_str)[i]); + asm_nodes_output = true; + for (can = cgraph_asm_nodes; can; can = can->next) + { + int len = TREE_STRING_LENGTH (can->asm_str); + lto_output_uleb128_stream (ob->main_stream, len); + for (i = 0; i < len; ++i) + lto_output_1_stream (ob->main_stream, + TREE_STRING_POINTER (can->asm_str)[i]); + } } lto_output_uleb128_stream (ob->main_stream, 0); @@ -920,7 +932,8 @@ unsigned int self_time, unsigned int time_inlining_benefit, unsigned int self_size, - unsigned int size_inlining_benefit) + unsigned int size_inlining_benefit, + enum ld_plugin_symbol_resolution resolution) { node->aux = (void *) tag; node->local.inline_summary.estimated_self_stack_size = stack_size; @@ -939,6 +952,7 @@ node->local.finalized = bp_unpack_value (bp, 1); node->local.inlinable = bp_unpack_value (bp, 1); node->local.versionable = bp_unpack_value (bp, 1); + node->local.can_change_signature = bp_unpack_value (bp, 1); node->local.disregard_inline_limits = bp_unpack_value (bp, 1); node->local.redefined_extern_inline = bp_unpack_value (bp, 1); node->local.vtable_method = bp_unpack_value (bp, 1); @@ -949,9 +963,26 @@ node->lowered = bp_unpack_value (bp, 1); node->analyzed = tag == LTO_cgraph_analyzed_node; node->in_other_partition = bp_unpack_value (bp, 1); + if (node->in_other_partition + /* Avoid updating decl when we are seeing just inline clone. + When inlining function that has functions already inlined into it, + we produce clones of inline clones. + + WPA partitioning might put each clone into different unit and + we might end up streaming inline clone from other partition + to support clone we are interested in. */ + && (!node->clone_of + || node->clone_of->decl != node->decl)) + { + DECL_EXTERNAL (node->decl) = 1; + TREE_STATIC (node->decl) = 0; + } node->alias = bp_unpack_value (bp, 1); node->finalized_by_frontend = bp_unpack_value (bp, 1); node->frequency = (enum node_frequency)bp_unpack_value (bp, 2); + node->only_called_at_startup = bp_unpack_value (bp, 1); + node->only_called_at_exit = bp_unpack_value (bp, 1); + node->resolution = resolution; } /* Output the part of the cgraph in SET. */ @@ -989,7 +1020,7 @@ { tree fn_decl; struct cgraph_node *node; - struct bitpack_d *bp; + struct bitpack_d bp; int stack_size = 0; unsigned decl_index; int ref = LCC_NOT_FOUND, ref2 = LCC_NOT_FOUND; @@ -999,6 +1030,7 @@ int size_inlining_benefit = 0; unsigned long same_body_count = 0; int clone_ref; + enum ld_plugin_symbol_resolution resolution; clone_ref = lto_input_sleb128 (ib); @@ -1014,7 +1046,7 @@ node = cgraph_node (fn_decl); node->count = lto_input_sleb128 (ib); - bp = lto_input_bitpack (ib); + node->count_materialization_scale = lto_input_sleb128 (ib); if (tag == LTO_cgraph_analyzed_node) { @@ -1028,7 +1060,6 @@ } ref2 = lto_input_sleb128 (ib); - same_body_count = lto_input_uleb128 (ib); /* Make sure that we have not read this node before. Nodes that have already been read will have their tag stored in the 'aux' @@ -1038,10 +1069,11 @@ internal_error ("bytecode stream: found multiple instances of cgraph " "node %d", node->uid); - input_overwrite_node (file_data, node, tag, bp, stack_size, self_time, + bp = lto_input_bitpack (ib); + resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib); + input_overwrite_node (file_data, node, tag, &bp, stack_size, self_time, time_inlining_benefit, self_size, - size_inlining_benefit); - bitpack_delete (bp); + size_inlining_benefit, resolution); /* Store a reference for now, and fix up later to be a pointer. */ node->global.inlined_to = (cgraph_node_ptr) (intptr_t) ref; @@ -1049,10 +1081,12 @@ /* Store a reference for now, and fix up later to be a pointer. */ node->same_comdat_group = (cgraph_node_ptr) (intptr_t) ref2; + same_body_count = lto_input_uleb128 (ib); while (same_body_count-- > 0) { tree alias_decl; int type; + struct cgraph_node *alias; decl_index = lto_input_uleb128 (ib); alias_decl = lto_file_decl_data_get_fn_decl (file_data, decl_index); type = lto_input_uleb128 (ib); @@ -1061,7 +1095,7 @@ tree real_alias; decl_index = lto_input_uleb128 (ib); real_alias = lto_file_decl_data_get_fn_decl (file_data, decl_index); - cgraph_same_body_alias (alias_decl, real_alias); + alias = cgraph_same_body_alias (node, alias_decl, real_alias); } else { @@ -1070,11 +1104,13 @@ tree real_alias; decl_index = lto_input_uleb128 (ib); real_alias = lto_file_decl_data_get_fn_decl (file_data, decl_index); - cgraph_add_thunk (alias_decl, fn_decl, type & 2, fixed_offset, - virtual_value, - (type & 4) ? size_int (virtual_value) : NULL_TREE, - real_alias); + alias = cgraph_add_thunk (node, alias_decl, fn_decl, type & 2, fixed_offset, + virtual_value, + (type & 4) ? size_int (virtual_value) : NULL_TREE, + real_alias); } + gcc_assert (alias); + alias->resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib); } return node; } @@ -1089,7 +1125,7 @@ int decl_index; tree var_decl; struct varpool_node *node; - struct bitpack_d *bp; + struct bitpack_d bp; bool aliases_p; int count; int ref = LCC_NOT_FOUND; @@ -1097,22 +1133,28 @@ decl_index = lto_input_uleb128 (ib); var_decl = lto_file_decl_data_get_var_decl (file_data, decl_index); node = varpool_node (var_decl); + node->lto_file_data = file_data; bp = lto_input_bitpack (ib); - node->externally_visible = bp_unpack_value (bp, 1); - node->force_output = bp_unpack_value (bp, 1); - node->finalized = bp_unpack_value (bp, 1); - node->alias = bp_unpack_value (bp, 1); + node->externally_visible = bp_unpack_value (&bp, 1); + node->force_output = bp_unpack_value (&bp, 1); + node->finalized = bp_unpack_value (&bp, 1); + node->alias = bp_unpack_value (&bp, 1); node->analyzed = node->finalized; - node->used_from_other_partition = bp_unpack_value (bp, 1); - node->in_other_partition = bp_unpack_value (bp, 1); - aliases_p = bp_unpack_value (bp, 1); + node->used_from_other_partition = bp_unpack_value (&bp, 1); + node->in_other_partition = bp_unpack_value (&bp, 1); + if (node->in_other_partition) + { + DECL_EXTERNAL (node->decl) = 1; + TREE_STATIC (node->decl) = 0; + } + aliases_p = bp_unpack_value (&bp, 1); if (node->finalized) varpool_mark_needed_node (node); - bitpack_delete (bp); ref = lto_input_sleb128 (ib); /* Store a reference for now, and fix up later to be a pointer. */ node->same_comdat_group = (struct varpool_node *) (intptr_t) ref; + node->resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib); if (aliases_p) { count = lto_input_uleb128 (ib); @@ -1120,7 +1162,9 @@ { tree decl = lto_file_decl_data_get_var_decl (file_data, lto_input_uleb128 (ib)); - varpool_extra_name_alias (decl, var_decl); + struct varpool_node *alias; + alias = varpool_extra_name_alias (decl, var_decl); + alias->resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib); } } return node; @@ -1138,14 +1182,13 @@ { struct cgraph_node *node = NULL; struct varpool_node *varpool_node = NULL; - struct bitpack_d *bp; + struct bitpack_d bp; enum ipa_ref_type type; enum ipa_ref_use use; bp = lto_input_bitpack (ib); - type = (enum ipa_ref_type) bp_unpack_value (bp, 1); - use = (enum ipa_ref_use) bp_unpack_value (bp, 2); - bitpack_delete (bp); + type = (enum ipa_ref_type) bp_unpack_value (&bp, 1); + use = (enum ipa_ref_use) bp_unpack_value (&bp, 2); if (type == IPA_REF_CGRAPH) node = VEC_index (cgraph_node_ptr, nodes, lto_input_sleb128 (ib)); else @@ -1170,8 +1213,7 @@ int freq; unsigned int nest; cgraph_inline_failed_t inline_failed; - struct bitpack_d *bp; - enum ld_plugin_symbol_resolution caller_resolution; + struct bitpack_d bp; int ecf_flags = 0; caller = VEC_index (cgraph_node_ptr, nodes, lto_input_sleb128 (ib)); @@ -1190,46 +1232,38 @@ count = (gcov_type) lto_input_sleb128 (ib); bp = lto_input_bitpack (ib); - stmt_id = (unsigned int) bp_unpack_value (bp, HOST_BITS_PER_INT); - inline_failed = (cgraph_inline_failed_t) bp_unpack_value (bp, + stmt_id = (unsigned int) bp_unpack_value (&bp, HOST_BITS_PER_INT); + inline_failed = (cgraph_inline_failed_t) bp_unpack_value (&bp, HOST_BITS_PER_INT); - freq = (int) bp_unpack_value (bp, HOST_BITS_PER_INT); - nest = (unsigned) bp_unpack_value (bp, 30); - - /* If the caller was preempted, don't create the edge. - ??? Should we ever have edges from a preempted caller? */ - caller_resolution = lto_symtab_get_resolution (caller->decl); - if (caller_resolution == LDPR_PREEMPTED_REG - || caller_resolution == LDPR_PREEMPTED_IR) - return; + freq = (int) bp_unpack_value (&bp, HOST_BITS_PER_INT); + nest = (unsigned) bp_unpack_value (&bp, 30); if (indirect) edge = cgraph_create_indirect_edge (caller, NULL, 0, count, freq, nest); else edge = cgraph_create_edge (caller, callee, NULL, count, freq, nest); - edge->indirect_inlining_edge = bp_unpack_value (bp, 1); + edge->indirect_inlining_edge = bp_unpack_value (&bp, 1); edge->lto_stmt_uid = stmt_id; edge->inline_failed = inline_failed; - edge->call_stmt_cannot_inline_p = bp_unpack_value (bp, 1); - edge->can_throw_external = bp_unpack_value (bp, 1); + edge->call_stmt_cannot_inline_p = bp_unpack_value (&bp, 1); + edge->can_throw_external = bp_unpack_value (&bp, 1); if (indirect) { - if (bp_unpack_value (bp, 1)) + if (bp_unpack_value (&bp, 1)) ecf_flags |= ECF_CONST; - if (bp_unpack_value (bp, 1)) + if (bp_unpack_value (&bp, 1)) ecf_flags |= ECF_PURE; - if (bp_unpack_value (bp, 1)) + if (bp_unpack_value (&bp, 1)) ecf_flags |= ECF_NORETURN; - if (bp_unpack_value (bp, 1)) + if (bp_unpack_value (&bp, 1)) ecf_flags |= ECF_MALLOC; - if (bp_unpack_value (bp, 1)) + if (bp_unpack_value (&bp, 1)) ecf_flags |= ECF_NOTHROW; - if (bp_unpack_value (bp, 1)) + if (bp_unpack_value (&bp, 1)) ecf_flags |= ECF_RETURNS_TWICE; edge->indirect_info->ecf_flags = ecf_flags; } - bitpack_delete (bp); } @@ -1276,11 +1310,20 @@ len = lto_input_uleb128 (ib); } - - for (i = 0; VEC_iterate (cgraph_node_ptr, nodes, i, node); i++) + /* AUX pointers should be all non-zero for nodes read from the stream. */ +#ifdef ENABLE_CHECKING + FOR_EACH_VEC_ELT (cgraph_node_ptr, nodes, i, node) + gcc_assert (node->aux); +#endif + FOR_EACH_VEC_ELT (cgraph_node_ptr, nodes, i, node) { int ref = (int) (intptr_t) node->global.inlined_to; + /* We share declaration of builtins, so we may read same node twice. */ + if (!node->aux) + continue; + node->aux = NULL; + /* Fixup inlined_to from reference to pointer. */ if (ref != LCC_NOT_FOUND) node->global.inlined_to = VEC_index (cgraph_node_ptr, nodes, ref); @@ -1295,6 +1338,8 @@ else node->same_comdat_group = NULL; } + FOR_EACH_VEC_ELT (cgraph_node_ptr, nodes, i, node) + node->aux = (void *)1; return nodes; } @@ -1316,9 +1361,17 @@ input_varpool_node (file_data, ib)); len--; } - for (i = 0; VEC_iterate (varpool_node_ptr, varpool, i, node); i++) +#ifdef ENABLE_CHECKING + FOR_EACH_VEC_ELT (varpool_node_ptr, varpool, i, node) + gcc_assert (!node->aux); +#endif + FOR_EACH_VEC_ELT (varpool_node_ptr, varpool, i, node) { int ref = (int) (intptr_t) node->same_comdat_group; + /* We share declaration of builtins, so we may read same node twice. */ + if (node->aux) + continue; + node->aux = (void *)1; /* Fixup same_comdat_group from reference to pointer. */ if (ref != LCC_NOT_FOUND) @@ -1326,6 +1379,8 @@ else node->same_comdat_group = NULL; } + FOR_EACH_VEC_ELT (varpool_node_ptr, varpool, i, node) + node->aux = NULL; return varpool; } @@ -1372,30 +1427,101 @@ /* Input profile_info from IB. */ static void -input_profile_summary (struct lto_input_block *ib) +input_profile_summary (struct lto_input_block *ib, + struct lto_file_decl_data *file_data) { unsigned int runs = lto_input_uleb128 (ib); if (runs) { - if (!profile_info) - { - profile_info = <o_gcov_summary; - lto_gcov_summary.runs = runs; - lto_gcov_summary.sum_all = lto_input_sleb128 (ib); - lto_gcov_summary.run_max = lto_input_sleb128 (ib); - lto_gcov_summary.sum_max = lto_input_sleb128 (ib); - } - /* We can support this by scaling all counts to nearest common multiple - of all different runs, but it is perhaps not worth the effort. */ - else if (profile_info->runs != runs - || profile_info->sum_all != lto_input_sleb128 (ib) - || profile_info->run_max != lto_input_sleb128 (ib) - || profile_info->sum_max != lto_input_sleb128 (ib)) - sorry ("Combining units with different profiles is not supported."); - /* We allow some units to have profile and other to not have one. This will - just make unprofiled units to be size optimized that is sane. */ + file_data->profile_info.runs = runs; + file_data->profile_info.sum_max = lto_input_uleb128 (ib); + } + +} + +/* Rescale profile summaries to the same number of runs in the whole unit. */ + +static void +merge_profile_summaries (struct lto_file_decl_data **file_data_vec) +{ + struct lto_file_decl_data *file_data; + unsigned int j; + gcov_unsigned_t max_runs = 0; + struct cgraph_node *node; + struct cgraph_edge *edge; + + /* Find unit with maximal number of runs. If we ever get serious about + roundoff errors, we might also consider computing smallest common + multiply. */ + for (j = 0; (file_data = file_data_vec[j]) != NULL; j++) + if (max_runs < file_data->profile_info.runs) + max_runs = file_data->profile_info.runs; + + if (!max_runs) + return; + + /* Simple overflow check. We probably don't need to support that many train + runs. Such a large value probably imply data corruption anyway. */ + if (max_runs > INT_MAX / REG_BR_PROB_BASE) + { + sorry ("At most %i profile runs is supported. Perhaps corrupted profile?", + INT_MAX / REG_BR_PROB_BASE); + return; } + profile_info = <o_gcov_summary; + lto_gcov_summary.runs = max_runs; + lto_gcov_summary.sum_max = 0; + + /* Rescale all units to the maximal number of runs. + sum_max can not be easily merged, as we have no idea what files come from + the same run. We do not use the info anyway, so leave it 0. */ + for (j = 0; (file_data = file_data_vec[j]) != NULL; j++) + if (file_data->profile_info.runs) + { + int scale = ((REG_BR_PROB_BASE * max_runs + + file_data->profile_info.runs / 2) + / file_data->profile_info.runs); + lto_gcov_summary.sum_max = MAX (lto_gcov_summary.sum_max, + (file_data->profile_info.sum_max + * scale + + REG_BR_PROB_BASE / 2) + / REG_BR_PROB_BASE); + } + + /* Watch roundoff errors. */ + if (lto_gcov_summary.sum_max < max_runs) + lto_gcov_summary.sum_max = max_runs; + + /* If merging already happent at WPA time, we are done. */ + if (flag_ltrans) + return; + + /* Now compute count_materialization_scale of each node. + During LTRANS we already have values of count_materialization_scale + computed, so just update them. */ + for (node = cgraph_nodes; node; node = node->next) + if (node->local.lto_file_data->profile_info.runs) + { + int scale; + + scale = + ((node->count_materialization_scale * max_runs + + node->local.lto_file_data->profile_info.runs / 2) + / node->local.lto_file_data->profile_info.runs); + node->count_materialization_scale = scale; + if (scale < 0) + fatal_error ("Profile information in %s corrupted", + file_data->file_name); + + if (scale == REG_BR_PROB_BASE) + continue; + for (edge = node->callees; edge; edge = edge->next_callee) + edge->count = ((edge->count * scale + REG_BR_PROB_BASE / 2) + / REG_BR_PROB_BASE); + node->count = ((node->count * scale + REG_BR_PROB_BASE / 2) + / REG_BR_PROB_BASE); + } } /* Input and merge the cgraph from each of the .o files passed to @@ -1419,7 +1545,9 @@ ib = lto_create_simple_input_block (file_data, LTO_section_cgraph, &data, &len); - input_profile_summary (ib); + if (!ib) + fatal_error ("cannot find LTO cgraph in %s", file_data->file_name); + input_profile_summary (ib, file_data); file_data->cgraph_node_encoder = lto_cgraph_encoder_new (); nodes = input_cgraph_1 (file_data, ib); lto_destroy_simple_input_block (file_data, LTO_section_cgraph, @@ -1427,12 +1555,16 @@ ib = lto_create_simple_input_block (file_data, LTO_section_varpool, &data, &len); + if (!ib) + fatal_error ("cannot find LTO varpool in %s", file_data->file_name); varpool = input_varpool_1 (file_data, ib); lto_destroy_simple_input_block (file_data, LTO_section_varpool, ib, data, len); ib = lto_create_simple_input_block (file_data, LTO_section_refs, &data, &len); + if (!ib) + fatal_error("cannot find LTO section refs in %s", file_data->file_name); input_refs (ib, nodes, varpool); lto_destroy_simple_input_block (file_data, LTO_section_refs, ib, data, len); @@ -1441,6 +1573,8 @@ VEC_free (cgraph_node_ptr, heap, nodes); VEC_free (varpool_node_ptr, heap, varpool); } + merge_profile_summaries (file_data_vec); + /* Clear out the aux field that was used to store enough state to tell which nodes should be overwritten. */ @@ -1459,26 +1593,53 @@ /* True when we need optimization summary for NODE. */ static int -output_cgraph_opt_summary_p (struct cgraph_node *node) +output_cgraph_opt_summary_p (struct cgraph_node *node, cgraph_node_set set) { - if (!node->clone_of) - return false; - return (node->clone.tree_map - || node->clone.args_to_skip - || node->clone.combined_args_to_skip); + struct cgraph_edge *e; + + if (cgraph_node_in_set_p (node, set)) + { + for (e = node->callees; e; e = e->next_callee) + if (e->indirect_info + && e->indirect_info->thunk_delta != 0) + return true; + + for (e = node->indirect_calls; e; e = e->next_callee) + if (e->indirect_info->thunk_delta != 0) + return true; + } + + return (node->clone_of + && (node->clone.tree_map + || node->clone.args_to_skip + || node->clone.combined_args_to_skip)); +} + +/* Output optimization summary for EDGE to OB. */ +static void +output_edge_opt_summary (struct output_block *ob, + struct cgraph_edge *edge) +{ + if (edge->indirect_info) + lto_output_sleb128_stream (ob->main_stream, + edge->indirect_info->thunk_delta); + else + lto_output_sleb128_stream (ob->main_stream, 0); } /* Output optimization summary for NODE to OB. */ static void output_node_opt_summary (struct output_block *ob, - struct cgraph_node *node) + struct cgraph_node *node, + cgraph_node_set set) { unsigned int index; bitmap_iterator bi; struct ipa_replace_map *map; - struct bitpack_d *bp; + struct bitpack_d bp; int i; + struct cgraph_edge *e; lto_output_uleb128_stream (ob->main_stream, bitmap_count_bits (node->clone.args_to_skip)); @@ -1490,13 +1651,13 @@ lto_output_uleb128_stream (ob->main_stream, index); lto_output_uleb128_stream (ob->main_stream, VEC_length (ipa_replace_map_p, node->clone.tree_map)); - for (i = 0; VEC_iterate (ipa_replace_map_p, node->clone.tree_map, i, map); i++) + FOR_EACH_VEC_ELT (ipa_replace_map_p, node->clone.tree_map, i, map) { int parm_num; tree parm; for (parm_num = 0, parm = DECL_ARGUMENTS (node->decl); parm; - parm = TREE_CHAIN (parm), parm_num++) + parm = DECL_CHAIN (parm), parm_num++) if (map->old_tree == parm) break; /* At the moment we assume all old trees to be PARM_DECLs, because we have no @@ -1504,11 +1665,18 @@ gcc_assert (parm); lto_output_uleb128_stream (ob->main_stream, parm_num); lto_output_tree (ob, map->new_tree, true); - bp = bitpack_create (); - bp_pack_value (bp, map->replace_p, 1); - bp_pack_value (bp, map->ref_p, 1); - lto_output_bitpack (ob->main_stream, bp); - bitpack_delete (bp); + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, map->replace_p, 1); + bp_pack_value (&bp, map->ref_p, 1); + lto_output_bitpack (&bp); + } + + if (cgraph_node_in_set_p (node, set)) + { + for (e = node->callees; e; e = e->next_callee) + output_edge_opt_summary (ob, e); + for (e = node->indirect_calls; e; e = e->next_callee) + output_edge_opt_summary (ob, e); } } @@ -1516,7 +1684,7 @@ At the moment it is the clone info structure. */ static void -output_cgraph_opt_summary (void) +output_cgraph_opt_summary (cgraph_node_set set) { struct cgraph_node *node; int i, n_nodes; @@ -1528,23 +1696,40 @@ encoder = ob->decl_state->cgraph_node_encoder; n_nodes = lto_cgraph_encoder_size (encoder); for (i = 0; i < n_nodes; i++) - if (output_cgraph_opt_summary_p (lto_cgraph_encoder_deref (encoder, i))) + if (output_cgraph_opt_summary_p (lto_cgraph_encoder_deref (encoder, i), + set)) count++; lto_output_uleb128_stream (ob->main_stream, count); for (i = 0; i < n_nodes; i++) { node = lto_cgraph_encoder_deref (encoder, i); - if (output_cgraph_opt_summary_p (node)) + if (output_cgraph_opt_summary_p (node, set)) { lto_output_uleb128_stream (ob->main_stream, i); - output_node_opt_summary (ob, node); + output_node_opt_summary (ob, node, set); } } produce_asm (ob, NULL); destroy_output_block (ob); } -/* Input optimiation summary of NODE. */ +/* Input optimisation summary of EDGE. */ + +static void +input_edge_opt_summary (struct cgraph_edge *edge, + struct lto_input_block *ib_main) +{ + HOST_WIDE_INT thunk_delta; + thunk_delta = lto_input_sleb128 (ib_main); + if (thunk_delta != 0) + { + gcc_assert (!edge->indirect_info); + edge->indirect_info = cgraph_allocate_init_indirect_info (); + edge->indirect_info->thunk_delta = thunk_delta; + } +} + +/* Input optimisation summary of NODE. */ static void input_node_opt_summary (struct cgraph_node *node, @@ -1554,7 +1739,8 @@ int i; int count; int bit; - struct bitpack_d *bp; + struct bitpack_d bp; + struct cgraph_edge *e; count = lto_input_uleb128 (ib_main); if (count) @@ -1577,20 +1763,23 @@ { int parm_num; tree parm; - struct ipa_replace_map *map = GGC_NEW (struct ipa_replace_map); + struct ipa_replace_map *map = ggc_alloc_ipa_replace_map (); VEC_safe_push (ipa_replace_map_p, gc, node->clone.tree_map, map); for (parm_num = 0, parm = DECL_ARGUMENTS (node->decl); parm_num; - parm = TREE_CHAIN (parm)) + parm = DECL_CHAIN (parm)) parm_num --; map->parm_num = lto_input_uleb128 (ib_main); map->old_tree = NULL; map->new_tree = lto_input_tree (ib_main, data_in); bp = lto_input_bitpack (ib_main); - map->replace_p = bp_unpack_value (bp, 1); - map->ref_p = bp_unpack_value (bp, 1); - bitpack_delete (bp); + map->replace_p = bp_unpack_value (&bp, 1); + map->ref_p = bp_unpack_value (&bp, 1); } + for (e = node->callees; e; e = e->next_callee) + input_edge_opt_summary (e, ib_main); + for (e = node->indirect_calls; e; e = e->next_callee) + input_edge_opt_summary (e, ib_main); } /* Read section in file FILE_DATA of length LEN with data DATA. */ @@ -1624,7 +1813,7 @@ input_node_opt_summary (VEC_index (cgraph_node_ptr, nodes, ref), &ib_main, data_in); } - lto_free_section_data (file_data, LTO_section_jump_functions, NULL, data, + lto_free_section_data (file_data, LTO_section_cgraph_opt_sum, NULL, data, len); lto_data_in_delete (data_in); }