comparison gcc/ipa-profile.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* Basic IPA optimizations based on profile. 1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc. 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 3
4 This file is part of GCC. 4 This file is part of GCC.
5 5
6 GCC is free software; you can redistribute it and/or modify it under 6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free 7 the terms of the GNU General Public License as published by the Free
22 22
23 - Count histogram construction. This is a histogram analyzing how much 23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read 24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO, 25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit. 26 otherwise it contains information only about the current unit.
27
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
34 27
35 The information is used to set hot/cold thresholds. 28 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local 29 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a 30 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph 31 histogram specifying the most common target. We look up the callgraph
177 basic_block bb; 170 basic_block bb;
178 171
179 hash_table<histogram_hash> hashtable (10); 172 hash_table<histogram_hash> hashtable (10);
180 173
181 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 174 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
182 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) 175 if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
183 { 176 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
184 int time = 0; 177 {
185 int size = 0; 178 int time = 0;
186 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 179 int size = 0;
187 { 180 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
188 gimple *stmt = gsi_stmt (gsi); 181 {
189 if (gimple_code (stmt) == GIMPLE_CALL 182 gimple *stmt = gsi_stmt (gsi);
190 && !gimple_call_fndecl (stmt)) 183 if (gimple_code (stmt) == GIMPLE_CALL
191 { 184 && !gimple_call_fndecl (stmt))
192 histogram_value h; 185 {
193 h = gimple_histogram_value_of_type 186 histogram_value h;
194 (DECL_STRUCT_FUNCTION (node->decl), 187 h = gimple_histogram_value_of_type
195 stmt, HIST_TYPE_INDIR_CALL); 188 (DECL_STRUCT_FUNCTION (node->decl),
196 /* No need to do sanity check: gimple_ic_transform already 189 stmt, HIST_TYPE_INDIR_CALL);
197 takes away bad histograms. */ 190 /* No need to do sanity check: gimple_ic_transform already
198 if (h) 191 takes away bad histograms. */
199 { 192 if (h)
200 /* counter 0 is target, counter 1 is number of execution we called target, 193 {
201 counter 2 is total number of executions. */ 194 /* counter 0 is target, counter 1 is number of execution we called target,
202 if (h->hvalue.counters[2]) 195 counter 2 is total number of executions. */
203 { 196 if (h->hvalue.counters[2])
204 struct cgraph_edge * e = node->get_edge (stmt); 197 {
205 if (e && !e->indirect_unknown_callee) 198 struct cgraph_edge * e = node->get_edge (stmt);
206 continue; 199 if (e && !e->indirect_unknown_callee)
207 e->indirect_info->common_target_id 200 continue;
208 = h->hvalue.counters [0]; 201 e->indirect_info->common_target_id
209 e->indirect_info->common_target_probability 202 = h->hvalue.counters [0];
210 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); 203 e->indirect_info->common_target_probability
211 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) 204 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
212 { 205 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
213 if (dump_file) 206 {
214 fprintf (dump_file, "Probability capped to 1\n"); 207 if (dump_file)
215 e->indirect_info->common_target_probability = REG_BR_PROB_BASE; 208 fprintf (dump_file, "Probability capped to 1\n");
216 } 209 e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
217 } 210 }
218 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), 211 }
219 stmt, h); 212 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
220 } 213 stmt, h);
221 } 214 }
222 time += estimate_num_insns (stmt, &eni_time_weights); 215 }
223 size += estimate_num_insns (stmt, &eni_size_weights); 216 time += estimate_num_insns (stmt, &eni_time_weights);
224 } 217 size += estimate_num_insns (stmt, &eni_size_weights);
225 if (bb->count.initialized_p ()) 218 }
226 account_time_size (&hashtable, histogram, bb->count.to_gcov_type (), 219 if (bb->count.ipa_p () && bb->count.initialized_p ())
227 time, size); 220 account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (),
228 } 221 time, size);
222 }
229 histogram.qsort (cmp_counts); 223 histogram.qsort (cmp_counts);
230 } 224 }
231 225
232 /* Serialize the ipa info for lto. */ 226 /* Serialize the ipa info for lto. */
233 227
328 counts are already good guide on function frequencies and roundoff 322 counts are already good guide on function frequencies and roundoff
329 errors can make us to push function into unlikely section even when 323 errors can make us to push function into unlikely section even when
330 it is executed by the train run. Transfer the function only if all 324 it is executed by the train run. Transfer the function only if all
331 callers are unlikely executed. */ 325 callers are unlikely executed. */
332 if (profile_info 326 if (profile_info
333 && edge->callee->count.initialized_p () 327 && !(edge->callee->count.ipa () == profile_count::zero ())
334 /* Thunks are not profiled. This is more or less implementation
335 bug. */
336 && !d->function_symbol->thunk.thunk_p
337 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED 328 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
338 || (edge->caller->global.inlined_to 329 || (edge->caller->global.inlined_to
339 && edge->caller->global.inlined_to->frequency 330 && edge->caller->global.inlined_to->frequency
340 != NODE_FREQUENCY_UNLIKELY_EXECUTED))) 331 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
341 d->maybe_unlikely_executed = false; 332 d->maybe_unlikely_executed = false;
342 if (!edge->frequency) 333 if (edge->count.ipa ().initialized_p ()
334 && !edge->count.ipa ().nonzero_p ())
343 continue; 335 continue;
344 switch (edge->caller->frequency) 336 switch (edge->caller->frequency)
345 { 337 {
346 case NODE_FREQUENCY_UNLIKELY_EXECUTED: 338 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
347 break; 339 break;
348 case NODE_FREQUENCY_EXECUTED_ONCE: 340 case NODE_FREQUENCY_EXECUTED_ONCE:
349 if (dump_file && (dump_flags & TDF_DETAILS)) 341 {
350 fprintf (dump_file, " Called by %s that is executed once\n", 342 if (dump_file && (dump_flags & TDF_DETAILS))
351 edge->caller->name ()); 343 fprintf (dump_file, " Called by %s that is executed once\n",
352 d->maybe_unlikely_executed = false; 344 edge->caller->name ());
353 if (ipa_call_summaries->get (edge)->loop_depth) 345 d->maybe_unlikely_executed = false;
354 { 346 ipa_call_summary *s = ipa_call_summaries->get (edge);
355 d->maybe_executed_once = false; 347 if (s != NULL && s->loop_depth)
356 if (dump_file && (dump_flags & TDF_DETAILS)) 348 {
357 fprintf (dump_file, " Called in loop\n"); 349 d->maybe_executed_once = false;
358 } 350 if (dump_file && (dump_flags & TDF_DETAILS))
359 break; 351 fprintf (dump_file, " Called in loop\n");
352 }
353 break;
354 }
360 case NODE_FREQUENCY_HOT: 355 case NODE_FREQUENCY_HOT:
361 case NODE_FREQUENCY_NORMAL: 356 case NODE_FREQUENCY_NORMAL:
362 if (dump_file && (dump_flags & TDF_DETAILS)) 357 if (dump_file && (dump_flags & TDF_DETAILS))
363 fprintf (dump_file, " Called by %s that is normal or hot\n", 358 fprintf (dump_file, " Called by %s that is normal or hot\n",
364 edge->caller->name ()); 359 edge->caller->name ());
428 node->name ()); 423 node->name ());
429 changed = true; 424 changed = true;
430 } 425 }
431 426
432 /* With profile we can decide on hot/normal based on count. */ 427 /* With profile we can decide on hot/normal based on count. */
433 if (node->count.initialized_p ()) 428 if (node->count. ipa().initialized_p ())
434 { 429 {
435 bool hot = false; 430 bool hot = false;
436 if (!(node->count == profile_count::zero ()) 431 if (!(node->count. ipa() == profile_count::zero ())
437 && node->count >= get_hot_bb_threshold ()) 432 && node->count. ipa() >= get_hot_bb_threshold ())
438 hot = true; 433 hot = true;
439 if (!hot) 434 if (!hot)
440 hot |= contains_hot_call_p (node); 435 hot |= contains_hot_call_p (node);
441 if (hot) 436 if (hot)
442 { 437 {
508 if (overall_time) 503 if (overall_time)
509 { 504 {
510 gcov_type threshold; 505 gcov_type threshold;
511 506
512 gcc_assert (overall_size); 507 gcc_assert (overall_size);
513 if (dump_file) 508
514 {
515 gcov_type min, cumulated_time = 0, cumulated_size = 0;
516
517 fprintf (dump_file, "Overall time: %" PRId64"\n",
518 (int64_t)overall_time);
519 min = get_hot_bb_threshold ();
520 for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
521 i++)
522 {
523 cumulated_time += histogram[i]->count * histogram[i]->time;
524 cumulated_size += histogram[i]->size;
525 }
526 fprintf (dump_file, "GCOV min count: %" PRId64
527 " Time:%3.2f%% Size:%3.2f%%\n",
528 (int64_t)min,
529 cumulated_time * 100.0 / overall_time,
530 cumulated_size * 100.0 / overall_size);
531 }
532 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000; 509 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
533 threshold = 0; 510 threshold = 0;
534 for (i = 0; cumulated < cutoff; i++) 511 for (i = 0; cumulated < cutoff; i++)
535 { 512 {
536 cumulated += histogram[i]->count * histogram[i]->time; 513 cumulated += histogram[i]->count * histogram[i]->time;
553 " Time:%3.2f%% Size:%3.2f%%\n", 530 " Time:%3.2f%% Size:%3.2f%%\n",
554 (int64_t)threshold, 531 (int64_t)threshold,
555 cumulated_time * 100.0 / overall_time, 532 cumulated_time * 100.0 / overall_time,
556 cumulated_size * 100.0 / overall_size); 533 cumulated_size * 100.0 / overall_size);
557 } 534 }
535
558 if (threshold > get_hot_bb_threshold () 536 if (threshold > get_hot_bb_threshold ()
559 || in_lto_p) 537 || in_lto_p)
560 { 538 {
561 if (dump_file) 539 if (dump_file)
562 fprintf (dump_file, "Threshold updated.\n"); 540 fprintf (dump_file, "Threshold updated.\n");
664 } 642 }
665 nconverted++; 643 nconverted++;
666 e->make_speculative 644 e->make_speculative
667 (n2, 645 (n2,
668 e->count.apply_probability 646 e->count.apply_probability
669 (e->indirect_info->common_target_probability), 647 (e->indirect_info->common_target_probability));
670 apply_scale (e->frequency,
671 e->indirect_info->common_target_probability));
672 update = true; 648 update = true;
673 } 649 }
674 } 650 }
675 else 651 else
676 { 652 {