Mercurial > hg > CbC > CbC_gcc
comparison gcc/ipa-profile.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Basic IPA optimizations based on profile. | 1 /* Basic IPA optimizations based on profile. |
2 Copyright (C) 2003-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2003-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it under | 6 GCC is free software; you can redistribute it and/or modify it under |
7 the terms of the GNU General Public License as published by the Free | 7 the terms of the GNU General Public License as published by the Free |
22 | 22 |
23 - Count histogram construction. This is a histogram analyzing how much | 23 - Count histogram construction. This is a histogram analyzing how much |
24 time is spent executing statements with a given execution count read | 24 time is spent executing statements with a given execution count read |
25 from profile feedback. This histogram is complete only with LTO, | 25 from profile feedback. This histogram is complete only with LTO, |
26 otherwise it contains information only about the current unit. | 26 otherwise it contains information only about the current unit. |
27 | |
28 Similar histogram is also estimated by coverage runtime. This histogram | |
29 is not dependent on LTO, but it suffers from various defects; first | |
30 gcov runtime is not weighting individual basic block by estimated execution | |
31 time and second the merging of multiple runs makes assumption that the | |
32 histogram distribution did not change. Consequentely histogram constructed | |
33 here may be more precise. | |
34 | 27 |
35 The information is used to set hot/cold thresholds. | 28 The information is used to set hot/cold thresholds. |
36 - Next speculative indirect call resolution is performed: the local | 29 - Next speculative indirect call resolution is performed: the local |
37 profile pass assigns profile-id to each function and provide us with a | 30 profile pass assigns profile-id to each function and provide us with a |
38 histogram specifying the most common target. We look up the callgraph | 31 histogram specifying the most common target. We look up the callgraph |
177 basic_block bb; | 170 basic_block bb; |
178 | 171 |
179 hash_table<histogram_hash> hashtable (10); | 172 hash_table<histogram_hash> hashtable (10); |
180 | 173 |
181 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) | 174 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
182 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) | 175 if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) |
183 { | 176 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) |
184 int time = 0; | 177 { |
185 int size = 0; | 178 int time = 0; |
186 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | 179 int size = 0; |
187 { | 180 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
188 gimple *stmt = gsi_stmt (gsi); | 181 { |
189 if (gimple_code (stmt) == GIMPLE_CALL | 182 gimple *stmt = gsi_stmt (gsi); |
190 && !gimple_call_fndecl (stmt)) | 183 if (gimple_code (stmt) == GIMPLE_CALL |
191 { | 184 && !gimple_call_fndecl (stmt)) |
192 histogram_value h; | 185 { |
193 h = gimple_histogram_value_of_type | 186 histogram_value h; |
194 (DECL_STRUCT_FUNCTION (node->decl), | 187 h = gimple_histogram_value_of_type |
195 stmt, HIST_TYPE_INDIR_CALL); | 188 (DECL_STRUCT_FUNCTION (node->decl), |
196 /* No need to do sanity check: gimple_ic_transform already | 189 stmt, HIST_TYPE_INDIR_CALL); |
197 takes away bad histograms. */ | 190 /* No need to do sanity check: gimple_ic_transform already |
198 if (h) | 191 takes away bad histograms. */ |
199 { | 192 if (h) |
200 /* counter 0 is target, counter 1 is number of execution we called target, | 193 { |
201 counter 2 is total number of executions. */ | 194 /* counter 0 is target, counter 1 is number of execution we called target, |
202 if (h->hvalue.counters[2]) | 195 counter 2 is total number of executions. */ |
203 { | 196 if (h->hvalue.counters[2]) |
204 struct cgraph_edge * e = node->get_edge (stmt); | 197 { |
205 if (e && !e->indirect_unknown_callee) | 198 struct cgraph_edge * e = node->get_edge (stmt); |
206 continue; | 199 if (e && !e->indirect_unknown_callee) |
207 e->indirect_info->common_target_id | 200 continue; |
208 = h->hvalue.counters [0]; | 201 e->indirect_info->common_target_id |
209 e->indirect_info->common_target_probability | 202 = h->hvalue.counters [0]; |
210 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); | 203 e->indirect_info->common_target_probability |
211 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) | 204 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); |
212 { | 205 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) |
213 if (dump_file) | 206 { |
214 fprintf (dump_file, "Probability capped to 1\n"); | 207 if (dump_file) |
215 e->indirect_info->common_target_probability = REG_BR_PROB_BASE; | 208 fprintf (dump_file, "Probability capped to 1\n"); |
216 } | 209 e->indirect_info->common_target_probability = REG_BR_PROB_BASE; |
217 } | 210 } |
218 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), | 211 } |
219 stmt, h); | 212 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), |
220 } | 213 stmt, h); |
221 } | 214 } |
222 time += estimate_num_insns (stmt, &eni_time_weights); | 215 } |
223 size += estimate_num_insns (stmt, &eni_size_weights); | 216 time += estimate_num_insns (stmt, &eni_time_weights); |
224 } | 217 size += estimate_num_insns (stmt, &eni_size_weights); |
225 if (bb->count.initialized_p ()) | 218 } |
226 account_time_size (&hashtable, histogram, bb->count.to_gcov_type (), | 219 if (bb->count.ipa_p () && bb->count.initialized_p ()) |
227 time, size); | 220 account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (), |
228 } | 221 time, size); |
222 } | |
229 histogram.qsort (cmp_counts); | 223 histogram.qsort (cmp_counts); |
230 } | 224 } |
231 | 225 |
232 /* Serialize the ipa info for lto. */ | 226 /* Serialize the ipa info for lto. */ |
233 | 227 |
328 counts are already good guide on function frequencies and roundoff | 322 counts are already good guide on function frequencies and roundoff |
329 errors can make us to push function into unlikely section even when | 323 errors can make us to push function into unlikely section even when |
330 it is executed by the train run. Transfer the function only if all | 324 it is executed by the train run. Transfer the function only if all |
331 callers are unlikely executed. */ | 325 callers are unlikely executed. */ |
332 if (profile_info | 326 if (profile_info |
333 && edge->callee->count.initialized_p () | 327 && !(edge->callee->count.ipa () == profile_count::zero ()) |
334 /* Thunks are not profiled. This is more or less implementation | |
335 bug. */ | |
336 && !d->function_symbol->thunk.thunk_p | |
337 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED | 328 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED |
338 || (edge->caller->global.inlined_to | 329 || (edge->caller->global.inlined_to |
339 && edge->caller->global.inlined_to->frequency | 330 && edge->caller->global.inlined_to->frequency |
340 != NODE_FREQUENCY_UNLIKELY_EXECUTED))) | 331 != NODE_FREQUENCY_UNLIKELY_EXECUTED))) |
341 d->maybe_unlikely_executed = false; | 332 d->maybe_unlikely_executed = false; |
342 if (!edge->frequency) | 333 if (edge->count.ipa ().initialized_p () |
334 && !edge->count.ipa ().nonzero_p ()) | |
343 continue; | 335 continue; |
344 switch (edge->caller->frequency) | 336 switch (edge->caller->frequency) |
345 { | 337 { |
346 case NODE_FREQUENCY_UNLIKELY_EXECUTED: | 338 case NODE_FREQUENCY_UNLIKELY_EXECUTED: |
347 break; | 339 break; |
348 case NODE_FREQUENCY_EXECUTED_ONCE: | 340 case NODE_FREQUENCY_EXECUTED_ONCE: |
349 if (dump_file && (dump_flags & TDF_DETAILS)) | 341 { |
350 fprintf (dump_file, " Called by %s that is executed once\n", | 342 if (dump_file && (dump_flags & TDF_DETAILS)) |
351 edge->caller->name ()); | 343 fprintf (dump_file, " Called by %s that is executed once\n", |
352 d->maybe_unlikely_executed = false; | 344 edge->caller->name ()); |
353 if (ipa_call_summaries->get (edge)->loop_depth) | 345 d->maybe_unlikely_executed = false; |
354 { | 346 ipa_call_summary *s = ipa_call_summaries->get (edge); |
355 d->maybe_executed_once = false; | 347 if (s != NULL && s->loop_depth) |
356 if (dump_file && (dump_flags & TDF_DETAILS)) | 348 { |
357 fprintf (dump_file, " Called in loop\n"); | 349 d->maybe_executed_once = false; |
358 } | 350 if (dump_file && (dump_flags & TDF_DETAILS)) |
359 break; | 351 fprintf (dump_file, " Called in loop\n"); |
352 } | |
353 break; | |
354 } | |
360 case NODE_FREQUENCY_HOT: | 355 case NODE_FREQUENCY_HOT: |
361 case NODE_FREQUENCY_NORMAL: | 356 case NODE_FREQUENCY_NORMAL: |
362 if (dump_file && (dump_flags & TDF_DETAILS)) | 357 if (dump_file && (dump_flags & TDF_DETAILS)) |
363 fprintf (dump_file, " Called by %s that is normal or hot\n", | 358 fprintf (dump_file, " Called by %s that is normal or hot\n", |
364 edge->caller->name ()); | 359 edge->caller->name ()); |
428 node->name ()); | 423 node->name ()); |
429 changed = true; | 424 changed = true; |
430 } | 425 } |
431 | 426 |
432 /* With profile we can decide on hot/normal based on count. */ | 427 /* With profile we can decide on hot/normal based on count. */ |
433 if (node->count.initialized_p ()) | 428 if (node->count. ipa().initialized_p ()) |
434 { | 429 { |
435 bool hot = false; | 430 bool hot = false; |
436 if (!(node->count == profile_count::zero ()) | 431 if (!(node->count. ipa() == profile_count::zero ()) |
437 && node->count >= get_hot_bb_threshold ()) | 432 && node->count. ipa() >= get_hot_bb_threshold ()) |
438 hot = true; | 433 hot = true; |
439 if (!hot) | 434 if (!hot) |
440 hot |= contains_hot_call_p (node); | 435 hot |= contains_hot_call_p (node); |
441 if (hot) | 436 if (hot) |
442 { | 437 { |
508 if (overall_time) | 503 if (overall_time) |
509 { | 504 { |
510 gcov_type threshold; | 505 gcov_type threshold; |
511 | 506 |
512 gcc_assert (overall_size); | 507 gcc_assert (overall_size); |
513 if (dump_file) | 508 |
514 { | |
515 gcov_type min, cumulated_time = 0, cumulated_size = 0; | |
516 | |
517 fprintf (dump_file, "Overall time: %" PRId64"\n", | |
518 (int64_t)overall_time); | |
519 min = get_hot_bb_threshold (); | |
520 for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min; | |
521 i++) | |
522 { | |
523 cumulated_time += histogram[i]->count * histogram[i]->time; | |
524 cumulated_size += histogram[i]->size; | |
525 } | |
526 fprintf (dump_file, "GCOV min count: %" PRId64 | |
527 " Time:%3.2f%% Size:%3.2f%%\n", | |
528 (int64_t)min, | |
529 cumulated_time * 100.0 / overall_time, | |
530 cumulated_size * 100.0 / overall_size); | |
531 } | |
532 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000; | 509 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000; |
533 threshold = 0; | 510 threshold = 0; |
534 for (i = 0; cumulated < cutoff; i++) | 511 for (i = 0; cumulated < cutoff; i++) |
535 { | 512 { |
536 cumulated += histogram[i]->count * histogram[i]->time; | 513 cumulated += histogram[i]->count * histogram[i]->time; |
553 " Time:%3.2f%% Size:%3.2f%%\n", | 530 " Time:%3.2f%% Size:%3.2f%%\n", |
554 (int64_t)threshold, | 531 (int64_t)threshold, |
555 cumulated_time * 100.0 / overall_time, | 532 cumulated_time * 100.0 / overall_time, |
556 cumulated_size * 100.0 / overall_size); | 533 cumulated_size * 100.0 / overall_size); |
557 } | 534 } |
535 | |
558 if (threshold > get_hot_bb_threshold () | 536 if (threshold > get_hot_bb_threshold () |
559 || in_lto_p) | 537 || in_lto_p) |
560 { | 538 { |
561 if (dump_file) | 539 if (dump_file) |
562 fprintf (dump_file, "Threshold updated.\n"); | 540 fprintf (dump_file, "Threshold updated.\n"); |
664 } | 642 } |
665 nconverted++; | 643 nconverted++; |
666 e->make_speculative | 644 e->make_speculative |
667 (n2, | 645 (n2, |
668 e->count.apply_probability | 646 e->count.apply_probability |
669 (e->indirect_info->common_target_probability), | 647 (e->indirect_info->common_target_probability)); |
670 apply_scale (e->frequency, | |
671 e->indirect_info->common_target_probability)); | |
672 update = true; | 648 update = true; |
673 } | 649 } |
674 } | 650 } |
675 else | 651 else |
676 { | 652 { |