comparison gcc/predict.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Branch prediction routines for the GNU compiler. 1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010 2 Copyright (C) 2000-2017 Free Software Foundation, Inc.
3 Free Software Foundation, Inc.
4 3
5 This file is part of GCC. 4 This file is part of GCC.
6 5
7 GCC is free software; you can redistribute it and/or modify it under 6 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free 7 the terms of the GNU General Public License as published by the Free
29 28
30 29
31 #include "config.h" 30 #include "config.h"
32 #include "system.h" 31 #include "system.h"
33 #include "coretypes.h" 32 #include "coretypes.h"
34 #include "tm.h" 33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h" 35 #include "tree.h"
36 #include "rtl.h" 36 #include "gimple.h"
37 #include "tm_p.h" 37 #include "cfghooks.h"
38 #include "hard-reg-set.h" 38 #include "tree-pass.h"
39 #include "basic-block.h" 39 #include "ssa.h"
40 #include "insn-config.h" 40 #include "memmodel.h"
41 #include "regs.h" 41 #include "emit-rtl.h"
42 #include "flags.h" 42 #include "cgraph.h"
43 #include "output.h" 43 #include "coverage.h"
44 #include "function.h"
45 #include "except.h"
46 #include "diagnostic-core.h" 44 #include "diagnostic-core.h"
47 #include "recog.h" 45 #include "gimple-predict.h"
48 #include "expr.h" 46 #include "fold-const.h"
49 #include "predict.h" 47 #include "calls.h"
50 #include "coverage.h" 48 #include "cfganal.h"
49 #include "profile.h"
51 #include "sreal.h" 50 #include "sreal.h"
52 #include "params.h" 51 #include "params.h"
53 #include "target.h"
54 #include "cfgloop.h" 52 #include "cfgloop.h"
55 #include "tree-flow.h" 53 #include "gimple-iterator.h"
56 #include "ggc.h" 54 #include "tree-cfg.h"
57 #include "tree-dump.h" 55 #include "tree-ssa-loop-niter.h"
58 #include "tree-pass.h" 56 #include "tree-ssa-loop.h"
59 #include "timevar.h"
60 #include "tree-scalar-evolution.h" 57 #include "tree-scalar-evolution.h"
61 #include "cfgloop.h" 58 #include "ipa-utils.h"
62 #include "pointer-set.h" 59 #include "gimple-pretty-print.h"
60 #include "selftest.h"
61 #include "cfgrtl.h"
62 #include "stringpool.h"
63 #include "attribs.h"
64
65 /* Enum with reasons why a predictor is ignored. */
66
67 enum predictor_reason
68 {
69 REASON_NONE,
70 REASON_IGNORED,
71 REASON_SINGLE_EDGE_DUPLICATE,
72 REASON_EDGE_PAIR_DUPLICATE
73 };
74
75 /* String messages for the aforementioned enum. */
76
77 static const char *reason_messages[] = {"", " (ignored)",
78 " (single edge duplicate)", " (edge pair duplicate)"};
63 79
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 80 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ 81 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base, 82 static sreal real_almost_one, real_br_prob_base,
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max; 83 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
68 84
69 /* Random guesstimation given names. 85 static void combine_predictions_for_insn (rtx_insn *, basic_block);
70 PROV_VERY_UNLIKELY should be small enough so basic block predicted 86 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
71 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */ 87 enum predictor_reason, edge);
72 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1) 88 static void predict_paths_leading_to (basic_block, enum br_predictor,
73 #define PROB_EVEN (REG_BR_PROB_BASE / 2) 89 enum prediction,
74 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY) 90 struct loop *in_loop = NULL);
75 #define PROB_ALWAYS (REG_BR_PROB_BASE) 91 static void predict_paths_leading_to_edge (edge, enum br_predictor,
76 92 enum prediction,
77 static void combine_predictions_for_insn (rtx, basic_block); 93 struct loop *in_loop = NULL);
78 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int); 94 static bool can_predict_insn_p (const rtx_insn *);
79 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
80 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
81 static bool can_predict_insn_p (const_rtx);
82 95
83 /* Information we hold about each branch predictor. 96 /* Information we hold about each branch predictor.
84 Filled using information from predict.def. */ 97 Filled using information from predict.def. */
85 98
86 struct predictor_info 99 struct predictor_info
109 #undef DEF_PREDICTOR 122 #undef DEF_PREDICTOR
110 123
111 /* Return TRUE if frequency FREQ is considered to be hot. */ 124 /* Return TRUE if frequency FREQ is considered to be hot. */
112 125
113 static inline bool 126 static inline bool
114 maybe_hot_frequency_p (int freq) 127 maybe_hot_frequency_p (struct function *fun, int freq)
115 { 128 {
116 struct cgraph_node *node = cgraph_node (current_function_decl); 129 struct cgraph_node *node = cgraph_node::get (fun->decl);
117 if (!profile_info || !flag_branch_probabilities) 130 if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
118 { 131 {
119 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) 132 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120 return false; 133 return false;
121 if (node->frequency == NODE_FREQUENCY_HOT) 134 if (node->frequency == NODE_FREQUENCY_HOT)
122 return true; 135 return true;
123 } 136 }
124 if (profile_status == PROFILE_ABSENT) 137 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125 return true; 138 return true;
126 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE 139 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127 && freq <= (ENTRY_BLOCK_PTR->frequency * 2 / 3)) 140 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128 return false; 141 return false;
129 if (freq < ENTRY_BLOCK_PTR->frequency / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) 142 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
143 return false;
144 if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
145 < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
130 return false; 146 return false;
131 return true; 147 return true;
132 } 148 }
133 149
150 static gcov_type min_count = -1;
151
152 /* Determine the threshold for hot BB counts. */
153
154 gcov_type
155 get_hot_bb_threshold ()
156 {
157 gcov_working_set_t *ws;
158 if (min_count == -1)
159 {
160 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
161 gcc_assert (ws);
162 min_count = ws->min_counter;
163 }
164 return min_count;
165 }
166
167 /* Set the threshold for hot BB counts. */
168
169 void
170 set_hot_bb_threshold (gcov_type min)
171 {
172 min_count = min;
173 }
174
134 /* Return TRUE if frequency FREQ is considered to be hot. */ 175 /* Return TRUE if frequency FREQ is considered to be hot. */
135 176
136 static inline bool 177 bool
137 maybe_hot_count_p (gcov_type count) 178 maybe_hot_count_p (struct function *, profile_count count)
138 { 179 {
139 if (profile_status != PROFILE_READ) 180 if (!count.initialized_p ())
140 return true; 181 return true;
141 /* Code executed at most once is not hot. */ 182 /* Code executed at most once is not hot. */
142 if (profile_info->runs >= count) 183 if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
143 return false; 184 return false;
144 return (count 185 return (count.to_gcov_type () >= get_hot_bb_threshold ());
145 > profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION));
146 } 186 }
147 187
148 /* Return true in case BB can be CPU intensive and should be optimized 188 /* Return true in case BB can be CPU intensive and should be optimized
149 for maximal performance. */ 189 for maximal performance. */
150 190
151 bool 191 bool
152 maybe_hot_bb_p (const_basic_block bb) 192 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
153 { 193 {
154 if (profile_status == PROFILE_READ) 194 gcc_checking_assert (fun);
155 return maybe_hot_count_p (bb->count); 195 if (!maybe_hot_count_p (fun, bb->count))
156 return maybe_hot_frequency_p (bb->frequency);
157 }
158
159 /* Return true if the call can be hot. */
160
161 bool
162 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
163 {
164 if (profile_info && flag_branch_probabilities
165 && (edge->count
166 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
167 return false; 196 return false;
168 if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED 197 return maybe_hot_frequency_p (fun, bb->frequency);
169 || edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
170 return false;
171 if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
172 && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE)
173 return false;
174 if (optimize_size)
175 return false;
176 if (edge->caller->frequency == NODE_FREQUENCY_HOT)
177 return true;
178 if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
179 && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
180 return false;
181 if (flag_guess_branch_prob
182 && edge->frequency <= (CGRAPH_FREQ_BASE
183 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
184 return false;
185 return true;
186 } 198 }
187 199
188 /* Return true in case BB can be CPU intensive and should be optimized 200 /* Return true in case BB can be CPU intensive and should be optimized
189 for maximal performance. */ 201 for maximal performance. */
190 202
191 bool 203 bool
192 maybe_hot_edge_p (edge e) 204 maybe_hot_edge_p (edge e)
193 { 205 {
194 if (profile_status == PROFILE_READ) 206 if (!maybe_hot_count_p (cfun, e->count ()))
195 return maybe_hot_count_p (e->count); 207 return false;
196 return maybe_hot_frequency_p (EDGE_FREQUENCY (e)); 208 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
197 } 209 }
198 210
199 /* Return true in case BB is probably never executed. */ 211 /* Return true if profile COUNT and FREQUENCY, or function FUN static
200 bool 212 node frequency reflects never being executed. */
201 probably_never_executed_bb_p (const_basic_block bb) 213
202 { 214 static bool
203 if (profile_info && flag_branch_probabilities) 215 probably_never_executed (struct function *fun,
204 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0; 216 profile_count count, int)
205 if ((!profile_info || !flag_branch_probabilities) 217 {
206 && cgraph_node (current_function_decl)->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) 218 gcc_checking_assert (fun);
219 if (count == profile_count::zero ())
220 return true;
221 if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ)
222 {
223 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
224 if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs)
225 return false;
226 return true;
227 }
228 if ((!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
229 && (cgraph_node::get (fun->decl)->frequency
230 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
207 return true; 231 return true;
208 return false; 232 return false;
209 } 233 }
210 234
235
236 /* Return true in case BB is probably never executed. */
237
238 bool
239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
240 {
241 return probably_never_executed (fun, bb->count, bb->frequency);
242 }
243
244
245 /* Return true if E is unlikely executed for obvious reasons. */
246
247 static bool
248 unlikely_executed_edge_p (edge e)
249 {
250 return (e->count () == profile_count::zero ()
251 || e->probability == profile_probability::never ())
252 || (e->flags & (EDGE_EH | EDGE_FAKE));
253 }
254
255 /* Return true in case edge E is probably never executed. */
256
257 bool
258 probably_never_executed_edge_p (struct function *fun, edge e)
259 {
260 if (unlikely_executed_edge_p (e))
261 return true;
262 return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e));
263 }
264
211 /* Return true when current function should always be optimized for size. */ 265 /* Return true when current function should always be optimized for size. */
212 266
213 bool 267 bool
214 optimize_function_for_size_p (struct function *fun) 268 optimize_function_for_size_p (struct function *fun)
215 { 269 {
216 return (optimize_size 270 if (!fun || !fun->decl)
217 || (fun && fun->decl 271 return optimize_size;
218 && (cgraph_node (fun->decl)->frequency 272 cgraph_node *n = cgraph_node::get (fun->decl);
219 == NODE_FREQUENCY_UNLIKELY_EXECUTED))); 273 return n && n->optimize_for_size_p ();
220 } 274 }
221 275
222 /* Return true when current function should always be optimized for speed. */ 276 /* Return true when current function should always be optimized for speed. */
223 277
224 bool 278 bool
225 optimize_function_for_speed_p (struct function *fun) 279 optimize_function_for_speed_p (struct function *fun)
226 { 280 {
227 return !optimize_function_for_size_p (fun); 281 return !optimize_function_for_size_p (fun);
228 } 282 }
229 283
284 /* Return the optimization type that should be used for the function FUN. */
285
286 optimization_type
287 function_optimization_type (struct function *fun)
288 {
289 return (optimize_function_for_speed_p (fun)
290 ? OPTIMIZE_FOR_SPEED
291 : OPTIMIZE_FOR_SIZE);
292 }
293
230 /* Return TRUE when BB should be optimized for size. */ 294 /* Return TRUE when BB should be optimized for size. */
231 295
232 bool 296 bool
233 optimize_bb_for_size_p (const_basic_block bb) 297 optimize_bb_for_size_p (const_basic_block bb)
234 { 298 {
235 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (bb); 299 return (optimize_function_for_size_p (cfun)
300 || (bb && !maybe_hot_bb_p (cfun, bb)));
236 } 301 }
237 302
238 /* Return TRUE when BB should be optimized for speed. */ 303 /* Return TRUE when BB should be optimized for speed. */
239 304
240 bool 305 bool
241 optimize_bb_for_speed_p (const_basic_block bb) 306 optimize_bb_for_speed_p (const_basic_block bb)
242 { 307 {
243 return !optimize_bb_for_size_p (bb); 308 return !optimize_bb_for_size_p (bb);
309 }
310
311 /* Return the optimization type that should be used for block BB. */
312
313 optimization_type
314 bb_optimization_type (const_basic_block bb)
315 {
316 return (optimize_bb_for_speed_p (bb)
317 ? OPTIMIZE_FOR_SPEED
318 : OPTIMIZE_FOR_SIZE);
244 } 319 }
245 320
246 /* Return TRUE when BB should be optimized for size. */ 321 /* Return TRUE when BB should be optimized for size. */
247 322
248 bool 323 bool
331 predictor. */ 406 predictor. */
332 407
333 bool 408 bool
334 predictable_edge_p (edge e) 409 predictable_edge_p (edge e)
335 { 410 {
336 if (profile_status == PROFILE_ABSENT) 411 if (!e->probability.initialized_p ())
337 return false; 412 return false;
338 if ((e->probability 413 if ((e->probability.to_reg_br_prob_base ()
339 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100) 414 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
340 || (REG_BR_PROB_BASE - e->probability 415 || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base ()
341 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)) 416 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
342 return true; 417 return true;
343 return false; 418 return false;
344 } 419 }
345 420
347 /* Set RTL expansion for BB profile. */ 422 /* Set RTL expansion for BB profile. */
348 423
349 void 424 void
350 rtl_profile_for_bb (basic_block bb) 425 rtl_profile_for_bb (basic_block bb)
351 { 426 {
352 crtl->maybe_hot_insn_p = maybe_hot_bb_p (bb); 427 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
353 } 428 }
354 429
355 /* Set RTL expansion for edge profile. */ 430 /* Set RTL expansion for edge profile. */
356 431
357 void 432 void
381 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) 456 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
382 return true; 457 return true;
383 return false; 458 return false;
384 } 459 }
385 460
386 /* This map contains for a basic block the list of predictions for the
387 outgoing edges. */
388
389 static struct pointer_map_t *bb_predictions;
390
391 /* Structure representing predictions in tree level. */ 461 /* Structure representing predictions in tree level. */
392 462
393 struct edge_prediction { 463 struct edge_prediction {
394 struct edge_prediction *ep_next; 464 struct edge_prediction *ep_next;
395 edge ep_edge; 465 edge ep_edge;
396 enum br_predictor ep_predictor; 466 enum br_predictor ep_predictor;
397 int ep_probability; 467 int ep_probability;
398 }; 468 };
399 469
470 /* This map contains for a basic block the list of predictions for the
471 outgoing edges. */
472
473 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
474
400 /* Return true if the one of outgoing edges is already predicted by 475 /* Return true if the one of outgoing edges is already predicted by
401 PREDICTOR. */ 476 PREDICTOR. */
402 477
403 bool 478 bool
404 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor) 479 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
405 { 480 {
406 struct edge_prediction *i; 481 struct edge_prediction *i;
407 void **preds = pointer_map_contains (bb_predictions, bb); 482 edge_prediction **preds = bb_predictions->get (bb);
408 483
409 if (!preds) 484 if (!preds)
410 return false; 485 return false;
411 486
412 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next) 487 for (i = *preds; i; i = i->ep_next)
413 if (i->ep_predictor == predictor) 488 if (i->ep_predictor == predictor)
414 return true; 489 return true;
415 return false; 490 return false;
416 } 491 }
417 492
418 /* Return true when the probability of edge is reliable. 493 /* Return true if the one of outgoing edges is already predicted by
419 494 PREDICTOR for edge E predicted as TAKEN. */
420 The profile guessing code is good at predicting branch outcome (ie. 495
421 taken/not taken), that is predicted right slightly over 75% of time. 496 bool
422 It is however notoriously poor on predicting the probability itself. 497 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
423 In general the profile appear a lot flatter (with probabilities closer 498 {
424 to 50%) than the reality so it is bad idea to use it to drive optimization 499 struct edge_prediction *i;
425 such as those disabling dynamic branch prediction for well predictable 500 basic_block bb = e->src;
426 branches. 501 edge_prediction **preds = bb_predictions->get (bb);
427 502 if (!preds)
428 There are two exceptions - edges leading to noreturn edges and edges 503 return false;
429 predicted by number of iterations heuristics are predicted well. This macro 504
430 should be able to distinguish those, but at the moment it simply check for 505 int probability = predictor_info[(int) predictor].hitrate;
431 noreturn heuristic that is only one giving probability over 99% or bellow 506
432 1%. In future we might want to propagate reliability information across the 507 if (taken != TAKEN)
433 CFG if we find this information useful on multiple places. */ 508 probability = REG_BR_PROB_BASE - probability;
434 static bool 509
435 probability_reliable_p (int prob) 510 for (i = *preds; i; i = i->ep_next)
436 { 511 if (i->ep_predictor == predictor
437 return (profile_status == PROFILE_READ 512 && i->ep_edge == e
438 || (profile_status == PROFILE_GUESSED 513 && i->ep_probability == probability)
439 && (prob <= HITRATE (1) || prob >= HITRATE (99)))); 514 return true;
515 return false;
440 } 516 }
441 517
442 /* Same predicate as above, working on edges. */ 518 /* Same predicate as above, working on edges. */
443 bool 519 bool
444 edge_probability_reliable_p (const_edge e) 520 edge_probability_reliable_p (const_edge e)
445 { 521 {
446 return probability_reliable_p (e->probability); 522 return e->probability.probably_reliable_p ();
447 } 523 }
448 524
449 /* Same predicate as edge_probability_reliable_p, working on notes. */ 525 /* Same predicate as edge_probability_reliable_p, working on notes. */
450 bool 526 bool
451 br_prob_note_reliable_p (const_rtx note) 527 br_prob_note_reliable_p (const_rtx note)
452 { 528 {
453 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB); 529 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
454 return probability_reliable_p (INTVAL (XEXP (note, 0))); 530 return profile_probability::from_reg_br_prob_note
531 (XINT (note, 0)).probably_reliable_p ();
455 } 532 }
456 533
457 static void 534 static void
458 predict_insn (rtx insn, enum br_predictor predictor, int probability) 535 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
459 { 536 {
460 gcc_assert (any_condjump_p (insn)); 537 gcc_assert (any_condjump_p (insn));
461 if (!flag_guess_branch_prob) 538 if (!flag_guess_branch_prob)
462 return; 539 return;
463 540
468 } 545 }
469 546
470 /* Predict insn by given predictor. */ 547 /* Predict insn by given predictor. */
471 548
472 void 549 void
473 predict_insn_def (rtx insn, enum br_predictor predictor, 550 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
474 enum prediction taken) 551 enum prediction taken)
475 { 552 {
476 int probability = predictor_info[(int) predictor].hitrate; 553 int probability = predictor_info[(int) predictor].hitrate;
477 554
478 if (taken != TAKEN) 555 if (taken != TAKEN)
484 /* Predict edge E with given probability if possible. */ 561 /* Predict edge E with given probability if possible. */
485 562
486 void 563 void
487 rtl_predict_edge (edge e, enum br_predictor predictor, int probability) 564 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
488 { 565 {
489 rtx last_insn; 566 rtx_insn *last_insn;
490 last_insn = BB_END (e->src); 567 last_insn = BB_END (e->src);
491 568
492 /* We can store the branch prediction information only about 569 /* We can store the branch prediction information only about
493 conditional jumps. */ 570 conditional jumps. */
494 if (!any_condjump_p (last_insn)) 571 if (!any_condjump_p (last_insn))
503 580
504 /* Predict edge E with the given PROBABILITY. */ 581 /* Predict edge E with the given PROBABILITY. */
505 void 582 void
506 gimple_predict_edge (edge e, enum br_predictor predictor, int probability) 583 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
507 { 584 {
508 gcc_assert (profile_status != PROFILE_GUESSED); 585 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
509 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1) 586 && EDGE_COUNT (e->src->succs) > 1
510 && flag_guess_branch_prob && optimize) 587 && flag_guess_branch_prob
588 && optimize)
511 { 589 {
512 struct edge_prediction *i = XNEW (struct edge_prediction); 590 struct edge_prediction *i = XNEW (struct edge_prediction);
513 void **preds = pointer_map_insert (bb_predictions, e->src); 591 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
514 592
515 i->ep_next = (struct edge_prediction *) *preds; 593 i->ep_next = preds;
516 *preds = i; 594 preds = i;
517 i->ep_probability = probability; 595 i->ep_probability = probability;
518 i->ep_predictor = predictor; 596 i->ep_predictor = predictor;
519 i->ep_edge = e; 597 i->ep_edge = e;
520 } 598 }
521 } 599 }
522 600
523 /* Remove all predictions on given basic block that are attached 601 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
524 to edge E. */ 602 to the filter function. */
603
525 void 604 void
526 remove_predictions_associated_with_edge (edge e) 605 filter_predictions (edge_prediction **preds,
527 { 606 bool (*filter) (edge_prediction *, void *), void *data)
528 void **preds; 607 {
529
530 if (!bb_predictions) 608 if (!bb_predictions)
531 return; 609 return;
532 610
533 preds = pointer_map_contains (bb_predictions, e->src);
534
535 if (preds) 611 if (preds)
536 { 612 {
537 struct edge_prediction **prediction = (struct edge_prediction **) preds; 613 struct edge_prediction **prediction = preds;
538 struct edge_prediction *next; 614 struct edge_prediction *next;
539 615
540 while (*prediction) 616 while (*prediction)
541 { 617 {
542 if ((*prediction)->ep_edge == e) 618 if ((*filter) (*prediction, data))
619 prediction = &((*prediction)->ep_next);
620 else
543 { 621 {
544 next = (*prediction)->ep_next; 622 next = (*prediction)->ep_next;
545 free (*prediction); 623 free (*prediction);
546 *prediction = next; 624 *prediction = next;
547 } 625 }
548 else 626 }
549 prediction = &((*prediction)->ep_next); 627 }
550 } 628 }
551 } 629
630 /* Filter function predicate that returns true for a edge predicate P
631 if its edge is equal to DATA. */
632
633 bool
634 equal_edge_p (edge_prediction *p, void *data)
635 {
636 return p->ep_edge == (edge)data;
637 }
638
639 /* Remove all predictions on given basic block that are attached
640 to edge E. */
641 void
642 remove_predictions_associated_with_edge (edge e)
643 {
644 if (!bb_predictions)
645 return;
646
647 edge_prediction **preds = bb_predictions->get (e->src);
648 filter_predictions (preds, equal_edge_p, e);
552 } 649 }
553 650
554 /* Clears the list of predictions stored for BB. */ 651 /* Clears the list of predictions stored for BB. */
555 652
556 static void 653 static void
557 clear_bb_predictions (basic_block bb) 654 clear_bb_predictions (basic_block bb)
558 { 655 {
559 void **preds = pointer_map_contains (bb_predictions, bb); 656 edge_prediction **preds = bb_predictions->get (bb);
560 struct edge_prediction *pred, *next; 657 struct edge_prediction *pred, *next;
561 658
562 if (!preds) 659 if (!preds)
563 return; 660 return;
564 661
565 for (pred = (struct edge_prediction *) *preds; pred; pred = next) 662 for (pred = *preds; pred; pred = next)
566 { 663 {
567 next = pred->ep_next; 664 next = pred->ep_next;
568 free (pred); 665 free (pred);
569 } 666 }
570 *preds = NULL; 667 *preds = NULL;
572 669
573 /* Return true when we can store prediction on insn INSN. 670 /* Return true when we can store prediction on insn INSN.
574 At the moment we represent predictions only on conditional 671 At the moment we represent predictions only on conditional
575 jumps, not at computed jump or other complicated cases. */ 672 jumps, not at computed jump or other complicated cases. */
576 static bool 673 static bool
577 can_predict_insn_p (const_rtx insn) 674 can_predict_insn_p (const rtx_insn *insn)
578 { 675 {
579 return (JUMP_P (insn) 676 return (JUMP_P (insn)
580 && any_condjump_p (insn) 677 && any_condjump_p (insn)
581 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2); 678 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
582 } 679 }
603 { 700 {
604 rtx note; 701 rtx note;
605 702
606 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 703 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
607 if (REG_NOTE_KIND (note) == REG_BR_PROB) 704 if (REG_NOTE_KIND (note) == REG_BR_PROB)
608 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0))); 705 XINT (note, 0) = profile_probability::from_reg_br_prob_note
706 (XINT (note, 0)).invert ().to_reg_br_prob_note ();
609 else if (REG_NOTE_KIND (note) == REG_BR_PRED) 707 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
610 XEXP (XEXP (note, 0), 1) 708 XEXP (XEXP (note, 0), 1)
611 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); 709 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
612 } 710 }
613 711
614 /* Dump information about the branch prediction to the output file. */ 712 /* Dump information about the branch prediction to the output file. */
615 713
616 static void 714 static void
617 dump_prediction (FILE *file, enum br_predictor predictor, int probability, 715 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
618 basic_block bb, int used) 716 basic_block bb, enum predictor_reason reason = REASON_NONE,
619 { 717 edge ep_edge = NULL)
620 edge e; 718 {
719 edge e = ep_edge;
621 edge_iterator ei; 720 edge_iterator ei;
622 721
623 if (!file) 722 if (!file)
624 return; 723 return;
625 724
725 if (e == NULL)
726 FOR_EACH_EDGE (e, ei, bb->succs)
727 if (! (e->flags & EDGE_FALLTHRU))
728 break;
729
730 char edge_info_str[128];
731 if (ep_edge)
732 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
733 ep_edge->dest->index);
734 else
735 edge_info_str[0] = '\0';
736
737 fprintf (file, " %s heuristics%s%s: %.1f%%",
738 predictor_info[predictor].name,
739 edge_info_str, reason_messages[reason],
740 probability * 100.0 / REG_BR_PROB_BASE);
741
742 if (bb->count.initialized_p ())
743 {
744 fprintf (file, " exec ");
745 bb->count.dump (file);
746 if (e)
747 {
748 fprintf (file, " hit ");
749 e->count ().dump (file);
750 fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0
751 / bb->count.to_gcov_type ());
752 }
753 }
754
755 fprintf (file, "\n");
756 }
757
758 /* Return true if STMT is known to be unlikely executed. */
759
760 static bool
761 unlikely_executed_stmt_p (gimple *stmt)
762 {
763 if (!is_gimple_call (stmt))
764 return false;
765 /* NORETURN attribute alone is not strong enough: exit() may be quite
766 likely executed once during program run. */
767 if (gimple_call_fntype (stmt)
768 && lookup_attribute ("cold",
769 TYPE_ATTRIBUTES (gimple_call_fntype (stmt)))
770 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
771 return true;
772 tree decl = gimple_call_fndecl (stmt);
773 if (!decl)
774 return false;
775 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))
776 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
777 return true;
778
779 cgraph_node *n = cgraph_node::get (decl);
780 if (!n)
781 return false;
782
783 availability avail;
784 n = n->ultimate_alias_target (&avail);
785 if (avail < AVAIL_AVAILABLE)
786 return false;
787 if (!n->analyzed
788 || n->decl == current_function_decl)
789 return false;
790 return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED;
791 }
792
793 /* Return true if BB is unlikely executed. */
794
795 static bool
796 unlikely_executed_bb_p (basic_block bb)
797 {
798 if (bb->count == profile_count::zero ())
799 return true;
800 if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
801 return false;
802 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
803 !gsi_end_p (gsi); gsi_next (&gsi))
804 {
805 if (unlikely_executed_stmt_p (gsi_stmt (gsi)))
806 return true;
807 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
808 return false;
809 }
810 return false;
811 }
812
813 /* We can not predict the probabilities of outgoing edges of bb. Set them
814 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
815 even probability for all edges not mentioned in the set. These edges
816 are given PROB_VERY_UNLIKELY probability. */
817
818 static void
819 set_even_probabilities (basic_block bb,
820 hash_set<edge> *unlikely_edges = NULL)
821 {
822 unsigned nedges = 0, unlikely_count = 0;
823 edge e = NULL;
824 edge_iterator ei;
825 profile_probability all = profile_probability::always ();
826
626 FOR_EACH_EDGE (e, ei, bb->succs) 827 FOR_EACH_EDGE (e, ei, bb->succs)
627 if (! (e->flags & EDGE_FALLTHRU)) 828 if (e->probability.initialized_p ())
628 break; 829 all -= e->probability;
629 830 else if (!unlikely_executed_edge_p (e))
630 fprintf (file, " %s heuristics%s: %.1f%%", 831 {
631 predictor_info[predictor].name, 832 nedges ++;
632 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE); 833 if (unlikely_edges != NULL && unlikely_edges->contains (e))
633 834 {
634 if (bb->count) 835 all -= profile_probability::very_unlikely ();
635 { 836 unlikely_count++;
636 fprintf (file, " exec "); 837 }
637 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count); 838 }
638 if (e) 839
639 { 840 /* Make the distribution even if all edges are unlikely. */
640 fprintf (file, " hit "); 841 if (unlikely_count == nedges)
641 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count); 842 {
642 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count); 843 unlikely_edges = NULL;
643 } 844 unlikely_count = 0;
644 } 845 }
645 846
646 fprintf (file, "\n"); 847 unsigned c = nedges - unlikely_count;
647 }
648
649 /* We can not predict the probabilities of outgoing edges of bb. Set them
650 evenly and hope for the best. */
651 static void
652 set_even_probabilities (basic_block bb)
653 {
654 int nedges = 0;
655 edge e;
656 edge_iterator ei;
657 848
658 FOR_EACH_EDGE (e, ei, bb->succs) 849 FOR_EACH_EDGE (e, ei, bb->succs)
659 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 850 if (e->probability.initialized_p ())
660 nedges ++; 851 ;
661 FOR_EACH_EDGE (e, ei, bb->succs) 852 else if (!unlikely_executed_edge_p (e))
662 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 853 {
663 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; 854 if (unlikely_edges != NULL && unlikely_edges->contains (e))
855 e->probability = profile_probability::very_unlikely ();
856 else
857 e->probability = all.apply_scale (1, c).guessed ();
858 }
664 else 859 else
665 e->probability = 0; 860 e->probability = profile_probability::never ();
861 }
862
863 /* Add REG_BR_PROB note to JUMP with PROB. */
864
865 void
866 add_reg_br_prob_note (rtx_insn *jump, profile_probability prob)
867 {
868 gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0));
869 add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ());
666 } 870 }
667 871
668 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB 872 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
669 note if not already present. Remove now useless REG_BR_PRED notes. */ 873 note if not already present. Remove now useless REG_BR_PRED notes. */
670 874
671 static void 875 static void
672 combine_predictions_for_insn (rtx insn, basic_block bb) 876 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
673 { 877 {
674 rtx prob_note; 878 rtx prob_note;
675 rtx *pnote; 879 rtx *pnote;
676 rtx note; 880 rtx note;
677 int best_probability = PROB_EVEN; 881 int best_probability = PROB_EVEN;
701 enum br_predictor predictor = ((enum br_predictor) 905 enum br_predictor predictor = ((enum br_predictor)
702 INTVAL (XEXP (XEXP (note, 0), 0))); 906 INTVAL (XEXP (XEXP (note, 0), 0)));
703 int probability = INTVAL (XEXP (XEXP (note, 0), 1)); 907 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
704 908
705 found = true; 909 found = true;
706 if (best_predictor > predictor) 910 if (best_predictor > predictor
911 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
707 best_probability = probability, best_predictor = predictor; 912 best_probability = probability, best_predictor = predictor;
708 913
709 d = (combined_probability * probability 914 d = (combined_probability * probability
710 + (REG_BR_PROB_BASE - combined_probability) 915 + (REG_BR_PROB_BASE - combined_probability)
711 * (REG_BR_PROB_BASE - probability)); 916 * (REG_BR_PROB_BASE - probability));
721 926
722 /* Decide which heuristic to use. In case we didn't match anything, 927 /* Decide which heuristic to use. In case we didn't match anything,
723 use no_prediction heuristic, in case we did match, use either 928 use no_prediction heuristic, in case we did match, use either
724 first match or Dempster-Shaffer theory depending on the flags. */ 929 first match or Dempster-Shaffer theory depending on the flags. */
725 930
726 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) 931 if (best_predictor != END_PREDICTORS)
727 first_match = true; 932 first_match = true;
728 933
729 if (!found) 934 if (!found)
730 dump_prediction (dump_file, PRED_NO_PREDICTION, 935 dump_prediction (dump_file, PRED_NO_PREDICTION,
731 combined_probability, bb, true); 936 combined_probability, bb);
732 else 937 else
733 { 938 {
734 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, 939 if (!first_match)
735 bb, !first_match); 940 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
736 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, 941 bb, !first_match ? REASON_NONE : REASON_IGNORED);
737 bb, first_match); 942 else
943 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
944 bb, first_match ? REASON_NONE : REASON_IGNORED);
738 } 945 }
739 946
740 if (first_match) 947 if (first_match)
741 combined_probability = best_probability; 948 combined_probability = best_probability;
742 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); 949 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
743 950
744 while (*pnote) 951 while (*pnote)
745 { 952 {
746 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) 953 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
747 { 954 {
748 enum br_predictor predictor = ((enum br_predictor) 955 enum br_predictor predictor = ((enum br_predictor)
749 INTVAL (XEXP (XEXP (*pnote, 0), 0))); 956 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
750 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); 957 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
751 958
752 dump_prediction (dump_file, predictor, probability, bb, 959 dump_prediction (dump_file, predictor, probability, bb,
753 !first_match || best_predictor == predictor); 960 (!first_match || best_predictor == predictor)
961 ? REASON_NONE : REASON_IGNORED);
754 *pnote = XEXP (*pnote, 1); 962 *pnote = XEXP (*pnote, 1);
755 } 963 }
756 else 964 else
757 pnote = &XEXP (*pnote, 1); 965 pnote = &XEXP (*pnote, 1);
758 } 966 }
759 967
760 if (!prob_note) 968 if (!prob_note)
761 { 969 {
762 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability)); 970 profile_probability p
971 = profile_probability::from_reg_br_prob_base (combined_probability);
972 add_reg_br_prob_note (insn, p);
763 973
764 /* Save the prediction into CFG in case we are seeing non-degenerated 974 /* Save the prediction into CFG in case we are seeing non-degenerated
765 conditional jump. */ 975 conditional jump. */
766 if (!single_succ_p (bb)) 976 if (!single_succ_p (bb))
767 { 977 {
768 BRANCH_EDGE (bb)->probability = combined_probability; 978 BRANCH_EDGE (bb)->probability = p;
769 FALLTHRU_EDGE (bb)->probability 979 FALLTHRU_EDGE (bb)->probability
770 = REG_BR_PROB_BASE - combined_probability; 980 = BRANCH_EDGE (bb)->probability.invert ();
771 } 981 }
772 } 982 }
773 else if (!single_succ_p (bb)) 983 else if (!single_succ_p (bb))
774 { 984 {
775 int prob = INTVAL (XEXP (prob_note, 0)); 985 profile_probability prob = profile_probability::from_reg_br_prob_note
986 (XINT (prob_note, 0));
776 987
777 BRANCH_EDGE (bb)->probability = prob; 988 BRANCH_EDGE (bb)->probability = prob;
778 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob; 989 FALLTHRU_EDGE (bb)->probability = prob.invert ();
779 } 990 }
780 else 991 else
781 single_succ_edge (bb)->probability = REG_BR_PROB_BASE; 992 single_succ_edge (bb)->probability = profile_probability::always ();
993 }
994
995 /* Edge prediction hash traits. */
996
997 struct predictor_hash: pointer_hash <edge_prediction>
998 {
999
1000 static inline hashval_t hash (const edge_prediction *);
1001 static inline bool equal (const edge_prediction *, const edge_prediction *);
1002 };
1003
1004 /* Calculate hash value of an edge prediction P based on predictor and
1005 normalized probability. */
1006
1007 inline hashval_t
1008 predictor_hash::hash (const edge_prediction *p)
1009 {
1010 inchash::hash hstate;
1011 hstate.add_int (p->ep_predictor);
1012
1013 int prob = p->ep_probability;
1014 if (prob > REG_BR_PROB_BASE / 2)
1015 prob = REG_BR_PROB_BASE - prob;
1016
1017 hstate.add_int (prob);
1018
1019 return hstate.end ();
1020 }
1021
1022 /* Return true whether edge predictions P1 and P2 use the same predictor and
1023 have equal (or opposed probability). */
1024
1025 inline bool
1026 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
1027 {
1028 return (p1->ep_predictor == p2->ep_predictor
1029 && (p1->ep_probability == p2->ep_probability
1030 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
1031 }
1032
1033 struct predictor_hash_traits: predictor_hash,
1034 typed_noop_remove <edge_prediction *> {};
1035
1036 /* Return true if edge prediction P is not in DATA hash set. */
1037
1038 static bool
1039 not_removed_prediction_p (edge_prediction *p, void *data)
1040 {
1041 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
1042 return !remove->contains (p);
1043 }
1044
1045 /* Prune predictions for a basic block BB. Currently we do following
1046 clean-up steps:
1047
1048 1) remove duplicate prediction that is guessed with the same probability
1049 (different than 1/2) to both edge
1050 2) remove duplicates for a prediction that belongs with the same probability
1051 to a single edge
1052
1053 */
1054
1055 static void
1056 prune_predictions_for_bb (basic_block bb)
1057 {
1058 edge_prediction **preds = bb_predictions->get (bb);
1059
1060 if (preds)
1061 {
1062 hash_table <predictor_hash_traits> s (13);
1063 hash_set <edge_prediction *> remove;
1064
1065 /* Step 1: identify predictors that should be removed. */
1066 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
1067 {
1068 edge_prediction *existing = s.find (pred);
1069 if (existing)
1070 {
1071 if (pred->ep_edge == existing->ep_edge
1072 && pred->ep_probability == existing->ep_probability)
1073 {
1074 /* Remove a duplicate predictor. */
1075 dump_prediction (dump_file, pred->ep_predictor,
1076 pred->ep_probability, bb,
1077 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1078
1079 remove.add (pred);
1080 }
1081 else if (pred->ep_edge != existing->ep_edge
1082 && pred->ep_probability == existing->ep_probability
1083 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1084 {
1085 /* Remove both predictors as they predict the same
1086 for both edges. */
1087 dump_prediction (dump_file, existing->ep_predictor,
1088 pred->ep_probability, bb,
1089 REASON_EDGE_PAIR_DUPLICATE,
1090 existing->ep_edge);
1091 dump_prediction (dump_file, pred->ep_predictor,
1092 pred->ep_probability, bb,
1093 REASON_EDGE_PAIR_DUPLICATE,
1094 pred->ep_edge);
1095
1096 remove.add (existing);
1097 remove.add (pred);
1098 }
1099 }
1100
1101 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1102 *slot2 = pred;
1103 }
1104
1105 /* Step 2: Remove predictors. */
1106 filter_predictions (preds, not_removed_prediction_p, &remove);
1107 }
782 } 1108 }
783 1109
784 /* Combine predictions into single probability and store them into CFG. 1110 /* Combine predictions into single probability and store them into CFG.
785 Remove now useless prediction entries. */ 1111 Remove now useless prediction entries.
1112 If DRY_RUN is set, only produce dumps and do not modify profile. */
786 1113
787 static void 1114 static void
788 combine_predictions_for_bb (basic_block bb) 1115 combine_predictions_for_bb (basic_block bb, bool dry_run)
789 { 1116 {
790 int best_probability = PROB_EVEN; 1117 int best_probability = PROB_EVEN;
791 enum br_predictor best_predictor = END_PREDICTORS; 1118 enum br_predictor best_predictor = END_PREDICTORS;
792 int combined_probability = REG_BR_PROB_BASE / 2; 1119 int combined_probability = REG_BR_PROB_BASE / 2;
793 int d; 1120 int d;
795 bool found = false; 1122 bool found = false;
796 struct edge_prediction *pred; 1123 struct edge_prediction *pred;
797 int nedges = 0; 1124 int nedges = 0;
798 edge e, first = NULL, second = NULL; 1125 edge e, first = NULL, second = NULL;
799 edge_iterator ei; 1126 edge_iterator ei;
800 void **preds;
801 1127
802 FOR_EACH_EDGE (e, ei, bb->succs) 1128 FOR_EACH_EDGE (e, ei, bb->succs)
803 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 1129 if (!unlikely_executed_edge_p (e))
804 { 1130 {
805 nedges ++; 1131 nedges ++;
806 if (first && !second) 1132 if (first && !second)
807 second = e; 1133 second = e;
808 if (!first) 1134 if (!first)
809 first = e; 1135 first = e;
810 } 1136 }
1137 else if (!e->probability.initialized_p ())
1138 e->probability = profile_probability::never ();
811 1139
812 /* When there is no successor or only one choice, prediction is easy. 1140 /* When there is no successor or only one choice, prediction is easy.
813 1141
814 We are lazy for now and predict only basic blocks with two outgoing 1142 When we have a basic block with more than 2 successors, the situation
815 edges. It is possible to predict generic case too, but we have to 1143 is more complicated as DS theory cannot be used literally.
816 ignore first match heuristics and do more involved combining. Implement 1144 More precisely, let's assume we predicted edge e1 with probability p1,
817 this later. */ 1145 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1146 need to find probability of e.g. m1({b2}), which we don't know.
1147 The only approximation is to equally distribute 1-p1 to all edges
1148 different from b1.
1149
1150 According to numbers we've got from SPEC2006 benchark, there's only
1151 one interesting reliable predictor (noreturn call), which can be
1152 handled with a bit easier approach. */
818 if (nedges != 2) 1153 if (nedges != 2)
819 { 1154 {
820 if (!bb->count) 1155 hash_set<edge> unlikely_edges (4);
821 set_even_probabilities (bb); 1156
1157 /* Identify all edges that have a probability close to very unlikely.
1158 Doing the approach for very unlikely doesn't worth for doing as
1159 there's no such probability in SPEC2006 benchmark. */
1160 edge_prediction **preds = bb_predictions->get (bb);
1161 if (preds)
1162 for (pred = *preds; pred; pred = pred->ep_next)
1163 if (pred->ep_probability <= PROB_VERY_UNLIKELY)
1164 unlikely_edges.add (pred->ep_edge);
1165
1166 if (!dry_run)
1167 set_even_probabilities (bb, &unlikely_edges);
822 clear_bb_predictions (bb); 1168 clear_bb_predictions (bb);
823 if (dump_file) 1169 if (dump_file)
824 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n", 1170 {
825 nedges, bb->index); 1171 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1172 if (unlikely_edges.elements () == 0)
1173 fprintf (dump_file,
1174 "%i edges in bb %i predicted to even probabilities\n",
1175 nedges, bb->index);
1176 else
1177 {
1178 fprintf (dump_file,
1179 "%i edges in bb %i predicted with some unlikely edges\n",
1180 nedges, bb->index);
1181 FOR_EACH_EDGE (e, ei, bb->succs)
1182 if (!unlikely_executed_edge_p (e))
1183 dump_prediction (dump_file, PRED_COMBINED,
1184 e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e);
1185 }
1186 }
826 return; 1187 return;
827 } 1188 }
828 1189
829 if (dump_file) 1190 if (dump_file)
830 fprintf (dump_file, "Predictions for bb %i\n", bb->index); 1191 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
831 1192
832 preds = pointer_map_contains (bb_predictions, bb); 1193 prune_predictions_for_bb (bb);
1194
1195 edge_prediction **preds = bb_predictions->get (bb);
1196
833 if (preds) 1197 if (preds)
834 { 1198 {
835 /* We implement "first match" heuristics and use probability guessed 1199 /* We implement "first match" heuristics and use probability guessed
836 by predictor with smallest index. */ 1200 by predictor with smallest index. */
837 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) 1201 for (pred = *preds; pred; pred = pred->ep_next)
838 { 1202 {
839 enum br_predictor predictor = pred->ep_predictor; 1203 enum br_predictor predictor = pred->ep_predictor;
840 int probability = pred->ep_probability; 1204 int probability = pred->ep_probability;
841 1205
842 if (pred->ep_edge != first) 1206 if (pred->ep_edge != first)
843 probability = REG_BR_PROB_BASE - probability; 1207 probability = REG_BR_PROB_BASE - probability;
844 1208
845 found = true; 1209 found = true;
846 /* First match heuristics would be widly confused if we predicted 1210 /* First match heuristics would be widly confused if we predicted
847 both directions. */ 1211 both directions. */
848 if (best_predictor > predictor) 1212 if (best_predictor > predictor
1213 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
849 { 1214 {
850 struct edge_prediction *pred2; 1215 struct edge_prediction *pred2;
851 int prob = probability; 1216 int prob = probability;
852 1217
853 for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next) 1218 for (pred2 = (struct edge_prediction *) *preds;
1219 pred2; pred2 = pred2->ep_next)
854 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor) 1220 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
855 { 1221 {
856 int probability2 = pred->ep_probability; 1222 int probability2 = pred2->ep_probability;
857 1223
858 if (pred2->ep_edge != first) 1224 if (pred2->ep_edge != first)
859 probability2 = REG_BR_PROB_BASE - probability2; 1225 probability2 = REG_BR_PROB_BASE - probability2;
860 1226
861 if ((probability < REG_BR_PROB_BASE / 2) != 1227 if ((probability < REG_BR_PROB_BASE / 2) !=
888 1254
889 /* Decide which heuristic to use. In case we didn't match anything, 1255 /* Decide which heuristic to use. In case we didn't match anything,
890 use no_prediction heuristic, in case we did match, use either 1256 use no_prediction heuristic, in case we did match, use either
891 first match or Dempster-Shaffer theory depending on the flags. */ 1257 first match or Dempster-Shaffer theory depending on the flags. */
892 1258
893 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) 1259 if (best_predictor != END_PREDICTORS)
894 first_match = true; 1260 first_match = true;
895 1261
896 if (!found) 1262 if (!found)
897 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true); 1263 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
898 else 1264 else
899 { 1265 {
900 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb, 1266 if (!first_match)
901 !first_match); 1267 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
902 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb, 1268 !first_match ? REASON_NONE : REASON_IGNORED);
903 first_match); 1269 else
1270 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1271 first_match ? REASON_NONE : REASON_IGNORED);
904 } 1272 }
905 1273
906 if (first_match) 1274 if (first_match)
907 combined_probability = best_probability; 1275 combined_probability = best_probability;
908 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); 1276 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
909 1277
910 if (preds) 1278 if (preds)
911 { 1279 {
912 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) 1280 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
913 { 1281 {
914 enum br_predictor predictor = pred->ep_predictor; 1282 enum br_predictor predictor = pred->ep_predictor;
915 int probability = pred->ep_probability; 1283 int probability = pred->ep_probability;
916 1284
917 if (pred->ep_edge != EDGE_SUCC (bb, 0))
918 probability = REG_BR_PROB_BASE - probability;
919 dump_prediction (dump_file, predictor, probability, bb, 1285 dump_prediction (dump_file, predictor, probability, bb,
920 !first_match || best_predictor == predictor); 1286 (!first_match || best_predictor == predictor)
1287 ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
921 } 1288 }
922 } 1289 }
923 clear_bb_predictions (bb); 1290 clear_bb_predictions (bb);
924 1291
925 if (!bb->count) 1292 if (!bb->count.initialized_p () && !dry_run)
926 { 1293 {
927 first->probability = combined_probability; 1294 first->probability
928 second->probability = REG_BR_PROB_BASE - combined_probability; 1295 = profile_probability::from_reg_br_prob_base (combined_probability);
929 } 1296 second->probability = first->probability.invert ();
930 } 1297 }
1298 }
1299
1300 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1301 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1302
1303 T1 and T2 should be one of the following cases:
1304 1. T1 is SSA_NAME, T2 is NULL
1305 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1306 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1307
1308 static tree
1309 strips_small_constant (tree t1, tree t2)
1310 {
1311 tree ret = NULL;
1312 int value = 0;
1313
1314 if (!t1)
1315 return NULL;
1316 else if (TREE_CODE (t1) == SSA_NAME)
1317 ret = t1;
1318 else if (tree_fits_shwi_p (t1))
1319 value = tree_to_shwi (t1);
1320 else
1321 return NULL;
1322
1323 if (!t2)
1324 return ret;
1325 else if (tree_fits_shwi_p (t2))
1326 value = tree_to_shwi (t2);
1327 else if (TREE_CODE (t2) == SSA_NAME)
1328 {
1329 if (ret)
1330 return NULL;
1331 else
1332 ret = t2;
1333 }
1334
1335 if (value <= 4 && value >= -4)
1336 return ret;
1337 else
1338 return NULL;
1339 }
1340
1341 /* Return the SSA_NAME in T or T's operands.
1342 Return NULL if SSA_NAME cannot be found. */
1343
1344 static tree
1345 get_base_value (tree t)
1346 {
1347 if (TREE_CODE (t) == SSA_NAME)
1348 return t;
1349
1350 if (!BINARY_CLASS_P (t))
1351 return NULL;
1352
1353 switch (TREE_OPERAND_LENGTH (t))
1354 {
1355 case 1:
1356 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1357 case 2:
1358 return strips_small_constant (TREE_OPERAND (t, 0),
1359 TREE_OPERAND (t, 1));
1360 default:
1361 return NULL;
1362 }
1363 }
1364
1365 /* Check the compare STMT in LOOP. If it compares an induction
1366 variable to a loop invariant, return true, and save
1367 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1368 Otherwise return false and set LOOP_INVAIANT to NULL. */
1369
1370 static bool
1371 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1372 tree *loop_invariant,
1373 enum tree_code *compare_code,
1374 tree *loop_step,
1375 tree *loop_iv_base)
1376 {
1377 tree op0, op1, bound, base;
1378 affine_iv iv0, iv1;
1379 enum tree_code code;
1380 tree step;
1381
1382 code = gimple_cond_code (stmt);
1383 *loop_invariant = NULL;
1384
1385 switch (code)
1386 {
1387 case GT_EXPR:
1388 case GE_EXPR:
1389 case NE_EXPR:
1390 case LT_EXPR:
1391 case LE_EXPR:
1392 case EQ_EXPR:
1393 break;
1394
1395 default:
1396 return false;
1397 }
1398
1399 op0 = gimple_cond_lhs (stmt);
1400 op1 = gimple_cond_rhs (stmt);
1401
1402 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1403 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1404 return false;
1405 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1406 return false;
1407 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1408 return false;
1409 if (TREE_CODE (iv0.step) != INTEGER_CST
1410 || TREE_CODE (iv1.step) != INTEGER_CST)
1411 return false;
1412 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1413 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1414 return false;
1415
1416 if (integer_zerop (iv0.step))
1417 {
1418 if (code != NE_EXPR && code != EQ_EXPR)
1419 code = invert_tree_comparison (code, false);
1420 bound = iv0.base;
1421 base = iv1.base;
1422 if (tree_fits_shwi_p (iv1.step))
1423 step = iv1.step;
1424 else
1425 return false;
1426 }
1427 else
1428 {
1429 bound = iv1.base;
1430 base = iv0.base;
1431 if (tree_fits_shwi_p (iv0.step))
1432 step = iv0.step;
1433 else
1434 return false;
1435 }
1436
1437 if (TREE_CODE (bound) != INTEGER_CST)
1438 bound = get_base_value (bound);
1439 if (!bound)
1440 return false;
1441 if (TREE_CODE (base) != INTEGER_CST)
1442 base = get_base_value (base);
1443 if (!base)
1444 return false;
1445
1446 *loop_invariant = bound;
1447 *compare_code = code;
1448 *loop_step = step;
1449 *loop_iv_base = base;
1450 return true;
1451 }
1452
1453 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1454
1455 static bool
1456 expr_coherent_p (tree t1, tree t2)
1457 {
1458 gimple *stmt;
1459 tree ssa_name_1 = NULL;
1460 tree ssa_name_2 = NULL;
1461
1462 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1463 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1464
1465 if (t1 == t2)
1466 return true;
1467
1468 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1469 return true;
1470 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1471 return false;
1472
1473 /* Check to see if t1 is expressed/defined with t2. */
1474 stmt = SSA_NAME_DEF_STMT (t1);
1475 gcc_assert (stmt != NULL);
1476 if (is_gimple_assign (stmt))
1477 {
1478 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1479 if (ssa_name_1 && ssa_name_1 == t2)
1480 return true;
1481 }
1482
1483 /* Check to see if t2 is expressed/defined with t1. */
1484 stmt = SSA_NAME_DEF_STMT (t2);
1485 gcc_assert (stmt != NULL);
1486 if (is_gimple_assign (stmt))
1487 {
1488 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1489 if (ssa_name_2 && ssa_name_2 == t1)
1490 return true;
1491 }
1492
1493 /* Compare if t1 and t2's def_stmts are identical. */
1494 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1495 return true;
1496 else
1497 return false;
1498 }
1499
1500 /* Return true if E is predicted by one of loop heuristics. */
1501
1502 static bool
1503 predicted_by_loop_heuristics_p (basic_block bb)
1504 {
1505 struct edge_prediction *i;
1506 edge_prediction **preds = bb_predictions->get (bb);
1507
1508 if (!preds)
1509 return false;
1510
1511 for (i = *preds; i; i = i->ep_next)
1512 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1513 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1514 || i->ep_predictor == PRED_LOOP_ITERATIONS
1515 || i->ep_predictor == PRED_LOOP_EXIT
1516 || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1517 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1518 return true;
1519 return false;
1520 }
1521
1522 /* Predict branch probability of BB when BB contains a branch that compares
1523 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1524 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1525
1526 E.g.
1527 for (int i = 0; i < bound; i++) {
1528 if (i < bound - 2)
1529 computation_1();
1530 else
1531 computation_2();
1532 }
1533
1534 In this loop, we will predict the branch inside the loop to be taken. */
1535
1536 static void
1537 predict_iv_comparison (struct loop *loop, basic_block bb,
1538 tree loop_bound_var,
1539 tree loop_iv_base_var,
1540 enum tree_code loop_bound_code,
1541 int loop_bound_step)
1542 {
1543 gimple *stmt;
1544 tree compare_var, compare_base;
1545 enum tree_code compare_code;
1546 tree compare_step_var;
1547 edge then_edge;
1548 edge_iterator ei;
1549
1550 if (predicted_by_loop_heuristics_p (bb))
1551 return;
1552
1553 stmt = last_stmt (bb);
1554 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1555 return;
1556 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1557 loop, &compare_var,
1558 &compare_code,
1559 &compare_step_var,
1560 &compare_base))
1561 return;
1562
1563 /* Find the taken edge. */
1564 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1565 if (then_edge->flags & EDGE_TRUE_VALUE)
1566 break;
1567
1568 /* When comparing an IV to a loop invariant, NE is more likely to be
1569 taken while EQ is more likely to be not-taken. */
1570 if (compare_code == NE_EXPR)
1571 {
1572 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1573 return;
1574 }
1575 else if (compare_code == EQ_EXPR)
1576 {
1577 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1578 return;
1579 }
1580
1581 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1582 return;
1583
1584 /* If loop bound, base and compare bound are all constants, we can
1585 calculate the probability directly. */
1586 if (tree_fits_shwi_p (loop_bound_var)
1587 && tree_fits_shwi_p (compare_var)
1588 && tree_fits_shwi_p (compare_base))
1589 {
1590 int probability;
1591 bool overflow, overall_overflow = false;
1592 widest_int compare_count, tem;
1593
1594 /* (loop_bound - base) / compare_step */
1595 tem = wi::sub (wi::to_widest (loop_bound_var),
1596 wi::to_widest (compare_base), SIGNED, &overflow);
1597 overall_overflow |= overflow;
1598 widest_int loop_count = wi::div_trunc (tem,
1599 wi::to_widest (compare_step_var),
1600 SIGNED, &overflow);
1601 overall_overflow |= overflow;
1602
1603 if (!wi::neg_p (wi::to_widest (compare_step_var))
1604 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1605 {
1606 /* (loop_bound - compare_bound) / compare_step */
1607 tem = wi::sub (wi::to_widest (loop_bound_var),
1608 wi::to_widest (compare_var), SIGNED, &overflow);
1609 overall_overflow |= overflow;
1610 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1611 SIGNED, &overflow);
1612 overall_overflow |= overflow;
1613 }
1614 else
1615 {
1616 /* (compare_bound - base) / compare_step */
1617 tem = wi::sub (wi::to_widest (compare_var),
1618 wi::to_widest (compare_base), SIGNED, &overflow);
1619 overall_overflow |= overflow;
1620 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1621 SIGNED, &overflow);
1622 overall_overflow |= overflow;
1623 }
1624 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1625 ++compare_count;
1626 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1627 ++loop_count;
1628 if (wi::neg_p (compare_count))
1629 compare_count = 0;
1630 if (wi::neg_p (loop_count))
1631 loop_count = 0;
1632 if (loop_count == 0)
1633 probability = 0;
1634 else if (wi::cmps (compare_count, loop_count) == 1)
1635 probability = REG_BR_PROB_BASE;
1636 else
1637 {
1638 tem = compare_count * REG_BR_PROB_BASE;
1639 tem = wi::udiv_trunc (tem, loop_count);
1640 probability = tem.to_uhwi ();
1641 }
1642
1643 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1644 if (!overall_overflow)
1645 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1646
1647 return;
1648 }
1649
1650 if (expr_coherent_p (loop_bound_var, compare_var))
1651 {
1652 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1653 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1654 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1655 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1656 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1657 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1658 else if (loop_bound_code == NE_EXPR)
1659 {
1660 /* If the loop backedge condition is "(i != bound)", we do
1661 the comparison based on the step of IV:
1662 * step < 0 : backedge condition is like (i > bound)
1663 * step > 0 : backedge condition is like (i < bound) */
1664 gcc_assert (loop_bound_step != 0);
1665 if (loop_bound_step > 0
1666 && (compare_code == LT_EXPR
1667 || compare_code == LE_EXPR))
1668 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1669 else if (loop_bound_step < 0
1670 && (compare_code == GT_EXPR
1671 || compare_code == GE_EXPR))
1672 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1673 else
1674 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1675 }
1676 else
1677 /* The branch is predicted not-taken if loop_bound_code is
1678 opposite with compare_code. */
1679 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1680 }
1681 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1682 {
1683 /* For cases like:
1684 for (i = s; i < h; i++)
1685 if (i > s + 2) ....
1686 The branch should be predicted taken. */
1687 if (loop_bound_step > 0
1688 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1689 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1690 else if (loop_bound_step < 0
1691 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1692 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1693 else
1694 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1695 }
1696 }
1697
1698 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1699 exits are resulted from short-circuit conditions that will generate an
1700 if_tmp. E.g.:
1701
1702 if (foo() || global > 10)
1703 break;
1704
1705 This will be translated into:
1706
1707 BB3:
1708 loop header...
1709 BB4:
1710 if foo() goto BB6 else goto BB5
1711 BB5:
1712 if global > 10 goto BB6 else goto BB7
1713 BB6:
1714 goto BB7
1715 BB7:
1716 iftmp = (PHI 0(BB5), 1(BB6))
1717 if iftmp == 1 goto BB8 else goto BB3
1718 BB8:
1719 outside of the loop...
1720
1721 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1722 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1723 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1724 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1725
1726 static void
1727 predict_extra_loop_exits (edge exit_edge)
1728 {
1729 unsigned i;
1730 bool check_value_one;
1731 gimple *lhs_def_stmt;
1732 gphi *phi_stmt;
1733 tree cmp_rhs, cmp_lhs;
1734 gimple *last;
1735 gcond *cmp_stmt;
1736
1737 last = last_stmt (exit_edge->src);
1738 if (!last)
1739 return;
1740 cmp_stmt = dyn_cast <gcond *> (last);
1741 if (!cmp_stmt)
1742 return;
1743
1744 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1745 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1746 if (!TREE_CONSTANT (cmp_rhs)
1747 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1748 return;
1749 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1750 return;
1751
1752 /* If check_value_one is true, only the phi_args with value '1' will lead
1753 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1754 loop exit. */
1755 check_value_one = (((integer_onep (cmp_rhs))
1756 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1757 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1758
1759 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1760 if (!lhs_def_stmt)
1761 return;
1762
1763 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1764 if (!phi_stmt)
1765 return;
1766
1767 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1768 {
1769 edge e1;
1770 edge_iterator ei;
1771 tree val = gimple_phi_arg_def (phi_stmt, i);
1772 edge e = gimple_phi_arg_edge (phi_stmt, i);
1773
1774 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1775 continue;
1776 if ((check_value_one ^ integer_onep (val)) == 1)
1777 continue;
1778 if (EDGE_COUNT (e->src->succs) != 1)
1779 {
1780 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1781 continue;
1782 }
1783
1784 FOR_EACH_EDGE (e1, ei, e->src->preds)
1785 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1786 }
1787 }
1788
931 1789
932 /* Predict edge probabilities by exploiting loop structure. */ 1790 /* Predict edge probabilities by exploiting loop structure. */
933 1791
934 static void 1792 static void
935 predict_loops (void) 1793 predict_loops (void)
936 { 1794 {
937 loop_iterator li;
938 struct loop *loop; 1795 struct loop *loop;
1796 basic_block bb;
1797 hash_set <struct loop *> with_recursion(10);
1798
1799 FOR_EACH_BB_FN (bb, cfun)
1800 {
1801 gimple_stmt_iterator gsi;
1802 tree decl;
1803
1804 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1805 if (is_gimple_call (gsi_stmt (gsi))
1806 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1807 && recursive_call_p (current_function_decl, decl))
1808 {
1809 loop = bb->loop_father;
1810 while (loop && !with_recursion.add (loop))
1811 loop = loop_outer (loop);
1812 }
1813 }
939 1814
940 /* Try to predict out blocks in a loop that are not part of a 1815 /* Try to predict out blocks in a loop that are not part of a
941 natural loop. */ 1816 natural loop. */
942 FOR_EACH_LOOP (li, loop, 0) 1817 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
943 { 1818 {
944 basic_block bb, *bbs; 1819 basic_block bb, *bbs;
945 unsigned j, n_exits; 1820 unsigned j, n_exits = 0;
946 VEC (edge, heap) *exits; 1821 vec<edge> exits;
947 struct tree_niter_desc niter_desc; 1822 struct tree_niter_desc niter_desc;
948 edge ex; 1823 edge ex;
1824 struct nb_iter_bound *nb_iter;
1825 enum tree_code loop_bound_code = ERROR_MARK;
1826 tree loop_bound_step = NULL;
1827 tree loop_bound_var = NULL;
1828 tree loop_iv_base = NULL;
1829 gcond *stmt = NULL;
1830 bool recursion = with_recursion.contains (loop);
949 1831
950 exits = get_loop_exit_edges (loop); 1832 exits = get_loop_exit_edges (loop);
951 n_exits = VEC_length (edge, exits); 1833 FOR_EACH_VEC_ELT (exits, j, ex)
952 1834 if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL))
953 FOR_EACH_VEC_ELT (edge, exits, j, ex) 1835 n_exits ++;
1836 if (!n_exits)
1837 {
1838 exits.release ();
1839 continue;
1840 }
1841
1842 if (dump_file && (dump_flags & TDF_DETAILS))
1843 fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
1844 loop->num, recursion ? " (with recursion)":"", n_exits);
1845 if (dump_file && (dump_flags & TDF_DETAILS)
1846 && max_loop_iterations_int (loop) >= 0)
1847 {
1848 fprintf (dump_file,
1849 "Loop %d iterates at most %i times.\n", loop->num,
1850 (int)max_loop_iterations_int (loop));
1851 }
1852 if (dump_file && (dump_flags & TDF_DETAILS)
1853 && likely_max_loop_iterations_int (loop) >= 0)
1854 {
1855 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1856 loop->num, (int)likely_max_loop_iterations_int (loop));
1857 }
1858
1859 FOR_EACH_VEC_ELT (exits, j, ex)
954 { 1860 {
955 tree niter = NULL; 1861 tree niter = NULL;
956 HOST_WIDE_INT nitercst; 1862 HOST_WIDE_INT nitercst;
957 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS); 1863 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
958 int probability; 1864 int probability;
959 enum br_predictor predictor; 1865 enum br_predictor predictor;
960 1866 widest_int nit;
961 if (number_of_iterations_exit (loop, ex, &niter_desc, false)) 1867
1868 if (unlikely_executed_edge_p (ex)
1869 || (ex->flags & EDGE_ABNORMAL_CALL))
1870 continue;
1871 /* Loop heuristics do not expect exit conditional to be inside
1872 inner loop. We predict from innermost to outermost loop. */
1873 if (predicted_by_loop_heuristics_p (ex->src))
1874 {
1875 if (dump_file && (dump_flags & TDF_DETAILS))
1876 fprintf (dump_file, "Skipping exit %i->%i because "
1877 "it is already predicted.\n",
1878 ex->src->index, ex->dest->index);
1879 continue;
1880 }
1881 predict_extra_loop_exits (ex);
1882
1883 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
962 niter = niter_desc.niter; 1884 niter = niter_desc.niter;
963 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST) 1885 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
964 niter = loop_niter_by_eval (loop, ex); 1886 niter = loop_niter_by_eval (loop, ex);
1887 if (dump_file && (dump_flags & TDF_DETAILS)
1888 && TREE_CODE (niter) == INTEGER_CST)
1889 {
1890 fprintf (dump_file, "Exit %i->%i %d iterates ",
1891 ex->src->index, ex->dest->index,
1892 loop->num);
1893 print_generic_expr (dump_file, niter, TDF_SLIM);
1894 fprintf (dump_file, " times.\n");
1895 }
965 1896
966 if (TREE_CODE (niter) == INTEGER_CST) 1897 if (TREE_CODE (niter) == INTEGER_CST)
967 { 1898 {
968 if (host_integerp (niter, 1) 1899 if (tree_fits_uhwi_p (niter)
969 && compare_tree_int (niter, max-1) == -1) 1900 && max
970 nitercst = tree_low_cst (niter, 1) + 1; 1901 && compare_tree_int (niter, max - 1) == -1)
1902 nitercst = tree_to_uhwi (niter) + 1;
971 else 1903 else
972 nitercst = max; 1904 nitercst = max;
973 predictor = PRED_LOOP_ITERATIONS; 1905 predictor = PRED_LOOP_ITERATIONS;
974 } 1906 }
975 /* If we have just one exit and we can derive some information about 1907 /* If we have just one exit and we can derive some information about
976 the number of iterations of the loop from the statements inside 1908 the number of iterations of the loop from the statements inside
977 the loop, use it to predict this exit. */ 1909 the loop, use it to predict this exit. */
978 else if (n_exits == 1) 1910 else if (n_exits == 1
1911 && estimated_stmt_executions (loop, &nit))
979 { 1912 {
980 nitercst = estimated_loop_iterations_int (loop, false); 1913 if (wi::gtu_p (nit, max))
981 if (nitercst < 0)
982 continue;
983 if (nitercst > max)
984 nitercst = max; 1914 nitercst = max;
985 1915 else
1916 nitercst = nit.to_shwi ();
986 predictor = PRED_LOOP_ITERATIONS_GUESSED; 1917 predictor = PRED_LOOP_ITERATIONS_GUESSED;
987 } 1918 }
1919 /* If we have likely upper bound, trust it for very small iteration
1920 counts. Such loops would otherwise get mispredicted by standard
1921 LOOP_EXIT heuristics. */
1922 else if (n_exits == 1
1923 && likely_max_stmt_executions (loop, &nit)
1924 && wi::ltu_p (nit,
1925 RDIV (REG_BR_PROB_BASE,
1926 REG_BR_PROB_BASE
1927 - predictor_info
1928 [recursion
1929 ? PRED_LOOP_EXIT_WITH_RECURSION
1930 : PRED_LOOP_EXIT].hitrate)))
1931 {
1932 nitercst = nit.to_shwi ();
1933 predictor = PRED_LOOP_ITERATIONS_MAX;
1934 }
988 else 1935 else
1936 {
1937 if (dump_file && (dump_flags & TDF_DETAILS))
1938 fprintf (dump_file, "Nothing known about exit %i->%i.\n",
1939 ex->src->index, ex->dest->index);
1940 continue;
1941 }
1942
1943 if (dump_file && (dump_flags & TDF_DETAILS))
1944 fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
1945 (int)nitercst, predictor_info[predictor].name);
1946 /* If the prediction for number of iterations is zero, do not
1947 predict the exit edges. */
1948 if (nitercst == 0)
989 continue; 1949 continue;
990 1950
991 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst); 1951 probability = RDIV (REG_BR_PROB_BASE, nitercst);
992 predict_edge (ex, predictor, probability); 1952 predict_edge (ex, predictor, probability);
993 } 1953 }
994 VEC_free (edge, heap, exits); 1954 exits.release ();
1955
1956 /* Find information about loop bound variables. */
1957 for (nb_iter = loop->bounds; nb_iter;
1958 nb_iter = nb_iter->next)
1959 if (nb_iter->stmt
1960 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1961 {
1962 stmt = as_a <gcond *> (nb_iter->stmt);
1963 break;
1964 }
1965 if (!stmt && last_stmt (loop->header)
1966 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1967 stmt = as_a <gcond *> (last_stmt (loop->header));
1968 if (stmt)
1969 is_comparison_with_loop_invariant_p (stmt, loop,
1970 &loop_bound_var,
1971 &loop_bound_code,
1972 &loop_bound_step,
1973 &loop_iv_base);
995 1974
996 bbs = get_loop_body (loop); 1975 bbs = get_loop_body (loop);
997 1976
998 for (j = 0; j < loop->num_nodes; j++) 1977 for (j = 0; j < loop->num_nodes; j++)
999 { 1978 {
1000 int header_found = 0;
1001 edge e; 1979 edge e;
1002 edge_iterator ei; 1980 edge_iterator ei;
1003 1981
1004 bb = bbs[j]; 1982 bb = bbs[j];
1005 1983
1006 /* Bypass loop heuristics on continue statement. These 1984 /* Bypass loop heuristics on continue statement. These
1007 statements construct loops via "non-loop" constructs 1985 statements construct loops via "non-loop" constructs
1008 in the source language and are better to be handled 1986 in the source language and are better to be handled
1009 separately. */ 1987 separately. */
1010 if (predicted_by_p (bb, PRED_CONTINUE)) 1988 if (predicted_by_p (bb, PRED_CONTINUE))
1011 continue;
1012
1013 /* Loop branch heuristics - predict an edge back to a
1014 loop's head as taken. */
1015 if (bb == loop->latch)
1016 { 1989 {
1017 e = find_edge (loop->latch, loop->header); 1990 if (dump_file && (dump_flags & TDF_DETAILS))
1018 if (e) 1991 fprintf (dump_file, "BB %i predicted by continue.\n",
1019 { 1992 bb->index);
1020 header_found = 1; 1993 continue;
1021 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1022 }
1023 } 1994 }
1024 1995
1025 /* Loop exit heuristics - predict an edge exiting the loop if the 1996 /* If we already used more reliable loop exit predictors, do not
1026 conditional has no loop header successors as not taken. */ 1997 bother with PRED_LOOP_EXIT. */
1027 if (!header_found 1998 if (!predicted_by_loop_heuristics_p (bb))
1028 /* If we already used more reliable loop exit predictors, do not
1029 bother with PRED_LOOP_EXIT. */
1030 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1031 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1032 { 1999 {
1033 /* For loop with many exits we don't want to predict all exits 2000 /* For loop with many exits we don't want to predict all exits
1034 with the pretty large probability, because if all exits are 2001 with the pretty large probability, because if all exits are
1035 considered in row, the loop would be predicted to iterate 2002 considered in row, the loop would be predicted to iterate
1036 almost never. The code to divide probability by number of 2003 almost never. The code to divide probability by number of
1043 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction 2010 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1044 as this was causing regression in perl benchmark containing such 2011 as this was causing regression in perl benchmark containing such
1045 a wide loop. */ 2012 a wide loop. */
1046 2013
1047 int probability = ((REG_BR_PROB_BASE 2014 int probability = ((REG_BR_PROB_BASE
1048 - predictor_info [(int) PRED_LOOP_EXIT].hitrate) 2015 - predictor_info
2016 [recursion
2017 ? PRED_LOOP_EXIT_WITH_RECURSION
2018 : PRED_LOOP_EXIT].hitrate)
1049 / n_exits); 2019 / n_exits);
1050 if (probability < HITRATE (2)) 2020 if (probability < HITRATE (2))
1051 probability = HITRATE (2); 2021 probability = HITRATE (2);
1052 FOR_EACH_EDGE (e, ei, bb->succs) 2022 FOR_EACH_EDGE (e, ei, bb->succs)
1053 if (e->dest->index < NUM_FIXED_BLOCKS 2023 if (e->dest->index < NUM_FIXED_BLOCKS
1054 || !flow_bb_inside_loop_p (loop, e->dest)) 2024 || !flow_bb_inside_loop_p (loop, e->dest))
1055 predict_edge (e, PRED_LOOP_EXIT, probability); 2025 {
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 fprintf (dump_file,
2028 "Predicting exit %i->%i with prob %i.\n",
2029 e->src->index, e->dest->index, probability);
2030 predict_edge (e,
2031 recursion ? PRED_LOOP_EXIT_WITH_RECURSION
2032 : PRED_LOOP_EXIT, probability);
2033 }
2034 }
2035 if (loop_bound_var)
2036 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
2037 loop_bound_code,
2038 tree_to_shwi (loop_bound_step));
2039 }
2040
2041 /* In the following code
2042 for (loop1)
2043 if (cond)
2044 for (loop2)
2045 body;
2046 guess that cond is unlikely. */
2047 if (loop_outer (loop)->num)
2048 {
2049 basic_block bb = NULL;
2050 edge preheader_edge = loop_preheader_edge (loop);
2051
2052 if (single_pred_p (preheader_edge->src)
2053 && single_succ_p (preheader_edge->src))
2054 preheader_edge = single_pred_edge (preheader_edge->src);
2055
2056 gimple *stmt = last_stmt (preheader_edge->src);
2057 /* Pattern match fortran loop preheader:
2058 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
2059 _17 = (logical(kind=4)) _16;
2060 if (_17 != 0)
2061 goto <bb 11>;
2062 else
2063 goto <bb 13>;
2064
2065 Loop guard branch prediction says nothing about duplicated loop
2066 headers produced by fortran frontend and in this case we want
2067 to predict paths leading to this preheader. */
2068
2069 if (stmt
2070 && gimple_code (stmt) == GIMPLE_COND
2071 && gimple_cond_code (stmt) == NE_EXPR
2072 && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
2073 && integer_zerop (gimple_cond_rhs (stmt)))
2074 {
2075 gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2076 if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2077 && gimple_expr_code (call_stmt) == NOP_EXPR
2078 && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2079 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2080 if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2081 && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2082 && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2083 && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2084 == PRED_FORTRAN_LOOP_PREHEADER)
2085 bb = preheader_edge->src;
2086 }
2087 if (!bb)
2088 {
2089 if (!dominated_by_p (CDI_DOMINATORS,
2090 loop_outer (loop)->latch, loop->header))
2091 predict_paths_leading_to_edge (loop_preheader_edge (loop),
2092 recursion
2093 ? PRED_LOOP_GUARD_WITH_RECURSION
2094 : PRED_LOOP_GUARD,
2095 NOT_TAKEN,
2096 loop_outer (loop));
2097 }
2098 else
2099 {
2100 if (!dominated_by_p (CDI_DOMINATORS,
2101 loop_outer (loop)->latch, bb))
2102 predict_paths_leading_to (bb,
2103 recursion
2104 ? PRED_LOOP_GUARD_WITH_RECURSION
2105 : PRED_LOOP_GUARD,
2106 NOT_TAKEN,
2107 loop_outer (loop));
1056 } 2108 }
1057 } 2109 }
1058 2110
1059 /* Free basic blocks from get_loop_body. */ 2111 /* Free basic blocks from get_loop_body. */
1060 free (bbs); 2112 free (bbs);
1064 /* Attempt to predict probabilities of BB outgoing edges using local 2116 /* Attempt to predict probabilities of BB outgoing edges using local
1065 properties. */ 2117 properties. */
1066 static void 2118 static void
1067 bb_estimate_probability_locally (basic_block bb) 2119 bb_estimate_probability_locally (basic_block bb)
1068 { 2120 {
1069 rtx last_insn = BB_END (bb); 2121 rtx_insn *last_insn = BB_END (bb);
1070 rtx cond; 2122 rtx cond;
1071 2123
1072 if (! can_predict_insn_p (last_insn)) 2124 if (! can_predict_insn_p (last_insn))
1073 return; 2125 return;
1074 cond = get_condition (last_insn, NULL, false, false); 2126 cond = get_condition (last_insn, NULL, false, false);
1166 { 2218 {
1167 bb_estimate_probability_locally (bb); 2219 bb_estimate_probability_locally (bb);
1168 combine_predictions_for_insn (BB_END (bb), bb); 2220 combine_predictions_for_insn (BB_END (bb), bb);
1169 } 2221 }
1170 2222
1171 static tree expr_expected_value (tree, bitmap); 2223 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1172 2224
1173 /* Helper function for expr_expected_value. */ 2225 /* Helper function for expr_expected_value. */
1174 2226
1175 static tree 2227 static tree
1176 expr_expected_value_1 (tree type, tree op0, enum tree_code code, tree op1, bitmap visited) 2228 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1177 { 2229 tree op1, bitmap visited, enum br_predictor *predictor)
1178 gimple def; 2230 {
2231 gimple *def;
2232
2233 if (predictor)
2234 *predictor = PRED_UNCONDITIONAL;
1179 2235
1180 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) 2236 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1181 { 2237 {
1182 if (TREE_CONSTANT (op0)) 2238 if (TREE_CONSTANT (op0))
1183 return op0; 2239 return op0;
2240
2241 if (code == IMAGPART_EXPR)
2242 {
2243 if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2244 {
2245 def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2246 if (is_gimple_call (def)
2247 && gimple_call_internal_p (def)
2248 && (gimple_call_internal_fn (def)
2249 == IFN_ATOMIC_COMPARE_EXCHANGE))
2250 {
2251 /* Assume that any given atomic operation has low contention,
2252 and thus the compare-and-swap operation succeeds. */
2253 if (predictor)
2254 *predictor = PRED_COMPARE_AND_SWAP;
2255 return build_one_cst (TREE_TYPE (op0));
2256 }
2257 }
2258 }
1184 2259
1185 if (code != SSA_NAME) 2260 if (code != SSA_NAME)
1186 return NULL_TREE; 2261 return NULL_TREE;
1187 2262
1188 def = SSA_NAME_DEF_STMT (op0); 2263 def = SSA_NAME_DEF_STMT (op0);
1199 tree val = NULL, new_val; 2274 tree val = NULL, new_val;
1200 2275
1201 for (i = 0; i < n; i++) 2276 for (i = 0; i < n; i++)
1202 { 2277 {
1203 tree arg = PHI_ARG_DEF (def, i); 2278 tree arg = PHI_ARG_DEF (def, i);
2279 enum br_predictor predictor2;
1204 2280
1205 /* If this PHI has itself as an argument, we cannot 2281 /* If this PHI has itself as an argument, we cannot
1206 determine the string length of this argument. However, 2282 determine the string length of this argument. However,
1207 if we can find an expected constant value for the other 2283 if we can find an expected constant value for the other
1208 PHI args then we can still be sure that this is 2284 PHI args then we can still be sure that this is
1209 likely a constant. So be optimistic and just 2285 likely a constant. So be optimistic and just
1210 continue with the next argument. */ 2286 continue with the next argument. */
1211 if (arg == PHI_RESULT (def)) 2287 if (arg == PHI_RESULT (def))
1212 continue; 2288 continue;
1213 2289
1214 new_val = expr_expected_value (arg, visited); 2290 new_val = expr_expected_value (arg, visited, &predictor2);
2291
2292 /* It is difficult to combine value predictors. Simply assume
2293 that later predictor is weaker and take its prediction. */
2294 if (predictor && *predictor < predictor2)
2295 *predictor = predictor2;
1215 if (!new_val) 2296 if (!new_val)
1216 return NULL; 2297 return NULL;
1217 if (!val) 2298 if (!val)
1218 val = new_val; 2299 val = new_val;
1219 else if (!operand_equal_p (val, new_val, false)) 2300 else if (!operand_equal_p (val, new_val, false))
1228 2309
1229 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), 2310 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1230 gimple_assign_rhs1 (def), 2311 gimple_assign_rhs1 (def),
1231 gimple_assign_rhs_code (def), 2312 gimple_assign_rhs_code (def),
1232 gimple_assign_rhs2 (def), 2313 gimple_assign_rhs2 (def),
1233 visited); 2314 visited, predictor);
1234 } 2315 }
1235 2316
1236 if (is_gimple_call (def)) 2317 if (is_gimple_call (def))
1237 { 2318 {
1238 tree decl = gimple_call_fndecl (def); 2319 tree decl = gimple_call_fndecl (def);
1239 if (!decl) 2320 if (!decl)
1240 return NULL;
1241 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
1242 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
1243 { 2321 {
1244 tree val; 2322 if (gimple_call_internal_p (def)
1245 2323 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1246 if (gimple_call_num_args (def) != 2) 2324 {
1247 return NULL; 2325 gcc_assert (gimple_call_num_args (def) == 3);
1248 val = gimple_call_arg (def, 0); 2326 tree val = gimple_call_arg (def, 0);
1249 if (TREE_CONSTANT (val)) 2327 if (TREE_CONSTANT (val))
1250 return val; 2328 return val;
1251 return gimple_call_arg (def, 1); 2329 if (predictor)
2330 {
2331 tree val2 = gimple_call_arg (def, 2);
2332 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2333 && tree_fits_uhwi_p (val2)
2334 && tree_to_uhwi (val2) < END_PREDICTORS);
2335 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2336 }
2337 return gimple_call_arg (def, 1);
2338 }
2339 return NULL;
1252 } 2340 }
2341 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2342 switch (DECL_FUNCTION_CODE (decl))
2343 {
2344 case BUILT_IN_EXPECT:
2345 {
2346 tree val;
2347 if (gimple_call_num_args (def) != 2)
2348 return NULL;
2349 val = gimple_call_arg (def, 0);
2350 if (TREE_CONSTANT (val))
2351 return val;
2352 if (predictor)
2353 *predictor = PRED_BUILTIN_EXPECT;
2354 return gimple_call_arg (def, 1);
2355 }
2356
2357 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2358 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2359 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2360 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2361 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2362 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2363 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2364 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2365 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2366 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2367 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2368 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2369 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2370 /* Assume that any given atomic operation has low contention,
2371 and thus the compare-and-swap operation succeeds. */
2372 if (predictor)
2373 *predictor = PRED_COMPARE_AND_SWAP;
2374 return boolean_true_node;
2375 default:
2376 break;
2377 }
1253 } 2378 }
1254 2379
1255 return NULL; 2380 return NULL;
1256 } 2381 }
1257 2382
1258 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) 2383 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1259 { 2384 {
1260 tree res; 2385 tree res;
1261 op0 = expr_expected_value (op0, visited); 2386 enum br_predictor predictor2;
2387 op0 = expr_expected_value (op0, visited, predictor);
1262 if (!op0) 2388 if (!op0)
1263 return NULL; 2389 return NULL;
1264 op1 = expr_expected_value (op1, visited); 2390 op1 = expr_expected_value (op1, visited, &predictor2);
2391 if (predictor && *predictor < predictor2)
2392 *predictor = predictor2;
1265 if (!op1) 2393 if (!op1)
1266 return NULL; 2394 return NULL;
1267 res = fold_build2 (code, type, op0, op1); 2395 res = fold_build2 (code, type, op0, op1);
1268 if (TREE_CONSTANT (res)) 2396 if (TREE_CONSTANT (res))
1269 return res; 2397 return res;
1270 return NULL; 2398 return NULL;
1271 } 2399 }
1272 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) 2400 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1273 { 2401 {
1274 tree res; 2402 tree res;
1275 op0 = expr_expected_value (op0, visited); 2403 op0 = expr_expected_value (op0, visited, predictor);
1276 if (!op0) 2404 if (!op0)
1277 return NULL; 2405 return NULL;
1278 res = fold_build1 (code, type, op0); 2406 res = fold_build1 (code, type, op0);
1279 if (TREE_CONSTANT (res)) 2407 if (TREE_CONSTANT (res))
1280 return res; 2408 return res;
1290 We may want to implement more involved value guess (such as value range 2418 We may want to implement more involved value guess (such as value range
1291 propagation based prediction), but such tricks shall go to new 2419 propagation based prediction), but such tricks shall go to new
1292 implementation. */ 2420 implementation. */
1293 2421
1294 static tree 2422 static tree
1295 expr_expected_value (tree expr, bitmap visited) 2423 expr_expected_value (tree expr, bitmap visited,
2424 enum br_predictor *predictor)
1296 { 2425 {
1297 enum tree_code code; 2426 enum tree_code code;
1298 tree op0, op1; 2427 tree op0, op1;
1299 2428
1300 if (TREE_CONSTANT (expr)) 2429 if (TREE_CONSTANT (expr))
1301 return expr; 2430 {
2431 if (predictor)
2432 *predictor = PRED_UNCONDITIONAL;
2433 return expr;
2434 }
1302 2435
1303 extract_ops_from_tree (expr, &code, &op0, &op1); 2436 extract_ops_from_tree (expr, &code, &op0, &op1);
1304 return expr_expected_value_1 (TREE_TYPE (expr), 2437 return expr_expected_value_1 (TREE_TYPE (expr),
1305 op0, code, op1, visited); 2438 op0, code, op1, visited, predictor);
1306 }
1307
1308
1309 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1310 we no longer need. */
1311 static unsigned int
1312 strip_predict_hints (void)
1313 {
1314 basic_block bb;
1315 gimple ass_stmt;
1316 tree var;
1317
1318 FOR_EACH_BB (bb)
1319 {
1320 gimple_stmt_iterator bi;
1321 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1322 {
1323 gimple stmt = gsi_stmt (bi);
1324
1325 if (gimple_code (stmt) == GIMPLE_PREDICT)
1326 {
1327 gsi_remove (&bi, true);
1328 continue;
1329 }
1330 else if (gimple_code (stmt) == GIMPLE_CALL)
1331 {
1332 tree fndecl = gimple_call_fndecl (stmt);
1333
1334 if (fndecl
1335 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1336 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1337 && gimple_call_num_args (stmt) == 2)
1338 {
1339 var = gimple_call_lhs (stmt);
1340 if (var)
1341 {
1342 ass_stmt
1343 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1344 gsi_replace (&bi, ass_stmt, true);
1345 }
1346 else
1347 {
1348 gsi_remove (&bi, true);
1349 continue;
1350 }
1351 }
1352 }
1353 gsi_next (&bi);
1354 }
1355 }
1356 return 0;
1357 } 2439 }
1358 2440
1359 /* Predict using opcode of the last statement in basic block. */ 2441 /* Predict using opcode of the last statement in basic block. */
1360 static void 2442 static void
1361 tree_predict_by_opcode (basic_block bb) 2443 tree_predict_by_opcode (basic_block bb)
1362 { 2444 {
1363 gimple stmt = last_stmt (bb); 2445 gimple *stmt = last_stmt (bb);
1364 edge then_edge; 2446 edge then_edge;
1365 tree op0, op1; 2447 tree op0, op1;
1366 tree type; 2448 tree type;
1367 tree val; 2449 tree val;
1368 enum tree_code cmp; 2450 enum tree_code cmp;
1369 bitmap visited;
1370 edge_iterator ei; 2451 edge_iterator ei;
2452 enum br_predictor predictor;
1371 2453
1372 if (!stmt || gimple_code (stmt) != GIMPLE_COND) 2454 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1373 return; 2455 return;
1374 FOR_EACH_EDGE (then_edge, ei, bb->succs) 2456 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1375 if (then_edge->flags & EDGE_TRUE_VALUE) 2457 if (then_edge->flags & EDGE_TRUE_VALUE)
1376 break; 2458 break;
1377 op0 = gimple_cond_lhs (stmt); 2459 op0 = gimple_cond_lhs (stmt);
1378 op1 = gimple_cond_rhs (stmt); 2460 op1 = gimple_cond_rhs (stmt);
1379 cmp = gimple_cond_code (stmt); 2461 cmp = gimple_cond_code (stmt);
1380 type = TREE_TYPE (op0); 2462 type = TREE_TYPE (op0);
1381 visited = BITMAP_ALLOC (NULL); 2463 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (),
1382 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited); 2464 &predictor);
1383 BITMAP_FREE (visited); 2465 if (val && TREE_CODE (val) == INTEGER_CST)
1384 if (val) 2466 {
1385 { 2467 if (predictor == PRED_BUILTIN_EXPECT)
1386 if (integer_zerop (val)) 2468 {
1387 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN); 2469 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2470
2471 gcc_assert (percent >= 0 && percent <= 100);
2472 if (integer_zerop (val))
2473 percent = 100 - percent;
2474 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2475 }
1388 else 2476 else
1389 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN); 2477 predict_edge_def (then_edge, predictor,
1390 return; 2478 integer_zerop (val) ? NOT_TAKEN : TAKEN);
1391 } 2479 }
1392 /* Try "pointer heuristic." 2480 /* Try "pointer heuristic."
1393 A comparison ptr == 0 is predicted as false. 2481 A comparison ptr == 0 is predicted as false.
1394 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ 2482 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1395 if (POINTER_TYPE_P (type)) 2483 if (POINTER_TYPE_P (type))
1471 default: 2559 default:
1472 break; 2560 break;
1473 } 2561 }
1474 } 2562 }
1475 2563
2564 /* Returns TRUE if the STMT is exit(0) like statement. */
2565
2566 static bool
2567 is_exit_with_zero_arg (const gimple *stmt)
2568 {
2569 /* This is not exit, _exit or _Exit. */
2570 if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2571 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2572 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2573 return false;
2574
2575 /* Argument is an interger zero. */
2576 return integer_zerop (gimple_call_arg (stmt, 0));
2577 }
2578
1476 /* Try to guess whether the value of return means error code. */ 2579 /* Try to guess whether the value of return means error code. */
1477 2580
1478 static enum br_predictor 2581 static enum br_predictor
1479 return_prediction (tree val, enum prediction *prediction) 2582 return_prediction (tree val, enum prediction *prediction)
1480 { 2583 {
1505 Zero/one often represent booleans so exclude them from the 2608 Zero/one often represent booleans so exclude them from the
1506 heuristics. */ 2609 heuristics. */
1507 if (TREE_CONSTANT (val) 2610 if (TREE_CONSTANT (val)
1508 && (!integer_zerop (val) && !integer_onep (val))) 2611 && (!integer_zerop (val) && !integer_onep (val)))
1509 { 2612 {
1510 *prediction = TAKEN; 2613 *prediction = NOT_TAKEN;
1511 return PRED_CONST_RETURN; 2614 return PRED_CONST_RETURN;
1512 } 2615 }
1513 } 2616 }
1514 return PRED_NO_PREDICTION; 2617 return PRED_NO_PREDICTION;
1515 } 2618 }
1517 /* Find the basic block with return expression and look up for possible 2620 /* Find the basic block with return expression and look up for possible
1518 return value trying to apply RETURN_PREDICTION heuristics. */ 2621 return value trying to apply RETURN_PREDICTION heuristics. */
1519 static void 2622 static void
1520 apply_return_prediction (void) 2623 apply_return_prediction (void)
1521 { 2624 {
1522 gimple return_stmt = NULL; 2625 greturn *return_stmt = NULL;
1523 tree return_val; 2626 tree return_val;
1524 edge e; 2627 edge e;
1525 gimple phi; 2628 gphi *phi;
1526 int phi_num_args, i; 2629 int phi_num_args, i;
1527 enum br_predictor pred; 2630 enum br_predictor pred;
1528 enum prediction direction; 2631 enum prediction direction;
1529 edge_iterator ei; 2632 edge_iterator ei;
1530 2633
1531 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 2634 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1532 { 2635 {
1533 return_stmt = last_stmt (e->src); 2636 gimple *last = last_stmt (e->src);
1534 if (return_stmt 2637 if (last
1535 && gimple_code (return_stmt) == GIMPLE_RETURN) 2638 && gimple_code (last) == GIMPLE_RETURN)
1536 break; 2639 {
2640 return_stmt = as_a <greturn *> (last);
2641 break;
2642 }
1537 } 2643 }
1538 if (!e) 2644 if (!e)
1539 return; 2645 return;
1540 return_val = gimple_return_retval (return_stmt); 2646 return_val = gimple_return_retval (return_stmt);
1541 if (!return_val) 2647 if (!return_val)
1542 return; 2648 return;
1543 if (TREE_CODE (return_val) != SSA_NAME 2649 if (TREE_CODE (return_val) != SSA_NAME
1544 || !SSA_NAME_DEF_STMT (return_val) 2650 || !SSA_NAME_DEF_STMT (return_val)
1545 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) 2651 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
1546 return; 2652 return;
1547 phi = SSA_NAME_DEF_STMT (return_val); 2653 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
1548 phi_num_args = gimple_phi_num_args (phi); 2654 phi_num_args = gimple_phi_num_args (phi);
1549 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); 2655 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1550 2656
1551 /* Avoid the degenerate case where all return values form the function 2657 /* Avoid the degenerate case where all return values form the function
1552 belongs to same category (ie they are all positive constants) 2658 belongs to same category (ie they are all positive constants)
1574 basic_block bb; 2680 basic_block bb;
1575 bool has_return_edges = false; 2681 bool has_return_edges = false;
1576 edge e; 2682 edge e;
1577 edge_iterator ei; 2683 edge_iterator ei;
1578 2684
1579 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 2685 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1580 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH))) 2686 if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL))
1581 { 2687 {
1582 has_return_edges = true; 2688 has_return_edges = true;
1583 break; 2689 break;
1584 } 2690 }
1585 2691
1586 apply_return_prediction (); 2692 apply_return_prediction ();
1587 2693
1588 FOR_EACH_BB (bb) 2694 FOR_EACH_BB_FN (bb, cfun)
1589 { 2695 {
1590 gimple_stmt_iterator gsi; 2696 gimple_stmt_iterator gsi;
1591 2697
1592 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2698 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1593 { 2699 {
1594 gimple stmt = gsi_stmt (gsi); 2700 gimple *stmt = gsi_stmt (gsi);
1595 tree decl; 2701 tree decl;
1596 2702
1597 if (is_gimple_call (stmt)) 2703 if (is_gimple_call (stmt))
1598 { 2704 {
1599 if ((gimple_call_flags (stmt) & ECF_NORETURN) 2705 if (gimple_call_noreturn_p (stmt)
1600 && has_return_edges) 2706 && has_return_edges
2707 && !is_exit_with_zero_arg (stmt))
1601 predict_paths_leading_to (bb, PRED_NORETURN, 2708 predict_paths_leading_to (bb, PRED_NORETURN,
1602 NOT_TAKEN); 2709 NOT_TAKEN);
1603 decl = gimple_call_fndecl (stmt); 2710 decl = gimple_call_fndecl (stmt);
1604 if (decl 2711 if (decl
1605 && lookup_attribute ("cold", 2712 && lookup_attribute ("cold",
1606 DECL_ATTRIBUTES (decl))) 2713 DECL_ATTRIBUTES (decl)))
1607 predict_paths_leading_to (bb, PRED_COLD_FUNCTION, 2714 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2715 NOT_TAKEN);
2716 if (decl && recursive_call_p (current_function_decl, decl))
2717 predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
1608 NOT_TAKEN); 2718 NOT_TAKEN);
1609 } 2719 }
1610 else if (gimple_code (stmt) == GIMPLE_PREDICT) 2720 else if (gimple_code (stmt) == GIMPLE_PREDICT)
1611 { 2721 {
1612 predict_paths_leading_to (bb, gimple_predict_predictor (stmt), 2722 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
1616 } 2726 }
1617 } 2727 }
1618 } 2728 }
1619 } 2729 }
1620 2730
1621 #ifdef ENABLE_CHECKING 2731 /* Callback for hash_map::traverse, asserts that the pointer map is
1622
1623 /* Callback for pointer_map_traverse, asserts that the pointer map is
1624 empty. */ 2732 empty. */
1625 2733
1626 static bool 2734 bool
1627 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value, 2735 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
1628 void *data ATTRIBUTE_UNUSED) 2736 void *)
1629 { 2737 {
1630 gcc_assert (!*value); 2738 gcc_assert (!value);
1631 return false; 2739 return false;
1632 } 2740 }
1633 #endif 2741
1634 2742 /* Predict branch probabilities and estimate profile for basic block BB.
1635 /* Predict branch probabilities and estimate profile for basic block BB. */ 2743 When LOCAL_ONLY is set do not use any global properties of CFG. */
1636 2744
1637 static void 2745 static void
1638 tree_estimate_probability_bb (basic_block bb) 2746 tree_estimate_probability_bb (basic_block bb, bool local_only)
1639 { 2747 {
1640 edge e; 2748 edge e;
1641 edge_iterator ei; 2749 edge_iterator ei;
1642 gimple last;
1643 2750
1644 FOR_EACH_EDGE (e, ei, bb->succs) 2751 FOR_EACH_EDGE (e, ei, bb->succs)
1645 { 2752 {
1646 /* Predict early returns to be probable, as we've already taken
1647 care for error returns and other cases are often used for
1648 fast paths through function.
1649
1650 Since we've already removed the return statements, we are
1651 looking for CFG like:
1652
1653 if (conditional)
1654 {
1655 ..
1656 goto return_block
1657 }
1658 some other blocks
1659 return_block:
1660 return_stmt. */
1661 if (e->dest != bb->next_bb
1662 && e->dest != EXIT_BLOCK_PTR
1663 && single_succ_p (e->dest)
1664 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
1665 && (last = last_stmt (e->dest)) != NULL
1666 && gimple_code (last) == GIMPLE_RETURN)
1667 {
1668 edge e1;
1669 edge_iterator ei1;
1670
1671 if (single_succ_p (bb))
1672 {
1673 FOR_EACH_EDGE (e1, ei1, bb->preds)
1674 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1675 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1676 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
1677 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1678 }
1679 else
1680 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
1681 && !predicted_by_p (e->src, PRED_CONST_RETURN)
1682 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
1683 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1684 }
1685
1686 /* Look for block we are guarding (ie we dominate it, 2753 /* Look for block we are guarding (ie we dominate it,
1687 but it doesn't postdominate us). */ 2754 but it doesn't postdominate us). */
1688 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb 2755 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2756 && !local_only
1689 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src) 2757 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1690 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest)) 2758 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1691 { 2759 {
1692 gimple_stmt_iterator bi; 2760 gimple_stmt_iterator bi;
1693 2761
1696 to signal exceptional situations such as printing error 2764 to signal exceptional situations such as printing error
1697 messages. */ 2765 messages. */
1698 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi); 2766 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
1699 gsi_next (&bi)) 2767 gsi_next (&bi))
1700 { 2768 {
1701 gimple stmt = gsi_stmt (bi); 2769 gimple *stmt = gsi_stmt (bi);
1702 if (is_gimple_call (stmt) 2770 if (is_gimple_call (stmt)
2771 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))
1703 /* Constant and pure calls are hardly used to signalize 2772 /* Constant and pure calls are hardly used to signalize
1704 something exceptional. */ 2773 something exceptional. */
1705 && gimple_has_side_effects (stmt)) 2774 && gimple_has_side_effects (stmt))
1706 { 2775 {
1707 predict_edge_def (e, PRED_CALL, NOT_TAKEN); 2776 if (gimple_call_fndecl (stmt))
2777 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2778 else if (virtual_method_call_p (gimple_call_fn (stmt)))
2779 predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
2780 else
2781 predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
1708 break; 2782 break;
1709 } 2783 }
1710 } 2784 }
1711 } 2785 }
1712 } 2786 }
1713 tree_predict_by_opcode (bb); 2787 tree_predict_by_opcode (bb);
1714 } 2788 }
1715 2789
1716 /* Predict branch probabilities and estimate profile of the tree CFG. 2790 /* Predict branch probabilities and estimate profile of the tree CFG.
1717 This function can be called from the loop optimizers to recompute 2791 This function can be called from the loop optimizers to recompute
1718 the profile information. */ 2792 the profile information.
2793 If DRY_RUN is set, do not modify CFG and only produce dump files. */
1719 2794
1720 void 2795 void
1721 tree_estimate_probability (void) 2796 tree_estimate_probability (bool dry_run)
1722 { 2797 {
1723 basic_block bb; 2798 basic_block bb;
1724 2799
1725 add_noreturn_fake_exit_edges (); 2800 add_noreturn_fake_exit_edges ();
1726 connect_infinite_loops_to_exit (); 2801 connect_infinite_loops_to_exit ();
1727 /* We use loop_niter_by_eval, which requires that the loops have 2802 /* We use loop_niter_by_eval, which requires that the loops have
1728 preheaders. */ 2803 preheaders. */
1729 create_preheaders (CP_SIMPLE_PREHEADERS); 2804 create_preheaders (CP_SIMPLE_PREHEADERS);
1730 calculate_dominance_info (CDI_POST_DOMINATORS); 2805 calculate_dominance_info (CDI_POST_DOMINATORS);
1731 2806
1732 bb_predictions = pointer_map_create (); 2807 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
1733 tree_bb_level_predictions (); 2808 tree_bb_level_predictions ();
1734 record_loop_exits (); 2809 record_loop_exits ();
1735 2810
1736 if (number_of_loops () > 1) 2811 if (number_of_loops (cfun) > 1)
1737 predict_loops (); 2812 predict_loops ();
1738 2813
1739 FOR_EACH_BB (bb) 2814 FOR_EACH_BB_FN (bb, cfun)
1740 tree_estimate_probability_bb (bb); 2815 tree_estimate_probability_bb (bb, false);
1741 2816
1742 FOR_EACH_BB (bb) 2817 FOR_EACH_BB_FN (bb, cfun)
1743 combine_predictions_for_bb (bb); 2818 combine_predictions_for_bb (bb, dry_run);
1744 2819
1745 #ifdef ENABLE_CHECKING 2820 if (flag_checking)
1746 pointer_map_traverse (bb_predictions, assert_is_empty, NULL); 2821 bb_predictions->traverse<void *, assert_is_empty> (NULL);
1747 #endif 2822
1748 pointer_map_destroy (bb_predictions); 2823 delete bb_predictions;
1749 bb_predictions = NULL; 2824 bb_predictions = NULL;
1750 2825
1751 estimate_bb_frequencies (); 2826 if (!dry_run)
2827 estimate_bb_frequencies (false);
1752 free_dominance_info (CDI_POST_DOMINATORS); 2828 free_dominance_info (CDI_POST_DOMINATORS);
1753 remove_fake_exit_edges (); 2829 remove_fake_exit_edges ();
1754 } 2830 }
1755 2831
1756 /* Predict branch probabilities and estimate profile of the tree CFG. 2832 /* Set edge->probability for each successor edge of BB. */
1757 This is the driver function for PASS_PROFILE. */ 2833 void
1758 2834 tree_guess_outgoing_edge_probabilities (basic_block bb)
1759 static unsigned int 2835 {
1760 tree_estimate_probability_driver (void) 2836 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
1761 { 2837 tree_estimate_probability_bb (bb, true);
1762 unsigned nb_loops; 2838 combine_predictions_for_bb (bb, false);
1763 2839 if (flag_checking)
1764 loop_optimizer_init (0); 2840 bb_predictions->traverse<void *, assert_is_empty> (NULL);
1765 if (dump_file && (dump_flags & TDF_DETAILS)) 2841 delete bb_predictions;
1766 flow_loops_dump (dump_file, NULL, 0); 2842 bb_predictions = NULL;
1767
1768 mark_irreducible_loops ();
1769
1770 nb_loops = number_of_loops ();
1771 if (nb_loops > 1)
1772 scev_initialize ();
1773
1774 tree_estimate_probability ();
1775
1776 if (nb_loops > 1)
1777 scev_finalize ();
1778
1779 loop_optimizer_finalize ();
1780 if (dump_file && (dump_flags & TDF_DETAILS))
1781 gimple_dump_cfg (dump_file, dump_flags);
1782 if (profile_status == PROFILE_ABSENT)
1783 profile_status = PROFILE_GUESSED;
1784 return 0;
1785 } 2843 }
1786 2844
1787 /* Predict edges to successors of CUR whose sources are not postdominated by 2845 /* Predict edges to successors of CUR whose sources are not postdominated by
1788 BB by PRED and recurse to all postdominators. */ 2846 BB by PRED and recurse to all postdominators. */
1789 2847
1790 static void 2848 static void
1791 predict_paths_for_bb (basic_block cur, basic_block bb, 2849 predict_paths_for_bb (basic_block cur, basic_block bb,
1792 enum br_predictor pred, 2850 enum br_predictor pred,
1793 enum prediction taken) 2851 enum prediction taken,
2852 bitmap visited, struct loop *in_loop = NULL)
1794 { 2853 {
1795 edge e; 2854 edge e;
1796 edge_iterator ei; 2855 edge_iterator ei;
1797 basic_block son; 2856 basic_block son;
2857
2858 /* If we exited the loop or CUR is unconditional in the loop, there is
2859 nothing to do. */
2860 if (in_loop
2861 && (!flow_bb_inside_loop_p (in_loop, cur)
2862 || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
2863 return;
1798 2864
1799 /* We are looking for all edges forming edge cut induced by 2865 /* We are looking for all edges forming edge cut induced by
1800 set of all blocks postdominated by BB. */ 2866 set of all blocks postdominated by BB. */
1801 FOR_EACH_EDGE (e, ei, cur->preds) 2867 FOR_EACH_EDGE (e, ei, cur->preds)
1802 if (e->src->index >= NUM_FIXED_BLOCKS 2868 if (e->src->index >= NUM_FIXED_BLOCKS
1805 edge e2; 2871 edge e2;
1806 edge_iterator ei2; 2872 edge_iterator ei2;
1807 bool found = false; 2873 bool found = false;
1808 2874
1809 /* Ignore fake edges and eh, we predict them as not taken anyway. */ 2875 /* Ignore fake edges and eh, we predict them as not taken anyway. */
1810 if (e->flags & (EDGE_EH | EDGE_FAKE)) 2876 if (unlikely_executed_edge_p (e))
1811 continue; 2877 continue;
1812 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); 2878 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
1813 2879
1814 /* See if there is how many edge from e->src that is not abnormal 2880 /* See if there is an edge from e->src that is not abnormal
1815 and does not lead to BB. */ 2881 and does not lead to BB and does not exit the loop. */
1816 FOR_EACH_EDGE (e2, ei2, e->src->succs) 2882 FOR_EACH_EDGE (e2, ei2, e->src->succs)
1817 if (e2 != e 2883 if (e2 != e
1818 && !(e2->flags & (EDGE_EH | EDGE_FAKE)) 2884 && !unlikely_executed_edge_p (e2)
1819 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)) 2885 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
2886 && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
1820 { 2887 {
1821 found = true; 2888 found = true;
1822 break; 2889 break;
1823 } 2890 }
1824 2891
1825 /* If there is non-abnormal path leaving e->src, predict edge 2892 /* If there is non-abnormal path leaving e->src, predict edge
1826 using predictor. Otherwise we need to look for paths 2893 using predictor. Otherwise we need to look for paths
1827 leading to e->src. */ 2894 leading to e->src.
2895
2896 The second may lead to infinite loop in the case we are predicitng
2897 regions that are only reachable by abnormal edges. We simply
2898 prevent visiting given BB twice. */
1828 if (found) 2899 if (found)
1829 predict_edge_def (e, pred, taken); 2900 {
1830 else 2901 if (!edge_predicted_by_p (e, pred, taken))
1831 predict_paths_for_bb (e->src, e->src, pred, taken); 2902 predict_edge_def (e, pred, taken);
2903 }
2904 else if (bitmap_set_bit (visited, e->src->index))
2905 predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
1832 } 2906 }
1833 for (son = first_dom_son (CDI_POST_DOMINATORS, cur); 2907 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
1834 son; 2908 son;
1835 son = next_dom_son (CDI_POST_DOMINATORS, son)) 2909 son = next_dom_son (CDI_POST_DOMINATORS, son))
1836 predict_paths_for_bb (son, bb, pred, taken); 2910 predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
1837 } 2911 }
1838 2912
1839 /* Sets branch probabilities according to PREDiction and 2913 /* Sets branch probabilities according to PREDiction and
1840 FLAGS. */ 2914 FLAGS. */
1841 2915
1842 static void 2916 static void
1843 predict_paths_leading_to (basic_block bb, enum br_predictor pred, 2917 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
1844 enum prediction taken) 2918 enum prediction taken, struct loop *in_loop)
1845 { 2919 {
1846 predict_paths_for_bb (bb, bb, pred, taken); 2920 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
1847 } 2921 }
1848 2922
1849 /* Like predict_paths_leading_to but take edge instead of basic block. */ 2923 /* Like predict_paths_leading_to but take edge instead of basic block. */
1850 2924
1851 static void 2925 static void
1852 predict_paths_leading_to_edge (edge e, enum br_predictor pred, 2926 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
1853 enum prediction taken) 2927 enum prediction taken, struct loop *in_loop)
1854 { 2928 {
1855 bool has_nonloop_edge = false; 2929 bool has_nonloop_edge = false;
1856 edge_iterator ei; 2930 edge_iterator ei;
1857 edge e2; 2931 edge e2;
1858 2932
1859 basic_block bb = e->src; 2933 basic_block bb = e->src;
1860 FOR_EACH_EDGE (e2, ei, bb->succs) 2934 FOR_EACH_EDGE (e2, ei, bb->succs)
1861 if (e2->dest != e->src && e2->dest != e->dest 2935 if (e2->dest != e->src && e2->dest != e->dest
1862 && !(e->flags & (EDGE_EH | EDGE_FAKE)) 2936 && !unlikely_executed_edge_p (e)
1863 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest)) 2937 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
1864 { 2938 {
1865 has_nonloop_edge = true; 2939 has_nonloop_edge = true;
1866 break; 2940 break;
1867 } 2941 }
1868 if (!has_nonloop_edge) 2942 if (!has_nonloop_edge)
1869 predict_paths_for_bb (bb, bb, pred, taken); 2943 {
2944 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
2945 }
1870 else 2946 else
1871 predict_edge_def (e, pred, taken); 2947 predict_edge_def (e, pred, taken);
1872 } 2948 }
1873 2949
1874 /* This is used to carry information about basic blocks. It is 2950 /* This is used to carry information about basic blocks. It is
1875 attached to the AUX field of the standard CFG block. */ 2951 attached to the AUX field of the standard CFG block. */
1876 2952
1877 typedef struct block_info_def 2953 struct block_info
1878 { 2954 {
1879 /* Estimated frequency of execution of basic_block. */ 2955 /* Estimated frequency of execution of basic_block. */
1880 sreal frequency; 2956 sreal frequency;
1881 2957
1882 /* To keep queue of basic blocks to process. */ 2958 /* To keep queue of basic blocks to process. */
1883 basic_block next; 2959 basic_block next;
1884 2960
1885 /* Number of predecessors we need to visit first. */ 2961 /* Number of predecessors we need to visit first. */
1886 int npredecessors; 2962 int npredecessors;
1887 } *block_info; 2963 };
1888 2964
1889 /* Similar information for edges. */ 2965 /* Similar information for edges. */
1890 typedef struct edge_info_def 2966 struct edge_prob_info
1891 { 2967 {
1892 /* In case edge is a loopback edge, the probability edge will be reached 2968 /* In case edge is a loopback edge, the probability edge will be reached
1893 in case header is. Estimated number of iterations of the loop can be 2969 in case header is. Estimated number of iterations of the loop can be
1894 then computed as 1 / (1 - back_edge_prob). */ 2970 then computed as 1 / (1 - back_edge_prob). */
1895 sreal back_edge_prob; 2971 sreal back_edge_prob;
1896 /* True if the edge is a loopback edge in the natural loop. */ 2972 /* True if the edge is a loopback edge in the natural loop. */
1897 unsigned int back_edge:1; 2973 unsigned int back_edge:1;
1898 } *edge_info; 2974 };
1899 2975
1900 #define BLOCK_INFO(B) ((block_info) (B)->aux) 2976 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
1901 #define EDGE_INFO(E) ((edge_info) (E)->aux) 2977 #undef EDGE_INFO
2978 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
1902 2979
1903 /* Helper function for estimate_bb_frequencies. 2980 /* Helper function for estimate_bb_frequencies.
1904 Propagate the frequencies in blocks marked in 2981 Propagate the frequencies in blocks marked in
1905 TOVISIT, starting in HEAD. */ 2982 TOVISIT, starting in HEAD. */
1906 2983
1919 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi) 2996 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1920 { 2997 {
1921 edge_iterator ei; 2998 edge_iterator ei;
1922 int count = 0; 2999 int count = 0;
1923 3000
1924 bb = BASIC_BLOCK (i); 3001 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1925 3002
1926 FOR_EACH_EDGE (e, ei, bb->preds) 3003 FOR_EACH_EDGE (e, ei, bb->preds)
1927 { 3004 {
1928 bool visit = bitmap_bit_p (tovisit, e->src->index); 3005 bool visit = bitmap_bit_p (tovisit, e->src->index);
1929 3006
1934 "Irreducible region hit, ignoring edge to %i->%i\n", 3011 "Irreducible region hit, ignoring edge to %i->%i\n",
1935 e->src->index, bb->index); 3012 e->src->index, bb->index);
1936 } 3013 }
1937 BLOCK_INFO (bb)->npredecessors = count; 3014 BLOCK_INFO (bb)->npredecessors = count;
1938 /* When function never returns, we will never process exit block. */ 3015 /* When function never returns, we will never process exit block. */
1939 if (!count && bb == EXIT_BLOCK_PTR) 3016 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1940 bb->count = bb->frequency = 0; 3017 {
1941 } 3018 bb->count = profile_count::zero ();
1942 3019 bb->frequency = 0;
1943 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one)); 3020 }
3021 }
3022
3023 BLOCK_INFO (head)->frequency = 1;
1944 last = head; 3024 last = head;
1945 for (bb = head; bb; bb = nextbb) 3025 for (bb = head; bb; bb = nextbb)
1946 { 3026 {
1947 edge_iterator ei; 3027 edge_iterator ei;
1948 sreal cyclic_probability, frequency; 3028 sreal cyclic_probability = 0;
1949 3029 sreal frequency = 0;
1950 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1951 memcpy (&frequency, &real_zero, sizeof (real_zero));
1952 3030
1953 nextbb = BLOCK_INFO (bb)->next; 3031 nextbb = BLOCK_INFO (bb)->next;
1954 BLOCK_INFO (bb)->next = NULL; 3032 BLOCK_INFO (bb)->next = NULL;
1955 3033
1956 /* Compute frequency of basic block. */ 3034 /* Compute frequency of basic block. */
1957 if (bb != head) 3035 if (bb != head)
1958 { 3036 {
1959 #ifdef ENABLE_CHECKING 3037 if (flag_checking)
1960 FOR_EACH_EDGE (e, ei, bb->preds) 3038 FOR_EACH_EDGE (e, ei, bb->preds)
1961 gcc_assert (!bitmap_bit_p (tovisit, e->src->index) 3039 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1962 || (e->flags & EDGE_DFS_BACK)); 3040 || (e->flags & EDGE_DFS_BACK));
1963 #endif
1964 3041
1965 FOR_EACH_EDGE (e, ei, bb->preds) 3042 FOR_EACH_EDGE (e, ei, bb->preds)
1966 if (EDGE_INFO (e)->back_edge) 3043 if (EDGE_INFO (e)->back_edge)
1967 { 3044 {
1968 sreal_add (&cyclic_probability, &cyclic_probability, 3045 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
1969 &EDGE_INFO (e)->back_edge_prob);
1970 } 3046 }
1971 else if (!(e->flags & EDGE_DFS_BACK)) 3047 else if (!(e->flags & EDGE_DFS_BACK))
1972 { 3048 {
1973 sreal tmp;
1974
1975 /* frequency += (e->probability 3049 /* frequency += (e->probability
1976 * BLOCK_INFO (e->src)->frequency / 3050 * BLOCK_INFO (e->src)->frequency /
1977 REG_BR_PROB_BASE); */ 3051 REG_BR_PROB_BASE); */
1978 3052
1979 sreal_init (&tmp, e->probability, 0); 3053 sreal tmp = e->probability.to_reg_br_prob_base ();
1980 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency); 3054 tmp *= BLOCK_INFO (e->src)->frequency;
1981 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base); 3055 tmp *= real_inv_br_prob_base;
1982 sreal_add (&frequency, &frequency, &tmp); 3056 frequency += tmp;
1983 } 3057 }
1984 3058
1985 if (sreal_compare (&cyclic_probability, &real_zero) == 0) 3059 if (cyclic_probability == 0)
1986 { 3060 {
1987 memcpy (&BLOCK_INFO (bb)->frequency, &frequency, 3061 BLOCK_INFO (bb)->frequency = frequency;
1988 sizeof (frequency));
1989 } 3062 }
1990 else 3063 else
1991 { 3064 {
1992 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0) 3065 if (cyclic_probability > real_almost_one)
1993 { 3066 cyclic_probability = real_almost_one;
1994 memcpy (&cyclic_probability, &real_almost_one,
1995 sizeof (real_almost_one));
1996 }
1997 3067
1998 /* BLOCK_INFO (bb)->frequency = frequency 3068 /* BLOCK_INFO (bb)->frequency = frequency
1999 / (1 - cyclic_probability) */ 3069 / (1 - cyclic_probability) */
2000 3070
2001 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability); 3071 cyclic_probability = sreal (1) - cyclic_probability;
2002 sreal_div (&BLOCK_INFO (bb)->frequency, 3072 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2003 &frequency, &cyclic_probability);
2004 } 3073 }
2005 } 3074 }
2006 3075
2007 bitmap_clear_bit (tovisit, bb->index); 3076 bitmap_clear_bit (tovisit, bb->index);
2008 3077
2009 e = find_edge (bb, head); 3078 e = find_edge (bb, head);
2010 if (e) 3079 if (e)
2011 { 3080 {
2012 sreal tmp;
2013
2014 /* EDGE_INFO (e)->back_edge_prob 3081 /* EDGE_INFO (e)->back_edge_prob
2015 = ((e->probability * BLOCK_INFO (bb)->frequency) 3082 = ((e->probability * BLOCK_INFO (bb)->frequency)
2016 / REG_BR_PROB_BASE); */ 3083 / REG_BR_PROB_BASE); */
2017 3084
2018 sreal_init (&tmp, e->probability, 0); 3085 sreal tmp = e->probability.to_reg_br_prob_base ();
2019 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency); 3086 tmp *= BLOCK_INFO (bb)->frequency;
2020 sreal_mul (&EDGE_INFO (e)->back_edge_prob, 3087 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2021 &tmp, &real_inv_br_prob_base);
2022 } 3088 }
2023 3089
2024 /* Propagate to successor blocks. */ 3090 /* Propagate to successor blocks. */
2025 FOR_EACH_EDGE (e, ei, bb->succs) 3091 FOR_EACH_EDGE (e, ei, bb->succs)
2026 if (!(e->flags & EDGE_DFS_BACK) 3092 if (!(e->flags & EDGE_DFS_BACK)
2038 } 3104 }
2039 } 3105 }
2040 } 3106 }
2041 } 3107 }
2042 3108
2043 /* Estimate probabilities of loopback edges in loops at same nest level. */ 3109 /* Estimate frequencies in loops at same nest level. */
2044 3110
2045 static void 3111 static void
2046 estimate_loops_at_level (struct loop *first_loop) 3112 estimate_loops_at_level (struct loop *first_loop)
2047 { 3113 {
2048 struct loop *loop; 3114 struct loop *loop;
2050 for (loop = first_loop; loop; loop = loop->next) 3116 for (loop = first_loop; loop; loop = loop->next)
2051 { 3117 {
2052 edge e; 3118 edge e;
2053 basic_block *bbs; 3119 basic_block *bbs;
2054 unsigned i; 3120 unsigned i;
2055 bitmap tovisit = BITMAP_ALLOC (NULL); 3121 auto_bitmap tovisit;
2056 3122
2057 estimate_loops_at_level (loop->inner); 3123 estimate_loops_at_level (loop->inner);
2058 3124
2059 /* Find current loop back edge and mark it. */ 3125 /* Find current loop back edge and mark it. */
2060 e = loop_latch_edge (loop); 3126 e = loop_latch_edge (loop);
2063 bbs = get_loop_body (loop); 3129 bbs = get_loop_body (loop);
2064 for (i = 0; i < loop->num_nodes; i++) 3130 for (i = 0; i < loop->num_nodes; i++)
2065 bitmap_set_bit (tovisit, bbs[i]->index); 3131 bitmap_set_bit (tovisit, bbs[i]->index);
2066 free (bbs); 3132 free (bbs);
2067 propagate_freq (loop->header, tovisit); 3133 propagate_freq (loop->header, tovisit);
2068 BITMAP_FREE (tovisit);
2069 } 3134 }
2070 } 3135 }
2071 3136
2072 /* Propagates frequencies through structure of loops. */ 3137 /* Propagates frequencies through structure of loops. */
2073 3138
2074 static void 3139 static void
2075 estimate_loops (void) 3140 estimate_loops (void)
2076 { 3141 {
2077 bitmap tovisit = BITMAP_ALLOC (NULL); 3142 auto_bitmap tovisit;
2078 basic_block bb; 3143 basic_block bb;
2079 3144
2080 /* Start by estimating the frequencies in the loops. */ 3145 /* Start by estimating the frequencies in the loops. */
2081 if (number_of_loops () > 1) 3146 if (number_of_loops (cfun) > 1)
2082 estimate_loops_at_level (current_loops->tree_root->inner); 3147 estimate_loops_at_level (current_loops->tree_root->inner);
2083 3148
2084 /* Now propagate the frequencies through all the blocks. */ 3149 /* Now propagate the frequencies through all the blocks. */
2085 FOR_ALL_BB (bb) 3150 FOR_ALL_BB_FN (bb, cfun)
2086 { 3151 {
2087 bitmap_set_bit (tovisit, bb->index); 3152 bitmap_set_bit (tovisit, bb->index);
2088 } 3153 }
2089 propagate_freq (ENTRY_BLOCK_PTR, tovisit); 3154 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2090 BITMAP_FREE (tovisit); 3155 }
3156
3157 /* Drop the profile for NODE to guessed, and update its frequency based on
3158 whether it is expected to be hot given the CALL_COUNT. */
3159
3160 static void
3161 drop_profile (struct cgraph_node *node, profile_count call_count)
3162 {
3163 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3164 /* In the case where this was called by another function with a
3165 dropped profile, call_count will be 0. Since there are no
3166 non-zero call counts to this function, we don't know for sure
3167 whether it is hot, and therefore it will be marked normal below. */
3168 bool hot = maybe_hot_count_p (NULL, call_count);
3169
3170 if (dump_file)
3171 fprintf (dump_file,
3172 "Dropping 0 profile for %s. %s based on calls.\n",
3173 node->dump_name (),
3174 hot ? "Function is hot" : "Function is normal");
3175 /* We only expect to miss profiles for functions that are reached
3176 via non-zero call edges in cases where the function may have
3177 been linked from another module or library (COMDATs and extern
3178 templates). See the comments below for handle_missing_profiles.
3179 Also, only warn in cases where the missing counts exceed the
3180 number of training runs. In certain cases with an execv followed
3181 by a no-return call the profile for the no-return call is not
3182 dumped and there can be a mismatch. */
3183 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3184 && call_count > profile_info->runs)
3185 {
3186 if (flag_profile_correction)
3187 {
3188 if (dump_file)
3189 fprintf (dump_file,
3190 "Missing counts for called function %s\n",
3191 node->dump_name ());
3192 }
3193 else
3194 warning (0, "Missing counts for called function %s",
3195 node->dump_name ());
3196 }
3197
3198 basic_block bb;
3199 FOR_ALL_BB_FN (bb, fn)
3200 {
3201 bb->count = profile_count::uninitialized ();
3202 }
3203
3204 struct cgraph_edge *e;
3205 for (e = node->callees; e; e = e->next_caller)
3206 {
3207 e->frequency = compute_call_stmt_bb_frequency (e->caller->decl,
3208 gimple_bb (e->call_stmt));
3209 }
3210
3211 profile_status_for_fn (fn)
3212 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3213 node->frequency
3214 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3215 }
3216
3217 /* In the case of COMDAT routines, multiple object files will contain the same
3218 function and the linker will select one for the binary. In that case
3219 all the other copies from the profile instrument binary will be missing
3220 profile counts. Look for cases where this happened, due to non-zero
3221 call counts going to 0-count functions, and drop the profile to guessed
3222 so that we can use the estimated probabilities and avoid optimizing only
3223 for size.
3224
3225 The other case where the profile may be missing is when the routine
3226 is not going to be emitted to the object file, e.g. for "extern template"
3227 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3228 all other cases of non-zero calls to 0-count functions. */
3229
3230 void
3231 handle_missing_profiles (void)
3232 {
3233 struct cgraph_node *node;
3234 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
3235 auto_vec<struct cgraph_node *, 64> worklist;
3236
3237 /* See if 0 count function has non-0 count callers. In this case we
3238 lost some profile. Drop its function profile to PROFILE_GUESSED. */
3239 FOR_EACH_DEFINED_FUNCTION (node)
3240 {
3241 struct cgraph_edge *e;
3242 profile_count call_count = profile_count::zero ();
3243 gcov_type max_tp_first_run = 0;
3244 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3245
3246 if (!(node->count == profile_count::zero ()))
3247 continue;
3248 for (e = node->callers; e; e = e->next_caller)
3249 if (e->count.initialized_p () && e->count > 0)
3250 {
3251 call_count = call_count + e->count;
3252
3253 if (e->caller->tp_first_run > max_tp_first_run)
3254 max_tp_first_run = e->caller->tp_first_run;
3255 }
3256
3257 /* If time profile is missing, let assign the maximum that comes from
3258 caller functions. */
3259 if (!node->tp_first_run && max_tp_first_run)
3260 node->tp_first_run = max_tp_first_run + 1;
3261
3262 if (call_count > 0
3263 && fn && fn->cfg
3264 && (call_count.apply_scale (unlikely_count_fraction, 1)
3265 >= profile_info->runs))
3266 {
3267 drop_profile (node, call_count);
3268 worklist.safe_push (node);
3269 }
3270 }
3271
3272 /* Propagate the profile dropping to other 0-count COMDATs that are
3273 potentially called by COMDATs we already dropped the profile on. */
3274 while (worklist.length () > 0)
3275 {
3276 struct cgraph_edge *e;
3277
3278 node = worklist.pop ();
3279 for (e = node->callees; e; e = e->next_caller)
3280 {
3281 struct cgraph_node *callee = e->callee;
3282 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3283
3284 if (callee->count > 0)
3285 continue;
3286 if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl))
3287 && fn && fn->cfg
3288 && profile_status_for_fn (fn) == PROFILE_READ)
3289 {
3290 drop_profile (node, profile_count::zero ());
3291 worklist.safe_push (callee);
3292 }
3293 }
3294 }
2091 } 3295 }
2092 3296
2093 /* Convert counts measured by profile driven feedback to frequencies. 3297 /* Convert counts measured by profile driven feedback to frequencies.
2094 Return nonzero iff there was any nonzero execution count. */ 3298 Return nonzero iff there was any nonzero execution count. */
2095 3299
2096 int 3300 bool
2097 counts_to_freqs (void) 3301 counts_to_freqs (void)
2098 { 3302 {
2099 gcov_type count_max, true_count_max = 0; 3303 gcov_type count_max;
3304 profile_count true_count_max = profile_count::zero ();
2100 basic_block bb; 3305 basic_block bb;
2101 3306
2102 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 3307 /* Don't overwrite the estimated frequencies when the profile for
2103 true_count_max = MAX (bb->count, true_count_max); 3308 the function is missing. We may drop this function PROFILE_GUESSED
2104 3309 later in drop_profile (). */
2105 count_max = MAX (true_count_max, 1); 3310 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
2106 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 3311 || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
2107 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max; 3312 return false;
2108 3313
2109 return true_count_max; 3314 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3315 if (bb->count > true_count_max)
3316 true_count_max = bb->count;
3317
3318 /* If we have no counts to base frequencies on, keep those that are
3319 already there. */
3320 if (!(true_count_max > 0))
3321 return false;
3322
3323 count_max = true_count_max.to_gcov_type ();
3324
3325 FOR_ALL_BB_FN (bb, cfun)
3326 if (bb->count.initialized_p ())
3327 bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max);
3328
3329 return true;
2110 } 3330 }
2111 3331
2112 /* Return true if function is likely to be expensive, so there is no point to 3332 /* Return true if function is likely to be expensive, so there is no point to
2113 optimize performance of prologue, epilogue or do inlining at the expense 3333 optimize performance of prologue, epilogue or do inlining at the expense
2114 of code size growth. THRESHOLD is the limit of number of instructions 3334 of code size growth. THRESHOLD is the limit of number of instructions
2126 gcc_assert (threshold <= BB_FREQ_MAX); 3346 gcc_assert (threshold <= BB_FREQ_MAX);
2127 3347
2128 /* Frequencies are out of range. This either means that function contains 3348 /* Frequencies are out of range. This either means that function contains
2129 internal loop executing more than BB_FREQ_MAX times or profile feedback 3349 internal loop executing more than BB_FREQ_MAX times or profile feedback
2130 is available and function has not been executed at all. */ 3350 is available and function has not been executed at all. */
2131 if (ENTRY_BLOCK_PTR->frequency == 0) 3351 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2132 return true; 3352 return true;
2133 3353
2134 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ 3354 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2135 limit = ENTRY_BLOCK_PTR->frequency * threshold; 3355 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2136 FOR_EACH_BB (bb) 3356 FOR_EACH_BB_FN (bb, cfun)
2137 { 3357 {
2138 rtx insn; 3358 rtx_insn *insn;
2139 3359
2140 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); 3360 FOR_BB_INSNS (bb, insn)
2141 insn = NEXT_INSN (insn))
2142 if (active_insn_p (insn)) 3361 if (active_insn_p (insn))
2143 { 3362 {
2144 sum += bb->frequency; 3363 sum += bb->frequency;
2145 if (sum > limit) 3364 if (sum > limit)
2146 return true; 3365 return true;
2148 } 3367 }
2149 3368
2150 return false; 3369 return false;
2151 } 3370 }
2152 3371
2153 /* Estimate basic blocks frequency by given branch probabilities. */ 3372 /* All basic blocks that are reachable only from unlikely basic blocks are
3373 unlikely. */
2154 3374
2155 void 3375 void
2156 estimate_bb_frequencies (void) 3376 propagate_unlikely_bbs_forward (void)
3377 {
3378 auto_vec<basic_block, 64> worklist;
3379 basic_block bb;
3380 edge_iterator ei;
3381 edge e;
3382
3383 if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()))
3384 {
3385 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1;
3386 worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3387
3388 while (worklist.length () > 0)
3389 {
3390 bb = worklist.pop ();
3391 FOR_EACH_EDGE (e, ei, bb->succs)
3392 if (!(e->count () == profile_count::zero ())
3393 && !(e->dest->count == profile_count::zero ())
3394 && !e->dest->aux)
3395 {
3396 e->dest->aux = (void *)(size_t) 1;
3397 worklist.safe_push (e->dest);
3398 }
3399 }
3400 }
3401
3402 FOR_ALL_BB_FN (bb, cfun)
3403 {
3404 if (!bb->aux)
3405 {
3406 if (!(bb->count == profile_count::zero ())
3407 && (dump_file && (dump_flags & TDF_DETAILS)))
3408 fprintf (dump_file,
3409 "Basic block %i is marked unlikely by forward prop\n",
3410 bb->index);
3411 bb->count = profile_count::zero ();
3412 bb->frequency = 0;
3413 }
3414 else
3415 bb->aux = NULL;
3416 }
3417 }
3418
3419 /* Determine basic blocks/edges that are known to be unlikely executed and set
3420 their counters to zero.
3421 This is done with first identifying obviously unlikely BBs/edges and then
3422 propagating in both directions. */
3423
3424 static void
3425 determine_unlikely_bbs ()
3426 {
3427 basic_block bb;
3428 auto_vec<basic_block, 64> worklist;
3429 edge_iterator ei;
3430 edge e;
3431
3432 FOR_EACH_BB_FN (bb, cfun)
3433 {
3434 if (!(bb->count == profile_count::zero ())
3435 && unlikely_executed_bb_p (bb))
3436 {
3437 if (dump_file && (dump_flags & TDF_DETAILS))
3438 fprintf (dump_file, "Basic block %i is locally unlikely\n",
3439 bb->index);
3440 bb->count = profile_count::zero ();
3441 }
3442
3443 if (bb->count == profile_count::zero ())
3444 bb->frequency = 0;
3445
3446 FOR_EACH_EDGE (e, ei, bb->succs)
3447 if (!(e->probability == profile_probability::never ())
3448 && unlikely_executed_edge_p (e))
3449 {
3450 if (dump_file && (dump_flags & TDF_DETAILS))
3451 fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
3452 bb->index, e->dest->index);
3453 e->probability = profile_probability::never ();
3454 }
3455
3456 gcc_checking_assert (!bb->aux);
3457 }
3458
3459 auto_vec<int, 64> nsuccs;
3460 nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun));
3461 FOR_ALL_BB_FN (bb, cfun)
3462 if (!(bb->count == profile_count::zero ())
3463 && bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
3464 {
3465 nsuccs[bb->index] = 0;
3466 FOR_EACH_EDGE (e, ei, bb->succs)
3467 if (!(e->probability == profile_probability::never ())
3468 && !(e->dest->count == profile_count::zero ()))
3469 nsuccs[bb->index]++;
3470 if (!nsuccs[bb->index])
3471 worklist.safe_push (bb);
3472 }
3473 while (worklist.length () > 0)
3474 {
3475 bb = worklist.pop ();
3476 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3477 {
3478 bool found = false;
3479 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
3480 !gsi_end_p (gsi); gsi_next (&gsi))
3481 if (stmt_can_terminate_bb_p (gsi_stmt (gsi))
3482 /* stmt_can_terminate_bb_p special cases noreturns because it
3483 assumes that fake edges are created. We want to know that
3484 noreturn alone does not imply BB to be unlikely. */
3485 || (is_gimple_call (gsi_stmt (gsi))
3486 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN)))
3487 {
3488 found = true;
3489 break;
3490 }
3491 if (found)
3492 continue;
3493 }
3494 if (!(bb->count == profile_count::zero ())
3495 && (dump_file && (dump_flags & TDF_DETAILS)))
3496 fprintf (dump_file,
3497 "Basic block %i is marked unlikely by backward prop\n",
3498 bb->index);
3499 bb->count = profile_count::zero ();
3500 bb->frequency = 0;
3501 FOR_EACH_EDGE (e, ei, bb->preds)
3502 if (!(e->probability == profile_probability::never ()))
3503 {
3504 e->probability = profile_probability::never ();
3505 if (!(e->src->count == profile_count::zero ()))
3506 {
3507 nsuccs[e->src->index]--;
3508 if (!nsuccs[e->src->index])
3509 worklist.safe_push (e->src);
3510 }
3511 }
3512 }
3513 }
3514
3515 /* Estimate and propagate basic block frequencies using the given branch
3516 probabilities. If FORCE is true, the frequencies are used to estimate
3517 the counts even when there are already non-zero profile counts. */
3518
3519 void
3520 estimate_bb_frequencies (bool force)
2157 { 3521 {
2158 basic_block bb; 3522 basic_block bb;
2159 sreal freq_max; 3523 sreal freq_max;
2160 3524
2161 if (profile_status != PROFILE_READ || !counts_to_freqs ()) 3525 determine_unlikely_bbs ();
3526
3527 if (force || profile_status_for_fn (cfun) != PROFILE_READ
3528 || !counts_to_freqs ())
2162 { 3529 {
2163 static int real_values_initialized = 0; 3530 static int real_values_initialized = 0;
2164 3531
2165 if (!real_values_initialized) 3532 if (!real_values_initialized)
2166 { 3533 {
2167 real_values_initialized = 1; 3534 real_values_initialized = 1;
2168 sreal_init (&real_zero, 0, 0); 3535 real_br_prob_base = REG_BR_PROB_BASE;
2169 sreal_init (&real_one, 1, 0); 3536 real_bb_freq_max = BB_FREQ_MAX;
2170 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0); 3537 real_one_half = sreal (1, -1);
2171 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0); 3538 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2172 sreal_init (&real_one_half, 1, -1); 3539 real_almost_one = sreal (1) - real_inv_br_prob_base;
2173 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2174 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2175 } 3540 }
2176 3541
2177 mark_dfs_back_edges (); 3542 mark_dfs_back_edges ();
2178 3543
2179 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE; 3544 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3545 profile_probability::always ();
2180 3546
2181 /* Set up block info for each basic block. */ 3547 /* Set up block info for each basic block. */
2182 alloc_aux_for_blocks (sizeof (struct block_info_def)); 3548 alloc_aux_for_blocks (sizeof (block_info));
2183 alloc_aux_for_edges (sizeof (struct edge_info_def)); 3549 alloc_aux_for_edges (sizeof (edge_prob_info));
2184 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 3550 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2185 { 3551 {
2186 edge e; 3552 edge e;
2187 edge_iterator ei; 3553 edge_iterator ei;
2188 3554
2189 FOR_EACH_EDGE (e, ei, bb->succs) 3555 FOR_EACH_EDGE (e, ei, bb->succs)
2190 { 3556 {
2191 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0); 3557 EDGE_INFO (e)->back_edge_prob
2192 sreal_mul (&EDGE_INFO (e)->back_edge_prob, 3558 = e->probability.to_reg_br_prob_base ();
2193 &EDGE_INFO (e)->back_edge_prob, 3559 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2194 &real_inv_br_prob_base);
2195 } 3560 }
2196 } 3561 }
2197 3562
2198 /* First compute probabilities locally for each loop from innermost 3563 /* First compute frequencies locally for each loop from innermost
2199 to outermost to examine probabilities for back edges. */ 3564 to outermost to examine frequencies for back edges. */
2200 estimate_loops (); 3565 estimate_loops ();
2201 3566
2202 memcpy (&freq_max, &real_zero, sizeof (real_zero)); 3567 freq_max = 0;
2203 FOR_EACH_BB (bb) 3568 FOR_EACH_BB_FN (bb, cfun)
2204 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0) 3569 if (freq_max < BLOCK_INFO (bb)->frequency)
2205 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max)); 3570 freq_max = BLOCK_INFO (bb)->frequency;
2206 3571
2207 sreal_div (&freq_max, &real_bb_freq_max, &freq_max); 3572 freq_max = real_bb_freq_max / freq_max;
2208 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 3573 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2209 { 3574 {
2210 sreal tmp; 3575 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2211 3576 bb->frequency = tmp.to_int ();
2212 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2213 sreal_add (&tmp, &tmp, &real_one_half);
2214 bb->frequency = sreal_to_int (&tmp);
2215 } 3577 }
2216 3578
2217 free_aux_for_blocks (); 3579 free_aux_for_blocks ();
2218 free_aux_for_edges (); 3580 free_aux_for_edges ();
2219 } 3581 }
2223 /* Decide whether function is hot, cold or unlikely executed. */ 3585 /* Decide whether function is hot, cold or unlikely executed. */
2224 void 3586 void
2225 compute_function_frequency (void) 3587 compute_function_frequency (void)
2226 { 3588 {
2227 basic_block bb; 3589 basic_block bb;
2228 struct cgraph_node *node = cgraph_node (current_function_decl); 3590 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3591
2229 if (DECL_STATIC_CONSTRUCTOR (current_function_decl) 3592 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2230 || MAIN_NAME_P (DECL_NAME (current_function_decl))) 3593 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2231 node->only_called_at_startup = true; 3594 node->only_called_at_startup = true;
2232 if (DECL_STATIC_DESTRUCTOR (current_function_decl)) 3595 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2233 node->only_called_at_exit = true; 3596 node->only_called_at_exit = true;
2234 3597
2235 if (!profile_info || !flag_branch_probabilities) 3598 if (profile_status_for_fn (cfun) != PROFILE_READ)
2236 { 3599 {
2237 int flags = flags_from_decl_or_type (current_function_decl); 3600 int flags = flags_from_decl_or_type (current_function_decl);
2238 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) 3601 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()
2239 != NULL) 3602 || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2240 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; 3603 != NULL)
3604 {
3605 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3606 warn_function_cold (current_function_decl);
3607 }
2241 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) 3608 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2242 != NULL) 3609 != NULL)
2243 node->frequency = NODE_FREQUENCY_HOT; 3610 node->frequency = NODE_FREQUENCY_HOT;
2244 else if (flags & ECF_NORETURN) 3611 else if (flags & ECF_NORETURN)
2245 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 3612 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2248 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl) 3615 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2249 || DECL_STATIC_DESTRUCTOR (current_function_decl)) 3616 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2250 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 3617 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2251 return; 3618 return;
2252 } 3619 }
2253 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; 3620
2254 FOR_EACH_BB (bb) 3621 /* Only first time try to drop function into unlikely executed.
2255 { 3622 After inlining the roundoff errors may confuse us.
2256 if (maybe_hot_bb_p (bb)) 3623 Ipa-profile pass will drop functions only called from unlikely
3624 functions to unlikely and that is most of what we care about. */
3625 if (!cfun->after_inlining)
3626 {
3627 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3628 warn_function_cold (current_function_decl);
3629 }
3630 FOR_EACH_BB_FN (bb, cfun)
3631 {
3632 if (maybe_hot_bb_p (cfun, bb))
2257 { 3633 {
2258 node->frequency = NODE_FREQUENCY_HOT; 3634 node->frequency = NODE_FREQUENCY_HOT;
2259 return; 3635 return;
2260 } 3636 }
2261 if (!probably_never_executed_bb_p (bb)) 3637 if (!probably_never_executed_bb_p (cfun, bb))
2262 node->frequency = NODE_FREQUENCY_NORMAL; 3638 node->frequency = NODE_FREQUENCY_NORMAL;
2263 } 3639 }
2264 }
2265
2266 static bool
2267 gate_estimate_probability (void)
2268 {
2269 return flag_guess_branch_prob;
2270 } 3640 }
2271 3641
2272 /* Build PREDICT_EXPR. */ 3642 /* Build PREDICT_EXPR. */
2273 tree 3643 tree
2274 build_predict_expr (enum br_predictor predictor, enum prediction taken) 3644 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2275 { 3645 {
2276 tree t = build1 (PREDICT_EXPR, void_type_node, 3646 tree t = build1 (PREDICT_EXPR, void_type_node,
2277 build_int_cst (NULL, predictor)); 3647 build_int_cst (integer_type_node, predictor));
2278 SET_PREDICT_EXPR_OUTCOME (t, taken); 3648 SET_PREDICT_EXPR_OUTCOME (t, taken);
2279 return t; 3649 return t;
2280 } 3650 }
2281 3651
2282 const char * 3652 const char *
2283 predictor_name (enum br_predictor predictor) 3653 predictor_name (enum br_predictor predictor)
2284 { 3654 {
2285 return predictor_info[predictor].name; 3655 return predictor_info[predictor].name;
2286 } 3656 }
2287 3657
2288 struct gimple_opt_pass pass_profile = 3658 /* Predict branch probabilities and estimate profile of the tree CFG. */
2289 { 3659
2290 { 3660 namespace {
2291 GIMPLE_PASS, 3661
2292 "profile", /* name */ 3662 const pass_data pass_data_profile =
2293 gate_estimate_probability, /* gate */ 3663 {
2294 tree_estimate_probability_driver, /* execute */ 3664 GIMPLE_PASS, /* type */
2295 NULL, /* sub */ 3665 "profile_estimate", /* name */
2296 NULL, /* next */ 3666 OPTGROUP_NONE, /* optinfo_flags */
2297 0, /* static_pass_number */ 3667 TV_BRANCH_PROB, /* tv_id */
2298 TV_BRANCH_PROB, /* tv_id */ 3668 PROP_cfg, /* properties_required */
2299 PROP_cfg, /* properties_required */ 3669 0, /* properties_provided */
2300 0, /* properties_provided */ 3670 0, /* properties_destroyed */
2301 0, /* properties_destroyed */ 3671 0, /* todo_flags_start */
2302 0, /* todo_flags_start */ 3672 0, /* todo_flags_finish */
2303 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */
2304 }
2305 }; 3673 };
2306 3674
2307 struct gimple_opt_pass pass_strip_predict_hints = 3675 class pass_profile : public gimple_opt_pass
2308 { 3676 {
2309 { 3677 public:
2310 GIMPLE_PASS, 3678 pass_profile (gcc::context *ctxt)
2311 "*strip_predict_hints", /* name */ 3679 : gimple_opt_pass (pass_data_profile, ctxt)
2312 NULL, /* gate */ 3680 {}
2313 strip_predict_hints, /* execute */ 3681
2314 NULL, /* sub */ 3682 /* opt_pass methods: */
2315 NULL, /* next */ 3683 virtual bool gate (function *) { return flag_guess_branch_prob; }
2316 0, /* static_pass_number */ 3684 virtual unsigned int execute (function *);
2317 TV_BRANCH_PROB, /* tv_id */ 3685
2318 PROP_cfg, /* properties_required */ 3686 }; // class pass_profile
2319 0, /* properties_provided */ 3687
2320 0, /* properties_destroyed */ 3688 unsigned int
2321 0, /* todo_flags_start */ 3689 pass_profile::execute (function *fun)
2322 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ 3690 {
2323 } 3691 unsigned nb_loops;
3692
3693 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3694 return 0;
3695
3696 loop_optimizer_init (LOOPS_NORMAL);
3697 if (dump_file && (dump_flags & TDF_DETAILS))
3698 flow_loops_dump (dump_file, NULL, 0);
3699
3700 mark_irreducible_loops ();
3701
3702 nb_loops = number_of_loops (fun);
3703 if (nb_loops > 1)
3704 scev_initialize ();
3705
3706 tree_estimate_probability (false);
3707
3708 if (nb_loops > 1)
3709 scev_finalize ();
3710
3711 loop_optimizer_finalize ();
3712 if (dump_file && (dump_flags & TDF_DETAILS))
3713 gimple_dump_cfg (dump_file, dump_flags);
3714 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3715 profile_status_for_fn (fun) = PROFILE_GUESSED;
3716 if (dump_file && (dump_flags & TDF_DETAILS))
3717 {
3718 struct loop *loop;
3719 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
3720 if (loop->header->frequency)
3721 fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
3722 loop->num,
3723 (int)expected_loop_iterations_unbounded (loop));
3724 }
3725 return 0;
3726 }
3727
3728 } // anon namespace
3729
3730 gimple_opt_pass *
3731 make_pass_profile (gcc::context *ctxt)
3732 {
3733 return new pass_profile (ctxt);
3734 }
3735
3736 namespace {
3737
3738 const pass_data pass_data_strip_predict_hints =
3739 {
3740 GIMPLE_PASS, /* type */
3741 "*strip_predict_hints", /* name */
3742 OPTGROUP_NONE, /* optinfo_flags */
3743 TV_BRANCH_PROB, /* tv_id */
3744 PROP_cfg, /* properties_required */
3745 0, /* properties_provided */
3746 0, /* properties_destroyed */
3747 0, /* todo_flags_start */
3748 0, /* todo_flags_finish */
2324 }; 3749 };
3750
3751 class pass_strip_predict_hints : public gimple_opt_pass
3752 {
3753 public:
3754 pass_strip_predict_hints (gcc::context *ctxt)
3755 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3756 {}
3757
3758 /* opt_pass methods: */
3759 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3760 virtual unsigned int execute (function *);
3761
3762 }; // class pass_strip_predict_hints
3763
3764 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3765 we no longer need. */
3766 unsigned int
3767 pass_strip_predict_hints::execute (function *fun)
3768 {
3769 basic_block bb;
3770 gimple *ass_stmt;
3771 tree var;
3772 bool changed = false;
3773
3774 FOR_EACH_BB_FN (bb, fun)
3775 {
3776 gimple_stmt_iterator bi;
3777 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3778 {
3779 gimple *stmt = gsi_stmt (bi);
3780
3781 if (gimple_code (stmt) == GIMPLE_PREDICT)
3782 {
3783 gsi_remove (&bi, true);
3784 changed = true;
3785 continue;
3786 }
3787 else if (is_gimple_call (stmt))
3788 {
3789 tree fndecl = gimple_call_fndecl (stmt);
3790
3791 if ((fndecl
3792 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3793 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3794 && gimple_call_num_args (stmt) == 2)
3795 || (gimple_call_internal_p (stmt)
3796 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3797 {
3798 var = gimple_call_lhs (stmt);
3799 changed = true;
3800 if (var)
3801 {
3802 ass_stmt
3803 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3804 gsi_replace (&bi, ass_stmt, true);
3805 }
3806 else
3807 {
3808 gsi_remove (&bi, true);
3809 continue;
3810 }
3811 }
3812 }
3813 gsi_next (&bi);
3814 }
3815 }
3816 return changed ? TODO_cleanup_cfg : 0;
3817 }
3818
3819 } // anon namespace
3820
3821 gimple_opt_pass *
3822 make_pass_strip_predict_hints (gcc::context *ctxt)
3823 {
3824 return new pass_strip_predict_hints (ctxt);
3825 }
2325 3826
2326 /* Rebuild function frequencies. Passes are in general expected to 3827 /* Rebuild function frequencies. Passes are in general expected to
2327 maintain profile by hand, however in some cases this is not possible: 3828 maintain profile by hand, however in some cases this is not possible:
2328 for example when inlining several functions with loops freuqencies might run 3829 for example when inlining several functions with loops freuqencies might run
2329 out of scale and thus needs to be recomputed. */ 3830 out of scale and thus needs to be recomputed. */
2330 3831
2331 void 3832 void
2332 rebuild_frequencies (void) 3833 rebuild_frequencies (void)
2333 { 3834 {
2334 timevar_push (TV_REBUILD_FREQUENCIES); 3835 timevar_push (TV_REBUILD_FREQUENCIES);
2335 if (profile_status == PROFILE_GUESSED) 3836
3837 /* When the max bb count in the function is small, there is a higher
3838 chance that there were truncation errors in the integer scaling
3839 of counts by inlining and other optimizations. This could lead
3840 to incorrect classification of code as being cold when it isn't.
3841 In that case, force the estimation of bb counts/frequencies from the
3842 branch probabilities, rather than computing frequencies from counts,
3843 which may also lead to frequencies incorrectly reduced to 0. There
3844 is less precision in the probabilities, so we only do this for small
3845 max counts. */
3846 profile_count count_max = profile_count::zero ();
3847 basic_block bb;
3848 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3849 if (bb->count > count_max)
3850 count_max = bb->count;
3851
3852 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3853 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3854 && count_max < REG_BR_PROB_BASE / 10))
2336 { 3855 {
2337 loop_optimizer_init (0); 3856 loop_optimizer_init (0);
2338 add_noreturn_fake_exit_edges (); 3857 add_noreturn_fake_exit_edges ();
2339 mark_irreducible_loops (); 3858 mark_irreducible_loops ();
2340 connect_infinite_loops_to_exit (); 3859 connect_infinite_loops_to_exit ();
2341 estimate_bb_frequencies (); 3860 estimate_bb_frequencies (true);
2342 remove_fake_exit_edges (); 3861 remove_fake_exit_edges ();
2343 loop_optimizer_finalize (); 3862 loop_optimizer_finalize ();
2344 } 3863 }
2345 else if (profile_status == PROFILE_READ) 3864 else if (profile_status_for_fn (cfun) == PROFILE_READ)
2346 counts_to_freqs (); 3865 counts_to_freqs ();
2347 else 3866 else
2348 gcc_unreachable (); 3867 gcc_unreachable ();
2349 timevar_pop (TV_REBUILD_FREQUENCIES); 3868 timevar_pop (TV_REBUILD_FREQUENCIES);
2350 } 3869 }
3870
3871 /* Perform a dry run of the branch prediction pass and report comparsion of
3872 the predicted and real profile into the dump file. */
3873
3874 void
3875 report_predictor_hitrates (void)
3876 {
3877 unsigned nb_loops;
3878
3879 loop_optimizer_init (LOOPS_NORMAL);
3880 if (dump_file && (dump_flags & TDF_DETAILS))
3881 flow_loops_dump (dump_file, NULL, 0);
3882
3883 mark_irreducible_loops ();
3884
3885 nb_loops = number_of_loops (cfun);
3886 if (nb_loops > 1)
3887 scev_initialize ();
3888
3889 tree_estimate_probability (true);
3890
3891 if (nb_loops > 1)
3892 scev_finalize ();
3893
3894 loop_optimizer_finalize ();
3895 }
3896
3897 /* Force edge E to be cold.
3898 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3899 keep low probability to represent possible error in a guess. This is used
3900 i.e. in case we predict loop to likely iterate given number of times but
3901 we are not 100% sure.
3902
3903 This function locally updates profile without attempt to keep global
3904 consistency which can not be reached in full generality without full profile
3905 rebuild from probabilities alone. Doing so is not necessarily a good idea
3906 because frequencies and counts may be more realistic then probabilities.
3907
3908 In some cases (such as for elimination of early exits during full loop
3909 unrolling) the caller can ensure that profile will get consistent
3910 afterwards. */
3911
3912 void
3913 force_edge_cold (edge e, bool impossible)
3914 {
3915 profile_count count_sum = profile_count::zero ();
3916 profile_probability prob_sum = profile_probability::never ();
3917 edge_iterator ei;
3918 edge e2;
3919 bool uninitialized_exit = false;
3920
3921 profile_probability goal = (impossible ? profile_probability::never ()
3922 : profile_probability::very_unlikely ());
3923
3924 /* If edge is already improbably or cold, just return. */
3925 if (e->probability <= goal
3926 && (!impossible || e->count () == profile_count::zero ()))
3927 return;
3928 FOR_EACH_EDGE (e2, ei, e->src->succs)
3929 if (e2 != e)
3930 {
3931 if (e2->count ().initialized_p ())
3932 count_sum += e2->count ();
3933 else
3934 uninitialized_exit = true;
3935 if (e2->probability.initialized_p ())
3936 prob_sum += e2->probability;
3937 }
3938
3939 /* If there are other edges out of e->src, redistribute probabilitity
3940 there. */
3941 if (prob_sum > profile_probability::never ())
3942 {
3943 if (!(e->probability < goal))
3944 e->probability = goal;
3945
3946 profile_probability prob_comp = prob_sum / e->probability.invert ();
3947
3948 if (dump_file && (dump_flags & TDF_DETAILS))
3949 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
3950 "probability to other edges.\n",
3951 e->src->index, e->dest->index,
3952 impossible ? "impossible" : "cold");
3953 FOR_EACH_EDGE (e2, ei, e->src->succs)
3954 if (e2 != e)
3955 {
3956 e2->probability /= prob_comp;
3957 }
3958 if (current_ir_type () != IR_GIMPLE
3959 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3960 update_br_prob_note (e->src);
3961 }
3962 /* If all edges out of e->src are unlikely, the basic block itself
3963 is unlikely. */
3964 else
3965 {
3966 if (prob_sum == profile_probability::never ())
3967 e->probability = profile_probability::always ();
3968 else
3969 {
3970 if (impossible)
3971 e->probability = profile_probability::never ();
3972 /* If BB has some edges out that are not impossible, we can not
3973 assume that BB itself is. */
3974 impossible = false;
3975 }
3976 if (current_ir_type () != IR_GIMPLE
3977 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3978 update_br_prob_note (e->src);
3979 if (e->src->count == profile_count::zero ())
3980 return;
3981 if (count_sum == profile_count::zero () && !uninitialized_exit
3982 && impossible)
3983 {
3984 bool found = false;
3985 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3986 ;
3987 else if (current_ir_type () == IR_GIMPLE)
3988 for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
3989 !gsi_end_p (gsi); gsi_next (&gsi))
3990 {
3991 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
3992 {
3993 found = true;
3994 break;
3995 }
3996 }
3997 /* FIXME: Implement RTL path. */
3998 else
3999 found = true;
4000 if (!found)
4001 {
4002 if (dump_file && (dump_flags & TDF_DETAILS))
4003 fprintf (dump_file,
4004 "Making bb %i impossible and dropping count to 0.\n",
4005 e->src->index);
4006 e->src->count = profile_count::zero ();
4007 FOR_EACH_EDGE (e2, ei, e->src->preds)
4008 force_edge_cold (e2, impossible);
4009 return;
4010 }
4011 }
4012
4013 /* If we did not adjusting, the source basic block has no likely edeges
4014 leaving other direction. In that case force that bb cold, too.
4015 This in general is difficult task to do, but handle special case when
4016 BB has only one predecestor. This is common case when we are updating
4017 after loop transforms. */
4018 if (!(prob_sum > profile_probability::never ())
4019 && count_sum == profile_count::zero ()
4020 && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1))
4021 {
4022 int old_frequency = e->src->frequency;
4023 if (dump_file && (dump_flags & TDF_DETAILS))
4024 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
4025 impossible ? "impossible" : "cold");
4026 e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
4027 if (impossible)
4028 e->src->count = profile_count::zero ();
4029 else
4030 e->src->count = e->count ().apply_scale (e->src->frequency,
4031 old_frequency);
4032 force_edge_cold (single_pred_edge (e->src), impossible);
4033 }
4034 else if (dump_file && (dump_flags & TDF_DETAILS)
4035 && maybe_hot_bb_p (cfun, e->src))
4036 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
4037 impossible ? "impossible" : "cold");
4038 }
4039 }
4040
4041 #if CHECKING_P
4042
4043 namespace selftest {
4044
4045 /* Test that value range of predictor values defined in predict.def is
4046 within range (50, 100]. */
4047
4048 struct branch_predictor
4049 {
4050 const char *name;
4051 unsigned probability;
4052 };
4053
4054 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
4055
4056 static void
4057 test_prediction_value_range ()
4058 {
4059 branch_predictor predictors[] = {
4060 #include "predict.def"
4061 {NULL, -1U}
4062 };
4063
4064 for (unsigned i = 0; predictors[i].name != NULL; i++)
4065 {
4066 unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE;
4067 ASSERT_TRUE (p > 50 && p <= 100);
4068 }
4069 }
4070
4071 #undef DEF_PREDICTOR
4072
4073 /* Run all of the selfests within this file. */
4074
4075 void
4076 predict_c_tests ()
4077 {
4078 test_prediction_value_range ();
4079 }
4080
4081 } // namespace selftest
4082 #endif /* CHECKING_P. */