Mercurial > hg > CbC > CbC_gcc
comparison gcc/predict.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Branch prediction routines for the GNU compiler. | 1 /* Branch prediction routines for the GNU compiler. |
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010 | 2 Copyright (C) 2000-2017 Free Software Foundation, Inc. |
3 Free Software Foundation, Inc. | |
4 | 3 |
5 This file is part of GCC. | 4 This file is part of GCC. |
6 | 5 |
7 GCC is free software; you can redistribute it and/or modify it under | 6 GCC is free software; you can redistribute it and/or modify it under |
8 the terms of the GNU General Public License as published by the Free | 7 the terms of the GNU General Public License as published by the Free |
29 | 28 |
30 | 29 |
31 #include "config.h" | 30 #include "config.h" |
32 #include "system.h" | 31 #include "system.h" |
33 #include "coretypes.h" | 32 #include "coretypes.h" |
34 #include "tm.h" | 33 #include "backend.h" |
34 #include "rtl.h" | |
35 #include "tree.h" | 35 #include "tree.h" |
36 #include "rtl.h" | 36 #include "gimple.h" |
37 #include "tm_p.h" | 37 #include "cfghooks.h" |
38 #include "hard-reg-set.h" | 38 #include "tree-pass.h" |
39 #include "basic-block.h" | 39 #include "ssa.h" |
40 #include "insn-config.h" | 40 #include "memmodel.h" |
41 #include "regs.h" | 41 #include "emit-rtl.h" |
42 #include "flags.h" | 42 #include "cgraph.h" |
43 #include "output.h" | 43 #include "coverage.h" |
44 #include "function.h" | |
45 #include "except.h" | |
46 #include "diagnostic-core.h" | 44 #include "diagnostic-core.h" |
47 #include "recog.h" | 45 #include "gimple-predict.h" |
48 #include "expr.h" | 46 #include "fold-const.h" |
49 #include "predict.h" | 47 #include "calls.h" |
50 #include "coverage.h" | 48 #include "cfganal.h" |
49 #include "profile.h" | |
51 #include "sreal.h" | 50 #include "sreal.h" |
52 #include "params.h" | 51 #include "params.h" |
53 #include "target.h" | |
54 #include "cfgloop.h" | 52 #include "cfgloop.h" |
55 #include "tree-flow.h" | 53 #include "gimple-iterator.h" |
56 #include "ggc.h" | 54 #include "tree-cfg.h" |
57 #include "tree-dump.h" | 55 #include "tree-ssa-loop-niter.h" |
58 #include "tree-pass.h" | 56 #include "tree-ssa-loop.h" |
59 #include "timevar.h" | |
60 #include "tree-scalar-evolution.h" | 57 #include "tree-scalar-evolution.h" |
61 #include "cfgloop.h" | 58 #include "ipa-utils.h" |
62 #include "pointer-set.h" | 59 #include "gimple-pretty-print.h" |
60 #include "selftest.h" | |
61 #include "cfgrtl.h" | |
62 #include "stringpool.h" | |
63 #include "attribs.h" | |
64 | |
65 /* Enum with reasons why a predictor is ignored. */ | |
66 | |
67 enum predictor_reason | |
68 { | |
69 REASON_NONE, | |
70 REASON_IGNORED, | |
71 REASON_SINGLE_EDGE_DUPLICATE, | |
72 REASON_EDGE_PAIR_DUPLICATE | |
73 }; | |
74 | |
75 /* String messages for the aforementioned enum. */ | |
76 | |
77 static const char *reason_messages[] = {"", " (ignored)", | |
78 " (single edge duplicate)", " (edge pair duplicate)"}; | |
63 | 79 |
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, | 80 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, |
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ | 81 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ |
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base, | 82 static sreal real_almost_one, real_br_prob_base, |
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max; | 83 real_inv_br_prob_base, real_one_half, real_bb_freq_max; |
68 | 84 |
69 /* Random guesstimation given names. | 85 static void combine_predictions_for_insn (rtx_insn *, basic_block); |
70 PROV_VERY_UNLIKELY should be small enough so basic block predicted | 86 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, |
71 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */ | 87 enum predictor_reason, edge); |
72 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1) | 88 static void predict_paths_leading_to (basic_block, enum br_predictor, |
73 #define PROB_EVEN (REG_BR_PROB_BASE / 2) | 89 enum prediction, |
74 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY) | 90 struct loop *in_loop = NULL); |
75 #define PROB_ALWAYS (REG_BR_PROB_BASE) | 91 static void predict_paths_leading_to_edge (edge, enum br_predictor, |
76 | 92 enum prediction, |
77 static void combine_predictions_for_insn (rtx, basic_block); | 93 struct loop *in_loop = NULL); |
78 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int); | 94 static bool can_predict_insn_p (const rtx_insn *); |
79 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction); | |
80 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction); | |
81 static bool can_predict_insn_p (const_rtx); | |
82 | 95 |
83 /* Information we hold about each branch predictor. | 96 /* Information we hold about each branch predictor. |
84 Filled using information from predict.def. */ | 97 Filled using information from predict.def. */ |
85 | 98 |
86 struct predictor_info | 99 struct predictor_info |
109 #undef DEF_PREDICTOR | 122 #undef DEF_PREDICTOR |
110 | 123 |
111 /* Return TRUE if frequency FREQ is considered to be hot. */ | 124 /* Return TRUE if frequency FREQ is considered to be hot. */ |
112 | 125 |
113 static inline bool | 126 static inline bool |
114 maybe_hot_frequency_p (int freq) | 127 maybe_hot_frequency_p (struct function *fun, int freq) |
115 { | 128 { |
116 struct cgraph_node *node = cgraph_node (current_function_decl); | 129 struct cgraph_node *node = cgraph_node::get (fun->decl); |
117 if (!profile_info || !flag_branch_probabilities) | 130 if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ) |
118 { | 131 { |
119 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | 132 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) |
120 return false; | 133 return false; |
121 if (node->frequency == NODE_FREQUENCY_HOT) | 134 if (node->frequency == NODE_FREQUENCY_HOT) |
122 return true; | 135 return true; |
123 } | 136 } |
124 if (profile_status == PROFILE_ABSENT) | 137 if (profile_status_for_fn (fun) == PROFILE_ABSENT) |
125 return true; | 138 return true; |
126 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE | 139 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE |
127 && freq <= (ENTRY_BLOCK_PTR->frequency * 2 / 3)) | 140 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3)) |
128 return false; | 141 return false; |
129 if (freq < ENTRY_BLOCK_PTR->frequency / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) | 142 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) |
143 return false; | |
144 if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) | |
145 < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency) | |
130 return false; | 146 return false; |
131 return true; | 147 return true; |
132 } | 148 } |
133 | 149 |
150 static gcov_type min_count = -1; | |
151 | |
152 /* Determine the threshold for hot BB counts. */ | |
153 | |
154 gcov_type | |
155 get_hot_bb_threshold () | |
156 { | |
157 gcov_working_set_t *ws; | |
158 if (min_count == -1) | |
159 { | |
160 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); | |
161 gcc_assert (ws); | |
162 min_count = ws->min_counter; | |
163 } | |
164 return min_count; | |
165 } | |
166 | |
167 /* Set the threshold for hot BB counts. */ | |
168 | |
169 void | |
170 set_hot_bb_threshold (gcov_type min) | |
171 { | |
172 min_count = min; | |
173 } | |
174 | |
134 /* Return TRUE if frequency FREQ is considered to be hot. */ | 175 /* Return TRUE if frequency FREQ is considered to be hot. */ |
135 | 176 |
136 static inline bool | 177 bool |
137 maybe_hot_count_p (gcov_type count) | 178 maybe_hot_count_p (struct function *, profile_count count) |
138 { | 179 { |
139 if (profile_status != PROFILE_READ) | 180 if (!count.initialized_p ()) |
140 return true; | 181 return true; |
141 /* Code executed at most once is not hot. */ | 182 /* Code executed at most once is not hot. */ |
142 if (profile_info->runs >= count) | 183 if (count <= MAX (profile_info ? profile_info->runs : 1, 1)) |
143 return false; | 184 return false; |
144 return (count | 185 return (count.to_gcov_type () >= get_hot_bb_threshold ()); |
145 > profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)); | |
146 } | 186 } |
147 | 187 |
148 /* Return true in case BB can be CPU intensive and should be optimized | 188 /* Return true in case BB can be CPU intensive and should be optimized |
149 for maximal performance. */ | 189 for maximal performance. */ |
150 | 190 |
151 bool | 191 bool |
152 maybe_hot_bb_p (const_basic_block bb) | 192 maybe_hot_bb_p (struct function *fun, const_basic_block bb) |
153 { | 193 { |
154 if (profile_status == PROFILE_READ) | 194 gcc_checking_assert (fun); |
155 return maybe_hot_count_p (bb->count); | 195 if (!maybe_hot_count_p (fun, bb->count)) |
156 return maybe_hot_frequency_p (bb->frequency); | |
157 } | |
158 | |
159 /* Return true if the call can be hot. */ | |
160 | |
161 bool | |
162 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge) | |
163 { | |
164 if (profile_info && flag_branch_probabilities | |
165 && (edge->count | |
166 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) | |
167 return false; | 196 return false; |
168 if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED | 197 return maybe_hot_frequency_p (fun, bb->frequency); |
169 || edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | |
170 return false; | |
171 if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED | |
172 && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE) | |
173 return false; | |
174 if (optimize_size) | |
175 return false; | |
176 if (edge->caller->frequency == NODE_FREQUENCY_HOT) | |
177 return true; | |
178 if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE | |
179 && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2) | |
180 return false; | |
181 if (flag_guess_branch_prob | |
182 && edge->frequency <= (CGRAPH_FREQ_BASE | |
183 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))) | |
184 return false; | |
185 return true; | |
186 } | 198 } |
187 | 199 |
188 /* Return true in case BB can be CPU intensive and should be optimized | 200 /* Return true in case BB can be CPU intensive and should be optimized |
189 for maximal performance. */ | 201 for maximal performance. */ |
190 | 202 |
191 bool | 203 bool |
192 maybe_hot_edge_p (edge e) | 204 maybe_hot_edge_p (edge e) |
193 { | 205 { |
194 if (profile_status == PROFILE_READ) | 206 if (!maybe_hot_count_p (cfun, e->count ())) |
195 return maybe_hot_count_p (e->count); | 207 return false; |
196 return maybe_hot_frequency_p (EDGE_FREQUENCY (e)); | 208 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); |
197 } | 209 } |
198 | 210 |
199 /* Return true in case BB is probably never executed. */ | 211 /* Return true if profile COUNT and FREQUENCY, or function FUN static |
200 bool | 212 node frequency reflects never being executed. */ |
201 probably_never_executed_bb_p (const_basic_block bb) | 213 |
202 { | 214 static bool |
203 if (profile_info && flag_branch_probabilities) | 215 probably_never_executed (struct function *fun, |
204 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0; | 216 profile_count count, int) |
205 if ((!profile_info || !flag_branch_probabilities) | 217 { |
206 && cgraph_node (current_function_decl)->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | 218 gcc_checking_assert (fun); |
219 if (count == profile_count::zero ()) | |
220 return true; | |
221 if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ) | |
222 { | |
223 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); | |
224 if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs) | |
225 return false; | |
226 return true; | |
227 } | |
228 if ((!profile_info || profile_status_for_fn (fun) != PROFILE_READ) | |
229 && (cgraph_node::get (fun->decl)->frequency | |
230 == NODE_FREQUENCY_UNLIKELY_EXECUTED)) | |
207 return true; | 231 return true; |
208 return false; | 232 return false; |
209 } | 233 } |
210 | 234 |
235 | |
236 /* Return true in case BB is probably never executed. */ | |
237 | |
238 bool | |
239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb) | |
240 { | |
241 return probably_never_executed (fun, bb->count, bb->frequency); | |
242 } | |
243 | |
244 | |
245 /* Return true if E is unlikely executed for obvious reasons. */ | |
246 | |
247 static bool | |
248 unlikely_executed_edge_p (edge e) | |
249 { | |
250 return (e->count () == profile_count::zero () | |
251 || e->probability == profile_probability::never ()) | |
252 || (e->flags & (EDGE_EH | EDGE_FAKE)); | |
253 } | |
254 | |
255 /* Return true in case edge E is probably never executed. */ | |
256 | |
257 bool | |
258 probably_never_executed_edge_p (struct function *fun, edge e) | |
259 { | |
260 if (unlikely_executed_edge_p (e)) | |
261 return true; | |
262 return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e)); | |
263 } | |
264 | |
211 /* Return true when current function should always be optimized for size. */ | 265 /* Return true when current function should always be optimized for size. */ |
212 | 266 |
213 bool | 267 bool |
214 optimize_function_for_size_p (struct function *fun) | 268 optimize_function_for_size_p (struct function *fun) |
215 { | 269 { |
216 return (optimize_size | 270 if (!fun || !fun->decl) |
217 || (fun && fun->decl | 271 return optimize_size; |
218 && (cgraph_node (fun->decl)->frequency | 272 cgraph_node *n = cgraph_node::get (fun->decl); |
219 == NODE_FREQUENCY_UNLIKELY_EXECUTED))); | 273 return n && n->optimize_for_size_p (); |
220 } | 274 } |
221 | 275 |
222 /* Return true when current function should always be optimized for speed. */ | 276 /* Return true when current function should always be optimized for speed. */ |
223 | 277 |
224 bool | 278 bool |
225 optimize_function_for_speed_p (struct function *fun) | 279 optimize_function_for_speed_p (struct function *fun) |
226 { | 280 { |
227 return !optimize_function_for_size_p (fun); | 281 return !optimize_function_for_size_p (fun); |
228 } | 282 } |
229 | 283 |
284 /* Return the optimization type that should be used for the function FUN. */ | |
285 | |
286 optimization_type | |
287 function_optimization_type (struct function *fun) | |
288 { | |
289 return (optimize_function_for_speed_p (fun) | |
290 ? OPTIMIZE_FOR_SPEED | |
291 : OPTIMIZE_FOR_SIZE); | |
292 } | |
293 | |
230 /* Return TRUE when BB should be optimized for size. */ | 294 /* Return TRUE when BB should be optimized for size. */ |
231 | 295 |
232 bool | 296 bool |
233 optimize_bb_for_size_p (const_basic_block bb) | 297 optimize_bb_for_size_p (const_basic_block bb) |
234 { | 298 { |
235 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (bb); | 299 return (optimize_function_for_size_p (cfun) |
300 || (bb && !maybe_hot_bb_p (cfun, bb))); | |
236 } | 301 } |
237 | 302 |
238 /* Return TRUE when BB should be optimized for speed. */ | 303 /* Return TRUE when BB should be optimized for speed. */ |
239 | 304 |
240 bool | 305 bool |
241 optimize_bb_for_speed_p (const_basic_block bb) | 306 optimize_bb_for_speed_p (const_basic_block bb) |
242 { | 307 { |
243 return !optimize_bb_for_size_p (bb); | 308 return !optimize_bb_for_size_p (bb); |
309 } | |
310 | |
311 /* Return the optimization type that should be used for block BB. */ | |
312 | |
313 optimization_type | |
314 bb_optimization_type (const_basic_block bb) | |
315 { | |
316 return (optimize_bb_for_speed_p (bb) | |
317 ? OPTIMIZE_FOR_SPEED | |
318 : OPTIMIZE_FOR_SIZE); | |
244 } | 319 } |
245 | 320 |
246 /* Return TRUE when BB should be optimized for size. */ | 321 /* Return TRUE when BB should be optimized for size. */ |
247 | 322 |
248 bool | 323 bool |
331 predictor. */ | 406 predictor. */ |
332 | 407 |
333 bool | 408 bool |
334 predictable_edge_p (edge e) | 409 predictable_edge_p (edge e) |
335 { | 410 { |
336 if (profile_status == PROFILE_ABSENT) | 411 if (!e->probability.initialized_p ()) |
337 return false; | 412 return false; |
338 if ((e->probability | 413 if ((e->probability.to_reg_br_prob_base () |
339 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100) | 414 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100) |
340 || (REG_BR_PROB_BASE - e->probability | 415 || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base () |
341 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)) | 416 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)) |
342 return true; | 417 return true; |
343 return false; | 418 return false; |
344 } | 419 } |
345 | 420 |
347 /* Set RTL expansion for BB profile. */ | 422 /* Set RTL expansion for BB profile. */ |
348 | 423 |
349 void | 424 void |
350 rtl_profile_for_bb (basic_block bb) | 425 rtl_profile_for_bb (basic_block bb) |
351 { | 426 { |
352 crtl->maybe_hot_insn_p = maybe_hot_bb_p (bb); | 427 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb); |
353 } | 428 } |
354 | 429 |
355 /* Set RTL expansion for edge profile. */ | 430 /* Set RTL expansion for edge profile. */ |
356 | 431 |
357 void | 432 void |
381 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) | 456 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) |
382 return true; | 457 return true; |
383 return false; | 458 return false; |
384 } | 459 } |
385 | 460 |
386 /* This map contains for a basic block the list of predictions for the | |
387 outgoing edges. */ | |
388 | |
389 static struct pointer_map_t *bb_predictions; | |
390 | |
391 /* Structure representing predictions in tree level. */ | 461 /* Structure representing predictions in tree level. */ |
392 | 462 |
393 struct edge_prediction { | 463 struct edge_prediction { |
394 struct edge_prediction *ep_next; | 464 struct edge_prediction *ep_next; |
395 edge ep_edge; | 465 edge ep_edge; |
396 enum br_predictor ep_predictor; | 466 enum br_predictor ep_predictor; |
397 int ep_probability; | 467 int ep_probability; |
398 }; | 468 }; |
399 | 469 |
470 /* This map contains for a basic block the list of predictions for the | |
471 outgoing edges. */ | |
472 | |
473 static hash_map<const_basic_block, edge_prediction *> *bb_predictions; | |
474 | |
400 /* Return true if the one of outgoing edges is already predicted by | 475 /* Return true if the one of outgoing edges is already predicted by |
401 PREDICTOR. */ | 476 PREDICTOR. */ |
402 | 477 |
403 bool | 478 bool |
404 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor) | 479 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor) |
405 { | 480 { |
406 struct edge_prediction *i; | 481 struct edge_prediction *i; |
407 void **preds = pointer_map_contains (bb_predictions, bb); | 482 edge_prediction **preds = bb_predictions->get (bb); |
408 | 483 |
409 if (!preds) | 484 if (!preds) |
410 return false; | 485 return false; |
411 | 486 |
412 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next) | 487 for (i = *preds; i; i = i->ep_next) |
413 if (i->ep_predictor == predictor) | 488 if (i->ep_predictor == predictor) |
414 return true; | 489 return true; |
415 return false; | 490 return false; |
416 } | 491 } |
417 | 492 |
418 /* Return true when the probability of edge is reliable. | 493 /* Return true if the one of outgoing edges is already predicted by |
419 | 494 PREDICTOR for edge E predicted as TAKEN. */ |
420 The profile guessing code is good at predicting branch outcome (ie. | 495 |
421 taken/not taken), that is predicted right slightly over 75% of time. | 496 bool |
422 It is however notoriously poor on predicting the probability itself. | 497 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken) |
423 In general the profile appear a lot flatter (with probabilities closer | 498 { |
424 to 50%) than the reality so it is bad idea to use it to drive optimization | 499 struct edge_prediction *i; |
425 such as those disabling dynamic branch prediction for well predictable | 500 basic_block bb = e->src; |
426 branches. | 501 edge_prediction **preds = bb_predictions->get (bb); |
427 | 502 if (!preds) |
428 There are two exceptions - edges leading to noreturn edges and edges | 503 return false; |
429 predicted by number of iterations heuristics are predicted well. This macro | 504 |
430 should be able to distinguish those, but at the moment it simply check for | 505 int probability = predictor_info[(int) predictor].hitrate; |
431 noreturn heuristic that is only one giving probability over 99% or bellow | 506 |
432 1%. In future we might want to propagate reliability information across the | 507 if (taken != TAKEN) |
433 CFG if we find this information useful on multiple places. */ | 508 probability = REG_BR_PROB_BASE - probability; |
434 static bool | 509 |
435 probability_reliable_p (int prob) | 510 for (i = *preds; i; i = i->ep_next) |
436 { | 511 if (i->ep_predictor == predictor |
437 return (profile_status == PROFILE_READ | 512 && i->ep_edge == e |
438 || (profile_status == PROFILE_GUESSED | 513 && i->ep_probability == probability) |
439 && (prob <= HITRATE (1) || prob >= HITRATE (99)))); | 514 return true; |
515 return false; | |
440 } | 516 } |
441 | 517 |
442 /* Same predicate as above, working on edges. */ | 518 /* Same predicate as above, working on edges. */ |
443 bool | 519 bool |
444 edge_probability_reliable_p (const_edge e) | 520 edge_probability_reliable_p (const_edge e) |
445 { | 521 { |
446 return probability_reliable_p (e->probability); | 522 return e->probability.probably_reliable_p (); |
447 } | 523 } |
448 | 524 |
449 /* Same predicate as edge_probability_reliable_p, working on notes. */ | 525 /* Same predicate as edge_probability_reliable_p, working on notes. */ |
450 bool | 526 bool |
451 br_prob_note_reliable_p (const_rtx note) | 527 br_prob_note_reliable_p (const_rtx note) |
452 { | 528 { |
453 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB); | 529 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB); |
454 return probability_reliable_p (INTVAL (XEXP (note, 0))); | 530 return profile_probability::from_reg_br_prob_note |
531 (XINT (note, 0)).probably_reliable_p (); | |
455 } | 532 } |
456 | 533 |
457 static void | 534 static void |
458 predict_insn (rtx insn, enum br_predictor predictor, int probability) | 535 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability) |
459 { | 536 { |
460 gcc_assert (any_condjump_p (insn)); | 537 gcc_assert (any_condjump_p (insn)); |
461 if (!flag_guess_branch_prob) | 538 if (!flag_guess_branch_prob) |
462 return; | 539 return; |
463 | 540 |
468 } | 545 } |
469 | 546 |
470 /* Predict insn by given predictor. */ | 547 /* Predict insn by given predictor. */ |
471 | 548 |
472 void | 549 void |
473 predict_insn_def (rtx insn, enum br_predictor predictor, | 550 predict_insn_def (rtx_insn *insn, enum br_predictor predictor, |
474 enum prediction taken) | 551 enum prediction taken) |
475 { | 552 { |
476 int probability = predictor_info[(int) predictor].hitrate; | 553 int probability = predictor_info[(int) predictor].hitrate; |
477 | 554 |
478 if (taken != TAKEN) | 555 if (taken != TAKEN) |
484 /* Predict edge E with given probability if possible. */ | 561 /* Predict edge E with given probability if possible. */ |
485 | 562 |
486 void | 563 void |
487 rtl_predict_edge (edge e, enum br_predictor predictor, int probability) | 564 rtl_predict_edge (edge e, enum br_predictor predictor, int probability) |
488 { | 565 { |
489 rtx last_insn; | 566 rtx_insn *last_insn; |
490 last_insn = BB_END (e->src); | 567 last_insn = BB_END (e->src); |
491 | 568 |
492 /* We can store the branch prediction information only about | 569 /* We can store the branch prediction information only about |
493 conditional jumps. */ | 570 conditional jumps. */ |
494 if (!any_condjump_p (last_insn)) | 571 if (!any_condjump_p (last_insn)) |
503 | 580 |
504 /* Predict edge E with the given PROBABILITY. */ | 581 /* Predict edge E with the given PROBABILITY. */ |
505 void | 582 void |
506 gimple_predict_edge (edge e, enum br_predictor predictor, int probability) | 583 gimple_predict_edge (edge e, enum br_predictor predictor, int probability) |
507 { | 584 { |
508 gcc_assert (profile_status != PROFILE_GUESSED); | 585 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) |
509 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1) | 586 && EDGE_COUNT (e->src->succs) > 1 |
510 && flag_guess_branch_prob && optimize) | 587 && flag_guess_branch_prob |
588 && optimize) | |
511 { | 589 { |
512 struct edge_prediction *i = XNEW (struct edge_prediction); | 590 struct edge_prediction *i = XNEW (struct edge_prediction); |
513 void **preds = pointer_map_insert (bb_predictions, e->src); | 591 edge_prediction *&preds = bb_predictions->get_or_insert (e->src); |
514 | 592 |
515 i->ep_next = (struct edge_prediction *) *preds; | 593 i->ep_next = preds; |
516 *preds = i; | 594 preds = i; |
517 i->ep_probability = probability; | 595 i->ep_probability = probability; |
518 i->ep_predictor = predictor; | 596 i->ep_predictor = predictor; |
519 i->ep_edge = e; | 597 i->ep_edge = e; |
520 } | 598 } |
521 } | 599 } |
522 | 600 |
523 /* Remove all predictions on given basic block that are attached | 601 /* Filter edge predictions PREDS by a function FILTER. DATA are passed |
524 to edge E. */ | 602 to the filter function. */ |
603 | |
525 void | 604 void |
526 remove_predictions_associated_with_edge (edge e) | 605 filter_predictions (edge_prediction **preds, |
527 { | 606 bool (*filter) (edge_prediction *, void *), void *data) |
528 void **preds; | 607 { |
529 | |
530 if (!bb_predictions) | 608 if (!bb_predictions) |
531 return; | 609 return; |
532 | 610 |
533 preds = pointer_map_contains (bb_predictions, e->src); | |
534 | |
535 if (preds) | 611 if (preds) |
536 { | 612 { |
537 struct edge_prediction **prediction = (struct edge_prediction **) preds; | 613 struct edge_prediction **prediction = preds; |
538 struct edge_prediction *next; | 614 struct edge_prediction *next; |
539 | 615 |
540 while (*prediction) | 616 while (*prediction) |
541 { | 617 { |
542 if ((*prediction)->ep_edge == e) | 618 if ((*filter) (*prediction, data)) |
619 prediction = &((*prediction)->ep_next); | |
620 else | |
543 { | 621 { |
544 next = (*prediction)->ep_next; | 622 next = (*prediction)->ep_next; |
545 free (*prediction); | 623 free (*prediction); |
546 *prediction = next; | 624 *prediction = next; |
547 } | 625 } |
548 else | 626 } |
549 prediction = &((*prediction)->ep_next); | 627 } |
550 } | 628 } |
551 } | 629 |
630 /* Filter function predicate that returns true for a edge predicate P | |
631 if its edge is equal to DATA. */ | |
632 | |
633 bool | |
634 equal_edge_p (edge_prediction *p, void *data) | |
635 { | |
636 return p->ep_edge == (edge)data; | |
637 } | |
638 | |
639 /* Remove all predictions on given basic block that are attached | |
640 to edge E. */ | |
641 void | |
642 remove_predictions_associated_with_edge (edge e) | |
643 { | |
644 if (!bb_predictions) | |
645 return; | |
646 | |
647 edge_prediction **preds = bb_predictions->get (e->src); | |
648 filter_predictions (preds, equal_edge_p, e); | |
552 } | 649 } |
553 | 650 |
554 /* Clears the list of predictions stored for BB. */ | 651 /* Clears the list of predictions stored for BB. */ |
555 | 652 |
556 static void | 653 static void |
557 clear_bb_predictions (basic_block bb) | 654 clear_bb_predictions (basic_block bb) |
558 { | 655 { |
559 void **preds = pointer_map_contains (bb_predictions, bb); | 656 edge_prediction **preds = bb_predictions->get (bb); |
560 struct edge_prediction *pred, *next; | 657 struct edge_prediction *pred, *next; |
561 | 658 |
562 if (!preds) | 659 if (!preds) |
563 return; | 660 return; |
564 | 661 |
565 for (pred = (struct edge_prediction *) *preds; pred; pred = next) | 662 for (pred = *preds; pred; pred = next) |
566 { | 663 { |
567 next = pred->ep_next; | 664 next = pred->ep_next; |
568 free (pred); | 665 free (pred); |
569 } | 666 } |
570 *preds = NULL; | 667 *preds = NULL; |
572 | 669 |
573 /* Return true when we can store prediction on insn INSN. | 670 /* Return true when we can store prediction on insn INSN. |
574 At the moment we represent predictions only on conditional | 671 At the moment we represent predictions only on conditional |
575 jumps, not at computed jump or other complicated cases. */ | 672 jumps, not at computed jump or other complicated cases. */ |
576 static bool | 673 static bool |
577 can_predict_insn_p (const_rtx insn) | 674 can_predict_insn_p (const rtx_insn *insn) |
578 { | 675 { |
579 return (JUMP_P (insn) | 676 return (JUMP_P (insn) |
580 && any_condjump_p (insn) | 677 && any_condjump_p (insn) |
581 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2); | 678 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2); |
582 } | 679 } |
603 { | 700 { |
604 rtx note; | 701 rtx note; |
605 | 702 |
606 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) | 703 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) |
607 if (REG_NOTE_KIND (note) == REG_BR_PROB) | 704 if (REG_NOTE_KIND (note) == REG_BR_PROB) |
608 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0))); | 705 XINT (note, 0) = profile_probability::from_reg_br_prob_note |
706 (XINT (note, 0)).invert ().to_reg_br_prob_note (); | |
609 else if (REG_NOTE_KIND (note) == REG_BR_PRED) | 707 else if (REG_NOTE_KIND (note) == REG_BR_PRED) |
610 XEXP (XEXP (note, 0), 1) | 708 XEXP (XEXP (note, 0), 1) |
611 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); | 709 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); |
612 } | 710 } |
613 | 711 |
614 /* Dump information about the branch prediction to the output file. */ | 712 /* Dump information about the branch prediction to the output file. */ |
615 | 713 |
616 static void | 714 static void |
617 dump_prediction (FILE *file, enum br_predictor predictor, int probability, | 715 dump_prediction (FILE *file, enum br_predictor predictor, int probability, |
618 basic_block bb, int used) | 716 basic_block bb, enum predictor_reason reason = REASON_NONE, |
619 { | 717 edge ep_edge = NULL) |
620 edge e; | 718 { |
719 edge e = ep_edge; | |
621 edge_iterator ei; | 720 edge_iterator ei; |
622 | 721 |
623 if (!file) | 722 if (!file) |
624 return; | 723 return; |
625 | 724 |
725 if (e == NULL) | |
726 FOR_EACH_EDGE (e, ei, bb->succs) | |
727 if (! (e->flags & EDGE_FALLTHRU)) | |
728 break; | |
729 | |
730 char edge_info_str[128]; | |
731 if (ep_edge) | |
732 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index, | |
733 ep_edge->dest->index); | |
734 else | |
735 edge_info_str[0] = '\0'; | |
736 | |
737 fprintf (file, " %s heuristics%s%s: %.1f%%", | |
738 predictor_info[predictor].name, | |
739 edge_info_str, reason_messages[reason], | |
740 probability * 100.0 / REG_BR_PROB_BASE); | |
741 | |
742 if (bb->count.initialized_p ()) | |
743 { | |
744 fprintf (file, " exec "); | |
745 bb->count.dump (file); | |
746 if (e) | |
747 { | |
748 fprintf (file, " hit "); | |
749 e->count ().dump (file); | |
750 fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0 | |
751 / bb->count.to_gcov_type ()); | |
752 } | |
753 } | |
754 | |
755 fprintf (file, "\n"); | |
756 } | |
757 | |
758 /* Return true if STMT is known to be unlikely executed. */ | |
759 | |
760 static bool | |
761 unlikely_executed_stmt_p (gimple *stmt) | |
762 { | |
763 if (!is_gimple_call (stmt)) | |
764 return false; | |
765 /* NORETURN attribute alone is not strong enough: exit() may be quite | |
766 likely executed once during program run. */ | |
767 if (gimple_call_fntype (stmt) | |
768 && lookup_attribute ("cold", | |
769 TYPE_ATTRIBUTES (gimple_call_fntype (stmt))) | |
770 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))) | |
771 return true; | |
772 tree decl = gimple_call_fndecl (stmt); | |
773 if (!decl) | |
774 return false; | |
775 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)) | |
776 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))) | |
777 return true; | |
778 | |
779 cgraph_node *n = cgraph_node::get (decl); | |
780 if (!n) | |
781 return false; | |
782 | |
783 availability avail; | |
784 n = n->ultimate_alias_target (&avail); | |
785 if (avail < AVAIL_AVAILABLE) | |
786 return false; | |
787 if (!n->analyzed | |
788 || n->decl == current_function_decl) | |
789 return false; | |
790 return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED; | |
791 } | |
792 | |
793 /* Return true if BB is unlikely executed. */ | |
794 | |
795 static bool | |
796 unlikely_executed_bb_p (basic_block bb) | |
797 { | |
798 if (bb->count == profile_count::zero ()) | |
799 return true; | |
800 if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
801 return false; | |
802 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
803 !gsi_end_p (gsi); gsi_next (&gsi)) | |
804 { | |
805 if (unlikely_executed_stmt_p (gsi_stmt (gsi))) | |
806 return true; | |
807 if (stmt_can_terminate_bb_p (gsi_stmt (gsi))) | |
808 return false; | |
809 } | |
810 return false; | |
811 } | |
812 | |
813 /* We can not predict the probabilities of outgoing edges of bb. Set them | |
814 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute | |
815 even probability for all edges not mentioned in the set. These edges | |
816 are given PROB_VERY_UNLIKELY probability. */ | |
817 | |
818 static void | |
819 set_even_probabilities (basic_block bb, | |
820 hash_set<edge> *unlikely_edges = NULL) | |
821 { | |
822 unsigned nedges = 0, unlikely_count = 0; | |
823 edge e = NULL; | |
824 edge_iterator ei; | |
825 profile_probability all = profile_probability::always (); | |
826 | |
626 FOR_EACH_EDGE (e, ei, bb->succs) | 827 FOR_EACH_EDGE (e, ei, bb->succs) |
627 if (! (e->flags & EDGE_FALLTHRU)) | 828 if (e->probability.initialized_p ()) |
628 break; | 829 all -= e->probability; |
629 | 830 else if (!unlikely_executed_edge_p (e)) |
630 fprintf (file, " %s heuristics%s: %.1f%%", | 831 { |
631 predictor_info[predictor].name, | 832 nedges ++; |
632 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE); | 833 if (unlikely_edges != NULL && unlikely_edges->contains (e)) |
633 | 834 { |
634 if (bb->count) | 835 all -= profile_probability::very_unlikely (); |
635 { | 836 unlikely_count++; |
636 fprintf (file, " exec "); | 837 } |
637 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count); | 838 } |
638 if (e) | 839 |
639 { | 840 /* Make the distribution even if all edges are unlikely. */ |
640 fprintf (file, " hit "); | 841 if (unlikely_count == nedges) |
641 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count); | 842 { |
642 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count); | 843 unlikely_edges = NULL; |
643 } | 844 unlikely_count = 0; |
644 } | 845 } |
645 | 846 |
646 fprintf (file, "\n"); | 847 unsigned c = nedges - unlikely_count; |
647 } | |
648 | |
649 /* We can not predict the probabilities of outgoing edges of bb. Set them | |
650 evenly and hope for the best. */ | |
651 static void | |
652 set_even_probabilities (basic_block bb) | |
653 { | |
654 int nedges = 0; | |
655 edge e; | |
656 edge_iterator ei; | |
657 | 848 |
658 FOR_EACH_EDGE (e, ei, bb->succs) | 849 FOR_EACH_EDGE (e, ei, bb->succs) |
659 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | 850 if (e->probability.initialized_p ()) |
660 nedges ++; | 851 ; |
661 FOR_EACH_EDGE (e, ei, bb->succs) | 852 else if (!unlikely_executed_edge_p (e)) |
662 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | 853 { |
663 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; | 854 if (unlikely_edges != NULL && unlikely_edges->contains (e)) |
855 e->probability = profile_probability::very_unlikely (); | |
856 else | |
857 e->probability = all.apply_scale (1, c).guessed (); | |
858 } | |
664 else | 859 else |
665 e->probability = 0; | 860 e->probability = profile_probability::never (); |
861 } | |
862 | |
863 /* Add REG_BR_PROB note to JUMP with PROB. */ | |
864 | |
865 void | |
866 add_reg_br_prob_note (rtx_insn *jump, profile_probability prob) | |
867 { | |
868 gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0)); | |
869 add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ()); | |
666 } | 870 } |
667 | 871 |
668 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB | 872 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB |
669 note if not already present. Remove now useless REG_BR_PRED notes. */ | 873 note if not already present. Remove now useless REG_BR_PRED notes. */ |
670 | 874 |
671 static void | 875 static void |
672 combine_predictions_for_insn (rtx insn, basic_block bb) | 876 combine_predictions_for_insn (rtx_insn *insn, basic_block bb) |
673 { | 877 { |
674 rtx prob_note; | 878 rtx prob_note; |
675 rtx *pnote; | 879 rtx *pnote; |
676 rtx note; | 880 rtx note; |
677 int best_probability = PROB_EVEN; | 881 int best_probability = PROB_EVEN; |
701 enum br_predictor predictor = ((enum br_predictor) | 905 enum br_predictor predictor = ((enum br_predictor) |
702 INTVAL (XEXP (XEXP (note, 0), 0))); | 906 INTVAL (XEXP (XEXP (note, 0), 0))); |
703 int probability = INTVAL (XEXP (XEXP (note, 0), 1)); | 907 int probability = INTVAL (XEXP (XEXP (note, 0), 1)); |
704 | 908 |
705 found = true; | 909 found = true; |
706 if (best_predictor > predictor) | 910 if (best_predictor > predictor |
911 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH) | |
707 best_probability = probability, best_predictor = predictor; | 912 best_probability = probability, best_predictor = predictor; |
708 | 913 |
709 d = (combined_probability * probability | 914 d = (combined_probability * probability |
710 + (REG_BR_PROB_BASE - combined_probability) | 915 + (REG_BR_PROB_BASE - combined_probability) |
711 * (REG_BR_PROB_BASE - probability)); | 916 * (REG_BR_PROB_BASE - probability)); |
721 | 926 |
722 /* Decide which heuristic to use. In case we didn't match anything, | 927 /* Decide which heuristic to use. In case we didn't match anything, |
723 use no_prediction heuristic, in case we did match, use either | 928 use no_prediction heuristic, in case we did match, use either |
724 first match or Dempster-Shaffer theory depending on the flags. */ | 929 first match or Dempster-Shaffer theory depending on the flags. */ |
725 | 930 |
726 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) | 931 if (best_predictor != END_PREDICTORS) |
727 first_match = true; | 932 first_match = true; |
728 | 933 |
729 if (!found) | 934 if (!found) |
730 dump_prediction (dump_file, PRED_NO_PREDICTION, | 935 dump_prediction (dump_file, PRED_NO_PREDICTION, |
731 combined_probability, bb, true); | 936 combined_probability, bb); |
732 else | 937 else |
733 { | 938 { |
734 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, | 939 if (!first_match) |
735 bb, !first_match); | 940 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, |
736 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, | 941 bb, !first_match ? REASON_NONE : REASON_IGNORED); |
737 bb, first_match); | 942 else |
943 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, | |
944 bb, first_match ? REASON_NONE : REASON_IGNORED); | |
738 } | 945 } |
739 | 946 |
740 if (first_match) | 947 if (first_match) |
741 combined_probability = best_probability; | 948 combined_probability = best_probability; |
742 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); | 949 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb); |
743 | 950 |
744 while (*pnote) | 951 while (*pnote) |
745 { | 952 { |
746 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) | 953 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) |
747 { | 954 { |
748 enum br_predictor predictor = ((enum br_predictor) | 955 enum br_predictor predictor = ((enum br_predictor) |
749 INTVAL (XEXP (XEXP (*pnote, 0), 0))); | 956 INTVAL (XEXP (XEXP (*pnote, 0), 0))); |
750 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); | 957 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); |
751 | 958 |
752 dump_prediction (dump_file, predictor, probability, bb, | 959 dump_prediction (dump_file, predictor, probability, bb, |
753 !first_match || best_predictor == predictor); | 960 (!first_match || best_predictor == predictor) |
961 ? REASON_NONE : REASON_IGNORED); | |
754 *pnote = XEXP (*pnote, 1); | 962 *pnote = XEXP (*pnote, 1); |
755 } | 963 } |
756 else | 964 else |
757 pnote = &XEXP (*pnote, 1); | 965 pnote = &XEXP (*pnote, 1); |
758 } | 966 } |
759 | 967 |
760 if (!prob_note) | 968 if (!prob_note) |
761 { | 969 { |
762 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability)); | 970 profile_probability p |
971 = profile_probability::from_reg_br_prob_base (combined_probability); | |
972 add_reg_br_prob_note (insn, p); | |
763 | 973 |
764 /* Save the prediction into CFG in case we are seeing non-degenerated | 974 /* Save the prediction into CFG in case we are seeing non-degenerated |
765 conditional jump. */ | 975 conditional jump. */ |
766 if (!single_succ_p (bb)) | 976 if (!single_succ_p (bb)) |
767 { | 977 { |
768 BRANCH_EDGE (bb)->probability = combined_probability; | 978 BRANCH_EDGE (bb)->probability = p; |
769 FALLTHRU_EDGE (bb)->probability | 979 FALLTHRU_EDGE (bb)->probability |
770 = REG_BR_PROB_BASE - combined_probability; | 980 = BRANCH_EDGE (bb)->probability.invert (); |
771 } | 981 } |
772 } | 982 } |
773 else if (!single_succ_p (bb)) | 983 else if (!single_succ_p (bb)) |
774 { | 984 { |
775 int prob = INTVAL (XEXP (prob_note, 0)); | 985 profile_probability prob = profile_probability::from_reg_br_prob_note |
986 (XINT (prob_note, 0)); | |
776 | 987 |
777 BRANCH_EDGE (bb)->probability = prob; | 988 BRANCH_EDGE (bb)->probability = prob; |
778 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob; | 989 FALLTHRU_EDGE (bb)->probability = prob.invert (); |
779 } | 990 } |
780 else | 991 else |
781 single_succ_edge (bb)->probability = REG_BR_PROB_BASE; | 992 single_succ_edge (bb)->probability = profile_probability::always (); |
993 } | |
994 | |
995 /* Edge prediction hash traits. */ | |
996 | |
997 struct predictor_hash: pointer_hash <edge_prediction> | |
998 { | |
999 | |
1000 static inline hashval_t hash (const edge_prediction *); | |
1001 static inline bool equal (const edge_prediction *, const edge_prediction *); | |
1002 }; | |
1003 | |
1004 /* Calculate hash value of an edge prediction P based on predictor and | |
1005 normalized probability. */ | |
1006 | |
1007 inline hashval_t | |
1008 predictor_hash::hash (const edge_prediction *p) | |
1009 { | |
1010 inchash::hash hstate; | |
1011 hstate.add_int (p->ep_predictor); | |
1012 | |
1013 int prob = p->ep_probability; | |
1014 if (prob > REG_BR_PROB_BASE / 2) | |
1015 prob = REG_BR_PROB_BASE - prob; | |
1016 | |
1017 hstate.add_int (prob); | |
1018 | |
1019 return hstate.end (); | |
1020 } | |
1021 | |
1022 /* Return true whether edge predictions P1 and P2 use the same predictor and | |
1023 have equal (or opposed probability). */ | |
1024 | |
1025 inline bool | |
1026 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2) | |
1027 { | |
1028 return (p1->ep_predictor == p2->ep_predictor | |
1029 && (p1->ep_probability == p2->ep_probability | |
1030 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability)); | |
1031 } | |
1032 | |
1033 struct predictor_hash_traits: predictor_hash, | |
1034 typed_noop_remove <edge_prediction *> {}; | |
1035 | |
1036 /* Return true if edge prediction P is not in DATA hash set. */ | |
1037 | |
1038 static bool | |
1039 not_removed_prediction_p (edge_prediction *p, void *data) | |
1040 { | |
1041 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data; | |
1042 return !remove->contains (p); | |
1043 } | |
1044 | |
1045 /* Prune predictions for a basic block BB. Currently we do following | |
1046 clean-up steps: | |
1047 | |
1048 1) remove duplicate prediction that is guessed with the same probability | |
1049 (different than 1/2) to both edge | |
1050 2) remove duplicates for a prediction that belongs with the same probability | |
1051 to a single edge | |
1052 | |
1053 */ | |
1054 | |
1055 static void | |
1056 prune_predictions_for_bb (basic_block bb) | |
1057 { | |
1058 edge_prediction **preds = bb_predictions->get (bb); | |
1059 | |
1060 if (preds) | |
1061 { | |
1062 hash_table <predictor_hash_traits> s (13); | |
1063 hash_set <edge_prediction *> remove; | |
1064 | |
1065 /* Step 1: identify predictors that should be removed. */ | |
1066 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next) | |
1067 { | |
1068 edge_prediction *existing = s.find (pred); | |
1069 if (existing) | |
1070 { | |
1071 if (pred->ep_edge == existing->ep_edge | |
1072 && pred->ep_probability == existing->ep_probability) | |
1073 { | |
1074 /* Remove a duplicate predictor. */ | |
1075 dump_prediction (dump_file, pred->ep_predictor, | |
1076 pred->ep_probability, bb, | |
1077 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge); | |
1078 | |
1079 remove.add (pred); | |
1080 } | |
1081 else if (pred->ep_edge != existing->ep_edge | |
1082 && pred->ep_probability == existing->ep_probability | |
1083 && pred->ep_probability != REG_BR_PROB_BASE / 2) | |
1084 { | |
1085 /* Remove both predictors as they predict the same | |
1086 for both edges. */ | |
1087 dump_prediction (dump_file, existing->ep_predictor, | |
1088 pred->ep_probability, bb, | |
1089 REASON_EDGE_PAIR_DUPLICATE, | |
1090 existing->ep_edge); | |
1091 dump_prediction (dump_file, pred->ep_predictor, | |
1092 pred->ep_probability, bb, | |
1093 REASON_EDGE_PAIR_DUPLICATE, | |
1094 pred->ep_edge); | |
1095 | |
1096 remove.add (existing); | |
1097 remove.add (pred); | |
1098 } | |
1099 } | |
1100 | |
1101 edge_prediction **slot2 = s.find_slot (pred, INSERT); | |
1102 *slot2 = pred; | |
1103 } | |
1104 | |
1105 /* Step 2: Remove predictors. */ | |
1106 filter_predictions (preds, not_removed_prediction_p, &remove); | |
1107 } | |
782 } | 1108 } |
783 | 1109 |
784 /* Combine predictions into single probability and store them into CFG. | 1110 /* Combine predictions into single probability and store them into CFG. |
785 Remove now useless prediction entries. */ | 1111 Remove now useless prediction entries. |
1112 If DRY_RUN is set, only produce dumps and do not modify profile. */ | |
786 | 1113 |
787 static void | 1114 static void |
788 combine_predictions_for_bb (basic_block bb) | 1115 combine_predictions_for_bb (basic_block bb, bool dry_run) |
789 { | 1116 { |
790 int best_probability = PROB_EVEN; | 1117 int best_probability = PROB_EVEN; |
791 enum br_predictor best_predictor = END_PREDICTORS; | 1118 enum br_predictor best_predictor = END_PREDICTORS; |
792 int combined_probability = REG_BR_PROB_BASE / 2; | 1119 int combined_probability = REG_BR_PROB_BASE / 2; |
793 int d; | 1120 int d; |
795 bool found = false; | 1122 bool found = false; |
796 struct edge_prediction *pred; | 1123 struct edge_prediction *pred; |
797 int nedges = 0; | 1124 int nedges = 0; |
798 edge e, first = NULL, second = NULL; | 1125 edge e, first = NULL, second = NULL; |
799 edge_iterator ei; | 1126 edge_iterator ei; |
800 void **preds; | |
801 | 1127 |
802 FOR_EACH_EDGE (e, ei, bb->succs) | 1128 FOR_EACH_EDGE (e, ei, bb->succs) |
803 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | 1129 if (!unlikely_executed_edge_p (e)) |
804 { | 1130 { |
805 nedges ++; | 1131 nedges ++; |
806 if (first && !second) | 1132 if (first && !second) |
807 second = e; | 1133 second = e; |
808 if (!first) | 1134 if (!first) |
809 first = e; | 1135 first = e; |
810 } | 1136 } |
1137 else if (!e->probability.initialized_p ()) | |
1138 e->probability = profile_probability::never (); | |
811 | 1139 |
812 /* When there is no successor or only one choice, prediction is easy. | 1140 /* When there is no successor or only one choice, prediction is easy. |
813 | 1141 |
814 We are lazy for now and predict only basic blocks with two outgoing | 1142 When we have a basic block with more than 2 successors, the situation |
815 edges. It is possible to predict generic case too, but we have to | 1143 is more complicated as DS theory cannot be used literally. |
816 ignore first match heuristics and do more involved combining. Implement | 1144 More precisely, let's assume we predicted edge e1 with probability p1, |
817 this later. */ | 1145 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we |
1146 need to find probability of e.g. m1({b2}), which we don't know. | |
1147 The only approximation is to equally distribute 1-p1 to all edges | |
1148 different from b1. | |
1149 | |
1150 According to numbers we've got from SPEC2006 benchark, there's only | |
1151 one interesting reliable predictor (noreturn call), which can be | |
1152 handled with a bit easier approach. */ | |
818 if (nedges != 2) | 1153 if (nedges != 2) |
819 { | 1154 { |
820 if (!bb->count) | 1155 hash_set<edge> unlikely_edges (4); |
821 set_even_probabilities (bb); | 1156 |
1157 /* Identify all edges that have a probability close to very unlikely. | |
1158 Doing the approach for very unlikely doesn't worth for doing as | |
1159 there's no such probability in SPEC2006 benchmark. */ | |
1160 edge_prediction **preds = bb_predictions->get (bb); | |
1161 if (preds) | |
1162 for (pred = *preds; pred; pred = pred->ep_next) | |
1163 if (pred->ep_probability <= PROB_VERY_UNLIKELY) | |
1164 unlikely_edges.add (pred->ep_edge); | |
1165 | |
1166 if (!dry_run) | |
1167 set_even_probabilities (bb, &unlikely_edges); | |
822 clear_bb_predictions (bb); | 1168 clear_bb_predictions (bb); |
823 if (dump_file) | 1169 if (dump_file) |
824 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n", | 1170 { |
825 nedges, bb->index); | 1171 fprintf (dump_file, "Predictions for bb %i\n", bb->index); |
1172 if (unlikely_edges.elements () == 0) | |
1173 fprintf (dump_file, | |
1174 "%i edges in bb %i predicted to even probabilities\n", | |
1175 nedges, bb->index); | |
1176 else | |
1177 { | |
1178 fprintf (dump_file, | |
1179 "%i edges in bb %i predicted with some unlikely edges\n", | |
1180 nedges, bb->index); | |
1181 FOR_EACH_EDGE (e, ei, bb->succs) | |
1182 if (!unlikely_executed_edge_p (e)) | |
1183 dump_prediction (dump_file, PRED_COMBINED, | |
1184 e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e); | |
1185 } | |
1186 } | |
826 return; | 1187 return; |
827 } | 1188 } |
828 | 1189 |
829 if (dump_file) | 1190 if (dump_file) |
830 fprintf (dump_file, "Predictions for bb %i\n", bb->index); | 1191 fprintf (dump_file, "Predictions for bb %i\n", bb->index); |
831 | 1192 |
832 preds = pointer_map_contains (bb_predictions, bb); | 1193 prune_predictions_for_bb (bb); |
1194 | |
1195 edge_prediction **preds = bb_predictions->get (bb); | |
1196 | |
833 if (preds) | 1197 if (preds) |
834 { | 1198 { |
835 /* We implement "first match" heuristics and use probability guessed | 1199 /* We implement "first match" heuristics and use probability guessed |
836 by predictor with smallest index. */ | 1200 by predictor with smallest index. */ |
837 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) | 1201 for (pred = *preds; pred; pred = pred->ep_next) |
838 { | 1202 { |
839 enum br_predictor predictor = pred->ep_predictor; | 1203 enum br_predictor predictor = pred->ep_predictor; |
840 int probability = pred->ep_probability; | 1204 int probability = pred->ep_probability; |
841 | 1205 |
842 if (pred->ep_edge != first) | 1206 if (pred->ep_edge != first) |
843 probability = REG_BR_PROB_BASE - probability; | 1207 probability = REG_BR_PROB_BASE - probability; |
844 | 1208 |
845 found = true; | 1209 found = true; |
846 /* First match heuristics would be widly confused if we predicted | 1210 /* First match heuristics would be widly confused if we predicted |
847 both directions. */ | 1211 both directions. */ |
848 if (best_predictor > predictor) | 1212 if (best_predictor > predictor |
1213 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH) | |
849 { | 1214 { |
850 struct edge_prediction *pred2; | 1215 struct edge_prediction *pred2; |
851 int prob = probability; | 1216 int prob = probability; |
852 | 1217 |
853 for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next) | 1218 for (pred2 = (struct edge_prediction *) *preds; |
1219 pred2; pred2 = pred2->ep_next) | |
854 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor) | 1220 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor) |
855 { | 1221 { |
856 int probability2 = pred->ep_probability; | 1222 int probability2 = pred2->ep_probability; |
857 | 1223 |
858 if (pred2->ep_edge != first) | 1224 if (pred2->ep_edge != first) |
859 probability2 = REG_BR_PROB_BASE - probability2; | 1225 probability2 = REG_BR_PROB_BASE - probability2; |
860 | 1226 |
861 if ((probability < REG_BR_PROB_BASE / 2) != | 1227 if ((probability < REG_BR_PROB_BASE / 2) != |
888 | 1254 |
889 /* Decide which heuristic to use. In case we didn't match anything, | 1255 /* Decide which heuristic to use. In case we didn't match anything, |
890 use no_prediction heuristic, in case we did match, use either | 1256 use no_prediction heuristic, in case we did match, use either |
891 first match or Dempster-Shaffer theory depending on the flags. */ | 1257 first match or Dempster-Shaffer theory depending on the flags. */ |
892 | 1258 |
893 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) | 1259 if (best_predictor != END_PREDICTORS) |
894 first_match = true; | 1260 first_match = true; |
895 | 1261 |
896 if (!found) | 1262 if (!found) |
897 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true); | 1263 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb); |
898 else | 1264 else |
899 { | 1265 { |
900 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb, | 1266 if (!first_match) |
901 !first_match); | 1267 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb, |
902 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb, | 1268 !first_match ? REASON_NONE : REASON_IGNORED); |
903 first_match); | 1269 else |
1270 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb, | |
1271 first_match ? REASON_NONE : REASON_IGNORED); | |
904 } | 1272 } |
905 | 1273 |
906 if (first_match) | 1274 if (first_match) |
907 combined_probability = best_probability; | 1275 combined_probability = best_probability; |
908 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); | 1276 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb); |
909 | 1277 |
910 if (preds) | 1278 if (preds) |
911 { | 1279 { |
912 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) | 1280 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) |
913 { | 1281 { |
914 enum br_predictor predictor = pred->ep_predictor; | 1282 enum br_predictor predictor = pred->ep_predictor; |
915 int probability = pred->ep_probability; | 1283 int probability = pred->ep_probability; |
916 | 1284 |
917 if (pred->ep_edge != EDGE_SUCC (bb, 0)) | |
918 probability = REG_BR_PROB_BASE - probability; | |
919 dump_prediction (dump_file, predictor, probability, bb, | 1285 dump_prediction (dump_file, predictor, probability, bb, |
920 !first_match || best_predictor == predictor); | 1286 (!first_match || best_predictor == predictor) |
1287 ? REASON_NONE : REASON_IGNORED, pred->ep_edge); | |
921 } | 1288 } |
922 } | 1289 } |
923 clear_bb_predictions (bb); | 1290 clear_bb_predictions (bb); |
924 | 1291 |
925 if (!bb->count) | 1292 if (!bb->count.initialized_p () && !dry_run) |
926 { | 1293 { |
927 first->probability = combined_probability; | 1294 first->probability |
928 second->probability = REG_BR_PROB_BASE - combined_probability; | 1295 = profile_probability::from_reg_br_prob_base (combined_probability); |
929 } | 1296 second->probability = first->probability.invert (); |
930 } | 1297 } |
1298 } | |
1299 | |
1300 /* Check if T1 and T2 satisfy the IV_COMPARE condition. | |
1301 Return the SSA_NAME if the condition satisfies, NULL otherwise. | |
1302 | |
1303 T1 and T2 should be one of the following cases: | |
1304 1. T1 is SSA_NAME, T2 is NULL | |
1305 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4] | |
1306 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */ | |
1307 | |
1308 static tree | |
1309 strips_small_constant (tree t1, tree t2) | |
1310 { | |
1311 tree ret = NULL; | |
1312 int value = 0; | |
1313 | |
1314 if (!t1) | |
1315 return NULL; | |
1316 else if (TREE_CODE (t1) == SSA_NAME) | |
1317 ret = t1; | |
1318 else if (tree_fits_shwi_p (t1)) | |
1319 value = tree_to_shwi (t1); | |
1320 else | |
1321 return NULL; | |
1322 | |
1323 if (!t2) | |
1324 return ret; | |
1325 else if (tree_fits_shwi_p (t2)) | |
1326 value = tree_to_shwi (t2); | |
1327 else if (TREE_CODE (t2) == SSA_NAME) | |
1328 { | |
1329 if (ret) | |
1330 return NULL; | |
1331 else | |
1332 ret = t2; | |
1333 } | |
1334 | |
1335 if (value <= 4 && value >= -4) | |
1336 return ret; | |
1337 else | |
1338 return NULL; | |
1339 } | |
1340 | |
1341 /* Return the SSA_NAME in T or T's operands. | |
1342 Return NULL if SSA_NAME cannot be found. */ | |
1343 | |
1344 static tree | |
1345 get_base_value (tree t) | |
1346 { | |
1347 if (TREE_CODE (t) == SSA_NAME) | |
1348 return t; | |
1349 | |
1350 if (!BINARY_CLASS_P (t)) | |
1351 return NULL; | |
1352 | |
1353 switch (TREE_OPERAND_LENGTH (t)) | |
1354 { | |
1355 case 1: | |
1356 return strips_small_constant (TREE_OPERAND (t, 0), NULL); | |
1357 case 2: | |
1358 return strips_small_constant (TREE_OPERAND (t, 0), | |
1359 TREE_OPERAND (t, 1)); | |
1360 default: | |
1361 return NULL; | |
1362 } | |
1363 } | |
1364 | |
1365 /* Check the compare STMT in LOOP. If it compares an induction | |
1366 variable to a loop invariant, return true, and save | |
1367 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP. | |
1368 Otherwise return false and set LOOP_INVAIANT to NULL. */ | |
1369 | |
1370 static bool | |
1371 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop, | |
1372 tree *loop_invariant, | |
1373 enum tree_code *compare_code, | |
1374 tree *loop_step, | |
1375 tree *loop_iv_base) | |
1376 { | |
1377 tree op0, op1, bound, base; | |
1378 affine_iv iv0, iv1; | |
1379 enum tree_code code; | |
1380 tree step; | |
1381 | |
1382 code = gimple_cond_code (stmt); | |
1383 *loop_invariant = NULL; | |
1384 | |
1385 switch (code) | |
1386 { | |
1387 case GT_EXPR: | |
1388 case GE_EXPR: | |
1389 case NE_EXPR: | |
1390 case LT_EXPR: | |
1391 case LE_EXPR: | |
1392 case EQ_EXPR: | |
1393 break; | |
1394 | |
1395 default: | |
1396 return false; | |
1397 } | |
1398 | |
1399 op0 = gimple_cond_lhs (stmt); | |
1400 op1 = gimple_cond_rhs (stmt); | |
1401 | |
1402 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST) | |
1403 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST)) | |
1404 return false; | |
1405 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true)) | |
1406 return false; | |
1407 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true)) | |
1408 return false; | |
1409 if (TREE_CODE (iv0.step) != INTEGER_CST | |
1410 || TREE_CODE (iv1.step) != INTEGER_CST) | |
1411 return false; | |
1412 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step)) | |
1413 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step))) | |
1414 return false; | |
1415 | |
1416 if (integer_zerop (iv0.step)) | |
1417 { | |
1418 if (code != NE_EXPR && code != EQ_EXPR) | |
1419 code = invert_tree_comparison (code, false); | |
1420 bound = iv0.base; | |
1421 base = iv1.base; | |
1422 if (tree_fits_shwi_p (iv1.step)) | |
1423 step = iv1.step; | |
1424 else | |
1425 return false; | |
1426 } | |
1427 else | |
1428 { | |
1429 bound = iv1.base; | |
1430 base = iv0.base; | |
1431 if (tree_fits_shwi_p (iv0.step)) | |
1432 step = iv0.step; | |
1433 else | |
1434 return false; | |
1435 } | |
1436 | |
1437 if (TREE_CODE (bound) != INTEGER_CST) | |
1438 bound = get_base_value (bound); | |
1439 if (!bound) | |
1440 return false; | |
1441 if (TREE_CODE (base) != INTEGER_CST) | |
1442 base = get_base_value (base); | |
1443 if (!base) | |
1444 return false; | |
1445 | |
1446 *loop_invariant = bound; | |
1447 *compare_code = code; | |
1448 *loop_step = step; | |
1449 *loop_iv_base = base; | |
1450 return true; | |
1451 } | |
1452 | |
1453 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */ | |
1454 | |
1455 static bool | |
1456 expr_coherent_p (tree t1, tree t2) | |
1457 { | |
1458 gimple *stmt; | |
1459 tree ssa_name_1 = NULL; | |
1460 tree ssa_name_2 = NULL; | |
1461 | |
1462 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST); | |
1463 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST); | |
1464 | |
1465 if (t1 == t2) | |
1466 return true; | |
1467 | |
1468 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST) | |
1469 return true; | |
1470 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST) | |
1471 return false; | |
1472 | |
1473 /* Check to see if t1 is expressed/defined with t2. */ | |
1474 stmt = SSA_NAME_DEF_STMT (t1); | |
1475 gcc_assert (stmt != NULL); | |
1476 if (is_gimple_assign (stmt)) | |
1477 { | |
1478 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); | |
1479 if (ssa_name_1 && ssa_name_1 == t2) | |
1480 return true; | |
1481 } | |
1482 | |
1483 /* Check to see if t2 is expressed/defined with t1. */ | |
1484 stmt = SSA_NAME_DEF_STMT (t2); | |
1485 gcc_assert (stmt != NULL); | |
1486 if (is_gimple_assign (stmt)) | |
1487 { | |
1488 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); | |
1489 if (ssa_name_2 && ssa_name_2 == t1) | |
1490 return true; | |
1491 } | |
1492 | |
1493 /* Compare if t1 and t2's def_stmts are identical. */ | |
1494 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2) | |
1495 return true; | |
1496 else | |
1497 return false; | |
1498 } | |
1499 | |
1500 /* Return true if E is predicted by one of loop heuristics. */ | |
1501 | |
1502 static bool | |
1503 predicted_by_loop_heuristics_p (basic_block bb) | |
1504 { | |
1505 struct edge_prediction *i; | |
1506 edge_prediction **preds = bb_predictions->get (bb); | |
1507 | |
1508 if (!preds) | |
1509 return false; | |
1510 | |
1511 for (i = *preds; i; i = i->ep_next) | |
1512 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED | |
1513 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX | |
1514 || i->ep_predictor == PRED_LOOP_ITERATIONS | |
1515 || i->ep_predictor == PRED_LOOP_EXIT | |
1516 || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION | |
1517 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT) | |
1518 return true; | |
1519 return false; | |
1520 } | |
1521 | |
1522 /* Predict branch probability of BB when BB contains a branch that compares | |
1523 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The | |
1524 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP. | |
1525 | |
1526 E.g. | |
1527 for (int i = 0; i < bound; i++) { | |
1528 if (i < bound - 2) | |
1529 computation_1(); | |
1530 else | |
1531 computation_2(); | |
1532 } | |
1533 | |
1534 In this loop, we will predict the branch inside the loop to be taken. */ | |
1535 | |
1536 static void | |
1537 predict_iv_comparison (struct loop *loop, basic_block bb, | |
1538 tree loop_bound_var, | |
1539 tree loop_iv_base_var, | |
1540 enum tree_code loop_bound_code, | |
1541 int loop_bound_step) | |
1542 { | |
1543 gimple *stmt; | |
1544 tree compare_var, compare_base; | |
1545 enum tree_code compare_code; | |
1546 tree compare_step_var; | |
1547 edge then_edge; | |
1548 edge_iterator ei; | |
1549 | |
1550 if (predicted_by_loop_heuristics_p (bb)) | |
1551 return; | |
1552 | |
1553 stmt = last_stmt (bb); | |
1554 if (!stmt || gimple_code (stmt) != GIMPLE_COND) | |
1555 return; | |
1556 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt), | |
1557 loop, &compare_var, | |
1558 &compare_code, | |
1559 &compare_step_var, | |
1560 &compare_base)) | |
1561 return; | |
1562 | |
1563 /* Find the taken edge. */ | |
1564 FOR_EACH_EDGE (then_edge, ei, bb->succs) | |
1565 if (then_edge->flags & EDGE_TRUE_VALUE) | |
1566 break; | |
1567 | |
1568 /* When comparing an IV to a loop invariant, NE is more likely to be | |
1569 taken while EQ is more likely to be not-taken. */ | |
1570 if (compare_code == NE_EXPR) | |
1571 { | |
1572 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); | |
1573 return; | |
1574 } | |
1575 else if (compare_code == EQ_EXPR) | |
1576 { | |
1577 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); | |
1578 return; | |
1579 } | |
1580 | |
1581 if (!expr_coherent_p (loop_iv_base_var, compare_base)) | |
1582 return; | |
1583 | |
1584 /* If loop bound, base and compare bound are all constants, we can | |
1585 calculate the probability directly. */ | |
1586 if (tree_fits_shwi_p (loop_bound_var) | |
1587 && tree_fits_shwi_p (compare_var) | |
1588 && tree_fits_shwi_p (compare_base)) | |
1589 { | |
1590 int probability; | |
1591 bool overflow, overall_overflow = false; | |
1592 widest_int compare_count, tem; | |
1593 | |
1594 /* (loop_bound - base) / compare_step */ | |
1595 tem = wi::sub (wi::to_widest (loop_bound_var), | |
1596 wi::to_widest (compare_base), SIGNED, &overflow); | |
1597 overall_overflow |= overflow; | |
1598 widest_int loop_count = wi::div_trunc (tem, | |
1599 wi::to_widest (compare_step_var), | |
1600 SIGNED, &overflow); | |
1601 overall_overflow |= overflow; | |
1602 | |
1603 if (!wi::neg_p (wi::to_widest (compare_step_var)) | |
1604 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR)) | |
1605 { | |
1606 /* (loop_bound - compare_bound) / compare_step */ | |
1607 tem = wi::sub (wi::to_widest (loop_bound_var), | |
1608 wi::to_widest (compare_var), SIGNED, &overflow); | |
1609 overall_overflow |= overflow; | |
1610 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var), | |
1611 SIGNED, &overflow); | |
1612 overall_overflow |= overflow; | |
1613 } | |
1614 else | |
1615 { | |
1616 /* (compare_bound - base) / compare_step */ | |
1617 tem = wi::sub (wi::to_widest (compare_var), | |
1618 wi::to_widest (compare_base), SIGNED, &overflow); | |
1619 overall_overflow |= overflow; | |
1620 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var), | |
1621 SIGNED, &overflow); | |
1622 overall_overflow |= overflow; | |
1623 } | |
1624 if (compare_code == LE_EXPR || compare_code == GE_EXPR) | |
1625 ++compare_count; | |
1626 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR) | |
1627 ++loop_count; | |
1628 if (wi::neg_p (compare_count)) | |
1629 compare_count = 0; | |
1630 if (wi::neg_p (loop_count)) | |
1631 loop_count = 0; | |
1632 if (loop_count == 0) | |
1633 probability = 0; | |
1634 else if (wi::cmps (compare_count, loop_count) == 1) | |
1635 probability = REG_BR_PROB_BASE; | |
1636 else | |
1637 { | |
1638 tem = compare_count * REG_BR_PROB_BASE; | |
1639 tem = wi::udiv_trunc (tem, loop_count); | |
1640 probability = tem.to_uhwi (); | |
1641 } | |
1642 | |
1643 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */ | |
1644 if (!overall_overflow) | |
1645 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability); | |
1646 | |
1647 return; | |
1648 } | |
1649 | |
1650 if (expr_coherent_p (loop_bound_var, compare_var)) | |
1651 { | |
1652 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR) | |
1653 && (compare_code == LT_EXPR || compare_code == LE_EXPR)) | |
1654 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); | |
1655 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR) | |
1656 && (compare_code == GT_EXPR || compare_code == GE_EXPR)) | |
1657 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); | |
1658 else if (loop_bound_code == NE_EXPR) | |
1659 { | |
1660 /* If the loop backedge condition is "(i != bound)", we do | |
1661 the comparison based on the step of IV: | |
1662 * step < 0 : backedge condition is like (i > bound) | |
1663 * step > 0 : backedge condition is like (i < bound) */ | |
1664 gcc_assert (loop_bound_step != 0); | |
1665 if (loop_bound_step > 0 | |
1666 && (compare_code == LT_EXPR | |
1667 || compare_code == LE_EXPR)) | |
1668 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); | |
1669 else if (loop_bound_step < 0 | |
1670 && (compare_code == GT_EXPR | |
1671 || compare_code == GE_EXPR)) | |
1672 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); | |
1673 else | |
1674 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); | |
1675 } | |
1676 else | |
1677 /* The branch is predicted not-taken if loop_bound_code is | |
1678 opposite with compare_code. */ | |
1679 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); | |
1680 } | |
1681 else if (expr_coherent_p (loop_iv_base_var, compare_var)) | |
1682 { | |
1683 /* For cases like: | |
1684 for (i = s; i < h; i++) | |
1685 if (i > s + 2) .... | |
1686 The branch should be predicted taken. */ | |
1687 if (loop_bound_step > 0 | |
1688 && (compare_code == GT_EXPR || compare_code == GE_EXPR)) | |
1689 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); | |
1690 else if (loop_bound_step < 0 | |
1691 && (compare_code == LT_EXPR || compare_code == LE_EXPR)) | |
1692 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); | |
1693 else | |
1694 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); | |
1695 } | |
1696 } | |
1697 | |
1698 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop | |
1699 exits are resulted from short-circuit conditions that will generate an | |
1700 if_tmp. E.g.: | |
1701 | |
1702 if (foo() || global > 10) | |
1703 break; | |
1704 | |
1705 This will be translated into: | |
1706 | |
1707 BB3: | |
1708 loop header... | |
1709 BB4: | |
1710 if foo() goto BB6 else goto BB5 | |
1711 BB5: | |
1712 if global > 10 goto BB6 else goto BB7 | |
1713 BB6: | |
1714 goto BB7 | |
1715 BB7: | |
1716 iftmp = (PHI 0(BB5), 1(BB6)) | |
1717 if iftmp == 1 goto BB8 else goto BB3 | |
1718 BB8: | |
1719 outside of the loop... | |
1720 | |
1721 The edge BB7->BB8 is loop exit because BB8 is outside of the loop. | |
1722 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop | |
1723 exits. This function takes BB7->BB8 as input, and finds out the extra loop | |
1724 exits to predict them using PRED_LOOP_EXTRA_EXIT. */ | |
1725 | |
1726 static void | |
1727 predict_extra_loop_exits (edge exit_edge) | |
1728 { | |
1729 unsigned i; | |
1730 bool check_value_one; | |
1731 gimple *lhs_def_stmt; | |
1732 gphi *phi_stmt; | |
1733 tree cmp_rhs, cmp_lhs; | |
1734 gimple *last; | |
1735 gcond *cmp_stmt; | |
1736 | |
1737 last = last_stmt (exit_edge->src); | |
1738 if (!last) | |
1739 return; | |
1740 cmp_stmt = dyn_cast <gcond *> (last); | |
1741 if (!cmp_stmt) | |
1742 return; | |
1743 | |
1744 cmp_rhs = gimple_cond_rhs (cmp_stmt); | |
1745 cmp_lhs = gimple_cond_lhs (cmp_stmt); | |
1746 if (!TREE_CONSTANT (cmp_rhs) | |
1747 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) | |
1748 return; | |
1749 if (TREE_CODE (cmp_lhs) != SSA_NAME) | |
1750 return; | |
1751 | |
1752 /* If check_value_one is true, only the phi_args with value '1' will lead | |
1753 to loop exit. Otherwise, only the phi_args with value '0' will lead to | |
1754 loop exit. */ | |
1755 check_value_one = (((integer_onep (cmp_rhs)) | |
1756 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) | |
1757 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0)); | |
1758 | |
1759 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs); | |
1760 if (!lhs_def_stmt) | |
1761 return; | |
1762 | |
1763 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt); | |
1764 if (!phi_stmt) | |
1765 return; | |
1766 | |
1767 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) | |
1768 { | |
1769 edge e1; | |
1770 edge_iterator ei; | |
1771 tree val = gimple_phi_arg_def (phi_stmt, i); | |
1772 edge e = gimple_phi_arg_edge (phi_stmt, i); | |
1773 | |
1774 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val))) | |
1775 continue; | |
1776 if ((check_value_one ^ integer_onep (val)) == 1) | |
1777 continue; | |
1778 if (EDGE_COUNT (e->src->succs) != 1) | |
1779 { | |
1780 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN); | |
1781 continue; | |
1782 } | |
1783 | |
1784 FOR_EACH_EDGE (e1, ei, e->src->preds) | |
1785 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN); | |
1786 } | |
1787 } | |
1788 | |
931 | 1789 |
932 /* Predict edge probabilities by exploiting loop structure. */ | 1790 /* Predict edge probabilities by exploiting loop structure. */ |
933 | 1791 |
934 static void | 1792 static void |
935 predict_loops (void) | 1793 predict_loops (void) |
936 { | 1794 { |
937 loop_iterator li; | |
938 struct loop *loop; | 1795 struct loop *loop; |
1796 basic_block bb; | |
1797 hash_set <struct loop *> with_recursion(10); | |
1798 | |
1799 FOR_EACH_BB_FN (bb, cfun) | |
1800 { | |
1801 gimple_stmt_iterator gsi; | |
1802 tree decl; | |
1803 | |
1804 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1805 if (is_gimple_call (gsi_stmt (gsi)) | |
1806 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL | |
1807 && recursive_call_p (current_function_decl, decl)) | |
1808 { | |
1809 loop = bb->loop_father; | |
1810 while (loop && !with_recursion.add (loop)) | |
1811 loop = loop_outer (loop); | |
1812 } | |
1813 } | |
939 | 1814 |
940 /* Try to predict out blocks in a loop that are not part of a | 1815 /* Try to predict out blocks in a loop that are not part of a |
941 natural loop. */ | 1816 natural loop. */ |
942 FOR_EACH_LOOP (li, loop, 0) | 1817 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
943 { | 1818 { |
944 basic_block bb, *bbs; | 1819 basic_block bb, *bbs; |
945 unsigned j, n_exits; | 1820 unsigned j, n_exits = 0; |
946 VEC (edge, heap) *exits; | 1821 vec<edge> exits; |
947 struct tree_niter_desc niter_desc; | 1822 struct tree_niter_desc niter_desc; |
948 edge ex; | 1823 edge ex; |
1824 struct nb_iter_bound *nb_iter; | |
1825 enum tree_code loop_bound_code = ERROR_MARK; | |
1826 tree loop_bound_step = NULL; | |
1827 tree loop_bound_var = NULL; | |
1828 tree loop_iv_base = NULL; | |
1829 gcond *stmt = NULL; | |
1830 bool recursion = with_recursion.contains (loop); | |
949 | 1831 |
950 exits = get_loop_exit_edges (loop); | 1832 exits = get_loop_exit_edges (loop); |
951 n_exits = VEC_length (edge, exits); | 1833 FOR_EACH_VEC_ELT (exits, j, ex) |
952 | 1834 if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL)) |
953 FOR_EACH_VEC_ELT (edge, exits, j, ex) | 1835 n_exits ++; |
1836 if (!n_exits) | |
1837 { | |
1838 exits.release (); | |
1839 continue; | |
1840 } | |
1841 | |
1842 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1843 fprintf (dump_file, "Predicting loop %i%s with %i exits.\n", | |
1844 loop->num, recursion ? " (with recursion)":"", n_exits); | |
1845 if (dump_file && (dump_flags & TDF_DETAILS) | |
1846 && max_loop_iterations_int (loop) >= 0) | |
1847 { | |
1848 fprintf (dump_file, | |
1849 "Loop %d iterates at most %i times.\n", loop->num, | |
1850 (int)max_loop_iterations_int (loop)); | |
1851 } | |
1852 if (dump_file && (dump_flags & TDF_DETAILS) | |
1853 && likely_max_loop_iterations_int (loop) >= 0) | |
1854 { | |
1855 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", | |
1856 loop->num, (int)likely_max_loop_iterations_int (loop)); | |
1857 } | |
1858 | |
1859 FOR_EACH_VEC_ELT (exits, j, ex) | |
954 { | 1860 { |
955 tree niter = NULL; | 1861 tree niter = NULL; |
956 HOST_WIDE_INT nitercst; | 1862 HOST_WIDE_INT nitercst; |
957 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS); | 1863 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS); |
958 int probability; | 1864 int probability; |
959 enum br_predictor predictor; | 1865 enum br_predictor predictor; |
960 | 1866 widest_int nit; |
961 if (number_of_iterations_exit (loop, ex, &niter_desc, false)) | 1867 |
1868 if (unlikely_executed_edge_p (ex) | |
1869 || (ex->flags & EDGE_ABNORMAL_CALL)) | |
1870 continue; | |
1871 /* Loop heuristics do not expect exit conditional to be inside | |
1872 inner loop. We predict from innermost to outermost loop. */ | |
1873 if (predicted_by_loop_heuristics_p (ex->src)) | |
1874 { | |
1875 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1876 fprintf (dump_file, "Skipping exit %i->%i because " | |
1877 "it is already predicted.\n", | |
1878 ex->src->index, ex->dest->index); | |
1879 continue; | |
1880 } | |
1881 predict_extra_loop_exits (ex); | |
1882 | |
1883 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false)) | |
962 niter = niter_desc.niter; | 1884 niter = niter_desc.niter; |
963 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST) | 1885 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST) |
964 niter = loop_niter_by_eval (loop, ex); | 1886 niter = loop_niter_by_eval (loop, ex); |
1887 if (dump_file && (dump_flags & TDF_DETAILS) | |
1888 && TREE_CODE (niter) == INTEGER_CST) | |
1889 { | |
1890 fprintf (dump_file, "Exit %i->%i %d iterates ", | |
1891 ex->src->index, ex->dest->index, | |
1892 loop->num); | |
1893 print_generic_expr (dump_file, niter, TDF_SLIM); | |
1894 fprintf (dump_file, " times.\n"); | |
1895 } | |
965 | 1896 |
966 if (TREE_CODE (niter) == INTEGER_CST) | 1897 if (TREE_CODE (niter) == INTEGER_CST) |
967 { | 1898 { |
968 if (host_integerp (niter, 1) | 1899 if (tree_fits_uhwi_p (niter) |
969 && compare_tree_int (niter, max-1) == -1) | 1900 && max |
970 nitercst = tree_low_cst (niter, 1) + 1; | 1901 && compare_tree_int (niter, max - 1) == -1) |
1902 nitercst = tree_to_uhwi (niter) + 1; | |
971 else | 1903 else |
972 nitercst = max; | 1904 nitercst = max; |
973 predictor = PRED_LOOP_ITERATIONS; | 1905 predictor = PRED_LOOP_ITERATIONS; |
974 } | 1906 } |
975 /* If we have just one exit and we can derive some information about | 1907 /* If we have just one exit and we can derive some information about |
976 the number of iterations of the loop from the statements inside | 1908 the number of iterations of the loop from the statements inside |
977 the loop, use it to predict this exit. */ | 1909 the loop, use it to predict this exit. */ |
978 else if (n_exits == 1) | 1910 else if (n_exits == 1 |
1911 && estimated_stmt_executions (loop, &nit)) | |
979 { | 1912 { |
980 nitercst = estimated_loop_iterations_int (loop, false); | 1913 if (wi::gtu_p (nit, max)) |
981 if (nitercst < 0) | |
982 continue; | |
983 if (nitercst > max) | |
984 nitercst = max; | 1914 nitercst = max; |
985 | 1915 else |
1916 nitercst = nit.to_shwi (); | |
986 predictor = PRED_LOOP_ITERATIONS_GUESSED; | 1917 predictor = PRED_LOOP_ITERATIONS_GUESSED; |
987 } | 1918 } |
1919 /* If we have likely upper bound, trust it for very small iteration | |
1920 counts. Such loops would otherwise get mispredicted by standard | |
1921 LOOP_EXIT heuristics. */ | |
1922 else if (n_exits == 1 | |
1923 && likely_max_stmt_executions (loop, &nit) | |
1924 && wi::ltu_p (nit, | |
1925 RDIV (REG_BR_PROB_BASE, | |
1926 REG_BR_PROB_BASE | |
1927 - predictor_info | |
1928 [recursion | |
1929 ? PRED_LOOP_EXIT_WITH_RECURSION | |
1930 : PRED_LOOP_EXIT].hitrate))) | |
1931 { | |
1932 nitercst = nit.to_shwi (); | |
1933 predictor = PRED_LOOP_ITERATIONS_MAX; | |
1934 } | |
988 else | 1935 else |
1936 { | |
1937 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1938 fprintf (dump_file, "Nothing known about exit %i->%i.\n", | |
1939 ex->src->index, ex->dest->index); | |
1940 continue; | |
1941 } | |
1942 | |
1943 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1944 fprintf (dump_file, "Recording prediction to %i iterations by %s.\n", | |
1945 (int)nitercst, predictor_info[predictor].name); | |
1946 /* If the prediction for number of iterations is zero, do not | |
1947 predict the exit edges. */ | |
1948 if (nitercst == 0) | |
989 continue; | 1949 continue; |
990 | 1950 |
991 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst); | 1951 probability = RDIV (REG_BR_PROB_BASE, nitercst); |
992 predict_edge (ex, predictor, probability); | 1952 predict_edge (ex, predictor, probability); |
993 } | 1953 } |
994 VEC_free (edge, heap, exits); | 1954 exits.release (); |
1955 | |
1956 /* Find information about loop bound variables. */ | |
1957 for (nb_iter = loop->bounds; nb_iter; | |
1958 nb_iter = nb_iter->next) | |
1959 if (nb_iter->stmt | |
1960 && gimple_code (nb_iter->stmt) == GIMPLE_COND) | |
1961 { | |
1962 stmt = as_a <gcond *> (nb_iter->stmt); | |
1963 break; | |
1964 } | |
1965 if (!stmt && last_stmt (loop->header) | |
1966 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND) | |
1967 stmt = as_a <gcond *> (last_stmt (loop->header)); | |
1968 if (stmt) | |
1969 is_comparison_with_loop_invariant_p (stmt, loop, | |
1970 &loop_bound_var, | |
1971 &loop_bound_code, | |
1972 &loop_bound_step, | |
1973 &loop_iv_base); | |
995 | 1974 |
996 bbs = get_loop_body (loop); | 1975 bbs = get_loop_body (loop); |
997 | 1976 |
998 for (j = 0; j < loop->num_nodes; j++) | 1977 for (j = 0; j < loop->num_nodes; j++) |
999 { | 1978 { |
1000 int header_found = 0; | |
1001 edge e; | 1979 edge e; |
1002 edge_iterator ei; | 1980 edge_iterator ei; |
1003 | 1981 |
1004 bb = bbs[j]; | 1982 bb = bbs[j]; |
1005 | 1983 |
1006 /* Bypass loop heuristics on continue statement. These | 1984 /* Bypass loop heuristics on continue statement. These |
1007 statements construct loops via "non-loop" constructs | 1985 statements construct loops via "non-loop" constructs |
1008 in the source language and are better to be handled | 1986 in the source language and are better to be handled |
1009 separately. */ | 1987 separately. */ |
1010 if (predicted_by_p (bb, PRED_CONTINUE)) | 1988 if (predicted_by_p (bb, PRED_CONTINUE)) |
1011 continue; | |
1012 | |
1013 /* Loop branch heuristics - predict an edge back to a | |
1014 loop's head as taken. */ | |
1015 if (bb == loop->latch) | |
1016 { | 1989 { |
1017 e = find_edge (loop->latch, loop->header); | 1990 if (dump_file && (dump_flags & TDF_DETAILS)) |
1018 if (e) | 1991 fprintf (dump_file, "BB %i predicted by continue.\n", |
1019 { | 1992 bb->index); |
1020 header_found = 1; | 1993 continue; |
1021 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN); | |
1022 } | |
1023 } | 1994 } |
1024 | 1995 |
1025 /* Loop exit heuristics - predict an edge exiting the loop if the | 1996 /* If we already used more reliable loop exit predictors, do not |
1026 conditional has no loop header successors as not taken. */ | 1997 bother with PRED_LOOP_EXIT. */ |
1027 if (!header_found | 1998 if (!predicted_by_loop_heuristics_p (bb)) |
1028 /* If we already used more reliable loop exit predictors, do not | |
1029 bother with PRED_LOOP_EXIT. */ | |
1030 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED) | |
1031 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS)) | |
1032 { | 1999 { |
1033 /* For loop with many exits we don't want to predict all exits | 2000 /* For loop with many exits we don't want to predict all exits |
1034 with the pretty large probability, because if all exits are | 2001 with the pretty large probability, because if all exits are |
1035 considered in row, the loop would be predicted to iterate | 2002 considered in row, the loop would be predicted to iterate |
1036 almost never. The code to divide probability by number of | 2003 almost never. The code to divide probability by number of |
1043 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction | 2010 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction |
1044 as this was causing regression in perl benchmark containing such | 2011 as this was causing regression in perl benchmark containing such |
1045 a wide loop. */ | 2012 a wide loop. */ |
1046 | 2013 |
1047 int probability = ((REG_BR_PROB_BASE | 2014 int probability = ((REG_BR_PROB_BASE |
1048 - predictor_info [(int) PRED_LOOP_EXIT].hitrate) | 2015 - predictor_info |
2016 [recursion | |
2017 ? PRED_LOOP_EXIT_WITH_RECURSION | |
2018 : PRED_LOOP_EXIT].hitrate) | |
1049 / n_exits); | 2019 / n_exits); |
1050 if (probability < HITRATE (2)) | 2020 if (probability < HITRATE (2)) |
1051 probability = HITRATE (2); | 2021 probability = HITRATE (2); |
1052 FOR_EACH_EDGE (e, ei, bb->succs) | 2022 FOR_EACH_EDGE (e, ei, bb->succs) |
1053 if (e->dest->index < NUM_FIXED_BLOCKS | 2023 if (e->dest->index < NUM_FIXED_BLOCKS |
1054 || !flow_bb_inside_loop_p (loop, e->dest)) | 2024 || !flow_bb_inside_loop_p (loop, e->dest)) |
1055 predict_edge (e, PRED_LOOP_EXIT, probability); | 2025 { |
2026 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2027 fprintf (dump_file, | |
2028 "Predicting exit %i->%i with prob %i.\n", | |
2029 e->src->index, e->dest->index, probability); | |
2030 predict_edge (e, | |
2031 recursion ? PRED_LOOP_EXIT_WITH_RECURSION | |
2032 : PRED_LOOP_EXIT, probability); | |
2033 } | |
2034 } | |
2035 if (loop_bound_var) | |
2036 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base, | |
2037 loop_bound_code, | |
2038 tree_to_shwi (loop_bound_step)); | |
2039 } | |
2040 | |
2041 /* In the following code | |
2042 for (loop1) | |
2043 if (cond) | |
2044 for (loop2) | |
2045 body; | |
2046 guess that cond is unlikely. */ | |
2047 if (loop_outer (loop)->num) | |
2048 { | |
2049 basic_block bb = NULL; | |
2050 edge preheader_edge = loop_preheader_edge (loop); | |
2051 | |
2052 if (single_pred_p (preheader_edge->src) | |
2053 && single_succ_p (preheader_edge->src)) | |
2054 preheader_edge = single_pred_edge (preheader_edge->src); | |
2055 | |
2056 gimple *stmt = last_stmt (preheader_edge->src); | |
2057 /* Pattern match fortran loop preheader: | |
2058 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER); | |
2059 _17 = (logical(kind=4)) _16; | |
2060 if (_17 != 0) | |
2061 goto <bb 11>; | |
2062 else | |
2063 goto <bb 13>; | |
2064 | |
2065 Loop guard branch prediction says nothing about duplicated loop | |
2066 headers produced by fortran frontend and in this case we want | |
2067 to predict paths leading to this preheader. */ | |
2068 | |
2069 if (stmt | |
2070 && gimple_code (stmt) == GIMPLE_COND | |
2071 && gimple_cond_code (stmt) == NE_EXPR | |
2072 && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME | |
2073 && integer_zerop (gimple_cond_rhs (stmt))) | |
2074 { | |
2075 gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt)); | |
2076 if (gimple_code (call_stmt) == GIMPLE_ASSIGN | |
2077 && gimple_expr_code (call_stmt) == NOP_EXPR | |
2078 && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME) | |
2079 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt)); | |
2080 if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT) | |
2081 && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST | |
2082 && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2)) | |
2083 && tree_to_uhwi (gimple_call_arg (call_stmt, 2)) | |
2084 == PRED_FORTRAN_LOOP_PREHEADER) | |
2085 bb = preheader_edge->src; | |
2086 } | |
2087 if (!bb) | |
2088 { | |
2089 if (!dominated_by_p (CDI_DOMINATORS, | |
2090 loop_outer (loop)->latch, loop->header)) | |
2091 predict_paths_leading_to_edge (loop_preheader_edge (loop), | |
2092 recursion | |
2093 ? PRED_LOOP_GUARD_WITH_RECURSION | |
2094 : PRED_LOOP_GUARD, | |
2095 NOT_TAKEN, | |
2096 loop_outer (loop)); | |
2097 } | |
2098 else | |
2099 { | |
2100 if (!dominated_by_p (CDI_DOMINATORS, | |
2101 loop_outer (loop)->latch, bb)) | |
2102 predict_paths_leading_to (bb, | |
2103 recursion | |
2104 ? PRED_LOOP_GUARD_WITH_RECURSION | |
2105 : PRED_LOOP_GUARD, | |
2106 NOT_TAKEN, | |
2107 loop_outer (loop)); | |
1056 } | 2108 } |
1057 } | 2109 } |
1058 | 2110 |
1059 /* Free basic blocks from get_loop_body. */ | 2111 /* Free basic blocks from get_loop_body. */ |
1060 free (bbs); | 2112 free (bbs); |
1064 /* Attempt to predict probabilities of BB outgoing edges using local | 2116 /* Attempt to predict probabilities of BB outgoing edges using local |
1065 properties. */ | 2117 properties. */ |
1066 static void | 2118 static void |
1067 bb_estimate_probability_locally (basic_block bb) | 2119 bb_estimate_probability_locally (basic_block bb) |
1068 { | 2120 { |
1069 rtx last_insn = BB_END (bb); | 2121 rtx_insn *last_insn = BB_END (bb); |
1070 rtx cond; | 2122 rtx cond; |
1071 | 2123 |
1072 if (! can_predict_insn_p (last_insn)) | 2124 if (! can_predict_insn_p (last_insn)) |
1073 return; | 2125 return; |
1074 cond = get_condition (last_insn, NULL, false, false); | 2126 cond = get_condition (last_insn, NULL, false, false); |
1166 { | 2218 { |
1167 bb_estimate_probability_locally (bb); | 2219 bb_estimate_probability_locally (bb); |
1168 combine_predictions_for_insn (BB_END (bb), bb); | 2220 combine_predictions_for_insn (BB_END (bb), bb); |
1169 } | 2221 } |
1170 | 2222 |
1171 static tree expr_expected_value (tree, bitmap); | 2223 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor); |
1172 | 2224 |
1173 /* Helper function for expr_expected_value. */ | 2225 /* Helper function for expr_expected_value. */ |
1174 | 2226 |
1175 static tree | 2227 static tree |
1176 expr_expected_value_1 (tree type, tree op0, enum tree_code code, tree op1, bitmap visited) | 2228 expr_expected_value_1 (tree type, tree op0, enum tree_code code, |
1177 { | 2229 tree op1, bitmap visited, enum br_predictor *predictor) |
1178 gimple def; | 2230 { |
2231 gimple *def; | |
2232 | |
2233 if (predictor) | |
2234 *predictor = PRED_UNCONDITIONAL; | |
1179 | 2235 |
1180 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) | 2236 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) |
1181 { | 2237 { |
1182 if (TREE_CONSTANT (op0)) | 2238 if (TREE_CONSTANT (op0)) |
1183 return op0; | 2239 return op0; |
2240 | |
2241 if (code == IMAGPART_EXPR) | |
2242 { | |
2243 if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME) | |
2244 { | |
2245 def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0)); | |
2246 if (is_gimple_call (def) | |
2247 && gimple_call_internal_p (def) | |
2248 && (gimple_call_internal_fn (def) | |
2249 == IFN_ATOMIC_COMPARE_EXCHANGE)) | |
2250 { | |
2251 /* Assume that any given atomic operation has low contention, | |
2252 and thus the compare-and-swap operation succeeds. */ | |
2253 if (predictor) | |
2254 *predictor = PRED_COMPARE_AND_SWAP; | |
2255 return build_one_cst (TREE_TYPE (op0)); | |
2256 } | |
2257 } | |
2258 } | |
1184 | 2259 |
1185 if (code != SSA_NAME) | 2260 if (code != SSA_NAME) |
1186 return NULL_TREE; | 2261 return NULL_TREE; |
1187 | 2262 |
1188 def = SSA_NAME_DEF_STMT (op0); | 2263 def = SSA_NAME_DEF_STMT (op0); |
1199 tree val = NULL, new_val; | 2274 tree val = NULL, new_val; |
1200 | 2275 |
1201 for (i = 0; i < n; i++) | 2276 for (i = 0; i < n; i++) |
1202 { | 2277 { |
1203 tree arg = PHI_ARG_DEF (def, i); | 2278 tree arg = PHI_ARG_DEF (def, i); |
2279 enum br_predictor predictor2; | |
1204 | 2280 |
1205 /* If this PHI has itself as an argument, we cannot | 2281 /* If this PHI has itself as an argument, we cannot |
1206 determine the string length of this argument. However, | 2282 determine the string length of this argument. However, |
1207 if we can find an expected constant value for the other | 2283 if we can find an expected constant value for the other |
1208 PHI args then we can still be sure that this is | 2284 PHI args then we can still be sure that this is |
1209 likely a constant. So be optimistic and just | 2285 likely a constant. So be optimistic and just |
1210 continue with the next argument. */ | 2286 continue with the next argument. */ |
1211 if (arg == PHI_RESULT (def)) | 2287 if (arg == PHI_RESULT (def)) |
1212 continue; | 2288 continue; |
1213 | 2289 |
1214 new_val = expr_expected_value (arg, visited); | 2290 new_val = expr_expected_value (arg, visited, &predictor2); |
2291 | |
2292 /* It is difficult to combine value predictors. Simply assume | |
2293 that later predictor is weaker and take its prediction. */ | |
2294 if (predictor && *predictor < predictor2) | |
2295 *predictor = predictor2; | |
1215 if (!new_val) | 2296 if (!new_val) |
1216 return NULL; | 2297 return NULL; |
1217 if (!val) | 2298 if (!val) |
1218 val = new_val; | 2299 val = new_val; |
1219 else if (!operand_equal_p (val, new_val, false)) | 2300 else if (!operand_equal_p (val, new_val, false)) |
1228 | 2309 |
1229 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), | 2310 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), |
1230 gimple_assign_rhs1 (def), | 2311 gimple_assign_rhs1 (def), |
1231 gimple_assign_rhs_code (def), | 2312 gimple_assign_rhs_code (def), |
1232 gimple_assign_rhs2 (def), | 2313 gimple_assign_rhs2 (def), |
1233 visited); | 2314 visited, predictor); |
1234 } | 2315 } |
1235 | 2316 |
1236 if (is_gimple_call (def)) | 2317 if (is_gimple_call (def)) |
1237 { | 2318 { |
1238 tree decl = gimple_call_fndecl (def); | 2319 tree decl = gimple_call_fndecl (def); |
1239 if (!decl) | 2320 if (!decl) |
1240 return NULL; | |
1241 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL | |
1242 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT) | |
1243 { | 2321 { |
1244 tree val; | 2322 if (gimple_call_internal_p (def) |
1245 | 2323 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT) |
1246 if (gimple_call_num_args (def) != 2) | 2324 { |
1247 return NULL; | 2325 gcc_assert (gimple_call_num_args (def) == 3); |
1248 val = gimple_call_arg (def, 0); | 2326 tree val = gimple_call_arg (def, 0); |
1249 if (TREE_CONSTANT (val)) | 2327 if (TREE_CONSTANT (val)) |
1250 return val; | 2328 return val; |
1251 return gimple_call_arg (def, 1); | 2329 if (predictor) |
2330 { | |
2331 tree val2 = gimple_call_arg (def, 2); | |
2332 gcc_assert (TREE_CODE (val2) == INTEGER_CST | |
2333 && tree_fits_uhwi_p (val2) | |
2334 && tree_to_uhwi (val2) < END_PREDICTORS); | |
2335 *predictor = (enum br_predictor) tree_to_uhwi (val2); | |
2336 } | |
2337 return gimple_call_arg (def, 1); | |
2338 } | |
2339 return NULL; | |
1252 } | 2340 } |
2341 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | |
2342 switch (DECL_FUNCTION_CODE (decl)) | |
2343 { | |
2344 case BUILT_IN_EXPECT: | |
2345 { | |
2346 tree val; | |
2347 if (gimple_call_num_args (def) != 2) | |
2348 return NULL; | |
2349 val = gimple_call_arg (def, 0); | |
2350 if (TREE_CONSTANT (val)) | |
2351 return val; | |
2352 if (predictor) | |
2353 *predictor = PRED_BUILTIN_EXPECT; | |
2354 return gimple_call_arg (def, 1); | |
2355 } | |
2356 | |
2357 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N: | |
2358 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1: | |
2359 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2: | |
2360 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4: | |
2361 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8: | |
2362 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16: | |
2363 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE: | |
2364 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N: | |
2365 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1: | |
2366 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2: | |
2367 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: | |
2368 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: | |
2369 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: | |
2370 /* Assume that any given atomic operation has low contention, | |
2371 and thus the compare-and-swap operation succeeds. */ | |
2372 if (predictor) | |
2373 *predictor = PRED_COMPARE_AND_SWAP; | |
2374 return boolean_true_node; | |
2375 default: | |
2376 break; | |
2377 } | |
1253 } | 2378 } |
1254 | 2379 |
1255 return NULL; | 2380 return NULL; |
1256 } | 2381 } |
1257 | 2382 |
1258 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) | 2383 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) |
1259 { | 2384 { |
1260 tree res; | 2385 tree res; |
1261 op0 = expr_expected_value (op0, visited); | 2386 enum br_predictor predictor2; |
2387 op0 = expr_expected_value (op0, visited, predictor); | |
1262 if (!op0) | 2388 if (!op0) |
1263 return NULL; | 2389 return NULL; |
1264 op1 = expr_expected_value (op1, visited); | 2390 op1 = expr_expected_value (op1, visited, &predictor2); |
2391 if (predictor && *predictor < predictor2) | |
2392 *predictor = predictor2; | |
1265 if (!op1) | 2393 if (!op1) |
1266 return NULL; | 2394 return NULL; |
1267 res = fold_build2 (code, type, op0, op1); | 2395 res = fold_build2 (code, type, op0, op1); |
1268 if (TREE_CONSTANT (res)) | 2396 if (TREE_CONSTANT (res)) |
1269 return res; | 2397 return res; |
1270 return NULL; | 2398 return NULL; |
1271 } | 2399 } |
1272 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) | 2400 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) |
1273 { | 2401 { |
1274 tree res; | 2402 tree res; |
1275 op0 = expr_expected_value (op0, visited); | 2403 op0 = expr_expected_value (op0, visited, predictor); |
1276 if (!op0) | 2404 if (!op0) |
1277 return NULL; | 2405 return NULL; |
1278 res = fold_build1 (code, type, op0); | 2406 res = fold_build1 (code, type, op0); |
1279 if (TREE_CONSTANT (res)) | 2407 if (TREE_CONSTANT (res)) |
1280 return res; | 2408 return res; |
1290 We may want to implement more involved value guess (such as value range | 2418 We may want to implement more involved value guess (such as value range |
1291 propagation based prediction), but such tricks shall go to new | 2419 propagation based prediction), but such tricks shall go to new |
1292 implementation. */ | 2420 implementation. */ |
1293 | 2421 |
1294 static tree | 2422 static tree |
1295 expr_expected_value (tree expr, bitmap visited) | 2423 expr_expected_value (tree expr, bitmap visited, |
2424 enum br_predictor *predictor) | |
1296 { | 2425 { |
1297 enum tree_code code; | 2426 enum tree_code code; |
1298 tree op0, op1; | 2427 tree op0, op1; |
1299 | 2428 |
1300 if (TREE_CONSTANT (expr)) | 2429 if (TREE_CONSTANT (expr)) |
1301 return expr; | 2430 { |
2431 if (predictor) | |
2432 *predictor = PRED_UNCONDITIONAL; | |
2433 return expr; | |
2434 } | |
1302 | 2435 |
1303 extract_ops_from_tree (expr, &code, &op0, &op1); | 2436 extract_ops_from_tree (expr, &code, &op0, &op1); |
1304 return expr_expected_value_1 (TREE_TYPE (expr), | 2437 return expr_expected_value_1 (TREE_TYPE (expr), |
1305 op0, code, op1, visited); | 2438 op0, code, op1, visited, predictor); |
1306 } | |
1307 | |
1308 | |
1309 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements | |
1310 we no longer need. */ | |
1311 static unsigned int | |
1312 strip_predict_hints (void) | |
1313 { | |
1314 basic_block bb; | |
1315 gimple ass_stmt; | |
1316 tree var; | |
1317 | |
1318 FOR_EACH_BB (bb) | |
1319 { | |
1320 gimple_stmt_iterator bi; | |
1321 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) | |
1322 { | |
1323 gimple stmt = gsi_stmt (bi); | |
1324 | |
1325 if (gimple_code (stmt) == GIMPLE_PREDICT) | |
1326 { | |
1327 gsi_remove (&bi, true); | |
1328 continue; | |
1329 } | |
1330 else if (gimple_code (stmt) == GIMPLE_CALL) | |
1331 { | |
1332 tree fndecl = gimple_call_fndecl (stmt); | |
1333 | |
1334 if (fndecl | |
1335 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL | |
1336 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT | |
1337 && gimple_call_num_args (stmt) == 2) | |
1338 { | |
1339 var = gimple_call_lhs (stmt); | |
1340 if (var) | |
1341 { | |
1342 ass_stmt | |
1343 = gimple_build_assign (var, gimple_call_arg (stmt, 0)); | |
1344 gsi_replace (&bi, ass_stmt, true); | |
1345 } | |
1346 else | |
1347 { | |
1348 gsi_remove (&bi, true); | |
1349 continue; | |
1350 } | |
1351 } | |
1352 } | |
1353 gsi_next (&bi); | |
1354 } | |
1355 } | |
1356 return 0; | |
1357 } | 2439 } |
1358 | 2440 |
1359 /* Predict using opcode of the last statement in basic block. */ | 2441 /* Predict using opcode of the last statement in basic block. */ |
1360 static void | 2442 static void |
1361 tree_predict_by_opcode (basic_block bb) | 2443 tree_predict_by_opcode (basic_block bb) |
1362 { | 2444 { |
1363 gimple stmt = last_stmt (bb); | 2445 gimple *stmt = last_stmt (bb); |
1364 edge then_edge; | 2446 edge then_edge; |
1365 tree op0, op1; | 2447 tree op0, op1; |
1366 tree type; | 2448 tree type; |
1367 tree val; | 2449 tree val; |
1368 enum tree_code cmp; | 2450 enum tree_code cmp; |
1369 bitmap visited; | |
1370 edge_iterator ei; | 2451 edge_iterator ei; |
2452 enum br_predictor predictor; | |
1371 | 2453 |
1372 if (!stmt || gimple_code (stmt) != GIMPLE_COND) | 2454 if (!stmt || gimple_code (stmt) != GIMPLE_COND) |
1373 return; | 2455 return; |
1374 FOR_EACH_EDGE (then_edge, ei, bb->succs) | 2456 FOR_EACH_EDGE (then_edge, ei, bb->succs) |
1375 if (then_edge->flags & EDGE_TRUE_VALUE) | 2457 if (then_edge->flags & EDGE_TRUE_VALUE) |
1376 break; | 2458 break; |
1377 op0 = gimple_cond_lhs (stmt); | 2459 op0 = gimple_cond_lhs (stmt); |
1378 op1 = gimple_cond_rhs (stmt); | 2460 op1 = gimple_cond_rhs (stmt); |
1379 cmp = gimple_cond_code (stmt); | 2461 cmp = gimple_cond_code (stmt); |
1380 type = TREE_TYPE (op0); | 2462 type = TREE_TYPE (op0); |
1381 visited = BITMAP_ALLOC (NULL); | 2463 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (), |
1382 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited); | 2464 &predictor); |
1383 BITMAP_FREE (visited); | 2465 if (val && TREE_CODE (val) == INTEGER_CST) |
1384 if (val) | 2466 { |
1385 { | 2467 if (predictor == PRED_BUILTIN_EXPECT) |
1386 if (integer_zerop (val)) | 2468 { |
1387 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN); | 2469 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY); |
2470 | |
2471 gcc_assert (percent >= 0 && percent <= 100); | |
2472 if (integer_zerop (val)) | |
2473 percent = 100 - percent; | |
2474 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent)); | |
2475 } | |
1388 else | 2476 else |
1389 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN); | 2477 predict_edge_def (then_edge, predictor, |
1390 return; | 2478 integer_zerop (val) ? NOT_TAKEN : TAKEN); |
1391 } | 2479 } |
1392 /* Try "pointer heuristic." | 2480 /* Try "pointer heuristic." |
1393 A comparison ptr == 0 is predicted as false. | 2481 A comparison ptr == 0 is predicted as false. |
1394 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ | 2482 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ |
1395 if (POINTER_TYPE_P (type)) | 2483 if (POINTER_TYPE_P (type)) |
1471 default: | 2559 default: |
1472 break; | 2560 break; |
1473 } | 2561 } |
1474 } | 2562 } |
1475 | 2563 |
2564 /* Returns TRUE if the STMT is exit(0) like statement. */ | |
2565 | |
2566 static bool | |
2567 is_exit_with_zero_arg (const gimple *stmt) | |
2568 { | |
2569 /* This is not exit, _exit or _Exit. */ | |
2570 if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT) | |
2571 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT) | |
2572 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2)) | |
2573 return false; | |
2574 | |
2575 /* Argument is an interger zero. */ | |
2576 return integer_zerop (gimple_call_arg (stmt, 0)); | |
2577 } | |
2578 | |
1476 /* Try to guess whether the value of return means error code. */ | 2579 /* Try to guess whether the value of return means error code. */ |
1477 | 2580 |
1478 static enum br_predictor | 2581 static enum br_predictor |
1479 return_prediction (tree val, enum prediction *prediction) | 2582 return_prediction (tree val, enum prediction *prediction) |
1480 { | 2583 { |
1505 Zero/one often represent booleans so exclude them from the | 2608 Zero/one often represent booleans so exclude them from the |
1506 heuristics. */ | 2609 heuristics. */ |
1507 if (TREE_CONSTANT (val) | 2610 if (TREE_CONSTANT (val) |
1508 && (!integer_zerop (val) && !integer_onep (val))) | 2611 && (!integer_zerop (val) && !integer_onep (val))) |
1509 { | 2612 { |
1510 *prediction = TAKEN; | 2613 *prediction = NOT_TAKEN; |
1511 return PRED_CONST_RETURN; | 2614 return PRED_CONST_RETURN; |
1512 } | 2615 } |
1513 } | 2616 } |
1514 return PRED_NO_PREDICTION; | 2617 return PRED_NO_PREDICTION; |
1515 } | 2618 } |
1517 /* Find the basic block with return expression and look up for possible | 2620 /* Find the basic block with return expression and look up for possible |
1518 return value trying to apply RETURN_PREDICTION heuristics. */ | 2621 return value trying to apply RETURN_PREDICTION heuristics. */ |
1519 static void | 2622 static void |
1520 apply_return_prediction (void) | 2623 apply_return_prediction (void) |
1521 { | 2624 { |
1522 gimple return_stmt = NULL; | 2625 greturn *return_stmt = NULL; |
1523 tree return_val; | 2626 tree return_val; |
1524 edge e; | 2627 edge e; |
1525 gimple phi; | 2628 gphi *phi; |
1526 int phi_num_args, i; | 2629 int phi_num_args, i; |
1527 enum br_predictor pred; | 2630 enum br_predictor pred; |
1528 enum prediction direction; | 2631 enum prediction direction; |
1529 edge_iterator ei; | 2632 edge_iterator ei; |
1530 | 2633 |
1531 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) | 2634 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
1532 { | 2635 { |
1533 return_stmt = last_stmt (e->src); | 2636 gimple *last = last_stmt (e->src); |
1534 if (return_stmt | 2637 if (last |
1535 && gimple_code (return_stmt) == GIMPLE_RETURN) | 2638 && gimple_code (last) == GIMPLE_RETURN) |
1536 break; | 2639 { |
2640 return_stmt = as_a <greturn *> (last); | |
2641 break; | |
2642 } | |
1537 } | 2643 } |
1538 if (!e) | 2644 if (!e) |
1539 return; | 2645 return; |
1540 return_val = gimple_return_retval (return_stmt); | 2646 return_val = gimple_return_retval (return_stmt); |
1541 if (!return_val) | 2647 if (!return_val) |
1542 return; | 2648 return; |
1543 if (TREE_CODE (return_val) != SSA_NAME | 2649 if (TREE_CODE (return_val) != SSA_NAME |
1544 || !SSA_NAME_DEF_STMT (return_val) | 2650 || !SSA_NAME_DEF_STMT (return_val) |
1545 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) | 2651 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) |
1546 return; | 2652 return; |
1547 phi = SSA_NAME_DEF_STMT (return_val); | 2653 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val)); |
1548 phi_num_args = gimple_phi_num_args (phi); | 2654 phi_num_args = gimple_phi_num_args (phi); |
1549 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); | 2655 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); |
1550 | 2656 |
1551 /* Avoid the degenerate case where all return values form the function | 2657 /* Avoid the degenerate case where all return values form the function |
1552 belongs to same category (ie they are all positive constants) | 2658 belongs to same category (ie they are all positive constants) |
1574 basic_block bb; | 2680 basic_block bb; |
1575 bool has_return_edges = false; | 2681 bool has_return_edges = false; |
1576 edge e; | 2682 edge e; |
1577 edge_iterator ei; | 2683 edge_iterator ei; |
1578 | 2684 |
1579 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) | 2685 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
1580 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH))) | 2686 if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL)) |
1581 { | 2687 { |
1582 has_return_edges = true; | 2688 has_return_edges = true; |
1583 break; | 2689 break; |
1584 } | 2690 } |
1585 | 2691 |
1586 apply_return_prediction (); | 2692 apply_return_prediction (); |
1587 | 2693 |
1588 FOR_EACH_BB (bb) | 2694 FOR_EACH_BB_FN (bb, cfun) |
1589 { | 2695 { |
1590 gimple_stmt_iterator gsi; | 2696 gimple_stmt_iterator gsi; |
1591 | 2697 |
1592 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | 2698 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
1593 { | 2699 { |
1594 gimple stmt = gsi_stmt (gsi); | 2700 gimple *stmt = gsi_stmt (gsi); |
1595 tree decl; | 2701 tree decl; |
1596 | 2702 |
1597 if (is_gimple_call (stmt)) | 2703 if (is_gimple_call (stmt)) |
1598 { | 2704 { |
1599 if ((gimple_call_flags (stmt) & ECF_NORETURN) | 2705 if (gimple_call_noreturn_p (stmt) |
1600 && has_return_edges) | 2706 && has_return_edges |
2707 && !is_exit_with_zero_arg (stmt)) | |
1601 predict_paths_leading_to (bb, PRED_NORETURN, | 2708 predict_paths_leading_to (bb, PRED_NORETURN, |
1602 NOT_TAKEN); | 2709 NOT_TAKEN); |
1603 decl = gimple_call_fndecl (stmt); | 2710 decl = gimple_call_fndecl (stmt); |
1604 if (decl | 2711 if (decl |
1605 && lookup_attribute ("cold", | 2712 && lookup_attribute ("cold", |
1606 DECL_ATTRIBUTES (decl))) | 2713 DECL_ATTRIBUTES (decl))) |
1607 predict_paths_leading_to (bb, PRED_COLD_FUNCTION, | 2714 predict_paths_leading_to (bb, PRED_COLD_FUNCTION, |
2715 NOT_TAKEN); | |
2716 if (decl && recursive_call_p (current_function_decl, decl)) | |
2717 predict_paths_leading_to (bb, PRED_RECURSIVE_CALL, | |
1608 NOT_TAKEN); | 2718 NOT_TAKEN); |
1609 } | 2719 } |
1610 else if (gimple_code (stmt) == GIMPLE_PREDICT) | 2720 else if (gimple_code (stmt) == GIMPLE_PREDICT) |
1611 { | 2721 { |
1612 predict_paths_leading_to (bb, gimple_predict_predictor (stmt), | 2722 predict_paths_leading_to (bb, gimple_predict_predictor (stmt), |
1616 } | 2726 } |
1617 } | 2727 } |
1618 } | 2728 } |
1619 } | 2729 } |
1620 | 2730 |
1621 #ifdef ENABLE_CHECKING | 2731 /* Callback for hash_map::traverse, asserts that the pointer map is |
1622 | |
1623 /* Callback for pointer_map_traverse, asserts that the pointer map is | |
1624 empty. */ | 2732 empty. */ |
1625 | 2733 |
1626 static bool | 2734 bool |
1627 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value, | 2735 assert_is_empty (const_basic_block const &, edge_prediction *const &value, |
1628 void *data ATTRIBUTE_UNUSED) | 2736 void *) |
1629 { | 2737 { |
1630 gcc_assert (!*value); | 2738 gcc_assert (!value); |
1631 return false; | 2739 return false; |
1632 } | 2740 } |
1633 #endif | 2741 |
1634 | 2742 /* Predict branch probabilities and estimate profile for basic block BB. |
1635 /* Predict branch probabilities and estimate profile for basic block BB. */ | 2743 When LOCAL_ONLY is set do not use any global properties of CFG. */ |
1636 | 2744 |
1637 static void | 2745 static void |
1638 tree_estimate_probability_bb (basic_block bb) | 2746 tree_estimate_probability_bb (basic_block bb, bool local_only) |
1639 { | 2747 { |
1640 edge e; | 2748 edge e; |
1641 edge_iterator ei; | 2749 edge_iterator ei; |
1642 gimple last; | |
1643 | 2750 |
1644 FOR_EACH_EDGE (e, ei, bb->succs) | 2751 FOR_EACH_EDGE (e, ei, bb->succs) |
1645 { | 2752 { |
1646 /* Predict early returns to be probable, as we've already taken | |
1647 care for error returns and other cases are often used for | |
1648 fast paths through function. | |
1649 | |
1650 Since we've already removed the return statements, we are | |
1651 looking for CFG like: | |
1652 | |
1653 if (conditional) | |
1654 { | |
1655 .. | |
1656 goto return_block | |
1657 } | |
1658 some other blocks | |
1659 return_block: | |
1660 return_stmt. */ | |
1661 if (e->dest != bb->next_bb | |
1662 && e->dest != EXIT_BLOCK_PTR | |
1663 && single_succ_p (e->dest) | |
1664 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR | |
1665 && (last = last_stmt (e->dest)) != NULL | |
1666 && gimple_code (last) == GIMPLE_RETURN) | |
1667 { | |
1668 edge e1; | |
1669 edge_iterator ei1; | |
1670 | |
1671 if (single_succ_p (bb)) | |
1672 { | |
1673 FOR_EACH_EDGE (e1, ei1, bb->preds) | |
1674 if (!predicted_by_p (e1->src, PRED_NULL_RETURN) | |
1675 && !predicted_by_p (e1->src, PRED_CONST_RETURN) | |
1676 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)) | |
1677 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN); | |
1678 } | |
1679 else | |
1680 if (!predicted_by_p (e->src, PRED_NULL_RETURN) | |
1681 && !predicted_by_p (e->src, PRED_CONST_RETURN) | |
1682 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN)) | |
1683 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN); | |
1684 } | |
1685 | |
1686 /* Look for block we are guarding (ie we dominate it, | 2753 /* Look for block we are guarding (ie we dominate it, |
1687 but it doesn't postdominate us). */ | 2754 but it doesn't postdominate us). */ |
1688 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb | 2755 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb |
2756 && !local_only | |
1689 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src) | 2757 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src) |
1690 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest)) | 2758 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest)) |
1691 { | 2759 { |
1692 gimple_stmt_iterator bi; | 2760 gimple_stmt_iterator bi; |
1693 | 2761 |
1696 to signal exceptional situations such as printing error | 2764 to signal exceptional situations such as printing error |
1697 messages. */ | 2765 messages. */ |
1698 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi); | 2766 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi); |
1699 gsi_next (&bi)) | 2767 gsi_next (&bi)) |
1700 { | 2768 { |
1701 gimple stmt = gsi_stmt (bi); | 2769 gimple *stmt = gsi_stmt (bi); |
1702 if (is_gimple_call (stmt) | 2770 if (is_gimple_call (stmt) |
2771 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)) | |
1703 /* Constant and pure calls are hardly used to signalize | 2772 /* Constant and pure calls are hardly used to signalize |
1704 something exceptional. */ | 2773 something exceptional. */ |
1705 && gimple_has_side_effects (stmt)) | 2774 && gimple_has_side_effects (stmt)) |
1706 { | 2775 { |
1707 predict_edge_def (e, PRED_CALL, NOT_TAKEN); | 2776 if (gimple_call_fndecl (stmt)) |
2777 predict_edge_def (e, PRED_CALL, NOT_TAKEN); | |
2778 else if (virtual_method_call_p (gimple_call_fn (stmt))) | |
2779 predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN); | |
2780 else | |
2781 predict_edge_def (e, PRED_INDIR_CALL, TAKEN); | |
1708 break; | 2782 break; |
1709 } | 2783 } |
1710 } | 2784 } |
1711 } | 2785 } |
1712 } | 2786 } |
1713 tree_predict_by_opcode (bb); | 2787 tree_predict_by_opcode (bb); |
1714 } | 2788 } |
1715 | 2789 |
1716 /* Predict branch probabilities and estimate profile of the tree CFG. | 2790 /* Predict branch probabilities and estimate profile of the tree CFG. |
1717 This function can be called from the loop optimizers to recompute | 2791 This function can be called from the loop optimizers to recompute |
1718 the profile information. */ | 2792 the profile information. |
2793 If DRY_RUN is set, do not modify CFG and only produce dump files. */ | |
1719 | 2794 |
1720 void | 2795 void |
1721 tree_estimate_probability (void) | 2796 tree_estimate_probability (bool dry_run) |
1722 { | 2797 { |
1723 basic_block bb; | 2798 basic_block bb; |
1724 | 2799 |
1725 add_noreturn_fake_exit_edges (); | 2800 add_noreturn_fake_exit_edges (); |
1726 connect_infinite_loops_to_exit (); | 2801 connect_infinite_loops_to_exit (); |
1727 /* We use loop_niter_by_eval, which requires that the loops have | 2802 /* We use loop_niter_by_eval, which requires that the loops have |
1728 preheaders. */ | 2803 preheaders. */ |
1729 create_preheaders (CP_SIMPLE_PREHEADERS); | 2804 create_preheaders (CP_SIMPLE_PREHEADERS); |
1730 calculate_dominance_info (CDI_POST_DOMINATORS); | 2805 calculate_dominance_info (CDI_POST_DOMINATORS); |
1731 | 2806 |
1732 bb_predictions = pointer_map_create (); | 2807 bb_predictions = new hash_map<const_basic_block, edge_prediction *>; |
1733 tree_bb_level_predictions (); | 2808 tree_bb_level_predictions (); |
1734 record_loop_exits (); | 2809 record_loop_exits (); |
1735 | 2810 |
1736 if (number_of_loops () > 1) | 2811 if (number_of_loops (cfun) > 1) |
1737 predict_loops (); | 2812 predict_loops (); |
1738 | 2813 |
1739 FOR_EACH_BB (bb) | 2814 FOR_EACH_BB_FN (bb, cfun) |
1740 tree_estimate_probability_bb (bb); | 2815 tree_estimate_probability_bb (bb, false); |
1741 | 2816 |
1742 FOR_EACH_BB (bb) | 2817 FOR_EACH_BB_FN (bb, cfun) |
1743 combine_predictions_for_bb (bb); | 2818 combine_predictions_for_bb (bb, dry_run); |
1744 | 2819 |
1745 #ifdef ENABLE_CHECKING | 2820 if (flag_checking) |
1746 pointer_map_traverse (bb_predictions, assert_is_empty, NULL); | 2821 bb_predictions->traverse<void *, assert_is_empty> (NULL); |
1747 #endif | 2822 |
1748 pointer_map_destroy (bb_predictions); | 2823 delete bb_predictions; |
1749 bb_predictions = NULL; | 2824 bb_predictions = NULL; |
1750 | 2825 |
1751 estimate_bb_frequencies (); | 2826 if (!dry_run) |
2827 estimate_bb_frequencies (false); | |
1752 free_dominance_info (CDI_POST_DOMINATORS); | 2828 free_dominance_info (CDI_POST_DOMINATORS); |
1753 remove_fake_exit_edges (); | 2829 remove_fake_exit_edges (); |
1754 } | 2830 } |
1755 | 2831 |
1756 /* Predict branch probabilities and estimate profile of the tree CFG. | 2832 /* Set edge->probability for each successor edge of BB. */ |
1757 This is the driver function for PASS_PROFILE. */ | 2833 void |
1758 | 2834 tree_guess_outgoing_edge_probabilities (basic_block bb) |
1759 static unsigned int | 2835 { |
1760 tree_estimate_probability_driver (void) | 2836 bb_predictions = new hash_map<const_basic_block, edge_prediction *>; |
1761 { | 2837 tree_estimate_probability_bb (bb, true); |
1762 unsigned nb_loops; | 2838 combine_predictions_for_bb (bb, false); |
1763 | 2839 if (flag_checking) |
1764 loop_optimizer_init (0); | 2840 bb_predictions->traverse<void *, assert_is_empty> (NULL); |
1765 if (dump_file && (dump_flags & TDF_DETAILS)) | 2841 delete bb_predictions; |
1766 flow_loops_dump (dump_file, NULL, 0); | 2842 bb_predictions = NULL; |
1767 | |
1768 mark_irreducible_loops (); | |
1769 | |
1770 nb_loops = number_of_loops (); | |
1771 if (nb_loops > 1) | |
1772 scev_initialize (); | |
1773 | |
1774 tree_estimate_probability (); | |
1775 | |
1776 if (nb_loops > 1) | |
1777 scev_finalize (); | |
1778 | |
1779 loop_optimizer_finalize (); | |
1780 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1781 gimple_dump_cfg (dump_file, dump_flags); | |
1782 if (profile_status == PROFILE_ABSENT) | |
1783 profile_status = PROFILE_GUESSED; | |
1784 return 0; | |
1785 } | 2843 } |
1786 | 2844 |
1787 /* Predict edges to successors of CUR whose sources are not postdominated by | 2845 /* Predict edges to successors of CUR whose sources are not postdominated by |
1788 BB by PRED and recurse to all postdominators. */ | 2846 BB by PRED and recurse to all postdominators. */ |
1789 | 2847 |
1790 static void | 2848 static void |
1791 predict_paths_for_bb (basic_block cur, basic_block bb, | 2849 predict_paths_for_bb (basic_block cur, basic_block bb, |
1792 enum br_predictor pred, | 2850 enum br_predictor pred, |
1793 enum prediction taken) | 2851 enum prediction taken, |
2852 bitmap visited, struct loop *in_loop = NULL) | |
1794 { | 2853 { |
1795 edge e; | 2854 edge e; |
1796 edge_iterator ei; | 2855 edge_iterator ei; |
1797 basic_block son; | 2856 basic_block son; |
2857 | |
2858 /* If we exited the loop or CUR is unconditional in the loop, there is | |
2859 nothing to do. */ | |
2860 if (in_loop | |
2861 && (!flow_bb_inside_loop_p (in_loop, cur) | |
2862 || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur))) | |
2863 return; | |
1798 | 2864 |
1799 /* We are looking for all edges forming edge cut induced by | 2865 /* We are looking for all edges forming edge cut induced by |
1800 set of all blocks postdominated by BB. */ | 2866 set of all blocks postdominated by BB. */ |
1801 FOR_EACH_EDGE (e, ei, cur->preds) | 2867 FOR_EACH_EDGE (e, ei, cur->preds) |
1802 if (e->src->index >= NUM_FIXED_BLOCKS | 2868 if (e->src->index >= NUM_FIXED_BLOCKS |
1805 edge e2; | 2871 edge e2; |
1806 edge_iterator ei2; | 2872 edge_iterator ei2; |
1807 bool found = false; | 2873 bool found = false; |
1808 | 2874 |
1809 /* Ignore fake edges and eh, we predict them as not taken anyway. */ | 2875 /* Ignore fake edges and eh, we predict them as not taken anyway. */ |
1810 if (e->flags & (EDGE_EH | EDGE_FAKE)) | 2876 if (unlikely_executed_edge_p (e)) |
1811 continue; | 2877 continue; |
1812 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); | 2878 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); |
1813 | 2879 |
1814 /* See if there is how many edge from e->src that is not abnormal | 2880 /* See if there is an edge from e->src that is not abnormal |
1815 and does not lead to BB. */ | 2881 and does not lead to BB and does not exit the loop. */ |
1816 FOR_EACH_EDGE (e2, ei2, e->src->succs) | 2882 FOR_EACH_EDGE (e2, ei2, e->src->succs) |
1817 if (e2 != e | 2883 if (e2 != e |
1818 && !(e2->flags & (EDGE_EH | EDGE_FAKE)) | 2884 && !unlikely_executed_edge_p (e2) |
1819 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)) | 2885 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb) |
2886 && (!in_loop || !loop_exit_edge_p (in_loop, e2))) | |
1820 { | 2887 { |
1821 found = true; | 2888 found = true; |
1822 break; | 2889 break; |
1823 } | 2890 } |
1824 | 2891 |
1825 /* If there is non-abnormal path leaving e->src, predict edge | 2892 /* If there is non-abnormal path leaving e->src, predict edge |
1826 using predictor. Otherwise we need to look for paths | 2893 using predictor. Otherwise we need to look for paths |
1827 leading to e->src. */ | 2894 leading to e->src. |
2895 | |
2896 The second may lead to infinite loop in the case we are predicitng | |
2897 regions that are only reachable by abnormal edges. We simply | |
2898 prevent visiting given BB twice. */ | |
1828 if (found) | 2899 if (found) |
1829 predict_edge_def (e, pred, taken); | 2900 { |
1830 else | 2901 if (!edge_predicted_by_p (e, pred, taken)) |
1831 predict_paths_for_bb (e->src, e->src, pred, taken); | 2902 predict_edge_def (e, pred, taken); |
2903 } | |
2904 else if (bitmap_set_bit (visited, e->src->index)) | |
2905 predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop); | |
1832 } | 2906 } |
1833 for (son = first_dom_son (CDI_POST_DOMINATORS, cur); | 2907 for (son = first_dom_son (CDI_POST_DOMINATORS, cur); |
1834 son; | 2908 son; |
1835 son = next_dom_son (CDI_POST_DOMINATORS, son)) | 2909 son = next_dom_son (CDI_POST_DOMINATORS, son)) |
1836 predict_paths_for_bb (son, bb, pred, taken); | 2910 predict_paths_for_bb (son, bb, pred, taken, visited, in_loop); |
1837 } | 2911 } |
1838 | 2912 |
1839 /* Sets branch probabilities according to PREDiction and | 2913 /* Sets branch probabilities according to PREDiction and |
1840 FLAGS. */ | 2914 FLAGS. */ |
1841 | 2915 |
1842 static void | 2916 static void |
1843 predict_paths_leading_to (basic_block bb, enum br_predictor pred, | 2917 predict_paths_leading_to (basic_block bb, enum br_predictor pred, |
1844 enum prediction taken) | 2918 enum prediction taken, struct loop *in_loop) |
1845 { | 2919 { |
1846 predict_paths_for_bb (bb, bb, pred, taken); | 2920 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop); |
1847 } | 2921 } |
1848 | 2922 |
1849 /* Like predict_paths_leading_to but take edge instead of basic block. */ | 2923 /* Like predict_paths_leading_to but take edge instead of basic block. */ |
1850 | 2924 |
1851 static void | 2925 static void |
1852 predict_paths_leading_to_edge (edge e, enum br_predictor pred, | 2926 predict_paths_leading_to_edge (edge e, enum br_predictor pred, |
1853 enum prediction taken) | 2927 enum prediction taken, struct loop *in_loop) |
1854 { | 2928 { |
1855 bool has_nonloop_edge = false; | 2929 bool has_nonloop_edge = false; |
1856 edge_iterator ei; | 2930 edge_iterator ei; |
1857 edge e2; | 2931 edge e2; |
1858 | 2932 |
1859 basic_block bb = e->src; | 2933 basic_block bb = e->src; |
1860 FOR_EACH_EDGE (e2, ei, bb->succs) | 2934 FOR_EACH_EDGE (e2, ei, bb->succs) |
1861 if (e2->dest != e->src && e2->dest != e->dest | 2935 if (e2->dest != e->src && e2->dest != e->dest |
1862 && !(e->flags & (EDGE_EH | EDGE_FAKE)) | 2936 && !unlikely_executed_edge_p (e) |
1863 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest)) | 2937 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest)) |
1864 { | 2938 { |
1865 has_nonloop_edge = true; | 2939 has_nonloop_edge = true; |
1866 break; | 2940 break; |
1867 } | 2941 } |
1868 if (!has_nonloop_edge) | 2942 if (!has_nonloop_edge) |
1869 predict_paths_for_bb (bb, bb, pred, taken); | 2943 { |
2944 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop); | |
2945 } | |
1870 else | 2946 else |
1871 predict_edge_def (e, pred, taken); | 2947 predict_edge_def (e, pred, taken); |
1872 } | 2948 } |
1873 | 2949 |
1874 /* This is used to carry information about basic blocks. It is | 2950 /* This is used to carry information about basic blocks. It is |
1875 attached to the AUX field of the standard CFG block. */ | 2951 attached to the AUX field of the standard CFG block. */ |
1876 | 2952 |
1877 typedef struct block_info_def | 2953 struct block_info |
1878 { | 2954 { |
1879 /* Estimated frequency of execution of basic_block. */ | 2955 /* Estimated frequency of execution of basic_block. */ |
1880 sreal frequency; | 2956 sreal frequency; |
1881 | 2957 |
1882 /* To keep queue of basic blocks to process. */ | 2958 /* To keep queue of basic blocks to process. */ |
1883 basic_block next; | 2959 basic_block next; |
1884 | 2960 |
1885 /* Number of predecessors we need to visit first. */ | 2961 /* Number of predecessors we need to visit first. */ |
1886 int npredecessors; | 2962 int npredecessors; |
1887 } *block_info; | 2963 }; |
1888 | 2964 |
1889 /* Similar information for edges. */ | 2965 /* Similar information for edges. */ |
1890 typedef struct edge_info_def | 2966 struct edge_prob_info |
1891 { | 2967 { |
1892 /* In case edge is a loopback edge, the probability edge will be reached | 2968 /* In case edge is a loopback edge, the probability edge will be reached |
1893 in case header is. Estimated number of iterations of the loop can be | 2969 in case header is. Estimated number of iterations of the loop can be |
1894 then computed as 1 / (1 - back_edge_prob). */ | 2970 then computed as 1 / (1 - back_edge_prob). */ |
1895 sreal back_edge_prob; | 2971 sreal back_edge_prob; |
1896 /* True if the edge is a loopback edge in the natural loop. */ | 2972 /* True if the edge is a loopback edge in the natural loop. */ |
1897 unsigned int back_edge:1; | 2973 unsigned int back_edge:1; |
1898 } *edge_info; | 2974 }; |
1899 | 2975 |
1900 #define BLOCK_INFO(B) ((block_info) (B)->aux) | 2976 #define BLOCK_INFO(B) ((block_info *) (B)->aux) |
1901 #define EDGE_INFO(E) ((edge_info) (E)->aux) | 2977 #undef EDGE_INFO |
2978 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux) | |
1902 | 2979 |
1903 /* Helper function for estimate_bb_frequencies. | 2980 /* Helper function for estimate_bb_frequencies. |
1904 Propagate the frequencies in blocks marked in | 2981 Propagate the frequencies in blocks marked in |
1905 TOVISIT, starting in HEAD. */ | 2982 TOVISIT, starting in HEAD. */ |
1906 | 2983 |
1919 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi) | 2996 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi) |
1920 { | 2997 { |
1921 edge_iterator ei; | 2998 edge_iterator ei; |
1922 int count = 0; | 2999 int count = 0; |
1923 | 3000 |
1924 bb = BASIC_BLOCK (i); | 3001 bb = BASIC_BLOCK_FOR_FN (cfun, i); |
1925 | 3002 |
1926 FOR_EACH_EDGE (e, ei, bb->preds) | 3003 FOR_EACH_EDGE (e, ei, bb->preds) |
1927 { | 3004 { |
1928 bool visit = bitmap_bit_p (tovisit, e->src->index); | 3005 bool visit = bitmap_bit_p (tovisit, e->src->index); |
1929 | 3006 |
1934 "Irreducible region hit, ignoring edge to %i->%i\n", | 3011 "Irreducible region hit, ignoring edge to %i->%i\n", |
1935 e->src->index, bb->index); | 3012 e->src->index, bb->index); |
1936 } | 3013 } |
1937 BLOCK_INFO (bb)->npredecessors = count; | 3014 BLOCK_INFO (bb)->npredecessors = count; |
1938 /* When function never returns, we will never process exit block. */ | 3015 /* When function never returns, we will never process exit block. */ |
1939 if (!count && bb == EXIT_BLOCK_PTR) | 3016 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
1940 bb->count = bb->frequency = 0; | 3017 { |
1941 } | 3018 bb->count = profile_count::zero (); |
1942 | 3019 bb->frequency = 0; |
1943 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one)); | 3020 } |
3021 } | |
3022 | |
3023 BLOCK_INFO (head)->frequency = 1; | |
1944 last = head; | 3024 last = head; |
1945 for (bb = head; bb; bb = nextbb) | 3025 for (bb = head; bb; bb = nextbb) |
1946 { | 3026 { |
1947 edge_iterator ei; | 3027 edge_iterator ei; |
1948 sreal cyclic_probability, frequency; | 3028 sreal cyclic_probability = 0; |
1949 | 3029 sreal frequency = 0; |
1950 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero)); | |
1951 memcpy (&frequency, &real_zero, sizeof (real_zero)); | |
1952 | 3030 |
1953 nextbb = BLOCK_INFO (bb)->next; | 3031 nextbb = BLOCK_INFO (bb)->next; |
1954 BLOCK_INFO (bb)->next = NULL; | 3032 BLOCK_INFO (bb)->next = NULL; |
1955 | 3033 |
1956 /* Compute frequency of basic block. */ | 3034 /* Compute frequency of basic block. */ |
1957 if (bb != head) | 3035 if (bb != head) |
1958 { | 3036 { |
1959 #ifdef ENABLE_CHECKING | 3037 if (flag_checking) |
1960 FOR_EACH_EDGE (e, ei, bb->preds) | 3038 FOR_EACH_EDGE (e, ei, bb->preds) |
1961 gcc_assert (!bitmap_bit_p (tovisit, e->src->index) | 3039 gcc_assert (!bitmap_bit_p (tovisit, e->src->index) |
1962 || (e->flags & EDGE_DFS_BACK)); | 3040 || (e->flags & EDGE_DFS_BACK)); |
1963 #endif | |
1964 | 3041 |
1965 FOR_EACH_EDGE (e, ei, bb->preds) | 3042 FOR_EACH_EDGE (e, ei, bb->preds) |
1966 if (EDGE_INFO (e)->back_edge) | 3043 if (EDGE_INFO (e)->back_edge) |
1967 { | 3044 { |
1968 sreal_add (&cyclic_probability, &cyclic_probability, | 3045 cyclic_probability += EDGE_INFO (e)->back_edge_prob; |
1969 &EDGE_INFO (e)->back_edge_prob); | |
1970 } | 3046 } |
1971 else if (!(e->flags & EDGE_DFS_BACK)) | 3047 else if (!(e->flags & EDGE_DFS_BACK)) |
1972 { | 3048 { |
1973 sreal tmp; | |
1974 | |
1975 /* frequency += (e->probability | 3049 /* frequency += (e->probability |
1976 * BLOCK_INFO (e->src)->frequency / | 3050 * BLOCK_INFO (e->src)->frequency / |
1977 REG_BR_PROB_BASE); */ | 3051 REG_BR_PROB_BASE); */ |
1978 | 3052 |
1979 sreal_init (&tmp, e->probability, 0); | 3053 sreal tmp = e->probability.to_reg_br_prob_base (); |
1980 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency); | 3054 tmp *= BLOCK_INFO (e->src)->frequency; |
1981 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base); | 3055 tmp *= real_inv_br_prob_base; |
1982 sreal_add (&frequency, &frequency, &tmp); | 3056 frequency += tmp; |
1983 } | 3057 } |
1984 | 3058 |
1985 if (sreal_compare (&cyclic_probability, &real_zero) == 0) | 3059 if (cyclic_probability == 0) |
1986 { | 3060 { |
1987 memcpy (&BLOCK_INFO (bb)->frequency, &frequency, | 3061 BLOCK_INFO (bb)->frequency = frequency; |
1988 sizeof (frequency)); | |
1989 } | 3062 } |
1990 else | 3063 else |
1991 { | 3064 { |
1992 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0) | 3065 if (cyclic_probability > real_almost_one) |
1993 { | 3066 cyclic_probability = real_almost_one; |
1994 memcpy (&cyclic_probability, &real_almost_one, | |
1995 sizeof (real_almost_one)); | |
1996 } | |
1997 | 3067 |
1998 /* BLOCK_INFO (bb)->frequency = frequency | 3068 /* BLOCK_INFO (bb)->frequency = frequency |
1999 / (1 - cyclic_probability) */ | 3069 / (1 - cyclic_probability) */ |
2000 | 3070 |
2001 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability); | 3071 cyclic_probability = sreal (1) - cyclic_probability; |
2002 sreal_div (&BLOCK_INFO (bb)->frequency, | 3072 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability; |
2003 &frequency, &cyclic_probability); | |
2004 } | 3073 } |
2005 } | 3074 } |
2006 | 3075 |
2007 bitmap_clear_bit (tovisit, bb->index); | 3076 bitmap_clear_bit (tovisit, bb->index); |
2008 | 3077 |
2009 e = find_edge (bb, head); | 3078 e = find_edge (bb, head); |
2010 if (e) | 3079 if (e) |
2011 { | 3080 { |
2012 sreal tmp; | |
2013 | |
2014 /* EDGE_INFO (e)->back_edge_prob | 3081 /* EDGE_INFO (e)->back_edge_prob |
2015 = ((e->probability * BLOCK_INFO (bb)->frequency) | 3082 = ((e->probability * BLOCK_INFO (bb)->frequency) |
2016 / REG_BR_PROB_BASE); */ | 3083 / REG_BR_PROB_BASE); */ |
2017 | 3084 |
2018 sreal_init (&tmp, e->probability, 0); | 3085 sreal tmp = e->probability.to_reg_br_prob_base (); |
2019 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency); | 3086 tmp *= BLOCK_INFO (bb)->frequency; |
2020 sreal_mul (&EDGE_INFO (e)->back_edge_prob, | 3087 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; |
2021 &tmp, &real_inv_br_prob_base); | |
2022 } | 3088 } |
2023 | 3089 |
2024 /* Propagate to successor blocks. */ | 3090 /* Propagate to successor blocks. */ |
2025 FOR_EACH_EDGE (e, ei, bb->succs) | 3091 FOR_EACH_EDGE (e, ei, bb->succs) |
2026 if (!(e->flags & EDGE_DFS_BACK) | 3092 if (!(e->flags & EDGE_DFS_BACK) |
2038 } | 3104 } |
2039 } | 3105 } |
2040 } | 3106 } |
2041 } | 3107 } |
2042 | 3108 |
2043 /* Estimate probabilities of loopback edges in loops at same nest level. */ | 3109 /* Estimate frequencies in loops at same nest level. */ |
2044 | 3110 |
2045 static void | 3111 static void |
2046 estimate_loops_at_level (struct loop *first_loop) | 3112 estimate_loops_at_level (struct loop *first_loop) |
2047 { | 3113 { |
2048 struct loop *loop; | 3114 struct loop *loop; |
2050 for (loop = first_loop; loop; loop = loop->next) | 3116 for (loop = first_loop; loop; loop = loop->next) |
2051 { | 3117 { |
2052 edge e; | 3118 edge e; |
2053 basic_block *bbs; | 3119 basic_block *bbs; |
2054 unsigned i; | 3120 unsigned i; |
2055 bitmap tovisit = BITMAP_ALLOC (NULL); | 3121 auto_bitmap tovisit; |
2056 | 3122 |
2057 estimate_loops_at_level (loop->inner); | 3123 estimate_loops_at_level (loop->inner); |
2058 | 3124 |
2059 /* Find current loop back edge and mark it. */ | 3125 /* Find current loop back edge and mark it. */ |
2060 e = loop_latch_edge (loop); | 3126 e = loop_latch_edge (loop); |
2063 bbs = get_loop_body (loop); | 3129 bbs = get_loop_body (loop); |
2064 for (i = 0; i < loop->num_nodes; i++) | 3130 for (i = 0; i < loop->num_nodes; i++) |
2065 bitmap_set_bit (tovisit, bbs[i]->index); | 3131 bitmap_set_bit (tovisit, bbs[i]->index); |
2066 free (bbs); | 3132 free (bbs); |
2067 propagate_freq (loop->header, tovisit); | 3133 propagate_freq (loop->header, tovisit); |
2068 BITMAP_FREE (tovisit); | |
2069 } | 3134 } |
2070 } | 3135 } |
2071 | 3136 |
2072 /* Propagates frequencies through structure of loops. */ | 3137 /* Propagates frequencies through structure of loops. */ |
2073 | 3138 |
2074 static void | 3139 static void |
2075 estimate_loops (void) | 3140 estimate_loops (void) |
2076 { | 3141 { |
2077 bitmap tovisit = BITMAP_ALLOC (NULL); | 3142 auto_bitmap tovisit; |
2078 basic_block bb; | 3143 basic_block bb; |
2079 | 3144 |
2080 /* Start by estimating the frequencies in the loops. */ | 3145 /* Start by estimating the frequencies in the loops. */ |
2081 if (number_of_loops () > 1) | 3146 if (number_of_loops (cfun) > 1) |
2082 estimate_loops_at_level (current_loops->tree_root->inner); | 3147 estimate_loops_at_level (current_loops->tree_root->inner); |
2083 | 3148 |
2084 /* Now propagate the frequencies through all the blocks. */ | 3149 /* Now propagate the frequencies through all the blocks. */ |
2085 FOR_ALL_BB (bb) | 3150 FOR_ALL_BB_FN (bb, cfun) |
2086 { | 3151 { |
2087 bitmap_set_bit (tovisit, bb->index); | 3152 bitmap_set_bit (tovisit, bb->index); |
2088 } | 3153 } |
2089 propagate_freq (ENTRY_BLOCK_PTR, tovisit); | 3154 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit); |
2090 BITMAP_FREE (tovisit); | 3155 } |
3156 | |
3157 /* Drop the profile for NODE to guessed, and update its frequency based on | |
3158 whether it is expected to be hot given the CALL_COUNT. */ | |
3159 | |
3160 static void | |
3161 drop_profile (struct cgraph_node *node, profile_count call_count) | |
3162 { | |
3163 struct function *fn = DECL_STRUCT_FUNCTION (node->decl); | |
3164 /* In the case where this was called by another function with a | |
3165 dropped profile, call_count will be 0. Since there are no | |
3166 non-zero call counts to this function, we don't know for sure | |
3167 whether it is hot, and therefore it will be marked normal below. */ | |
3168 bool hot = maybe_hot_count_p (NULL, call_count); | |
3169 | |
3170 if (dump_file) | |
3171 fprintf (dump_file, | |
3172 "Dropping 0 profile for %s. %s based on calls.\n", | |
3173 node->dump_name (), | |
3174 hot ? "Function is hot" : "Function is normal"); | |
3175 /* We only expect to miss profiles for functions that are reached | |
3176 via non-zero call edges in cases where the function may have | |
3177 been linked from another module or library (COMDATs and extern | |
3178 templates). See the comments below for handle_missing_profiles. | |
3179 Also, only warn in cases where the missing counts exceed the | |
3180 number of training runs. In certain cases with an execv followed | |
3181 by a no-return call the profile for the no-return call is not | |
3182 dumped and there can be a mismatch. */ | |
3183 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl) | |
3184 && call_count > profile_info->runs) | |
3185 { | |
3186 if (flag_profile_correction) | |
3187 { | |
3188 if (dump_file) | |
3189 fprintf (dump_file, | |
3190 "Missing counts for called function %s\n", | |
3191 node->dump_name ()); | |
3192 } | |
3193 else | |
3194 warning (0, "Missing counts for called function %s", | |
3195 node->dump_name ()); | |
3196 } | |
3197 | |
3198 basic_block bb; | |
3199 FOR_ALL_BB_FN (bb, fn) | |
3200 { | |
3201 bb->count = profile_count::uninitialized (); | |
3202 } | |
3203 | |
3204 struct cgraph_edge *e; | |
3205 for (e = node->callees; e; e = e->next_caller) | |
3206 { | |
3207 e->frequency = compute_call_stmt_bb_frequency (e->caller->decl, | |
3208 gimple_bb (e->call_stmt)); | |
3209 } | |
3210 | |
3211 profile_status_for_fn (fn) | |
3212 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT); | |
3213 node->frequency | |
3214 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL; | |
3215 } | |
3216 | |
3217 /* In the case of COMDAT routines, multiple object files will contain the same | |
3218 function and the linker will select one for the binary. In that case | |
3219 all the other copies from the profile instrument binary will be missing | |
3220 profile counts. Look for cases where this happened, due to non-zero | |
3221 call counts going to 0-count functions, and drop the profile to guessed | |
3222 so that we can use the estimated probabilities and avoid optimizing only | |
3223 for size. | |
3224 | |
3225 The other case where the profile may be missing is when the routine | |
3226 is not going to be emitted to the object file, e.g. for "extern template" | |
3227 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in | |
3228 all other cases of non-zero calls to 0-count functions. */ | |
3229 | |
3230 void | |
3231 handle_missing_profiles (void) | |
3232 { | |
3233 struct cgraph_node *node; | |
3234 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); | |
3235 auto_vec<struct cgraph_node *, 64> worklist; | |
3236 | |
3237 /* See if 0 count function has non-0 count callers. In this case we | |
3238 lost some profile. Drop its function profile to PROFILE_GUESSED. */ | |
3239 FOR_EACH_DEFINED_FUNCTION (node) | |
3240 { | |
3241 struct cgraph_edge *e; | |
3242 profile_count call_count = profile_count::zero (); | |
3243 gcov_type max_tp_first_run = 0; | |
3244 struct function *fn = DECL_STRUCT_FUNCTION (node->decl); | |
3245 | |
3246 if (!(node->count == profile_count::zero ())) | |
3247 continue; | |
3248 for (e = node->callers; e; e = e->next_caller) | |
3249 if (e->count.initialized_p () && e->count > 0) | |
3250 { | |
3251 call_count = call_count + e->count; | |
3252 | |
3253 if (e->caller->tp_first_run > max_tp_first_run) | |
3254 max_tp_first_run = e->caller->tp_first_run; | |
3255 } | |
3256 | |
3257 /* If time profile is missing, let assign the maximum that comes from | |
3258 caller functions. */ | |
3259 if (!node->tp_first_run && max_tp_first_run) | |
3260 node->tp_first_run = max_tp_first_run + 1; | |
3261 | |
3262 if (call_count > 0 | |
3263 && fn && fn->cfg | |
3264 && (call_count.apply_scale (unlikely_count_fraction, 1) | |
3265 >= profile_info->runs)) | |
3266 { | |
3267 drop_profile (node, call_count); | |
3268 worklist.safe_push (node); | |
3269 } | |
3270 } | |
3271 | |
3272 /* Propagate the profile dropping to other 0-count COMDATs that are | |
3273 potentially called by COMDATs we already dropped the profile on. */ | |
3274 while (worklist.length () > 0) | |
3275 { | |
3276 struct cgraph_edge *e; | |
3277 | |
3278 node = worklist.pop (); | |
3279 for (e = node->callees; e; e = e->next_caller) | |
3280 { | |
3281 struct cgraph_node *callee = e->callee; | |
3282 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl); | |
3283 | |
3284 if (callee->count > 0) | |
3285 continue; | |
3286 if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl)) | |
3287 && fn && fn->cfg | |
3288 && profile_status_for_fn (fn) == PROFILE_READ) | |
3289 { | |
3290 drop_profile (node, profile_count::zero ()); | |
3291 worklist.safe_push (callee); | |
3292 } | |
3293 } | |
3294 } | |
2091 } | 3295 } |
2092 | 3296 |
2093 /* Convert counts measured by profile driven feedback to frequencies. | 3297 /* Convert counts measured by profile driven feedback to frequencies. |
2094 Return nonzero iff there was any nonzero execution count. */ | 3298 Return nonzero iff there was any nonzero execution count. */ |
2095 | 3299 |
2096 int | 3300 bool |
2097 counts_to_freqs (void) | 3301 counts_to_freqs (void) |
2098 { | 3302 { |
2099 gcov_type count_max, true_count_max = 0; | 3303 gcov_type count_max; |
3304 profile_count true_count_max = profile_count::zero (); | |
2100 basic_block bb; | 3305 basic_block bb; |
2101 | 3306 |
2102 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | 3307 /* Don't overwrite the estimated frequencies when the profile for |
2103 true_count_max = MAX (bb->count, true_count_max); | 3308 the function is missing. We may drop this function PROFILE_GUESSED |
2104 | 3309 later in drop_profile (). */ |
2105 count_max = MAX (true_count_max, 1); | 3310 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () |
2106 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | 3311 || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) |
2107 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max; | 3312 return false; |
2108 | 3313 |
2109 return true_count_max; | 3314 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
3315 if (bb->count > true_count_max) | |
3316 true_count_max = bb->count; | |
3317 | |
3318 /* If we have no counts to base frequencies on, keep those that are | |
3319 already there. */ | |
3320 if (!(true_count_max > 0)) | |
3321 return false; | |
3322 | |
3323 count_max = true_count_max.to_gcov_type (); | |
3324 | |
3325 FOR_ALL_BB_FN (bb, cfun) | |
3326 if (bb->count.initialized_p ()) | |
3327 bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max); | |
3328 | |
3329 return true; | |
2110 } | 3330 } |
2111 | 3331 |
2112 /* Return true if function is likely to be expensive, so there is no point to | 3332 /* Return true if function is likely to be expensive, so there is no point to |
2113 optimize performance of prologue, epilogue or do inlining at the expense | 3333 optimize performance of prologue, epilogue or do inlining at the expense |
2114 of code size growth. THRESHOLD is the limit of number of instructions | 3334 of code size growth. THRESHOLD is the limit of number of instructions |
2126 gcc_assert (threshold <= BB_FREQ_MAX); | 3346 gcc_assert (threshold <= BB_FREQ_MAX); |
2127 | 3347 |
2128 /* Frequencies are out of range. This either means that function contains | 3348 /* Frequencies are out of range. This either means that function contains |
2129 internal loop executing more than BB_FREQ_MAX times or profile feedback | 3349 internal loop executing more than BB_FREQ_MAX times or profile feedback |
2130 is available and function has not been executed at all. */ | 3350 is available and function has not been executed at all. */ |
2131 if (ENTRY_BLOCK_PTR->frequency == 0) | 3351 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0) |
2132 return true; | 3352 return true; |
2133 | 3353 |
2134 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ | 3354 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ |
2135 limit = ENTRY_BLOCK_PTR->frequency * threshold; | 3355 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold; |
2136 FOR_EACH_BB (bb) | 3356 FOR_EACH_BB_FN (bb, cfun) |
2137 { | 3357 { |
2138 rtx insn; | 3358 rtx_insn *insn; |
2139 | 3359 |
2140 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); | 3360 FOR_BB_INSNS (bb, insn) |
2141 insn = NEXT_INSN (insn)) | |
2142 if (active_insn_p (insn)) | 3361 if (active_insn_p (insn)) |
2143 { | 3362 { |
2144 sum += bb->frequency; | 3363 sum += bb->frequency; |
2145 if (sum > limit) | 3364 if (sum > limit) |
2146 return true; | 3365 return true; |
2148 } | 3367 } |
2149 | 3368 |
2150 return false; | 3369 return false; |
2151 } | 3370 } |
2152 | 3371 |
2153 /* Estimate basic blocks frequency by given branch probabilities. */ | 3372 /* All basic blocks that are reachable only from unlikely basic blocks are |
3373 unlikely. */ | |
2154 | 3374 |
2155 void | 3375 void |
2156 estimate_bb_frequencies (void) | 3376 propagate_unlikely_bbs_forward (void) |
3377 { | |
3378 auto_vec<basic_block, 64> worklist; | |
3379 basic_block bb; | |
3380 edge_iterator ei; | |
3381 edge e; | |
3382 | |
3383 if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())) | |
3384 { | |
3385 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1; | |
3386 worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun)); | |
3387 | |
3388 while (worklist.length () > 0) | |
3389 { | |
3390 bb = worklist.pop (); | |
3391 FOR_EACH_EDGE (e, ei, bb->succs) | |
3392 if (!(e->count () == profile_count::zero ()) | |
3393 && !(e->dest->count == profile_count::zero ()) | |
3394 && !e->dest->aux) | |
3395 { | |
3396 e->dest->aux = (void *)(size_t) 1; | |
3397 worklist.safe_push (e->dest); | |
3398 } | |
3399 } | |
3400 } | |
3401 | |
3402 FOR_ALL_BB_FN (bb, cfun) | |
3403 { | |
3404 if (!bb->aux) | |
3405 { | |
3406 if (!(bb->count == profile_count::zero ()) | |
3407 && (dump_file && (dump_flags & TDF_DETAILS))) | |
3408 fprintf (dump_file, | |
3409 "Basic block %i is marked unlikely by forward prop\n", | |
3410 bb->index); | |
3411 bb->count = profile_count::zero (); | |
3412 bb->frequency = 0; | |
3413 } | |
3414 else | |
3415 bb->aux = NULL; | |
3416 } | |
3417 } | |
3418 | |
3419 /* Determine basic blocks/edges that are known to be unlikely executed and set | |
3420 their counters to zero. | |
3421 This is done with first identifying obviously unlikely BBs/edges and then | |
3422 propagating in both directions. */ | |
3423 | |
3424 static void | |
3425 determine_unlikely_bbs () | |
3426 { | |
3427 basic_block bb; | |
3428 auto_vec<basic_block, 64> worklist; | |
3429 edge_iterator ei; | |
3430 edge e; | |
3431 | |
3432 FOR_EACH_BB_FN (bb, cfun) | |
3433 { | |
3434 if (!(bb->count == profile_count::zero ()) | |
3435 && unlikely_executed_bb_p (bb)) | |
3436 { | |
3437 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3438 fprintf (dump_file, "Basic block %i is locally unlikely\n", | |
3439 bb->index); | |
3440 bb->count = profile_count::zero (); | |
3441 } | |
3442 | |
3443 if (bb->count == profile_count::zero ()) | |
3444 bb->frequency = 0; | |
3445 | |
3446 FOR_EACH_EDGE (e, ei, bb->succs) | |
3447 if (!(e->probability == profile_probability::never ()) | |
3448 && unlikely_executed_edge_p (e)) | |
3449 { | |
3450 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3451 fprintf (dump_file, "Edge %i->%i is locally unlikely\n", | |
3452 bb->index, e->dest->index); | |
3453 e->probability = profile_probability::never (); | |
3454 } | |
3455 | |
3456 gcc_checking_assert (!bb->aux); | |
3457 } | |
3458 | |
3459 auto_vec<int, 64> nsuccs; | |
3460 nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
3461 FOR_ALL_BB_FN (bb, cfun) | |
3462 if (!(bb->count == profile_count::zero ()) | |
3463 && bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
3464 { | |
3465 nsuccs[bb->index] = 0; | |
3466 FOR_EACH_EDGE (e, ei, bb->succs) | |
3467 if (!(e->probability == profile_probability::never ()) | |
3468 && !(e->dest->count == profile_count::zero ())) | |
3469 nsuccs[bb->index]++; | |
3470 if (!nsuccs[bb->index]) | |
3471 worklist.safe_push (bb); | |
3472 } | |
3473 while (worklist.length () > 0) | |
3474 { | |
3475 bb = worklist.pop (); | |
3476 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3477 { | |
3478 bool found = false; | |
3479 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
3480 !gsi_end_p (gsi); gsi_next (&gsi)) | |
3481 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)) | |
3482 /* stmt_can_terminate_bb_p special cases noreturns because it | |
3483 assumes that fake edges are created. We want to know that | |
3484 noreturn alone does not imply BB to be unlikely. */ | |
3485 || (is_gimple_call (gsi_stmt (gsi)) | |
3486 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN))) | |
3487 { | |
3488 found = true; | |
3489 break; | |
3490 } | |
3491 if (found) | |
3492 continue; | |
3493 } | |
3494 if (!(bb->count == profile_count::zero ()) | |
3495 && (dump_file && (dump_flags & TDF_DETAILS))) | |
3496 fprintf (dump_file, | |
3497 "Basic block %i is marked unlikely by backward prop\n", | |
3498 bb->index); | |
3499 bb->count = profile_count::zero (); | |
3500 bb->frequency = 0; | |
3501 FOR_EACH_EDGE (e, ei, bb->preds) | |
3502 if (!(e->probability == profile_probability::never ())) | |
3503 { | |
3504 e->probability = profile_probability::never (); | |
3505 if (!(e->src->count == profile_count::zero ())) | |
3506 { | |
3507 nsuccs[e->src->index]--; | |
3508 if (!nsuccs[e->src->index]) | |
3509 worklist.safe_push (e->src); | |
3510 } | |
3511 } | |
3512 } | |
3513 } | |
3514 | |
3515 /* Estimate and propagate basic block frequencies using the given branch | |
3516 probabilities. If FORCE is true, the frequencies are used to estimate | |
3517 the counts even when there are already non-zero profile counts. */ | |
3518 | |
3519 void | |
3520 estimate_bb_frequencies (bool force) | |
2157 { | 3521 { |
2158 basic_block bb; | 3522 basic_block bb; |
2159 sreal freq_max; | 3523 sreal freq_max; |
2160 | 3524 |
2161 if (profile_status != PROFILE_READ || !counts_to_freqs ()) | 3525 determine_unlikely_bbs (); |
3526 | |
3527 if (force || profile_status_for_fn (cfun) != PROFILE_READ | |
3528 || !counts_to_freqs ()) | |
2162 { | 3529 { |
2163 static int real_values_initialized = 0; | 3530 static int real_values_initialized = 0; |
2164 | 3531 |
2165 if (!real_values_initialized) | 3532 if (!real_values_initialized) |
2166 { | 3533 { |
2167 real_values_initialized = 1; | 3534 real_values_initialized = 1; |
2168 sreal_init (&real_zero, 0, 0); | 3535 real_br_prob_base = REG_BR_PROB_BASE; |
2169 sreal_init (&real_one, 1, 0); | 3536 real_bb_freq_max = BB_FREQ_MAX; |
2170 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0); | 3537 real_one_half = sreal (1, -1); |
2171 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0); | 3538 real_inv_br_prob_base = sreal (1) / real_br_prob_base; |
2172 sreal_init (&real_one_half, 1, -1); | 3539 real_almost_one = sreal (1) - real_inv_br_prob_base; |
2173 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base); | |
2174 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base); | |
2175 } | 3540 } |
2176 | 3541 |
2177 mark_dfs_back_edges (); | 3542 mark_dfs_back_edges (); |
2178 | 3543 |
2179 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE; | 3544 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability = |
3545 profile_probability::always (); | |
2180 | 3546 |
2181 /* Set up block info for each basic block. */ | 3547 /* Set up block info for each basic block. */ |
2182 alloc_aux_for_blocks (sizeof (struct block_info_def)); | 3548 alloc_aux_for_blocks (sizeof (block_info)); |
2183 alloc_aux_for_edges (sizeof (struct edge_info_def)); | 3549 alloc_aux_for_edges (sizeof (edge_prob_info)); |
2184 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | 3550 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
2185 { | 3551 { |
2186 edge e; | 3552 edge e; |
2187 edge_iterator ei; | 3553 edge_iterator ei; |
2188 | 3554 |
2189 FOR_EACH_EDGE (e, ei, bb->succs) | 3555 FOR_EACH_EDGE (e, ei, bb->succs) |
2190 { | 3556 { |
2191 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0); | 3557 EDGE_INFO (e)->back_edge_prob |
2192 sreal_mul (&EDGE_INFO (e)->back_edge_prob, | 3558 = e->probability.to_reg_br_prob_base (); |
2193 &EDGE_INFO (e)->back_edge_prob, | 3559 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; |
2194 &real_inv_br_prob_base); | |
2195 } | 3560 } |
2196 } | 3561 } |
2197 | 3562 |
2198 /* First compute probabilities locally for each loop from innermost | 3563 /* First compute frequencies locally for each loop from innermost |
2199 to outermost to examine probabilities for back edges. */ | 3564 to outermost to examine frequencies for back edges. */ |
2200 estimate_loops (); | 3565 estimate_loops (); |
2201 | 3566 |
2202 memcpy (&freq_max, &real_zero, sizeof (real_zero)); | 3567 freq_max = 0; |
2203 FOR_EACH_BB (bb) | 3568 FOR_EACH_BB_FN (bb, cfun) |
2204 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0) | 3569 if (freq_max < BLOCK_INFO (bb)->frequency) |
2205 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max)); | 3570 freq_max = BLOCK_INFO (bb)->frequency; |
2206 | 3571 |
2207 sreal_div (&freq_max, &real_bb_freq_max, &freq_max); | 3572 freq_max = real_bb_freq_max / freq_max; |
2208 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | 3573 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
2209 { | 3574 { |
2210 sreal tmp; | 3575 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; |
2211 | 3576 bb->frequency = tmp.to_int (); |
2212 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max); | |
2213 sreal_add (&tmp, &tmp, &real_one_half); | |
2214 bb->frequency = sreal_to_int (&tmp); | |
2215 } | 3577 } |
2216 | 3578 |
2217 free_aux_for_blocks (); | 3579 free_aux_for_blocks (); |
2218 free_aux_for_edges (); | 3580 free_aux_for_edges (); |
2219 } | 3581 } |
2223 /* Decide whether function is hot, cold or unlikely executed. */ | 3585 /* Decide whether function is hot, cold or unlikely executed. */ |
2224 void | 3586 void |
2225 compute_function_frequency (void) | 3587 compute_function_frequency (void) |
2226 { | 3588 { |
2227 basic_block bb; | 3589 basic_block bb; |
2228 struct cgraph_node *node = cgraph_node (current_function_decl); | 3590 struct cgraph_node *node = cgraph_node::get (current_function_decl); |
3591 | |
2229 if (DECL_STATIC_CONSTRUCTOR (current_function_decl) | 3592 if (DECL_STATIC_CONSTRUCTOR (current_function_decl) |
2230 || MAIN_NAME_P (DECL_NAME (current_function_decl))) | 3593 || MAIN_NAME_P (DECL_NAME (current_function_decl))) |
2231 node->only_called_at_startup = true; | 3594 node->only_called_at_startup = true; |
2232 if (DECL_STATIC_DESTRUCTOR (current_function_decl)) | 3595 if (DECL_STATIC_DESTRUCTOR (current_function_decl)) |
2233 node->only_called_at_exit = true; | 3596 node->only_called_at_exit = true; |
2234 | 3597 |
2235 if (!profile_info || !flag_branch_probabilities) | 3598 if (profile_status_for_fn (cfun) != PROFILE_READ) |
2236 { | 3599 { |
2237 int flags = flags_from_decl_or_type (current_function_decl); | 3600 int flags = flags_from_decl_or_type (current_function_decl); |
2238 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) | 3601 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero () |
2239 != NULL) | 3602 || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) |
2240 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | 3603 != NULL) |
3604 { | |
3605 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | |
3606 warn_function_cold (current_function_decl); | |
3607 } | |
2241 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) | 3608 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) |
2242 != NULL) | 3609 != NULL) |
2243 node->frequency = NODE_FREQUENCY_HOT; | 3610 node->frequency = NODE_FREQUENCY_HOT; |
2244 else if (flags & ECF_NORETURN) | 3611 else if (flags & ECF_NORETURN) |
2245 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; | 3612 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; |
2248 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl) | 3615 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl) |
2249 || DECL_STATIC_DESTRUCTOR (current_function_decl)) | 3616 || DECL_STATIC_DESTRUCTOR (current_function_decl)) |
2250 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; | 3617 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; |
2251 return; | 3618 return; |
2252 } | 3619 } |
2253 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | 3620 |
2254 FOR_EACH_BB (bb) | 3621 /* Only first time try to drop function into unlikely executed. |
2255 { | 3622 After inlining the roundoff errors may confuse us. |
2256 if (maybe_hot_bb_p (bb)) | 3623 Ipa-profile pass will drop functions only called from unlikely |
3624 functions to unlikely and that is most of what we care about. */ | |
3625 if (!cfun->after_inlining) | |
3626 { | |
3627 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | |
3628 warn_function_cold (current_function_decl); | |
3629 } | |
3630 FOR_EACH_BB_FN (bb, cfun) | |
3631 { | |
3632 if (maybe_hot_bb_p (cfun, bb)) | |
2257 { | 3633 { |
2258 node->frequency = NODE_FREQUENCY_HOT; | 3634 node->frequency = NODE_FREQUENCY_HOT; |
2259 return; | 3635 return; |
2260 } | 3636 } |
2261 if (!probably_never_executed_bb_p (bb)) | 3637 if (!probably_never_executed_bb_p (cfun, bb)) |
2262 node->frequency = NODE_FREQUENCY_NORMAL; | 3638 node->frequency = NODE_FREQUENCY_NORMAL; |
2263 } | 3639 } |
2264 } | |
2265 | |
2266 static bool | |
2267 gate_estimate_probability (void) | |
2268 { | |
2269 return flag_guess_branch_prob; | |
2270 } | 3640 } |
2271 | 3641 |
2272 /* Build PREDICT_EXPR. */ | 3642 /* Build PREDICT_EXPR. */ |
2273 tree | 3643 tree |
2274 build_predict_expr (enum br_predictor predictor, enum prediction taken) | 3644 build_predict_expr (enum br_predictor predictor, enum prediction taken) |
2275 { | 3645 { |
2276 tree t = build1 (PREDICT_EXPR, void_type_node, | 3646 tree t = build1 (PREDICT_EXPR, void_type_node, |
2277 build_int_cst (NULL, predictor)); | 3647 build_int_cst (integer_type_node, predictor)); |
2278 SET_PREDICT_EXPR_OUTCOME (t, taken); | 3648 SET_PREDICT_EXPR_OUTCOME (t, taken); |
2279 return t; | 3649 return t; |
2280 } | 3650 } |
2281 | 3651 |
2282 const char * | 3652 const char * |
2283 predictor_name (enum br_predictor predictor) | 3653 predictor_name (enum br_predictor predictor) |
2284 { | 3654 { |
2285 return predictor_info[predictor].name; | 3655 return predictor_info[predictor].name; |
2286 } | 3656 } |
2287 | 3657 |
2288 struct gimple_opt_pass pass_profile = | 3658 /* Predict branch probabilities and estimate profile of the tree CFG. */ |
2289 { | 3659 |
2290 { | 3660 namespace { |
2291 GIMPLE_PASS, | 3661 |
2292 "profile", /* name */ | 3662 const pass_data pass_data_profile = |
2293 gate_estimate_probability, /* gate */ | 3663 { |
2294 tree_estimate_probability_driver, /* execute */ | 3664 GIMPLE_PASS, /* type */ |
2295 NULL, /* sub */ | 3665 "profile_estimate", /* name */ |
2296 NULL, /* next */ | 3666 OPTGROUP_NONE, /* optinfo_flags */ |
2297 0, /* static_pass_number */ | 3667 TV_BRANCH_PROB, /* tv_id */ |
2298 TV_BRANCH_PROB, /* tv_id */ | 3668 PROP_cfg, /* properties_required */ |
2299 PROP_cfg, /* properties_required */ | 3669 0, /* properties_provided */ |
2300 0, /* properties_provided */ | 3670 0, /* properties_destroyed */ |
2301 0, /* properties_destroyed */ | 3671 0, /* todo_flags_start */ |
2302 0, /* todo_flags_start */ | 3672 0, /* todo_flags_finish */ |
2303 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ | |
2304 } | |
2305 }; | 3673 }; |
2306 | 3674 |
2307 struct gimple_opt_pass pass_strip_predict_hints = | 3675 class pass_profile : public gimple_opt_pass |
2308 { | 3676 { |
2309 { | 3677 public: |
2310 GIMPLE_PASS, | 3678 pass_profile (gcc::context *ctxt) |
2311 "*strip_predict_hints", /* name */ | 3679 : gimple_opt_pass (pass_data_profile, ctxt) |
2312 NULL, /* gate */ | 3680 {} |
2313 strip_predict_hints, /* execute */ | 3681 |
2314 NULL, /* sub */ | 3682 /* opt_pass methods: */ |
2315 NULL, /* next */ | 3683 virtual bool gate (function *) { return flag_guess_branch_prob; } |
2316 0, /* static_pass_number */ | 3684 virtual unsigned int execute (function *); |
2317 TV_BRANCH_PROB, /* tv_id */ | 3685 |
2318 PROP_cfg, /* properties_required */ | 3686 }; // class pass_profile |
2319 0, /* properties_provided */ | 3687 |
2320 0, /* properties_destroyed */ | 3688 unsigned int |
2321 0, /* todo_flags_start */ | 3689 pass_profile::execute (function *fun) |
2322 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ | 3690 { |
2323 } | 3691 unsigned nb_loops; |
3692 | |
3693 if (profile_status_for_fn (cfun) == PROFILE_GUESSED) | |
3694 return 0; | |
3695 | |
3696 loop_optimizer_init (LOOPS_NORMAL); | |
3697 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3698 flow_loops_dump (dump_file, NULL, 0); | |
3699 | |
3700 mark_irreducible_loops (); | |
3701 | |
3702 nb_loops = number_of_loops (fun); | |
3703 if (nb_loops > 1) | |
3704 scev_initialize (); | |
3705 | |
3706 tree_estimate_probability (false); | |
3707 | |
3708 if (nb_loops > 1) | |
3709 scev_finalize (); | |
3710 | |
3711 loop_optimizer_finalize (); | |
3712 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3713 gimple_dump_cfg (dump_file, dump_flags); | |
3714 if (profile_status_for_fn (fun) == PROFILE_ABSENT) | |
3715 profile_status_for_fn (fun) = PROFILE_GUESSED; | |
3716 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3717 { | |
3718 struct loop *loop; | |
3719 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
3720 if (loop->header->frequency) | |
3721 fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", | |
3722 loop->num, | |
3723 (int)expected_loop_iterations_unbounded (loop)); | |
3724 } | |
3725 return 0; | |
3726 } | |
3727 | |
3728 } // anon namespace | |
3729 | |
3730 gimple_opt_pass * | |
3731 make_pass_profile (gcc::context *ctxt) | |
3732 { | |
3733 return new pass_profile (ctxt); | |
3734 } | |
3735 | |
3736 namespace { | |
3737 | |
3738 const pass_data pass_data_strip_predict_hints = | |
3739 { | |
3740 GIMPLE_PASS, /* type */ | |
3741 "*strip_predict_hints", /* name */ | |
3742 OPTGROUP_NONE, /* optinfo_flags */ | |
3743 TV_BRANCH_PROB, /* tv_id */ | |
3744 PROP_cfg, /* properties_required */ | |
3745 0, /* properties_provided */ | |
3746 0, /* properties_destroyed */ | |
3747 0, /* todo_flags_start */ | |
3748 0, /* todo_flags_finish */ | |
2324 }; | 3749 }; |
3750 | |
3751 class pass_strip_predict_hints : public gimple_opt_pass | |
3752 { | |
3753 public: | |
3754 pass_strip_predict_hints (gcc::context *ctxt) | |
3755 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt) | |
3756 {} | |
3757 | |
3758 /* opt_pass methods: */ | |
3759 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); } | |
3760 virtual unsigned int execute (function *); | |
3761 | |
3762 }; // class pass_strip_predict_hints | |
3763 | |
3764 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements | |
3765 we no longer need. */ | |
3766 unsigned int | |
3767 pass_strip_predict_hints::execute (function *fun) | |
3768 { | |
3769 basic_block bb; | |
3770 gimple *ass_stmt; | |
3771 tree var; | |
3772 bool changed = false; | |
3773 | |
3774 FOR_EACH_BB_FN (bb, fun) | |
3775 { | |
3776 gimple_stmt_iterator bi; | |
3777 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) | |
3778 { | |
3779 gimple *stmt = gsi_stmt (bi); | |
3780 | |
3781 if (gimple_code (stmt) == GIMPLE_PREDICT) | |
3782 { | |
3783 gsi_remove (&bi, true); | |
3784 changed = true; | |
3785 continue; | |
3786 } | |
3787 else if (is_gimple_call (stmt)) | |
3788 { | |
3789 tree fndecl = gimple_call_fndecl (stmt); | |
3790 | |
3791 if ((fndecl | |
3792 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL | |
3793 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT | |
3794 && gimple_call_num_args (stmt) == 2) | |
3795 || (gimple_call_internal_p (stmt) | |
3796 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT)) | |
3797 { | |
3798 var = gimple_call_lhs (stmt); | |
3799 changed = true; | |
3800 if (var) | |
3801 { | |
3802 ass_stmt | |
3803 = gimple_build_assign (var, gimple_call_arg (stmt, 0)); | |
3804 gsi_replace (&bi, ass_stmt, true); | |
3805 } | |
3806 else | |
3807 { | |
3808 gsi_remove (&bi, true); | |
3809 continue; | |
3810 } | |
3811 } | |
3812 } | |
3813 gsi_next (&bi); | |
3814 } | |
3815 } | |
3816 return changed ? TODO_cleanup_cfg : 0; | |
3817 } | |
3818 | |
3819 } // anon namespace | |
3820 | |
3821 gimple_opt_pass * | |
3822 make_pass_strip_predict_hints (gcc::context *ctxt) | |
3823 { | |
3824 return new pass_strip_predict_hints (ctxt); | |
3825 } | |
2325 | 3826 |
2326 /* Rebuild function frequencies. Passes are in general expected to | 3827 /* Rebuild function frequencies. Passes are in general expected to |
2327 maintain profile by hand, however in some cases this is not possible: | 3828 maintain profile by hand, however in some cases this is not possible: |
2328 for example when inlining several functions with loops freuqencies might run | 3829 for example when inlining several functions with loops freuqencies might run |
2329 out of scale and thus needs to be recomputed. */ | 3830 out of scale and thus needs to be recomputed. */ |
2330 | 3831 |
2331 void | 3832 void |
2332 rebuild_frequencies (void) | 3833 rebuild_frequencies (void) |
2333 { | 3834 { |
2334 timevar_push (TV_REBUILD_FREQUENCIES); | 3835 timevar_push (TV_REBUILD_FREQUENCIES); |
2335 if (profile_status == PROFILE_GUESSED) | 3836 |
3837 /* When the max bb count in the function is small, there is a higher | |
3838 chance that there were truncation errors in the integer scaling | |
3839 of counts by inlining and other optimizations. This could lead | |
3840 to incorrect classification of code as being cold when it isn't. | |
3841 In that case, force the estimation of bb counts/frequencies from the | |
3842 branch probabilities, rather than computing frequencies from counts, | |
3843 which may also lead to frequencies incorrectly reduced to 0. There | |
3844 is less precision in the probabilities, so we only do this for small | |
3845 max counts. */ | |
3846 profile_count count_max = profile_count::zero (); | |
3847 basic_block bb; | |
3848 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) | |
3849 if (bb->count > count_max) | |
3850 count_max = bb->count; | |
3851 | |
3852 if (profile_status_for_fn (cfun) == PROFILE_GUESSED | |
3853 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ | |
3854 && count_max < REG_BR_PROB_BASE / 10)) | |
2336 { | 3855 { |
2337 loop_optimizer_init (0); | 3856 loop_optimizer_init (0); |
2338 add_noreturn_fake_exit_edges (); | 3857 add_noreturn_fake_exit_edges (); |
2339 mark_irreducible_loops (); | 3858 mark_irreducible_loops (); |
2340 connect_infinite_loops_to_exit (); | 3859 connect_infinite_loops_to_exit (); |
2341 estimate_bb_frequencies (); | 3860 estimate_bb_frequencies (true); |
2342 remove_fake_exit_edges (); | 3861 remove_fake_exit_edges (); |
2343 loop_optimizer_finalize (); | 3862 loop_optimizer_finalize (); |
2344 } | 3863 } |
2345 else if (profile_status == PROFILE_READ) | 3864 else if (profile_status_for_fn (cfun) == PROFILE_READ) |
2346 counts_to_freqs (); | 3865 counts_to_freqs (); |
2347 else | 3866 else |
2348 gcc_unreachable (); | 3867 gcc_unreachable (); |
2349 timevar_pop (TV_REBUILD_FREQUENCIES); | 3868 timevar_pop (TV_REBUILD_FREQUENCIES); |
2350 } | 3869 } |
3870 | |
3871 /* Perform a dry run of the branch prediction pass and report comparsion of | |
3872 the predicted and real profile into the dump file. */ | |
3873 | |
3874 void | |
3875 report_predictor_hitrates (void) | |
3876 { | |
3877 unsigned nb_loops; | |
3878 | |
3879 loop_optimizer_init (LOOPS_NORMAL); | |
3880 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3881 flow_loops_dump (dump_file, NULL, 0); | |
3882 | |
3883 mark_irreducible_loops (); | |
3884 | |
3885 nb_loops = number_of_loops (cfun); | |
3886 if (nb_loops > 1) | |
3887 scev_initialize (); | |
3888 | |
3889 tree_estimate_probability (true); | |
3890 | |
3891 if (nb_loops > 1) | |
3892 scev_finalize (); | |
3893 | |
3894 loop_optimizer_finalize (); | |
3895 } | |
3896 | |
3897 /* Force edge E to be cold. | |
3898 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise | |
3899 keep low probability to represent possible error in a guess. This is used | |
3900 i.e. in case we predict loop to likely iterate given number of times but | |
3901 we are not 100% sure. | |
3902 | |
3903 This function locally updates profile without attempt to keep global | |
3904 consistency which can not be reached in full generality without full profile | |
3905 rebuild from probabilities alone. Doing so is not necessarily a good idea | |
3906 because frequencies and counts may be more realistic then probabilities. | |
3907 | |
3908 In some cases (such as for elimination of early exits during full loop | |
3909 unrolling) the caller can ensure that profile will get consistent | |
3910 afterwards. */ | |
3911 | |
3912 void | |
3913 force_edge_cold (edge e, bool impossible) | |
3914 { | |
3915 profile_count count_sum = profile_count::zero (); | |
3916 profile_probability prob_sum = profile_probability::never (); | |
3917 edge_iterator ei; | |
3918 edge e2; | |
3919 bool uninitialized_exit = false; | |
3920 | |
3921 profile_probability goal = (impossible ? profile_probability::never () | |
3922 : profile_probability::very_unlikely ()); | |
3923 | |
3924 /* If edge is already improbably or cold, just return. */ | |
3925 if (e->probability <= goal | |
3926 && (!impossible || e->count () == profile_count::zero ())) | |
3927 return; | |
3928 FOR_EACH_EDGE (e2, ei, e->src->succs) | |
3929 if (e2 != e) | |
3930 { | |
3931 if (e2->count ().initialized_p ()) | |
3932 count_sum += e2->count (); | |
3933 else | |
3934 uninitialized_exit = true; | |
3935 if (e2->probability.initialized_p ()) | |
3936 prob_sum += e2->probability; | |
3937 } | |
3938 | |
3939 /* If there are other edges out of e->src, redistribute probabilitity | |
3940 there. */ | |
3941 if (prob_sum > profile_probability::never ()) | |
3942 { | |
3943 if (!(e->probability < goal)) | |
3944 e->probability = goal; | |
3945 | |
3946 profile_probability prob_comp = prob_sum / e->probability.invert (); | |
3947 | |
3948 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3949 fprintf (dump_file, "Making edge %i->%i %s by redistributing " | |
3950 "probability to other edges.\n", | |
3951 e->src->index, e->dest->index, | |
3952 impossible ? "impossible" : "cold"); | |
3953 FOR_EACH_EDGE (e2, ei, e->src->succs) | |
3954 if (e2 != e) | |
3955 { | |
3956 e2->probability /= prob_comp; | |
3957 } | |
3958 if (current_ir_type () != IR_GIMPLE | |
3959 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3960 update_br_prob_note (e->src); | |
3961 } | |
3962 /* If all edges out of e->src are unlikely, the basic block itself | |
3963 is unlikely. */ | |
3964 else | |
3965 { | |
3966 if (prob_sum == profile_probability::never ()) | |
3967 e->probability = profile_probability::always (); | |
3968 else | |
3969 { | |
3970 if (impossible) | |
3971 e->probability = profile_probability::never (); | |
3972 /* If BB has some edges out that are not impossible, we can not | |
3973 assume that BB itself is. */ | |
3974 impossible = false; | |
3975 } | |
3976 if (current_ir_type () != IR_GIMPLE | |
3977 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3978 update_br_prob_note (e->src); | |
3979 if (e->src->count == profile_count::zero ()) | |
3980 return; | |
3981 if (count_sum == profile_count::zero () && !uninitialized_exit | |
3982 && impossible) | |
3983 { | |
3984 bool found = false; | |
3985 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3986 ; | |
3987 else if (current_ir_type () == IR_GIMPLE) | |
3988 for (gimple_stmt_iterator gsi = gsi_start_bb (e->src); | |
3989 !gsi_end_p (gsi); gsi_next (&gsi)) | |
3990 { | |
3991 if (stmt_can_terminate_bb_p (gsi_stmt (gsi))) | |
3992 { | |
3993 found = true; | |
3994 break; | |
3995 } | |
3996 } | |
3997 /* FIXME: Implement RTL path. */ | |
3998 else | |
3999 found = true; | |
4000 if (!found) | |
4001 { | |
4002 if (dump_file && (dump_flags & TDF_DETAILS)) | |
4003 fprintf (dump_file, | |
4004 "Making bb %i impossible and dropping count to 0.\n", | |
4005 e->src->index); | |
4006 e->src->count = profile_count::zero (); | |
4007 FOR_EACH_EDGE (e2, ei, e->src->preds) | |
4008 force_edge_cold (e2, impossible); | |
4009 return; | |
4010 } | |
4011 } | |
4012 | |
4013 /* If we did not adjusting, the source basic block has no likely edeges | |
4014 leaving other direction. In that case force that bb cold, too. | |
4015 This in general is difficult task to do, but handle special case when | |
4016 BB has only one predecestor. This is common case when we are updating | |
4017 after loop transforms. */ | |
4018 if (!(prob_sum > profile_probability::never ()) | |
4019 && count_sum == profile_count::zero () | |
4020 && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1)) | |
4021 { | |
4022 int old_frequency = e->src->frequency; | |
4023 if (dump_file && (dump_flags & TDF_DETAILS)) | |
4024 fprintf (dump_file, "Making bb %i %s.\n", e->src->index, | |
4025 impossible ? "impossible" : "cold"); | |
4026 e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1); | |
4027 if (impossible) | |
4028 e->src->count = profile_count::zero (); | |
4029 else | |
4030 e->src->count = e->count ().apply_scale (e->src->frequency, | |
4031 old_frequency); | |
4032 force_edge_cold (single_pred_edge (e->src), impossible); | |
4033 } | |
4034 else if (dump_file && (dump_flags & TDF_DETAILS) | |
4035 && maybe_hot_bb_p (cfun, e->src)) | |
4036 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index, | |
4037 impossible ? "impossible" : "cold"); | |
4038 } | |
4039 } | |
4040 | |
4041 #if CHECKING_P | |
4042 | |
4043 namespace selftest { | |
4044 | |
4045 /* Test that value range of predictor values defined in predict.def is | |
4046 within range (50, 100]. */ | |
4047 | |
4048 struct branch_predictor | |
4049 { | |
4050 const char *name; | |
4051 unsigned probability; | |
4052 }; | |
4053 | |
4054 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE }, | |
4055 | |
4056 static void | |
4057 test_prediction_value_range () | |
4058 { | |
4059 branch_predictor predictors[] = { | |
4060 #include "predict.def" | |
4061 {NULL, -1U} | |
4062 }; | |
4063 | |
4064 for (unsigned i = 0; predictors[i].name != NULL; i++) | |
4065 { | |
4066 unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE; | |
4067 ASSERT_TRUE (p > 50 && p <= 100); | |
4068 } | |
4069 } | |
4070 | |
4071 #undef DEF_PREDICTOR | |
4072 | |
4073 /* Run all of the selfests within this file. */ | |
4074 | |
4075 void | |
4076 predict_c_tests () | |
4077 { | |
4078 test_prediction_value_range (); | |
4079 } | |
4080 | |
4081 } // namespace selftest | |
4082 #endif /* CHECKING_P. */ |