Mercurial > hg > CbC > CbC_gcc
annotate gcc/predict.c @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | a06113de4d67 |
children | b7f97abdc517 |
rev | line source |
---|---|
0 | 1 /* Branch prediction routines for the GNU compiler. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009 |
0 | 3 Free Software Foundation, Inc. |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 /* References: | |
22 | |
23 [1] "Branch Prediction for Free" | |
24 Ball and Larus; PLDI '93. | |
25 [2] "Static Branch Frequency and Program Profile Analysis" | |
26 Wu and Larus; MICRO-27. | |
27 [3] "Corpus-based Static Branch Prediction" | |
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */ | |
29 | |
30 | |
31 #include "config.h" | |
32 #include "system.h" | |
33 #include "coretypes.h" | |
34 #include "tm.h" | |
35 #include "tree.h" | |
36 #include "rtl.h" | |
37 #include "tm_p.h" | |
38 #include "hard-reg-set.h" | |
39 #include "basic-block.h" | |
40 #include "insn-config.h" | |
41 #include "regs.h" | |
42 #include "flags.h" | |
43 #include "output.h" | |
44 #include "function.h" | |
45 #include "except.h" | |
46 #include "toplev.h" | |
47 #include "recog.h" | |
48 #include "expr.h" | |
49 #include "predict.h" | |
50 #include "coverage.h" | |
51 #include "sreal.h" | |
52 #include "params.h" | |
53 #include "target.h" | |
54 #include "cfgloop.h" | |
55 #include "tree-flow.h" | |
56 #include "ggc.h" | |
57 #include "tree-dump.h" | |
58 #include "tree-pass.h" | |
59 #include "timevar.h" | |
60 #include "tree-scalar-evolution.h" | |
61 #include "cfgloop.h" | |
62 #include "pointer-set.h" | |
63 | |
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, | |
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ | |
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base, | |
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max; | |
68 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
69 /* Random guesstimation given names. |
0 | 70 PROV_VERY_UNLIKELY should be small enough so basic block predicted |
71 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */ | |
72 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1) | |
73 #define PROB_EVEN (REG_BR_PROB_BASE / 2) | |
74 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY) | |
75 #define PROB_ALWAYS (REG_BR_PROB_BASE) | |
76 | |
77 static void combine_predictions_for_insn (rtx, basic_block); | |
78 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int); | |
79 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction); | |
80 static void choose_function_section (void); | |
81 static bool can_predict_insn_p (const_rtx); | |
82 | |
83 /* Information we hold about each branch predictor. | |
84 Filled using information from predict.def. */ | |
85 | |
86 struct predictor_info | |
87 { | |
88 const char *const name; /* Name used in the debugging dumps. */ | |
89 const int hitrate; /* Expected hitrate used by | |
90 predict_insn_def call. */ | |
91 const int flags; | |
92 }; | |
93 | |
94 /* Use given predictor without Dempster-Shaffer theory if it matches | |
95 using first_match heuristics. */ | |
96 #define PRED_FLAG_FIRST_MATCH 1 | |
97 | |
98 /* Recompute hitrate in percent to our representation. */ | |
99 | |
100 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100) | |
101 | |
102 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS}, | |
103 static const struct predictor_info predictor_info[]= { | |
104 #include "predict.def" | |
105 | |
106 /* Upper bound on predictors. */ | |
107 {NULL, 0, 0} | |
108 }; | |
109 #undef DEF_PREDICTOR | |
110 | |
111 /* Return TRUE if frequency FREQ is considered to be hot. */ | |
112 | |
113 static inline bool | |
114 maybe_hot_frequency_p (int freq) | |
115 { | |
116 if (!profile_info || !flag_branch_probabilities) | |
117 { | |
118 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | |
119 return false; | |
120 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT) | |
121 return true; | |
122 } | |
123 if (profile_status == PROFILE_ABSENT) | |
124 return true; | |
125 if (freq < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) | |
126 return false; | |
127 return true; | |
128 } | |
129 | |
130 /* Return TRUE if frequency FREQ is considered to be hot. */ | |
131 | |
132 static inline bool | |
133 maybe_hot_count_p (gcov_type count) | |
134 { | |
135 if (profile_status != PROFILE_READ) | |
136 return true; | |
137 /* Code executed at most once is not hot. */ | |
138 if (profile_info->runs >= count) | |
139 return false; | |
140 return (count | |
141 > profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)); | |
142 } | |
143 | |
144 /* Return true in case BB can be CPU intensive and should be optimized | |
145 for maximal performance. */ | |
146 | |
147 bool | |
148 maybe_hot_bb_p (const_basic_block bb) | |
149 { | |
150 if (profile_status == PROFILE_READ) | |
151 return maybe_hot_count_p (bb->count); | |
152 return maybe_hot_frequency_p (bb->frequency); | |
153 } | |
154 | |
155 /* Return true if the call can be hot. */ | |
156 | |
157 bool | |
158 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge) | |
159 { | |
160 if (profile_info && flag_branch_probabilities | |
161 && (edge->count | |
162 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) | |
163 return false; | |
164 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge->callee->decl)) | |
165 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge->caller->decl))) | |
166 return false; | |
167 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl))) | |
168 return true; | |
169 if (flag_guess_branch_prob | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
170 && edge->frequency <= (CGRAPH_FREQ_BASE |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
171 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))) |
0 | 172 return false; |
173 return true; | |
174 } | |
175 | |
176 /* Return true in case BB can be CPU intensive and should be optimized | |
177 for maximal performance. */ | |
178 | |
179 bool | |
180 maybe_hot_edge_p (edge e) | |
181 { | |
182 if (profile_status == PROFILE_READ) | |
183 return maybe_hot_count_p (e->count); | |
184 return maybe_hot_frequency_p (EDGE_FREQUENCY (e)); | |
185 } | |
186 | |
187 /* Return true in case BB is probably never executed. */ | |
188 bool | |
189 probably_never_executed_bb_p (const_basic_block bb) | |
190 { | |
191 if (profile_info && flag_branch_probabilities) | |
192 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0; | |
193 if ((!profile_info || !flag_branch_probabilities) | |
194 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | |
195 return true; | |
196 return false; | |
197 } | |
198 | |
199 /* Return true when current function should always be optimized for size. */ | |
200 | |
201 bool | |
202 optimize_function_for_size_p (struct function *fun) | |
203 { | |
204 return (optimize_size | |
205 || (fun && (fun->function_frequency | |
206 == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED))); | |
207 } | |
208 | |
209 /* Return true when current function should always be optimized for speed. */ | |
210 | |
211 bool | |
212 optimize_function_for_speed_p (struct function *fun) | |
213 { | |
214 return !optimize_function_for_size_p (fun); | |
215 } | |
216 | |
217 /* Return TRUE when BB should be optimized for size. */ | |
218 | |
219 bool | |
220 optimize_bb_for_size_p (const_basic_block bb) | |
221 { | |
222 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (bb); | |
223 } | |
224 | |
225 /* Return TRUE when BB should be optimized for speed. */ | |
226 | |
227 bool | |
228 optimize_bb_for_speed_p (const_basic_block bb) | |
229 { | |
230 return !optimize_bb_for_size_p (bb); | |
231 } | |
232 | |
233 /* Return TRUE when BB should be optimized for size. */ | |
234 | |
235 bool | |
236 optimize_edge_for_size_p (edge e) | |
237 { | |
238 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e); | |
239 } | |
240 | |
241 /* Return TRUE when BB should be optimized for speed. */ | |
242 | |
243 bool | |
244 optimize_edge_for_speed_p (edge e) | |
245 { | |
246 return !optimize_edge_for_size_p (e); | |
247 } | |
248 | |
249 /* Return TRUE when BB should be optimized for size. */ | |
250 | |
251 bool | |
252 optimize_insn_for_size_p (void) | |
253 { | |
254 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p; | |
255 } | |
256 | |
257 /* Return TRUE when BB should be optimized for speed. */ | |
258 | |
259 bool | |
260 optimize_insn_for_speed_p (void) | |
261 { | |
262 return !optimize_insn_for_size_p (); | |
263 } | |
264 | |
265 /* Return TRUE when LOOP should be optimized for size. */ | |
266 | |
267 bool | |
268 optimize_loop_for_size_p (struct loop *loop) | |
269 { | |
270 return optimize_bb_for_size_p (loop->header); | |
271 } | |
272 | |
273 /* Return TRUE when LOOP should be optimized for speed. */ | |
274 | |
275 bool | |
276 optimize_loop_for_speed_p (struct loop *loop) | |
277 { | |
278 return optimize_bb_for_speed_p (loop->header); | |
279 } | |
280 | |
281 /* Return TRUE when LOOP nest should be optimized for speed. */ | |
282 | |
283 bool | |
284 optimize_loop_nest_for_speed_p (struct loop *loop) | |
285 { | |
286 struct loop *l = loop; | |
287 if (optimize_loop_for_speed_p (loop)) | |
288 return true; | |
289 l = loop->inner; | |
290 while (l && l != loop) | |
291 { | |
292 if (optimize_loop_for_speed_p (l)) | |
293 return true; | |
294 if (l->inner) | |
295 l = l->inner; | |
296 else if (l->next) | |
297 l = l->next; | |
298 else | |
299 { | |
300 while (l != loop && !l->next) | |
301 l = loop_outer (l); | |
302 if (l != loop) | |
303 l = l->next; | |
304 } | |
305 } | |
306 return false; | |
307 } | |
308 | |
309 /* Return TRUE when LOOP nest should be optimized for size. */ | |
310 | |
311 bool | |
312 optimize_loop_nest_for_size_p (struct loop *loop) | |
313 { | |
314 return !optimize_loop_nest_for_speed_p (loop); | |
315 } | |
316 | |
317 /* Return true when edge E is likely to be well predictable by branch | |
318 predictor. */ | |
319 | |
320 bool | |
321 predictable_edge_p (edge e) | |
322 { | |
323 if (profile_status == PROFILE_ABSENT) | |
324 return false; | |
325 if ((e->probability | |
326 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100) | |
327 || (REG_BR_PROB_BASE - e->probability | |
328 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)) | |
329 return true; | |
330 return false; | |
331 } | |
332 | |
333 | |
334 /* Set RTL expansion for BB profile. */ | |
335 | |
336 void | |
337 rtl_profile_for_bb (basic_block bb) | |
338 { | |
339 crtl->maybe_hot_insn_p = maybe_hot_bb_p (bb); | |
340 } | |
341 | |
342 /* Set RTL expansion for edge profile. */ | |
343 | |
344 void | |
345 rtl_profile_for_edge (edge e) | |
346 { | |
347 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e); | |
348 } | |
349 | |
350 /* Set RTL expansion to default mode (i.e. when profile info is not known). */ | |
351 void | |
352 default_rtl_profile (void) | |
353 { | |
354 crtl->maybe_hot_insn_p = true; | |
355 } | |
356 | |
357 /* Return true if the one of outgoing edges is already predicted by | |
358 PREDICTOR. */ | |
359 | |
360 bool | |
361 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor) | |
362 { | |
363 rtx note; | |
364 if (!INSN_P (BB_END (bb))) | |
365 return false; | |
366 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1)) | |
367 if (REG_NOTE_KIND (note) == REG_BR_PRED | |
368 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) | |
369 return true; | |
370 return false; | |
371 } | |
372 | |
373 /* This map contains for a basic block the list of predictions for the | |
374 outgoing edges. */ | |
375 | |
376 static struct pointer_map_t *bb_predictions; | |
377 | |
378 /* Return true if the one of outgoing edges is already predicted by | |
379 PREDICTOR. */ | |
380 | |
381 bool | |
382 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor) | |
383 { | |
384 struct edge_prediction *i; | |
385 void **preds = pointer_map_contains (bb_predictions, bb); | |
386 | |
387 if (!preds) | |
388 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
389 |
0 | 390 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next) |
391 if (i->ep_predictor == predictor) | |
392 return true; | |
393 return false; | |
394 } | |
395 | |
396 /* Return true when the probability of edge is reliable. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
397 |
0 | 398 The profile guessing code is good at predicting branch outcome (ie. |
399 taken/not taken), that is predicted right slightly over 75% of time. | |
400 It is however notoriously poor on predicting the probability itself. | |
401 In general the profile appear a lot flatter (with probabilities closer | |
402 to 50%) than the reality so it is bad idea to use it to drive optimization | |
403 such as those disabling dynamic branch prediction for well predictable | |
404 branches. | |
405 | |
406 There are two exceptions - edges leading to noreturn edges and edges | |
407 predicted by number of iterations heuristics are predicted well. This macro | |
408 should be able to distinguish those, but at the moment it simply check for | |
409 noreturn heuristic that is only one giving probability over 99% or bellow | |
410 1%. In future we might want to propagate reliability information across the | |
411 CFG if we find this information useful on multiple places. */ | |
412 static bool | |
413 probability_reliable_p (int prob) | |
414 { | |
415 return (profile_status == PROFILE_READ | |
416 || (profile_status == PROFILE_GUESSED | |
417 && (prob <= HITRATE (1) || prob >= HITRATE (99)))); | |
418 } | |
419 | |
420 /* Same predicate as above, working on edges. */ | |
421 bool | |
422 edge_probability_reliable_p (const_edge e) | |
423 { | |
424 return probability_reliable_p (e->probability); | |
425 } | |
426 | |
427 /* Same predicate as edge_probability_reliable_p, working on notes. */ | |
428 bool | |
429 br_prob_note_reliable_p (const_rtx note) | |
430 { | |
431 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB); | |
432 return probability_reliable_p (INTVAL (XEXP (note, 0))); | |
433 } | |
434 | |
435 static void | |
436 predict_insn (rtx insn, enum br_predictor predictor, int probability) | |
437 { | |
438 gcc_assert (any_condjump_p (insn)); | |
439 if (!flag_guess_branch_prob) | |
440 return; | |
441 | |
442 add_reg_note (insn, REG_BR_PRED, | |
443 gen_rtx_CONCAT (VOIDmode, | |
444 GEN_INT ((int) predictor), | |
445 GEN_INT ((int) probability))); | |
446 } | |
447 | |
448 /* Predict insn by given predictor. */ | |
449 | |
450 void | |
451 predict_insn_def (rtx insn, enum br_predictor predictor, | |
452 enum prediction taken) | |
453 { | |
454 int probability = predictor_info[(int) predictor].hitrate; | |
455 | |
456 if (taken != TAKEN) | |
457 probability = REG_BR_PROB_BASE - probability; | |
458 | |
459 predict_insn (insn, predictor, probability); | |
460 } | |
461 | |
462 /* Predict edge E with given probability if possible. */ | |
463 | |
464 void | |
465 rtl_predict_edge (edge e, enum br_predictor predictor, int probability) | |
466 { | |
467 rtx last_insn; | |
468 last_insn = BB_END (e->src); | |
469 | |
470 /* We can store the branch prediction information only about | |
471 conditional jumps. */ | |
472 if (!any_condjump_p (last_insn)) | |
473 return; | |
474 | |
475 /* We always store probability of branching. */ | |
476 if (e->flags & EDGE_FALLTHRU) | |
477 probability = REG_BR_PROB_BASE - probability; | |
478 | |
479 predict_insn (last_insn, predictor, probability); | |
480 } | |
481 | |
482 /* Predict edge E with the given PROBABILITY. */ | |
483 void | |
484 gimple_predict_edge (edge e, enum br_predictor predictor, int probability) | |
485 { | |
486 gcc_assert (profile_status != PROFILE_GUESSED); | |
487 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1) | |
488 && flag_guess_branch_prob && optimize) | |
489 { | |
490 struct edge_prediction *i = XNEW (struct edge_prediction); | |
491 void **preds = pointer_map_insert (bb_predictions, e->src); | |
492 | |
493 i->ep_next = (struct edge_prediction *) *preds; | |
494 *preds = i; | |
495 i->ep_probability = probability; | |
496 i->ep_predictor = predictor; | |
497 i->ep_edge = e; | |
498 } | |
499 } | |
500 | |
501 /* Remove all predictions on given basic block that are attached | |
502 to edge E. */ | |
503 void | |
504 remove_predictions_associated_with_edge (edge e) | |
505 { | |
506 void **preds; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
507 |
0 | 508 if (!bb_predictions) |
509 return; | |
510 | |
511 preds = pointer_map_contains (bb_predictions, e->src); | |
512 | |
513 if (preds) | |
514 { | |
515 struct edge_prediction **prediction = (struct edge_prediction **) preds; | |
516 struct edge_prediction *next; | |
517 | |
518 while (*prediction) | |
519 { | |
520 if ((*prediction)->ep_edge == e) | |
521 { | |
522 next = (*prediction)->ep_next; | |
523 free (*prediction); | |
524 *prediction = next; | |
525 } | |
526 else | |
527 prediction = &((*prediction)->ep_next); | |
528 } | |
529 } | |
530 } | |
531 | |
532 /* Clears the list of predictions stored for BB. */ | |
533 | |
534 static void | |
535 clear_bb_predictions (basic_block bb) | |
536 { | |
537 void **preds = pointer_map_contains (bb_predictions, bb); | |
538 struct edge_prediction *pred, *next; | |
539 | |
540 if (!preds) | |
541 return; | |
542 | |
543 for (pred = (struct edge_prediction *) *preds; pred; pred = next) | |
544 { | |
545 next = pred->ep_next; | |
546 free (pred); | |
547 } | |
548 *preds = NULL; | |
549 } | |
550 | |
551 /* Return true when we can store prediction on insn INSN. | |
552 At the moment we represent predictions only on conditional | |
553 jumps, not at computed jump or other complicated cases. */ | |
554 static bool | |
555 can_predict_insn_p (const_rtx insn) | |
556 { | |
557 return (JUMP_P (insn) | |
558 && any_condjump_p (insn) | |
559 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2); | |
560 } | |
561 | |
562 /* Predict edge E by given predictor if possible. */ | |
563 | |
564 void | |
565 predict_edge_def (edge e, enum br_predictor predictor, | |
566 enum prediction taken) | |
567 { | |
568 int probability = predictor_info[(int) predictor].hitrate; | |
569 | |
570 if (taken != TAKEN) | |
571 probability = REG_BR_PROB_BASE - probability; | |
572 | |
573 predict_edge (e, predictor, probability); | |
574 } | |
575 | |
576 /* Invert all branch predictions or probability notes in the INSN. This needs | |
577 to be done each time we invert the condition used by the jump. */ | |
578 | |
579 void | |
580 invert_br_probabilities (rtx insn) | |
581 { | |
582 rtx note; | |
583 | |
584 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) | |
585 if (REG_NOTE_KIND (note) == REG_BR_PROB) | |
586 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0))); | |
587 else if (REG_NOTE_KIND (note) == REG_BR_PRED) | |
588 XEXP (XEXP (note, 0), 1) | |
589 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); | |
590 } | |
591 | |
592 /* Dump information about the branch prediction to the output file. */ | |
593 | |
594 static void | |
595 dump_prediction (FILE *file, enum br_predictor predictor, int probability, | |
596 basic_block bb, int used) | |
597 { | |
598 edge e; | |
599 edge_iterator ei; | |
600 | |
601 if (!file) | |
602 return; | |
603 | |
604 FOR_EACH_EDGE (e, ei, bb->succs) | |
605 if (! (e->flags & EDGE_FALLTHRU)) | |
606 break; | |
607 | |
608 fprintf (file, " %s heuristics%s: %.1f%%", | |
609 predictor_info[predictor].name, | |
610 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE); | |
611 | |
612 if (bb->count) | |
613 { | |
614 fprintf (file, " exec "); | |
615 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count); | |
616 if (e) | |
617 { | |
618 fprintf (file, " hit "); | |
619 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count); | |
620 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count); | |
621 } | |
622 } | |
623 | |
624 fprintf (file, "\n"); | |
625 } | |
626 | |
627 /* We can not predict the probabilities of outgoing edges of bb. Set them | |
628 evenly and hope for the best. */ | |
629 static void | |
630 set_even_probabilities (basic_block bb) | |
631 { | |
632 int nedges = 0; | |
633 edge e; | |
634 edge_iterator ei; | |
635 | |
636 FOR_EACH_EDGE (e, ei, bb->succs) | |
637 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | |
638 nedges ++; | |
639 FOR_EACH_EDGE (e, ei, bb->succs) | |
640 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | |
641 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; | |
642 else | |
643 e->probability = 0; | |
644 } | |
645 | |
646 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB | |
647 note if not already present. Remove now useless REG_BR_PRED notes. */ | |
648 | |
649 static void | |
650 combine_predictions_for_insn (rtx insn, basic_block bb) | |
651 { | |
652 rtx prob_note; | |
653 rtx *pnote; | |
654 rtx note; | |
655 int best_probability = PROB_EVEN; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
656 enum br_predictor best_predictor = END_PREDICTORS; |
0 | 657 int combined_probability = REG_BR_PROB_BASE / 2; |
658 int d; | |
659 bool first_match = false; | |
660 bool found = false; | |
661 | |
662 if (!can_predict_insn_p (insn)) | |
663 { | |
664 set_even_probabilities (bb); | |
665 return; | |
666 } | |
667 | |
668 prob_note = find_reg_note (insn, REG_BR_PROB, 0); | |
669 pnote = ®_NOTES (insn); | |
670 if (dump_file) | |
671 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn), | |
672 bb->index); | |
673 | |
674 /* We implement "first match" heuristics and use probability guessed | |
675 by predictor with smallest index. */ | |
676 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) | |
677 if (REG_NOTE_KIND (note) == REG_BR_PRED) | |
678 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
679 enum br_predictor predictor = ((enum br_predictor) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
680 INTVAL (XEXP (XEXP (note, 0), 0))); |
0 | 681 int probability = INTVAL (XEXP (XEXP (note, 0), 1)); |
682 | |
683 found = true; | |
684 if (best_predictor > predictor) | |
685 best_probability = probability, best_predictor = predictor; | |
686 | |
687 d = (combined_probability * probability | |
688 + (REG_BR_PROB_BASE - combined_probability) | |
689 * (REG_BR_PROB_BASE - probability)); | |
690 | |
691 /* Use FP math to avoid overflows of 32bit integers. */ | |
692 if (d == 0) | |
693 /* If one probability is 0% and one 100%, avoid division by zero. */ | |
694 combined_probability = REG_BR_PROB_BASE / 2; | |
695 else | |
696 combined_probability = (((double) combined_probability) * probability | |
697 * REG_BR_PROB_BASE / d + 0.5); | |
698 } | |
699 | |
700 /* Decide which heuristic to use. In case we didn't match anything, | |
701 use no_prediction heuristic, in case we did match, use either | |
702 first match or Dempster-Shaffer theory depending on the flags. */ | |
703 | |
704 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) | |
705 first_match = true; | |
706 | |
707 if (!found) | |
708 dump_prediction (dump_file, PRED_NO_PREDICTION, | |
709 combined_probability, bb, true); | |
710 else | |
711 { | |
712 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, | |
713 bb, !first_match); | |
714 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, | |
715 bb, first_match); | |
716 } | |
717 | |
718 if (first_match) | |
719 combined_probability = best_probability; | |
720 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); | |
721 | |
722 while (*pnote) | |
723 { | |
724 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) | |
725 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
726 enum br_predictor predictor = ((enum br_predictor) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
727 INTVAL (XEXP (XEXP (*pnote, 0), 0))); |
0 | 728 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); |
729 | |
730 dump_prediction (dump_file, predictor, probability, bb, | |
731 !first_match || best_predictor == predictor); | |
732 *pnote = XEXP (*pnote, 1); | |
733 } | |
734 else | |
735 pnote = &XEXP (*pnote, 1); | |
736 } | |
737 | |
738 if (!prob_note) | |
739 { | |
740 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability)); | |
741 | |
742 /* Save the prediction into CFG in case we are seeing non-degenerated | |
743 conditional jump. */ | |
744 if (!single_succ_p (bb)) | |
745 { | |
746 BRANCH_EDGE (bb)->probability = combined_probability; | |
747 FALLTHRU_EDGE (bb)->probability | |
748 = REG_BR_PROB_BASE - combined_probability; | |
749 } | |
750 } | |
751 else if (!single_succ_p (bb)) | |
752 { | |
753 int prob = INTVAL (XEXP (prob_note, 0)); | |
754 | |
755 BRANCH_EDGE (bb)->probability = prob; | |
756 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob; | |
757 } | |
758 else | |
759 single_succ_edge (bb)->probability = REG_BR_PROB_BASE; | |
760 } | |
761 | |
762 /* Combine predictions into single probability and store them into CFG. | |
763 Remove now useless prediction entries. */ | |
764 | |
765 static void | |
766 combine_predictions_for_bb (basic_block bb) | |
767 { | |
768 int best_probability = PROB_EVEN; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
769 enum br_predictor best_predictor = END_PREDICTORS; |
0 | 770 int combined_probability = REG_BR_PROB_BASE / 2; |
771 int d; | |
772 bool first_match = false; | |
773 bool found = false; | |
774 struct edge_prediction *pred; | |
775 int nedges = 0; | |
776 edge e, first = NULL, second = NULL; | |
777 edge_iterator ei; | |
778 void **preds; | |
779 | |
780 FOR_EACH_EDGE (e, ei, bb->succs) | |
781 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | |
782 { | |
783 nedges ++; | |
784 if (first && !second) | |
785 second = e; | |
786 if (!first) | |
787 first = e; | |
788 } | |
789 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
790 /* When there is no successor or only one choice, prediction is easy. |
0 | 791 |
792 We are lazy for now and predict only basic blocks with two outgoing | |
793 edges. It is possible to predict generic case too, but we have to | |
794 ignore first match heuristics and do more involved combining. Implement | |
795 this later. */ | |
796 if (nedges != 2) | |
797 { | |
798 if (!bb->count) | |
799 set_even_probabilities (bb); | |
800 clear_bb_predictions (bb); | |
801 if (dump_file) | |
802 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n", | |
803 nedges, bb->index); | |
804 return; | |
805 } | |
806 | |
807 if (dump_file) | |
808 fprintf (dump_file, "Predictions for bb %i\n", bb->index); | |
809 | |
810 preds = pointer_map_contains (bb_predictions, bb); | |
811 if (preds) | |
812 { | |
813 /* We implement "first match" heuristics and use probability guessed | |
814 by predictor with smallest index. */ | |
815 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) | |
816 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
817 enum br_predictor predictor = pred->ep_predictor; |
0 | 818 int probability = pred->ep_probability; |
819 | |
820 if (pred->ep_edge != first) | |
821 probability = REG_BR_PROB_BASE - probability; | |
822 | |
823 found = true; | |
824 /* First match heuristics would be widly confused if we predicted | |
825 both directions. */ | |
826 if (best_predictor > predictor) | |
827 { | |
828 struct edge_prediction *pred2; | |
829 int prob = probability; | |
830 | |
831 for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next) | |
832 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor) | |
833 { | |
834 int probability2 = pred->ep_probability; | |
835 | |
836 if (pred2->ep_edge != first) | |
837 probability2 = REG_BR_PROB_BASE - probability2; | |
838 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
839 if ((probability < REG_BR_PROB_BASE / 2) != |
0 | 840 (probability2 < REG_BR_PROB_BASE / 2)) |
841 break; | |
842 | |
843 /* If the same predictor later gave better result, go for it! */ | |
844 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability)) | |
845 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability))) | |
846 prob = probability2; | |
847 } | |
848 if (!pred2) | |
849 best_probability = prob, best_predictor = predictor; | |
850 } | |
851 | |
852 d = (combined_probability * probability | |
853 + (REG_BR_PROB_BASE - combined_probability) | |
854 * (REG_BR_PROB_BASE - probability)); | |
855 | |
856 /* Use FP math to avoid overflows of 32bit integers. */ | |
857 if (d == 0) | |
858 /* If one probability is 0% and one 100%, avoid division by zero. */ | |
859 combined_probability = REG_BR_PROB_BASE / 2; | |
860 else | |
861 combined_probability = (((double) combined_probability) | |
862 * probability | |
863 * REG_BR_PROB_BASE / d + 0.5); | |
864 } | |
865 } | |
866 | |
867 /* Decide which heuristic to use. In case we didn't match anything, | |
868 use no_prediction heuristic, in case we did match, use either | |
869 first match or Dempster-Shaffer theory depending on the flags. */ | |
870 | |
871 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) | |
872 first_match = true; | |
873 | |
874 if (!found) | |
875 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true); | |
876 else | |
877 { | |
878 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb, | |
879 !first_match); | |
880 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb, | |
881 first_match); | |
882 } | |
883 | |
884 if (first_match) | |
885 combined_probability = best_probability; | |
886 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); | |
887 | |
888 if (preds) | |
889 { | |
890 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) | |
891 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
892 enum br_predictor predictor = pred->ep_predictor; |
0 | 893 int probability = pred->ep_probability; |
894 | |
895 if (pred->ep_edge != EDGE_SUCC (bb, 0)) | |
896 probability = REG_BR_PROB_BASE - probability; | |
897 dump_prediction (dump_file, predictor, probability, bb, | |
898 !first_match || best_predictor == predictor); | |
899 } | |
900 } | |
901 clear_bb_predictions (bb); | |
902 | |
903 if (!bb->count) | |
904 { | |
905 first->probability = combined_probability; | |
906 second->probability = REG_BR_PROB_BASE - combined_probability; | |
907 } | |
908 } | |
909 | |
910 /* Predict edge probabilities by exploiting loop structure. */ | |
911 | |
912 static void | |
913 predict_loops (void) | |
914 { | |
915 loop_iterator li; | |
916 struct loop *loop; | |
917 | |
918 /* Try to predict out blocks in a loop that are not part of a | |
919 natural loop. */ | |
920 FOR_EACH_LOOP (li, loop, 0) | |
921 { | |
922 basic_block bb, *bbs; | |
923 unsigned j, n_exits; | |
924 VEC (edge, heap) *exits; | |
925 struct tree_niter_desc niter_desc; | |
926 edge ex; | |
927 | |
928 exits = get_loop_exit_edges (loop); | |
929 n_exits = VEC_length (edge, exits); | |
930 | |
931 for (j = 0; VEC_iterate (edge, exits, j, ex); j++) | |
932 { | |
933 tree niter = NULL; | |
934 HOST_WIDE_INT nitercst; | |
935 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS); | |
936 int probability; | |
937 enum br_predictor predictor; | |
938 | |
939 if (number_of_iterations_exit (loop, ex, &niter_desc, false)) | |
940 niter = niter_desc.niter; | |
941 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST) | |
942 niter = loop_niter_by_eval (loop, ex); | |
943 | |
944 if (TREE_CODE (niter) == INTEGER_CST) | |
945 { | |
946 if (host_integerp (niter, 1) | |
947 && compare_tree_int (niter, max-1) == -1) | |
948 nitercst = tree_low_cst (niter, 1) + 1; | |
949 else | |
950 nitercst = max; | |
951 predictor = PRED_LOOP_ITERATIONS; | |
952 } | |
953 /* If we have just one exit and we can derive some information about | |
954 the number of iterations of the loop from the statements inside | |
955 the loop, use it to predict this exit. */ | |
956 else if (n_exits == 1) | |
957 { | |
958 nitercst = estimated_loop_iterations_int (loop, false); | |
959 if (nitercst < 0) | |
960 continue; | |
961 if (nitercst > max) | |
962 nitercst = max; | |
963 | |
964 predictor = PRED_LOOP_ITERATIONS_GUESSED; | |
965 } | |
966 else | |
967 continue; | |
968 | |
969 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst); | |
970 predict_edge (ex, predictor, probability); | |
971 } | |
972 VEC_free (edge, heap, exits); | |
973 | |
974 bbs = get_loop_body (loop); | |
975 | |
976 for (j = 0; j < loop->num_nodes; j++) | |
977 { | |
978 int header_found = 0; | |
979 edge e; | |
980 edge_iterator ei; | |
981 | |
982 bb = bbs[j]; | |
983 | |
984 /* Bypass loop heuristics on continue statement. These | |
985 statements construct loops via "non-loop" constructs | |
986 in the source language and are better to be handled | |
987 separately. */ | |
988 if (predicted_by_p (bb, PRED_CONTINUE)) | |
989 continue; | |
990 | |
991 /* Loop branch heuristics - predict an edge back to a | |
992 loop's head as taken. */ | |
993 if (bb == loop->latch) | |
994 { | |
995 e = find_edge (loop->latch, loop->header); | |
996 if (e) | |
997 { | |
998 header_found = 1; | |
999 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN); | |
1000 } | |
1001 } | |
1002 | |
1003 /* Loop exit heuristics - predict an edge exiting the loop if the | |
1004 conditional has no loop header successors as not taken. */ | |
1005 if (!header_found | |
1006 /* If we already used more reliable loop exit predictors, do not | |
1007 bother with PRED_LOOP_EXIT. */ | |
1008 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED) | |
1009 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS)) | |
1010 { | |
1011 /* For loop with many exits we don't want to predict all exits | |
1012 with the pretty large probability, because if all exits are | |
1013 considered in row, the loop would be predicted to iterate | |
1014 almost never. The code to divide probability by number of | |
1015 exits is very rough. It should compute the number of exits | |
1016 taken in each patch through function (not the overall number | |
1017 of exits that might be a lot higher for loops with wide switch | |
1018 statements in them) and compute n-th square root. | |
1019 | |
1020 We limit the minimal probability by 2% to avoid | |
1021 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction | |
1022 as this was causing regression in perl benchmark containing such | |
1023 a wide loop. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1024 |
0 | 1025 int probability = ((REG_BR_PROB_BASE |
1026 - predictor_info [(int) PRED_LOOP_EXIT].hitrate) | |
1027 / n_exits); | |
1028 if (probability < HITRATE (2)) | |
1029 probability = HITRATE (2); | |
1030 FOR_EACH_EDGE (e, ei, bb->succs) | |
1031 if (e->dest->index < NUM_FIXED_BLOCKS | |
1032 || !flow_bb_inside_loop_p (loop, e->dest)) | |
1033 predict_edge (e, PRED_LOOP_EXIT, probability); | |
1034 } | |
1035 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1036 |
0 | 1037 /* Free basic blocks from get_loop_body. */ |
1038 free (bbs); | |
1039 } | |
1040 } | |
1041 | |
1042 /* Attempt to predict probabilities of BB outgoing edges using local | |
1043 properties. */ | |
1044 static void | |
1045 bb_estimate_probability_locally (basic_block bb) | |
1046 { | |
1047 rtx last_insn = BB_END (bb); | |
1048 rtx cond; | |
1049 | |
1050 if (! can_predict_insn_p (last_insn)) | |
1051 return; | |
1052 cond = get_condition (last_insn, NULL, false, false); | |
1053 if (! cond) | |
1054 return; | |
1055 | |
1056 /* Try "pointer heuristic." | |
1057 A comparison ptr == 0 is predicted as false. | |
1058 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ | |
1059 if (COMPARISON_P (cond) | |
1060 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0))) | |
1061 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1))))) | |
1062 { | |
1063 if (GET_CODE (cond) == EQ) | |
1064 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN); | |
1065 else if (GET_CODE (cond) == NE) | |
1066 predict_insn_def (last_insn, PRED_POINTER, TAKEN); | |
1067 } | |
1068 else | |
1069 | |
1070 /* Try "opcode heuristic." | |
1071 EQ tests are usually false and NE tests are usually true. Also, | |
1072 most quantities are positive, so we can make the appropriate guesses | |
1073 about signed comparisons against zero. */ | |
1074 switch (GET_CODE (cond)) | |
1075 { | |
1076 case CONST_INT: | |
1077 /* Unconditional branch. */ | |
1078 predict_insn_def (last_insn, PRED_UNCONDITIONAL, | |
1079 cond == const0_rtx ? NOT_TAKEN : TAKEN); | |
1080 break; | |
1081 | |
1082 case EQ: | |
1083 case UNEQ: | |
1084 /* Floating point comparisons appears to behave in a very | |
1085 unpredictable way because of special role of = tests in | |
1086 FP code. */ | |
1087 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) | |
1088 ; | |
1089 /* Comparisons with 0 are often used for booleans and there is | |
1090 nothing useful to predict about them. */ | |
1091 else if (XEXP (cond, 1) == const0_rtx | |
1092 || XEXP (cond, 0) == const0_rtx) | |
1093 ; | |
1094 else | |
1095 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN); | |
1096 break; | |
1097 | |
1098 case NE: | |
1099 case LTGT: | |
1100 /* Floating point comparisons appears to behave in a very | |
1101 unpredictable way because of special role of = tests in | |
1102 FP code. */ | |
1103 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) | |
1104 ; | |
1105 /* Comparisons with 0 are often used for booleans and there is | |
1106 nothing useful to predict about them. */ | |
1107 else if (XEXP (cond, 1) == const0_rtx | |
1108 || XEXP (cond, 0) == const0_rtx) | |
1109 ; | |
1110 else | |
1111 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN); | |
1112 break; | |
1113 | |
1114 case ORDERED: | |
1115 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN); | |
1116 break; | |
1117 | |
1118 case UNORDERED: | |
1119 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN); | |
1120 break; | |
1121 | |
1122 case LE: | |
1123 case LT: | |
1124 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx | |
1125 || XEXP (cond, 1) == constm1_rtx) | |
1126 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN); | |
1127 break; | |
1128 | |
1129 case GE: | |
1130 case GT: | |
1131 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx | |
1132 || XEXP (cond, 1) == constm1_rtx) | |
1133 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN); | |
1134 break; | |
1135 | |
1136 default: | |
1137 break; | |
1138 } | |
1139 } | |
1140 | |
1141 /* Set edge->probability for each successor edge of BB. */ | |
1142 void | |
1143 guess_outgoing_edge_probabilities (basic_block bb) | |
1144 { | |
1145 bb_estimate_probability_locally (bb); | |
1146 combine_predictions_for_insn (BB_END (bb), bb); | |
1147 } | |
1148 | |
1149 static tree expr_expected_value (tree, bitmap); | |
1150 | |
1151 /* Helper function for expr_expected_value. */ | |
1152 | |
1153 static tree | |
1154 expr_expected_value_1 (tree type, tree op0, enum tree_code code, tree op1, bitmap visited) | |
1155 { | |
1156 gimple def; | |
1157 | |
1158 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) | |
1159 { | |
1160 if (TREE_CONSTANT (op0)) | |
1161 return op0; | |
1162 | |
1163 if (code != SSA_NAME) | |
1164 return NULL_TREE; | |
1165 | |
1166 def = SSA_NAME_DEF_STMT (op0); | |
1167 | |
1168 /* If we were already here, break the infinite cycle. */ | |
1169 if (bitmap_bit_p (visited, SSA_NAME_VERSION (op0))) | |
1170 return NULL; | |
1171 bitmap_set_bit (visited, SSA_NAME_VERSION (op0)); | |
1172 | |
1173 if (gimple_code (def) == GIMPLE_PHI) | |
1174 { | |
1175 /* All the arguments of the PHI node must have the same constant | |
1176 length. */ | |
1177 int i, n = gimple_phi_num_args (def); | |
1178 tree val = NULL, new_val; | |
1179 | |
1180 for (i = 0; i < n; i++) | |
1181 { | |
1182 tree arg = PHI_ARG_DEF (def, i); | |
1183 | |
1184 /* If this PHI has itself as an argument, we cannot | |
1185 determine the string length of this argument. However, | |
1186 if we can find an expected constant value for the other | |
1187 PHI args then we can still be sure that this is | |
1188 likely a constant. So be optimistic and just | |
1189 continue with the next argument. */ | |
1190 if (arg == PHI_RESULT (def)) | |
1191 continue; | |
1192 | |
1193 new_val = expr_expected_value (arg, visited); | |
1194 if (!new_val) | |
1195 return NULL; | |
1196 if (!val) | |
1197 val = new_val; | |
1198 else if (!operand_equal_p (val, new_val, false)) | |
1199 return NULL; | |
1200 } | |
1201 return val; | |
1202 } | |
1203 if (is_gimple_assign (def)) | |
1204 { | |
1205 if (gimple_assign_lhs (def) != op0) | |
1206 return NULL; | |
1207 | |
1208 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), | |
1209 gimple_assign_rhs1 (def), | |
1210 gimple_assign_rhs_code (def), | |
1211 gimple_assign_rhs2 (def), | |
1212 visited); | |
1213 } | |
1214 | |
1215 if (is_gimple_call (def)) | |
1216 { | |
1217 tree decl = gimple_call_fndecl (def); | |
1218 if (!decl) | |
1219 return NULL; | |
1220 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL | |
1221 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT) | |
1222 { | |
1223 tree val; | |
1224 | |
1225 if (gimple_call_num_args (def) != 2) | |
1226 return NULL; | |
1227 val = gimple_call_arg (def, 0); | |
1228 if (TREE_CONSTANT (val)) | |
1229 return val; | |
1230 return gimple_call_arg (def, 1); | |
1231 } | |
1232 } | |
1233 | |
1234 return NULL; | |
1235 } | |
1236 | |
1237 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) | |
1238 { | |
1239 tree res; | |
1240 op0 = expr_expected_value (op0, visited); | |
1241 if (!op0) | |
1242 return NULL; | |
1243 op1 = expr_expected_value (op1, visited); | |
1244 if (!op1) | |
1245 return NULL; | |
1246 res = fold_build2 (code, type, op0, op1); | |
1247 if (TREE_CONSTANT (res)) | |
1248 return res; | |
1249 return NULL; | |
1250 } | |
1251 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) | |
1252 { | |
1253 tree res; | |
1254 op0 = expr_expected_value (op0, visited); | |
1255 if (!op0) | |
1256 return NULL; | |
1257 res = fold_build1 (code, type, op0); | |
1258 if (TREE_CONSTANT (res)) | |
1259 return res; | |
1260 return NULL; | |
1261 } | |
1262 return NULL; | |
1263 } | |
1264 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1265 /* Return constant EXPR will likely have at execution time, NULL if unknown. |
0 | 1266 The function is used by builtin_expect branch predictor so the evidence |
1267 must come from this construct and additional possible constant folding. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1268 |
0 | 1269 We may want to implement more involved value guess (such as value range |
1270 propagation based prediction), but such tricks shall go to new | |
1271 implementation. */ | |
1272 | |
1273 static tree | |
1274 expr_expected_value (tree expr, bitmap visited) | |
1275 { | |
1276 enum tree_code code; | |
1277 tree op0, op1; | |
1278 | |
1279 if (TREE_CONSTANT (expr)) | |
1280 return expr; | |
1281 | |
1282 extract_ops_from_tree (expr, &code, &op0, &op1); | |
1283 return expr_expected_value_1 (TREE_TYPE (expr), | |
1284 op0, code, op1, visited); | |
1285 } | |
1286 | |
1287 | |
1288 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements | |
1289 we no longer need. */ | |
1290 static unsigned int | |
1291 strip_predict_hints (void) | |
1292 { | |
1293 basic_block bb; | |
1294 gimple ass_stmt; | |
1295 tree var; | |
1296 | |
1297 FOR_EACH_BB (bb) | |
1298 { | |
1299 gimple_stmt_iterator bi; | |
1300 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) | |
1301 { | |
1302 gimple stmt = gsi_stmt (bi); | |
1303 | |
1304 if (gimple_code (stmt) == GIMPLE_PREDICT) | |
1305 { | |
1306 gsi_remove (&bi, true); | |
1307 continue; | |
1308 } | |
1309 else if (gimple_code (stmt) == GIMPLE_CALL) | |
1310 { | |
1311 tree fndecl = gimple_call_fndecl (stmt); | |
1312 | |
1313 if (fndecl | |
1314 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL | |
1315 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT | |
1316 && gimple_call_num_args (stmt) == 2) | |
1317 { | |
1318 var = gimple_call_lhs (stmt); | |
1319 ass_stmt = gimple_build_assign (var, gimple_call_arg (stmt, 0)); | |
1320 | |
1321 gsi_replace (&bi, ass_stmt, true); | |
1322 } | |
1323 } | |
1324 gsi_next (&bi); | |
1325 } | |
1326 } | |
1327 return 0; | |
1328 } | |
1329 | |
1330 /* Predict using opcode of the last statement in basic block. */ | |
1331 static void | |
1332 tree_predict_by_opcode (basic_block bb) | |
1333 { | |
1334 gimple stmt = last_stmt (bb); | |
1335 edge then_edge; | |
1336 tree op0, op1; | |
1337 tree type; | |
1338 tree val; | |
1339 enum tree_code cmp; | |
1340 bitmap visited; | |
1341 edge_iterator ei; | |
1342 | |
1343 if (!stmt || gimple_code (stmt) != GIMPLE_COND) | |
1344 return; | |
1345 FOR_EACH_EDGE (then_edge, ei, bb->succs) | |
1346 if (then_edge->flags & EDGE_TRUE_VALUE) | |
1347 break; | |
1348 op0 = gimple_cond_lhs (stmt); | |
1349 op1 = gimple_cond_rhs (stmt); | |
1350 cmp = gimple_cond_code (stmt); | |
1351 type = TREE_TYPE (op0); | |
1352 visited = BITMAP_ALLOC (NULL); | |
1353 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited); | |
1354 BITMAP_FREE (visited); | |
1355 if (val) | |
1356 { | |
1357 if (integer_zerop (val)) | |
1358 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN); | |
1359 else | |
1360 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN); | |
1361 return; | |
1362 } | |
1363 /* Try "pointer heuristic." | |
1364 A comparison ptr == 0 is predicted as false. | |
1365 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ | |
1366 if (POINTER_TYPE_P (type)) | |
1367 { | |
1368 if (cmp == EQ_EXPR) | |
1369 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN); | |
1370 else if (cmp == NE_EXPR) | |
1371 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN); | |
1372 } | |
1373 else | |
1374 | |
1375 /* Try "opcode heuristic." | |
1376 EQ tests are usually false and NE tests are usually true. Also, | |
1377 most quantities are positive, so we can make the appropriate guesses | |
1378 about signed comparisons against zero. */ | |
1379 switch (cmp) | |
1380 { | |
1381 case EQ_EXPR: | |
1382 case UNEQ_EXPR: | |
1383 /* Floating point comparisons appears to behave in a very | |
1384 unpredictable way because of special role of = tests in | |
1385 FP code. */ | |
1386 if (FLOAT_TYPE_P (type)) | |
1387 ; | |
1388 /* Comparisons with 0 are often used for booleans and there is | |
1389 nothing useful to predict about them. */ | |
1390 else if (integer_zerop (op0) || integer_zerop (op1)) | |
1391 ; | |
1392 else | |
1393 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN); | |
1394 break; | |
1395 | |
1396 case NE_EXPR: | |
1397 case LTGT_EXPR: | |
1398 /* Floating point comparisons appears to behave in a very | |
1399 unpredictable way because of special role of = tests in | |
1400 FP code. */ | |
1401 if (FLOAT_TYPE_P (type)) | |
1402 ; | |
1403 /* Comparisons with 0 are often used for booleans and there is | |
1404 nothing useful to predict about them. */ | |
1405 else if (integer_zerop (op0) | |
1406 || integer_zerop (op1)) | |
1407 ; | |
1408 else | |
1409 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN); | |
1410 break; | |
1411 | |
1412 case ORDERED_EXPR: | |
1413 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN); | |
1414 break; | |
1415 | |
1416 case UNORDERED_EXPR: | |
1417 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN); | |
1418 break; | |
1419 | |
1420 case LE_EXPR: | |
1421 case LT_EXPR: | |
1422 if (integer_zerop (op1) | |
1423 || integer_onep (op1) | |
1424 || integer_all_onesp (op1) | |
1425 || real_zerop (op1) | |
1426 || real_onep (op1) | |
1427 || real_minus_onep (op1)) | |
1428 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN); | |
1429 break; | |
1430 | |
1431 case GE_EXPR: | |
1432 case GT_EXPR: | |
1433 if (integer_zerop (op1) | |
1434 || integer_onep (op1) | |
1435 || integer_all_onesp (op1) | |
1436 || real_zerop (op1) | |
1437 || real_onep (op1) | |
1438 || real_minus_onep (op1)) | |
1439 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN); | |
1440 break; | |
1441 | |
1442 default: | |
1443 break; | |
1444 } | |
1445 } | |
1446 | |
1447 /* Try to guess whether the value of return means error code. */ | |
1448 | |
1449 static enum br_predictor | |
1450 return_prediction (tree val, enum prediction *prediction) | |
1451 { | |
1452 /* VOID. */ | |
1453 if (!val) | |
1454 return PRED_NO_PREDICTION; | |
1455 /* Different heuristics for pointers and scalars. */ | |
1456 if (POINTER_TYPE_P (TREE_TYPE (val))) | |
1457 { | |
1458 /* NULL is usually not returned. */ | |
1459 if (integer_zerop (val)) | |
1460 { | |
1461 *prediction = NOT_TAKEN; | |
1462 return PRED_NULL_RETURN; | |
1463 } | |
1464 } | |
1465 else if (INTEGRAL_TYPE_P (TREE_TYPE (val))) | |
1466 { | |
1467 /* Negative return values are often used to indicate | |
1468 errors. */ | |
1469 if (TREE_CODE (val) == INTEGER_CST | |
1470 && tree_int_cst_sgn (val) < 0) | |
1471 { | |
1472 *prediction = NOT_TAKEN; | |
1473 return PRED_NEGATIVE_RETURN; | |
1474 } | |
1475 /* Constant return values seems to be commonly taken. | |
1476 Zero/one often represent booleans so exclude them from the | |
1477 heuristics. */ | |
1478 if (TREE_CONSTANT (val) | |
1479 && (!integer_zerop (val) && !integer_onep (val))) | |
1480 { | |
1481 *prediction = TAKEN; | |
1482 return PRED_CONST_RETURN; | |
1483 } | |
1484 } | |
1485 return PRED_NO_PREDICTION; | |
1486 } | |
1487 | |
1488 /* Find the basic block with return expression and look up for possible | |
1489 return value trying to apply RETURN_PREDICTION heuristics. */ | |
1490 static void | |
1491 apply_return_prediction (void) | |
1492 { | |
1493 gimple return_stmt = NULL; | |
1494 tree return_val; | |
1495 edge e; | |
1496 gimple phi; | |
1497 int phi_num_args, i; | |
1498 enum br_predictor pred; | |
1499 enum prediction direction; | |
1500 edge_iterator ei; | |
1501 | |
1502 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) | |
1503 { | |
1504 return_stmt = last_stmt (e->src); | |
1505 if (return_stmt | |
1506 && gimple_code (return_stmt) == GIMPLE_RETURN) | |
1507 break; | |
1508 } | |
1509 if (!e) | |
1510 return; | |
1511 return_val = gimple_return_retval (return_stmt); | |
1512 if (!return_val) | |
1513 return; | |
1514 if (TREE_CODE (return_val) != SSA_NAME | |
1515 || !SSA_NAME_DEF_STMT (return_val) | |
1516 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) | |
1517 return; | |
1518 phi = SSA_NAME_DEF_STMT (return_val); | |
1519 phi_num_args = gimple_phi_num_args (phi); | |
1520 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); | |
1521 | |
1522 /* Avoid the degenerate case where all return values form the function | |
1523 belongs to same category (ie they are all positive constants) | |
1524 so we can hardly say something about them. */ | |
1525 for (i = 1; i < phi_num_args; i++) | |
1526 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction)) | |
1527 break; | |
1528 if (i != phi_num_args) | |
1529 for (i = 0; i < phi_num_args; i++) | |
1530 { | |
1531 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction); | |
1532 if (pred != PRED_NO_PREDICTION) | |
1533 predict_paths_leading_to (gimple_phi_arg_edge (phi, i)->src, pred, | |
1534 direction); | |
1535 } | |
1536 } | |
1537 | |
1538 /* Look for basic block that contains unlikely to happen events | |
1539 (such as noreturn calls) and mark all paths leading to execution | |
1540 of this basic blocks as unlikely. */ | |
1541 | |
1542 static void | |
1543 tree_bb_level_predictions (void) | |
1544 { | |
1545 basic_block bb; | |
1546 bool has_return_edges = false; | |
1547 edge e; | |
1548 edge_iterator ei; | |
1549 | |
1550 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) | |
1551 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH))) | |
1552 { | |
1553 has_return_edges = true; | |
1554 break; | |
1555 } | |
1556 | |
1557 apply_return_prediction (); | |
1558 | |
1559 FOR_EACH_BB (bb) | |
1560 { | |
1561 gimple_stmt_iterator gsi; | |
1562 | |
1563 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1564 { | |
1565 gimple stmt = gsi_stmt (gsi); | |
1566 tree decl; | |
1567 | |
1568 if (is_gimple_call (stmt)) | |
1569 { | |
1570 if ((gimple_call_flags (stmt) & ECF_NORETURN) | |
1571 && has_return_edges) | |
1572 predict_paths_leading_to (bb, PRED_NORETURN, | |
1573 NOT_TAKEN); | |
1574 decl = gimple_call_fndecl (stmt); | |
1575 if (decl | |
1576 && lookup_attribute ("cold", | |
1577 DECL_ATTRIBUTES (decl))) | |
1578 predict_paths_leading_to (bb, PRED_COLD_FUNCTION, | |
1579 NOT_TAKEN); | |
1580 } | |
1581 else if (gimple_code (stmt) == GIMPLE_PREDICT) | |
1582 { | |
1583 predict_paths_leading_to (bb, gimple_predict_predictor (stmt), | |
1584 gimple_predict_outcome (stmt)); | |
1585 /* Keep GIMPLE_PREDICT around so early inlining will propagate | |
1586 hints to callers. */ | |
1587 } | |
1588 } | |
1589 } | |
1590 } | |
1591 | |
1592 #ifdef ENABLE_CHECKING | |
1593 | |
1594 /* Callback for pointer_map_traverse, asserts that the pointer map is | |
1595 empty. */ | |
1596 | |
1597 static bool | |
1598 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value, | |
1599 void *data ATTRIBUTE_UNUSED) | |
1600 { | |
1601 gcc_assert (!*value); | |
1602 return false; | |
1603 } | |
1604 #endif | |
1605 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1606 /* Predict branch probabilities and estimate profile for basic block BB. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1607 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1608 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1609 tree_estimate_probability_bb (basic_block bb) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1610 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1611 edge e; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1612 edge_iterator ei; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1613 gimple last; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1614 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1615 FOR_EACH_EDGE (e, ei, bb->succs) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1616 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1617 /* Predict early returns to be probable, as we've already taken |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1618 care for error returns and other cases are often used for |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1619 fast paths through function. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1620 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1621 Since we've already removed the return statements, we are |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1622 looking for CFG like: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1623 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1624 if (conditional) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1625 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1626 .. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1627 goto return_block |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1628 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1629 some other blocks |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1630 return_block: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1631 return_stmt. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1632 if (e->dest != bb->next_bb |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1633 && e->dest != EXIT_BLOCK_PTR |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1634 && single_succ_p (e->dest) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1635 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1636 && (last = last_stmt (e->dest)) != NULL |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1637 && gimple_code (last) == GIMPLE_RETURN) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1638 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1639 edge e1; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1640 edge_iterator ei1; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1641 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1642 if (single_succ_p (bb)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1643 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1644 FOR_EACH_EDGE (e1, ei1, bb->preds) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1645 if (!predicted_by_p (e1->src, PRED_NULL_RETURN) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1646 && !predicted_by_p (e1->src, PRED_CONST_RETURN) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1647 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1648 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1649 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1650 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1651 if (!predicted_by_p (e->src, PRED_NULL_RETURN) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1652 && !predicted_by_p (e->src, PRED_CONST_RETURN) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1653 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1654 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1655 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1656 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1657 /* Look for block we are guarding (ie we dominate it, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1658 but it doesn't postdominate us). */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1659 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1660 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1661 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1662 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1663 gimple_stmt_iterator bi; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1664 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1665 /* The call heuristic claims that a guarded function call |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1666 is improbable. This is because such calls are often used |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1667 to signal exceptional situations such as printing error |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1668 messages. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1669 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1670 gsi_next (&bi)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1671 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1672 gimple stmt = gsi_stmt (bi); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1673 if (is_gimple_call (stmt) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1674 /* Constant and pure calls are hardly used to signalize |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1675 something exceptional. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1676 && gimple_has_side_effects (stmt)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1677 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1678 predict_edge_def (e, PRED_CALL, NOT_TAKEN); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1679 break; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1680 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1681 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1682 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1683 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1684 tree_predict_by_opcode (bb); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1685 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1686 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1687 /* Predict branch probabilities and estimate profile of the tree CFG. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1688 This function can be called from the loop optimizers to recompute |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1689 the profile information. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1690 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1691 void |
0 | 1692 tree_estimate_probability (void) |
1693 { | |
1694 basic_block bb; | |
1695 | |
1696 add_noreturn_fake_exit_edges (); | |
1697 connect_infinite_loops_to_exit (); | |
1698 /* We use loop_niter_by_eval, which requires that the loops have | |
1699 preheaders. */ | |
1700 create_preheaders (CP_SIMPLE_PREHEADERS); | |
1701 calculate_dominance_info (CDI_POST_DOMINATORS); | |
1702 | |
1703 bb_predictions = pointer_map_create (); | |
1704 tree_bb_level_predictions (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1705 record_loop_exits (); |
0 | 1706 |
1707 if (number_of_loops () > 1) | |
1708 predict_loops (); | |
1709 | |
1710 FOR_EACH_BB (bb) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1711 tree_estimate_probability_bb (bb); |
0 | 1712 |
1713 FOR_EACH_BB (bb) | |
1714 combine_predictions_for_bb (bb); | |
1715 | |
1716 #ifdef ENABLE_CHECKING | |
1717 pointer_map_traverse (bb_predictions, assert_is_empty, NULL); | |
1718 #endif | |
1719 pointer_map_destroy (bb_predictions); | |
1720 bb_predictions = NULL; | |
1721 | |
1722 estimate_bb_frequencies (); | |
1723 free_dominance_info (CDI_POST_DOMINATORS); | |
1724 remove_fake_exit_edges (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1725 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1726 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1727 /* Predict branch probabilities and estimate profile of the tree CFG. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1728 This is the driver function for PASS_PROFILE. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1729 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1730 static unsigned int |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1731 tree_estimate_probability_driver (void) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1732 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1733 unsigned nb_loops; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1734 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1735 loop_optimizer_init (0); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1736 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1737 flow_loops_dump (dump_file, NULL, 0); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1738 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1739 mark_irreducible_loops (); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1740 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1741 nb_loops = number_of_loops (); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1742 if (nb_loops > 1) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1743 scev_initialize (); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1744 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1745 tree_estimate_probability (); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1746 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1747 if (nb_loops > 1) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1748 scev_finalize (); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1749 |
0 | 1750 loop_optimizer_finalize (); |
1751 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1752 gimple_dump_cfg (dump_file, dump_flags); | |
1753 if (profile_status == PROFILE_ABSENT) | |
1754 profile_status = PROFILE_GUESSED; | |
1755 return 0; | |
1756 } | |
1757 | |
1758 /* Predict edges to successors of CUR whose sources are not postdominated by | |
1759 BB by PRED and recurse to all postdominators. */ | |
1760 | |
1761 static void | |
1762 predict_paths_for_bb (basic_block cur, basic_block bb, | |
1763 enum br_predictor pred, | |
1764 enum prediction taken) | |
1765 { | |
1766 edge e; | |
1767 edge_iterator ei; | |
1768 basic_block son; | |
1769 | |
1770 /* We are looking for all edges forming edge cut induced by | |
1771 set of all blocks postdominated by BB. */ | |
1772 FOR_EACH_EDGE (e, ei, cur->preds) | |
1773 if (e->src->index >= NUM_FIXED_BLOCKS | |
1774 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb)) | |
1775 { | |
1776 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); | |
1777 predict_edge_def (e, pred, taken); | |
1778 } | |
1779 for (son = first_dom_son (CDI_POST_DOMINATORS, cur); | |
1780 son; | |
1781 son = next_dom_son (CDI_POST_DOMINATORS, son)) | |
1782 predict_paths_for_bb (son, bb, pred, taken); | |
1783 } | |
1784 | |
1785 /* Sets branch probabilities according to PREDiction and | |
1786 FLAGS. */ | |
1787 | |
1788 static void | |
1789 predict_paths_leading_to (basic_block bb, enum br_predictor pred, | |
1790 enum prediction taken) | |
1791 { | |
1792 predict_paths_for_bb (bb, bb, pred, taken); | |
1793 } | |
1794 | |
1795 /* This is used to carry information about basic blocks. It is | |
1796 attached to the AUX field of the standard CFG block. */ | |
1797 | |
1798 typedef struct block_info_def | |
1799 { | |
1800 /* Estimated frequency of execution of basic_block. */ | |
1801 sreal frequency; | |
1802 | |
1803 /* To keep queue of basic blocks to process. */ | |
1804 basic_block next; | |
1805 | |
1806 /* Number of predecessors we need to visit first. */ | |
1807 int npredecessors; | |
1808 } *block_info; | |
1809 | |
1810 /* Similar information for edges. */ | |
1811 typedef struct edge_info_def | |
1812 { | |
1813 /* In case edge is a loopback edge, the probability edge will be reached | |
1814 in case header is. Estimated number of iterations of the loop can be | |
1815 then computed as 1 / (1 - back_edge_prob). */ | |
1816 sreal back_edge_prob; | |
1817 /* True if the edge is a loopback edge in the natural loop. */ | |
1818 unsigned int back_edge:1; | |
1819 } *edge_info; | |
1820 | |
1821 #define BLOCK_INFO(B) ((block_info) (B)->aux) | |
1822 #define EDGE_INFO(E) ((edge_info) (E)->aux) | |
1823 | |
1824 /* Helper function for estimate_bb_frequencies. | |
1825 Propagate the frequencies in blocks marked in | |
1826 TOVISIT, starting in HEAD. */ | |
1827 | |
1828 static void | |
1829 propagate_freq (basic_block head, bitmap tovisit) | |
1830 { | |
1831 basic_block bb; | |
1832 basic_block last; | |
1833 unsigned i; | |
1834 edge e; | |
1835 basic_block nextbb; | |
1836 bitmap_iterator bi; | |
1837 | |
1838 /* For each basic block we need to visit count number of his predecessors | |
1839 we need to visit first. */ | |
1840 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi) | |
1841 { | |
1842 edge_iterator ei; | |
1843 int count = 0; | |
1844 | |
1845 /* The outermost "loop" includes the exit block, which we can not | |
1846 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR | |
1847 directly. Do the same for the entry block. */ | |
1848 bb = BASIC_BLOCK (i); | |
1849 | |
1850 FOR_EACH_EDGE (e, ei, bb->preds) | |
1851 { | |
1852 bool visit = bitmap_bit_p (tovisit, e->src->index); | |
1853 | |
1854 if (visit && !(e->flags & EDGE_DFS_BACK)) | |
1855 count++; | |
1856 else if (visit && dump_file && !EDGE_INFO (e)->back_edge) | |
1857 fprintf (dump_file, | |
1858 "Irreducible region hit, ignoring edge to %i->%i\n", | |
1859 e->src->index, bb->index); | |
1860 } | |
1861 BLOCK_INFO (bb)->npredecessors = count; | |
1862 } | |
1863 | |
1864 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one)); | |
1865 last = head; | |
1866 for (bb = head; bb; bb = nextbb) | |
1867 { | |
1868 edge_iterator ei; | |
1869 sreal cyclic_probability, frequency; | |
1870 | |
1871 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero)); | |
1872 memcpy (&frequency, &real_zero, sizeof (real_zero)); | |
1873 | |
1874 nextbb = BLOCK_INFO (bb)->next; | |
1875 BLOCK_INFO (bb)->next = NULL; | |
1876 | |
1877 /* Compute frequency of basic block. */ | |
1878 if (bb != head) | |
1879 { | |
1880 #ifdef ENABLE_CHECKING | |
1881 FOR_EACH_EDGE (e, ei, bb->preds) | |
1882 gcc_assert (!bitmap_bit_p (tovisit, e->src->index) | |
1883 || (e->flags & EDGE_DFS_BACK)); | |
1884 #endif | |
1885 | |
1886 FOR_EACH_EDGE (e, ei, bb->preds) | |
1887 if (EDGE_INFO (e)->back_edge) | |
1888 { | |
1889 sreal_add (&cyclic_probability, &cyclic_probability, | |
1890 &EDGE_INFO (e)->back_edge_prob); | |
1891 } | |
1892 else if (!(e->flags & EDGE_DFS_BACK)) | |
1893 { | |
1894 sreal tmp; | |
1895 | |
1896 /* frequency += (e->probability | |
1897 * BLOCK_INFO (e->src)->frequency / | |
1898 REG_BR_PROB_BASE); */ | |
1899 | |
1900 sreal_init (&tmp, e->probability, 0); | |
1901 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency); | |
1902 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base); | |
1903 sreal_add (&frequency, &frequency, &tmp); | |
1904 } | |
1905 | |
1906 if (sreal_compare (&cyclic_probability, &real_zero) == 0) | |
1907 { | |
1908 memcpy (&BLOCK_INFO (bb)->frequency, &frequency, | |
1909 sizeof (frequency)); | |
1910 } | |
1911 else | |
1912 { | |
1913 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0) | |
1914 { | |
1915 memcpy (&cyclic_probability, &real_almost_one, | |
1916 sizeof (real_almost_one)); | |
1917 } | |
1918 | |
1919 /* BLOCK_INFO (bb)->frequency = frequency | |
1920 / (1 - cyclic_probability) */ | |
1921 | |
1922 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability); | |
1923 sreal_div (&BLOCK_INFO (bb)->frequency, | |
1924 &frequency, &cyclic_probability); | |
1925 } | |
1926 } | |
1927 | |
1928 bitmap_clear_bit (tovisit, bb->index); | |
1929 | |
1930 e = find_edge (bb, head); | |
1931 if (e) | |
1932 { | |
1933 sreal tmp; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1934 |
0 | 1935 /* EDGE_INFO (e)->back_edge_prob |
1936 = ((e->probability * BLOCK_INFO (bb)->frequency) | |
1937 / REG_BR_PROB_BASE); */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1938 |
0 | 1939 sreal_init (&tmp, e->probability, 0); |
1940 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency); | |
1941 sreal_mul (&EDGE_INFO (e)->back_edge_prob, | |
1942 &tmp, &real_inv_br_prob_base); | |
1943 } | |
1944 | |
1945 /* Propagate to successor blocks. */ | |
1946 FOR_EACH_EDGE (e, ei, bb->succs) | |
1947 if (!(e->flags & EDGE_DFS_BACK) | |
1948 && BLOCK_INFO (e->dest)->npredecessors) | |
1949 { | |
1950 BLOCK_INFO (e->dest)->npredecessors--; | |
1951 if (!BLOCK_INFO (e->dest)->npredecessors) | |
1952 { | |
1953 if (!nextbb) | |
1954 nextbb = e->dest; | |
1955 else | |
1956 BLOCK_INFO (last)->next = e->dest; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1957 |
0 | 1958 last = e->dest; |
1959 } | |
1960 } | |
1961 } | |
1962 } | |
1963 | |
1964 /* Estimate probabilities of loopback edges in loops at same nest level. */ | |
1965 | |
1966 static void | |
1967 estimate_loops_at_level (struct loop *first_loop) | |
1968 { | |
1969 struct loop *loop; | |
1970 | |
1971 for (loop = first_loop; loop; loop = loop->next) | |
1972 { | |
1973 edge e; | |
1974 basic_block *bbs; | |
1975 unsigned i; | |
1976 bitmap tovisit = BITMAP_ALLOC (NULL); | |
1977 | |
1978 estimate_loops_at_level (loop->inner); | |
1979 | |
1980 /* Find current loop back edge and mark it. */ | |
1981 e = loop_latch_edge (loop); | |
1982 EDGE_INFO (e)->back_edge = 1; | |
1983 | |
1984 bbs = get_loop_body (loop); | |
1985 for (i = 0; i < loop->num_nodes; i++) | |
1986 bitmap_set_bit (tovisit, bbs[i]->index); | |
1987 free (bbs); | |
1988 propagate_freq (loop->header, tovisit); | |
1989 BITMAP_FREE (tovisit); | |
1990 } | |
1991 } | |
1992 | |
1993 /* Propagates frequencies through structure of loops. */ | |
1994 | |
1995 static void | |
1996 estimate_loops (void) | |
1997 { | |
1998 bitmap tovisit = BITMAP_ALLOC (NULL); | |
1999 basic_block bb; | |
2000 | |
2001 /* Start by estimating the frequencies in the loops. */ | |
2002 if (number_of_loops () > 1) | |
2003 estimate_loops_at_level (current_loops->tree_root->inner); | |
2004 | |
2005 /* Now propagate the frequencies through all the blocks. */ | |
2006 FOR_ALL_BB (bb) | |
2007 { | |
2008 bitmap_set_bit (tovisit, bb->index); | |
2009 } | |
2010 propagate_freq (ENTRY_BLOCK_PTR, tovisit); | |
2011 BITMAP_FREE (tovisit); | |
2012 } | |
2013 | |
2014 /* Convert counts measured by profile driven feedback to frequencies. | |
2015 Return nonzero iff there was any nonzero execution count. */ | |
2016 | |
2017 int | |
2018 counts_to_freqs (void) | |
2019 { | |
2020 gcov_type count_max, true_count_max = 0; | |
2021 basic_block bb; | |
2022 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2023 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) |
0 | 2024 true_count_max = MAX (bb->count, true_count_max); |
2025 | |
2026 count_max = MAX (true_count_max, 1); | |
2027 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | |
2028 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max; | |
2029 | |
2030 return true_count_max; | |
2031 } | |
2032 | |
2033 /* Return true if function is likely to be expensive, so there is no point to | |
2034 optimize performance of prologue, epilogue or do inlining at the expense | |
2035 of code size growth. THRESHOLD is the limit of number of instructions | |
2036 function can execute at average to be still considered not expensive. */ | |
2037 | |
2038 bool | |
2039 expensive_function_p (int threshold) | |
2040 { | |
2041 unsigned int sum = 0; | |
2042 basic_block bb; | |
2043 unsigned int limit; | |
2044 | |
2045 /* We can not compute accurately for large thresholds due to scaled | |
2046 frequencies. */ | |
2047 gcc_assert (threshold <= BB_FREQ_MAX); | |
2048 | |
2049 /* Frequencies are out of range. This either means that function contains | |
2050 internal loop executing more than BB_FREQ_MAX times or profile feedback | |
2051 is available and function has not been executed at all. */ | |
2052 if (ENTRY_BLOCK_PTR->frequency == 0) | |
2053 return true; | |
2054 | |
2055 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ | |
2056 limit = ENTRY_BLOCK_PTR->frequency * threshold; | |
2057 FOR_EACH_BB (bb) | |
2058 { | |
2059 rtx insn; | |
2060 | |
2061 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); | |
2062 insn = NEXT_INSN (insn)) | |
2063 if (active_insn_p (insn)) | |
2064 { | |
2065 sum += bb->frequency; | |
2066 if (sum > limit) | |
2067 return true; | |
2068 } | |
2069 } | |
2070 | |
2071 return false; | |
2072 } | |
2073 | |
2074 /* Estimate basic blocks frequency by given branch probabilities. */ | |
2075 | |
2076 void | |
2077 estimate_bb_frequencies (void) | |
2078 { | |
2079 basic_block bb; | |
2080 sreal freq_max; | |
2081 | |
2082 if (profile_status != PROFILE_READ || !counts_to_freqs ()) | |
2083 { | |
2084 static int real_values_initialized = 0; | |
2085 | |
2086 if (!real_values_initialized) | |
2087 { | |
2088 real_values_initialized = 1; | |
2089 sreal_init (&real_zero, 0, 0); | |
2090 sreal_init (&real_one, 1, 0); | |
2091 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0); | |
2092 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0); | |
2093 sreal_init (&real_one_half, 1, -1); | |
2094 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base); | |
2095 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base); | |
2096 } | |
2097 | |
2098 mark_dfs_back_edges (); | |
2099 | |
2100 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE; | |
2101 | |
2102 /* Set up block info for each basic block. */ | |
2103 alloc_aux_for_blocks (sizeof (struct block_info_def)); | |
2104 alloc_aux_for_edges (sizeof (struct edge_info_def)); | |
2105 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | |
2106 { | |
2107 edge e; | |
2108 edge_iterator ei; | |
2109 | |
2110 FOR_EACH_EDGE (e, ei, bb->succs) | |
2111 { | |
2112 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0); | |
2113 sreal_mul (&EDGE_INFO (e)->back_edge_prob, | |
2114 &EDGE_INFO (e)->back_edge_prob, | |
2115 &real_inv_br_prob_base); | |
2116 } | |
2117 } | |
2118 | |
2119 /* First compute probabilities locally for each loop from innermost | |
2120 to outermost to examine probabilities for back edges. */ | |
2121 estimate_loops (); | |
2122 | |
2123 memcpy (&freq_max, &real_zero, sizeof (real_zero)); | |
2124 FOR_EACH_BB (bb) | |
2125 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0) | |
2126 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max)); | |
2127 | |
2128 sreal_div (&freq_max, &real_bb_freq_max, &freq_max); | |
2129 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | |
2130 { | |
2131 sreal tmp; | |
2132 | |
2133 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max); | |
2134 sreal_add (&tmp, &tmp, &real_one_half); | |
2135 bb->frequency = sreal_to_int (&tmp); | |
2136 } | |
2137 | |
2138 free_aux_for_blocks (); | |
2139 free_aux_for_edges (); | |
2140 } | |
2141 compute_function_frequency (); | |
2142 if (flag_reorder_functions) | |
2143 choose_function_section (); | |
2144 } | |
2145 | |
2146 /* Decide whether function is hot, cold or unlikely executed. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2147 void |
0 | 2148 compute_function_frequency (void) |
2149 { | |
2150 basic_block bb; | |
2151 | |
2152 if (!profile_info || !flag_branch_probabilities) | |
2153 { | |
2154 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) | |
2155 != NULL) | |
2156 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED; | |
2157 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) | |
2158 != NULL) | |
2159 cfun->function_frequency = FUNCTION_FREQUENCY_HOT; | |
2160 return; | |
2161 } | |
2162 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED; | |
2163 FOR_EACH_BB (bb) | |
2164 { | |
2165 if (maybe_hot_bb_p (bb)) | |
2166 { | |
2167 cfun->function_frequency = FUNCTION_FREQUENCY_HOT; | |
2168 return; | |
2169 } | |
2170 if (!probably_never_executed_bb_p (bb)) | |
2171 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL; | |
2172 } | |
2173 } | |
2174 | |
2175 /* Choose appropriate section for the function. */ | |
2176 static void | |
2177 choose_function_section (void) | |
2178 { | |
2179 if (DECL_SECTION_NAME (current_function_decl) | |
2180 || !targetm.have_named_sections | |
2181 /* Theoretically we can split the gnu.linkonce text section too, | |
2182 but this requires more work as the frequency needs to match | |
2183 for all generated objects so we need to merge the frequency | |
2184 of all instances. For now just never set frequency for these. */ | |
2185 || DECL_ONE_ONLY (current_function_decl)) | |
2186 return; | |
2187 | |
2188 /* If we are doing the partitioning optimization, let the optimization | |
2189 choose the correct section into which to put things. */ | |
2190 | |
2191 if (flag_reorder_blocks_and_partition) | |
2192 return; | |
2193 | |
2194 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT) | |
2195 DECL_SECTION_NAME (current_function_decl) = | |
2196 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME); | |
2197 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | |
2198 DECL_SECTION_NAME (current_function_decl) = | |
2199 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME), | |
2200 UNLIKELY_EXECUTED_TEXT_SECTION_NAME); | |
2201 } | |
2202 | |
2203 static bool | |
2204 gate_estimate_probability (void) | |
2205 { | |
2206 return flag_guess_branch_prob; | |
2207 } | |
2208 | |
2209 /* Build PREDICT_EXPR. */ | |
2210 tree | |
2211 build_predict_expr (enum br_predictor predictor, enum prediction taken) | |
2212 { | |
2213 tree t = build1 (PREDICT_EXPR, void_type_node, | |
2214 build_int_cst (NULL, predictor)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2215 SET_PREDICT_EXPR_OUTCOME (t, taken); |
0 | 2216 return t; |
2217 } | |
2218 | |
2219 const char * | |
2220 predictor_name (enum br_predictor predictor) | |
2221 { | |
2222 return predictor_info[predictor].name; | |
2223 } | |
2224 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2225 struct gimple_opt_pass pass_profile = |
0 | 2226 { |
2227 { | |
2228 GIMPLE_PASS, | |
2229 "profile", /* name */ | |
2230 gate_estimate_probability, /* gate */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2231 tree_estimate_probability_driver, /* execute */ |
0 | 2232 NULL, /* sub */ |
2233 NULL, /* next */ | |
2234 0, /* static_pass_number */ | |
2235 TV_BRANCH_PROB, /* tv_id */ | |
2236 PROP_cfg, /* properties_required */ | |
2237 0, /* properties_provided */ | |
2238 0, /* properties_destroyed */ | |
2239 0, /* todo_flags_start */ | |
2240 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ | |
2241 } | |
2242 }; | |
2243 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2244 struct gimple_opt_pass pass_strip_predict_hints = |
0 | 2245 { |
2246 { | |
2247 GIMPLE_PASS, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2248 "*strip_predict_hints", /* name */ |
0 | 2249 NULL, /* gate */ |
2250 strip_predict_hints, /* execute */ | |
2251 NULL, /* sub */ | |
2252 NULL, /* next */ | |
2253 0, /* static_pass_number */ | |
2254 TV_BRANCH_PROB, /* tv_id */ | |
2255 PROP_cfg, /* properties_required */ | |
2256 0, /* properties_provided */ | |
2257 0, /* properties_destroyed */ | |
2258 0, /* todo_flags_start */ | |
2259 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ | |
2260 } | |
2261 }; |