Mercurial > hg > CbC > CbC_gcc
comparison gcc/predict.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
130:e108057fa461 | 132:d34655255c78 |
---|---|
1 /* Branch prediction routines for the GNU compiler. | 1 /* Branch prediction routines for the GNU compiler. |
2 Copyright (C) 2000-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2000-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it under | 6 GCC is free software; you can redistribute it and/or modify it under |
7 the terms of the GNU General Public License as published by the Free | 7 the terms of the GNU General Public License as published by the Free |
90 struct loop *in_loop = NULL); | 90 struct loop *in_loop = NULL); |
91 static void predict_paths_leading_to_edge (edge, enum br_predictor, | 91 static void predict_paths_leading_to_edge (edge, enum br_predictor, |
92 enum prediction, | 92 enum prediction, |
93 struct loop *in_loop = NULL); | 93 struct loop *in_loop = NULL); |
94 static bool can_predict_insn_p (const rtx_insn *); | 94 static bool can_predict_insn_p (const rtx_insn *); |
95 static HOST_WIDE_INT get_predictor_value (br_predictor, HOST_WIDE_INT); | |
95 | 96 |
96 /* Information we hold about each branch predictor. | 97 /* Information we hold about each branch predictor. |
97 Filled using information from predict.def. */ | 98 Filled using information from predict.def. */ |
98 | 99 |
99 struct predictor_info | 100 struct predictor_info |
119 /* Upper bound on predictors. */ | 120 /* Upper bound on predictors. */ |
120 {NULL, 0, 0} | 121 {NULL, 0, 0} |
121 }; | 122 }; |
122 #undef DEF_PREDICTOR | 123 #undef DEF_PREDICTOR |
123 | 124 |
124 /* Return TRUE if frequency FREQ is considered to be hot. */ | |
125 | |
126 static inline bool | |
127 maybe_hot_frequency_p (struct function *fun, int freq) | |
128 { | |
129 struct cgraph_node *node = cgraph_node::get (fun->decl); | |
130 if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ) | |
131 { | |
132 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | |
133 return false; | |
134 if (node->frequency == NODE_FREQUENCY_HOT) | |
135 return true; | |
136 } | |
137 if (profile_status_for_fn (fun) == PROFILE_ABSENT) | |
138 return true; | |
139 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE | |
140 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3)) | |
141 return false; | |
142 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) | |
143 return false; | |
144 if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) | |
145 < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency) | |
146 return false; | |
147 return true; | |
148 } | |
149 | |
150 static gcov_type min_count = -1; | 125 static gcov_type min_count = -1; |
151 | 126 |
152 /* Determine the threshold for hot BB counts. */ | 127 /* Determine the threshold for hot BB counts. */ |
153 | 128 |
154 gcov_type | 129 gcov_type |
155 get_hot_bb_threshold () | 130 get_hot_bb_threshold () |
156 { | 131 { |
157 gcov_working_set_t *ws; | |
158 if (min_count == -1) | 132 if (min_count == -1) |
159 { | 133 { |
160 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); | 134 min_count |
161 gcc_assert (ws); | 135 = profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION); |
162 min_count = ws->min_counter; | 136 if (dump_file) |
137 fprintf (dump_file, "Setting hotness threshold to %" PRId64 ".\n", | |
138 min_count); | |
163 } | 139 } |
164 return min_count; | 140 return min_count; |
165 } | 141 } |
166 | 142 |
167 /* Set the threshold for hot BB counts. */ | 143 /* Set the threshold for hot BB counts. */ |
173 } | 149 } |
174 | 150 |
175 /* Return TRUE if frequency FREQ is considered to be hot. */ | 151 /* Return TRUE if frequency FREQ is considered to be hot. */ |
176 | 152 |
177 bool | 153 bool |
178 maybe_hot_count_p (struct function *, profile_count count) | 154 maybe_hot_count_p (struct function *fun, profile_count count) |
179 { | 155 { |
180 if (!count.initialized_p ()) | 156 if (!count.initialized_p ()) |
181 return true; | 157 return true; |
158 if (count.ipa () == profile_count::zero ()) | |
159 return false; | |
160 if (!count.ipa_p ()) | |
161 { | |
162 struct cgraph_node *node = cgraph_node::get (fun->decl); | |
163 if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ) | |
164 { | |
165 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) | |
166 return false; | |
167 if (node->frequency == NODE_FREQUENCY_HOT) | |
168 return true; | |
169 } | |
170 if (profile_status_for_fn (fun) == PROFILE_ABSENT) | |
171 return true; | |
172 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE | |
173 && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3))) | |
174 return false; | |
175 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) | |
176 return false; | |
177 if (count.apply_scale (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION), 1) | |
178 < ENTRY_BLOCK_PTR_FOR_FN (fun)->count) | |
179 return false; | |
180 return true; | |
181 } | |
182 /* Code executed at most once is not hot. */ | 182 /* Code executed at most once is not hot. */ |
183 if (count <= MAX (profile_info ? profile_info->runs : 1, 1)) | 183 if (count <= MAX (profile_info ? profile_info->runs : 1, 1)) |
184 return false; | 184 return false; |
185 return (count.to_gcov_type () >= get_hot_bb_threshold ()); | 185 return (count.to_gcov_type () >= get_hot_bb_threshold ()); |
186 } | 186 } |
190 | 190 |
191 bool | 191 bool |
192 maybe_hot_bb_p (struct function *fun, const_basic_block bb) | 192 maybe_hot_bb_p (struct function *fun, const_basic_block bb) |
193 { | 193 { |
194 gcc_checking_assert (fun); | 194 gcc_checking_assert (fun); |
195 if (!maybe_hot_count_p (fun, bb->count)) | 195 return maybe_hot_count_p (fun, bb->count); |
196 return false; | |
197 return maybe_hot_frequency_p (fun, bb->frequency); | |
198 } | 196 } |
199 | 197 |
200 /* Return true in case BB can be CPU intensive and should be optimized | 198 /* Return true in case BB can be CPU intensive and should be optimized |
201 for maximal performance. */ | 199 for maximal performance. */ |
202 | 200 |
203 bool | 201 bool |
204 maybe_hot_edge_p (edge e) | 202 maybe_hot_edge_p (edge e) |
205 { | 203 { |
206 if (!maybe_hot_count_p (cfun, e->count ())) | 204 return maybe_hot_count_p (cfun, e->count ()); |
207 return false; | |
208 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); | |
209 } | 205 } |
210 | 206 |
211 /* Return true if profile COUNT and FREQUENCY, or function FUN static | 207 /* Return true if profile COUNT and FREQUENCY, or function FUN static |
212 node frequency reflects never being executed. */ | 208 node frequency reflects never being executed. */ |
213 | 209 |
214 static bool | 210 static bool |
215 probably_never_executed (struct function *fun, | 211 probably_never_executed (struct function *fun, |
216 profile_count count, int) | 212 profile_count count) |
217 { | 213 { |
218 gcc_checking_assert (fun); | 214 gcc_checking_assert (fun); |
219 if (count == profile_count::zero ()) | 215 if (count.ipa () == profile_count::zero ()) |
220 return true; | 216 return true; |
221 if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ) | 217 /* Do not trust adjusted counts. This will make us to drop int cold section |
218 code with low execution count as a result of inlining. These low counts | |
219 are not safe even with read profile and may lead us to dropping | |
220 code which actually gets executed into cold section of binary that is not | |
221 desirable. */ | |
222 if (count.precise_p () && profile_status_for_fn (fun) == PROFILE_READ) | |
222 { | 223 { |
223 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); | 224 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); |
224 if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs) | 225 if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs) |
225 return false; | 226 return false; |
226 return true; | 227 return true; |
236 /* Return true in case BB is probably never executed. */ | 237 /* Return true in case BB is probably never executed. */ |
237 | 238 |
238 bool | 239 bool |
239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb) | 240 probably_never_executed_bb_p (struct function *fun, const_basic_block bb) |
240 { | 241 { |
241 return probably_never_executed (fun, bb->count, bb->frequency); | 242 return probably_never_executed (fun, bb->count); |
242 } | 243 } |
243 | 244 |
244 | 245 |
245 /* Return true if E is unlikely executed for obvious reasons. */ | 246 /* Return true if E is unlikely executed for obvious reasons. */ |
246 | 247 |
257 bool | 258 bool |
258 probably_never_executed_edge_p (struct function *fun, edge e) | 259 probably_never_executed_edge_p (struct function *fun, edge e) |
259 { | 260 { |
260 if (unlikely_executed_edge_p (e)) | 261 if (unlikely_executed_edge_p (e)) |
261 return true; | 262 return true; |
262 return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e)); | 263 return probably_never_executed (fun, e->count ()); |
263 } | 264 } |
264 | 265 |
265 /* Return true when current function should always be optimized for size. */ | 266 /* Return true when current function should always be optimized for size. */ |
266 | 267 |
267 bool | 268 bool |
549 void | 550 void |
550 predict_insn_def (rtx_insn *insn, enum br_predictor predictor, | 551 predict_insn_def (rtx_insn *insn, enum br_predictor predictor, |
551 enum prediction taken) | 552 enum prediction taken) |
552 { | 553 { |
553 int probability = predictor_info[(int) predictor].hitrate; | 554 int probability = predictor_info[(int) predictor].hitrate; |
555 gcc_assert (probability != PROB_UNINITIALIZED); | |
554 | 556 |
555 if (taken != TAKEN) | 557 if (taken != TAKEN) |
556 probability = REG_BR_PROB_BASE - probability; | 558 probability = REG_BR_PROB_BASE - probability; |
557 | 559 |
558 predict_insn (insn, predictor, probability); | 560 predict_insn (insn, predictor, probability); |
732 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index, | 734 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index, |
733 ep_edge->dest->index); | 735 ep_edge->dest->index); |
734 else | 736 else |
735 edge_info_str[0] = '\0'; | 737 edge_info_str[0] = '\0'; |
736 | 738 |
737 fprintf (file, " %s heuristics%s%s: %.1f%%", | 739 fprintf (file, " %s heuristics%s%s: %.2f%%", |
738 predictor_info[predictor].name, | 740 predictor_info[predictor].name, |
739 edge_info_str, reason_messages[reason], | 741 edge_info_str, reason_messages[reason], |
740 probability * 100.0 / REG_BR_PROB_BASE); | 742 probability * 100.0 / REG_BR_PROB_BASE); |
741 | 743 |
742 if (bb->count.initialized_p ()) | 744 if (bb->count.initialized_p ()) |
751 / bb->count.to_gcov_type ()); | 753 / bb->count.to_gcov_type ()); |
752 } | 754 } |
753 } | 755 } |
754 | 756 |
755 fprintf (file, "\n"); | 757 fprintf (file, "\n"); |
758 | |
759 /* Print output that be easily read by analyze_brprob.py script. We are | |
760 interested only in counts that are read from GCDA files. */ | |
761 if (dump_file && (dump_flags & TDF_DETAILS) | |
762 && bb->count.precise_p () | |
763 && reason == REASON_NONE) | |
764 { | |
765 gcc_assert (e->count ().precise_p ()); | |
766 fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n", | |
767 predictor_info[predictor].name, | |
768 bb->count.to_gcov_type (), e->count ().to_gcov_type (), | |
769 probability * 100.0 / REG_BR_PROB_BASE); | |
770 } | |
756 } | 771 } |
757 | 772 |
758 /* Return true if STMT is known to be unlikely executed. */ | 773 /* Return true if STMT is known to be unlikely executed. */ |
759 | 774 |
760 static bool | 775 static bool |
811 } | 826 } |
812 | 827 |
813 /* We can not predict the probabilities of outgoing edges of bb. Set them | 828 /* We can not predict the probabilities of outgoing edges of bb. Set them |
814 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute | 829 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute |
815 even probability for all edges not mentioned in the set. These edges | 830 even probability for all edges not mentioned in the set. These edges |
816 are given PROB_VERY_UNLIKELY probability. */ | 831 are given PROB_VERY_UNLIKELY probability. Similarly for LIKELY_EDGES, |
832 if we have exactly one likely edge, make the other edges predicted | |
833 as not probable. */ | |
817 | 834 |
818 static void | 835 static void |
819 set_even_probabilities (basic_block bb, | 836 set_even_probabilities (basic_block bb, |
820 hash_set<edge> *unlikely_edges = NULL) | 837 hash_set<edge> *unlikely_edges = NULL, |
838 hash_set<edge_prediction *> *likely_edges = NULL) | |
821 { | 839 { |
822 unsigned nedges = 0, unlikely_count = 0; | 840 unsigned nedges = 0, unlikely_count = 0; |
823 edge e = NULL; | 841 edge e = NULL; |
824 edge_iterator ei; | 842 edge_iterator ei; |
825 profile_probability all = profile_probability::always (); | 843 profile_probability all = profile_probability::always (); |
827 FOR_EACH_EDGE (e, ei, bb->succs) | 845 FOR_EACH_EDGE (e, ei, bb->succs) |
828 if (e->probability.initialized_p ()) | 846 if (e->probability.initialized_p ()) |
829 all -= e->probability; | 847 all -= e->probability; |
830 else if (!unlikely_executed_edge_p (e)) | 848 else if (!unlikely_executed_edge_p (e)) |
831 { | 849 { |
832 nedges ++; | 850 nedges++; |
833 if (unlikely_edges != NULL && unlikely_edges->contains (e)) | 851 if (unlikely_edges != NULL && unlikely_edges->contains (e)) |
834 { | 852 { |
835 all -= profile_probability::very_unlikely (); | 853 all -= profile_probability::very_unlikely (); |
836 unlikely_count++; | 854 unlikely_count++; |
837 } | 855 } |
838 } | 856 } |
839 | 857 |
840 /* Make the distribution even if all edges are unlikely. */ | 858 /* Make the distribution even if all edges are unlikely. */ |
859 unsigned likely_count = likely_edges ? likely_edges->elements () : 0; | |
841 if (unlikely_count == nedges) | 860 if (unlikely_count == nedges) |
842 { | 861 { |
843 unlikely_edges = NULL; | 862 unlikely_edges = NULL; |
844 unlikely_count = 0; | 863 unlikely_count = 0; |
845 } | 864 } |
846 | 865 |
847 unsigned c = nedges - unlikely_count; | 866 /* If we have one likely edge, then use its probability and distribute |
848 | 867 remaining probabilities as even. */ |
849 FOR_EACH_EDGE (e, ei, bb->succs) | 868 if (likely_count == 1) |
850 if (e->probability.initialized_p ()) | 869 { |
851 ; | 870 FOR_EACH_EDGE (e, ei, bb->succs) |
852 else if (!unlikely_executed_edge_p (e)) | 871 if (e->probability.initialized_p ()) |
853 { | 872 ; |
854 if (unlikely_edges != NULL && unlikely_edges->contains (e)) | 873 else if (!unlikely_executed_edge_p (e)) |
855 e->probability = profile_probability::very_unlikely (); | 874 { |
875 edge_prediction *prediction = *likely_edges->begin (); | |
876 int p = prediction->ep_probability; | |
877 profile_probability prob | |
878 = profile_probability::from_reg_br_prob_base (p); | |
879 profile_probability remainder = prob.invert (); | |
880 | |
881 if (prediction->ep_edge == e) | |
882 e->probability = prob; | |
883 else | |
884 e->probability = remainder.apply_scale (1, nedges - 1); | |
885 } | |
856 else | 886 else |
857 e->probability = all.apply_scale (1, c).guessed (); | 887 e->probability = profile_probability::never (); |
858 } | 888 } |
859 else | 889 else |
860 e->probability = profile_probability::never (); | 890 { |
891 /* Make all unlikely edges unlikely and the rest will have even | |
892 probability. */ | |
893 unsigned scale = nedges - unlikely_count; | |
894 FOR_EACH_EDGE (e, ei, bb->succs) | |
895 if (e->probability.initialized_p ()) | |
896 ; | |
897 else if (!unlikely_executed_edge_p (e)) | |
898 { | |
899 if (unlikely_edges != NULL && unlikely_edges->contains (e)) | |
900 e->probability = profile_probability::very_unlikely (); | |
901 else | |
902 e->probability = all.apply_scale (1, scale); | |
903 } | |
904 else | |
905 e->probability = profile_probability::never (); | |
906 } | |
861 } | 907 } |
862 | 908 |
863 /* Add REG_BR_PROB note to JUMP with PROB. */ | 909 /* Add REG_BR_PROB note to JUMP with PROB. */ |
864 | 910 |
865 void | 911 void |
1122 bool found = false; | 1168 bool found = false; |
1123 struct edge_prediction *pred; | 1169 struct edge_prediction *pred; |
1124 int nedges = 0; | 1170 int nedges = 0; |
1125 edge e, first = NULL, second = NULL; | 1171 edge e, first = NULL, second = NULL; |
1126 edge_iterator ei; | 1172 edge_iterator ei; |
1173 int nzero = 0; | |
1174 int nunknown = 0; | |
1127 | 1175 |
1128 FOR_EACH_EDGE (e, ei, bb->succs) | 1176 FOR_EACH_EDGE (e, ei, bb->succs) |
1129 if (!unlikely_executed_edge_p (e)) | 1177 { |
1130 { | 1178 if (!unlikely_executed_edge_p (e)) |
1131 nedges ++; | 1179 { |
1132 if (first && !second) | 1180 nedges ++; |
1133 second = e; | 1181 if (first && !second) |
1134 if (!first) | 1182 second = e; |
1135 first = e; | 1183 if (!first) |
1136 } | 1184 first = e; |
1137 else if (!e->probability.initialized_p ()) | 1185 } |
1138 e->probability = profile_probability::never (); | 1186 else if (!e->probability.initialized_p ()) |
1187 e->probability = profile_probability::never (); | |
1188 if (!e->probability.initialized_p ()) | |
1189 nunknown++; | |
1190 else if (e->probability == profile_probability::never ()) | |
1191 nzero++; | |
1192 } | |
1139 | 1193 |
1140 /* When there is no successor or only one choice, prediction is easy. | 1194 /* When there is no successor or only one choice, prediction is easy. |
1141 | 1195 |
1142 When we have a basic block with more than 2 successors, the situation | 1196 When we have a basic block with more than 2 successors, the situation |
1143 is more complicated as DS theory cannot be used literally. | 1197 is more complicated as DS theory cannot be used literally. |
1151 one interesting reliable predictor (noreturn call), which can be | 1205 one interesting reliable predictor (noreturn call), which can be |
1152 handled with a bit easier approach. */ | 1206 handled with a bit easier approach. */ |
1153 if (nedges != 2) | 1207 if (nedges != 2) |
1154 { | 1208 { |
1155 hash_set<edge> unlikely_edges (4); | 1209 hash_set<edge> unlikely_edges (4); |
1210 hash_set<edge_prediction *> likely_edges (4); | |
1156 | 1211 |
1157 /* Identify all edges that have a probability close to very unlikely. | 1212 /* Identify all edges that have a probability close to very unlikely. |
1158 Doing the approach for very unlikely doesn't worth for doing as | 1213 Doing the approach for very unlikely doesn't worth for doing as |
1159 there's no such probability in SPEC2006 benchmark. */ | 1214 there's no such probability in SPEC2006 benchmark. */ |
1160 edge_prediction **preds = bb_predictions->get (bb); | 1215 edge_prediction **preds = bb_predictions->get (bb); |
1161 if (preds) | 1216 if (preds) |
1162 for (pred = *preds; pred; pred = pred->ep_next) | 1217 for (pred = *preds; pred; pred = pred->ep_next) |
1163 if (pred->ep_probability <= PROB_VERY_UNLIKELY) | 1218 { |
1164 unlikely_edges.add (pred->ep_edge); | 1219 if (pred->ep_probability <= PROB_VERY_UNLIKELY) |
1220 unlikely_edges.add (pred->ep_edge); | |
1221 if (pred->ep_probability >= PROB_VERY_LIKELY | |
1222 || pred->ep_predictor == PRED_BUILTIN_EXPECT) | |
1223 likely_edges.add (pred); | |
1224 } | |
1165 | 1225 |
1166 if (!dry_run) | 1226 if (!dry_run) |
1167 set_even_probabilities (bb, &unlikely_edges); | 1227 set_even_probabilities (bb, &unlikely_edges, &likely_edges); |
1168 clear_bb_predictions (bb); | 1228 clear_bb_predictions (bb); |
1169 if (dump_file) | 1229 if (dump_file) |
1170 { | 1230 { |
1171 fprintf (dump_file, "Predictions for bb %i\n", bb->index); | 1231 fprintf (dump_file, "Predictions for bb %i\n", bb->index); |
1172 if (unlikely_edges.elements () == 0) | 1232 if (unlikely_edges.elements () == 0) |
1287 ? REASON_NONE : REASON_IGNORED, pred->ep_edge); | 1347 ? REASON_NONE : REASON_IGNORED, pred->ep_edge); |
1288 } | 1348 } |
1289 } | 1349 } |
1290 clear_bb_predictions (bb); | 1350 clear_bb_predictions (bb); |
1291 | 1351 |
1292 if (!bb->count.initialized_p () && !dry_run) | 1352 |
1353 /* If we have only one successor which is unknown, we can compute missing | |
1354 probablity. */ | |
1355 if (nunknown == 1) | |
1356 { | |
1357 profile_probability prob = profile_probability::always (); | |
1358 edge missing = NULL; | |
1359 | |
1360 FOR_EACH_EDGE (e, ei, bb->succs) | |
1361 if (e->probability.initialized_p ()) | |
1362 prob -= e->probability; | |
1363 else if (missing == NULL) | |
1364 missing = e; | |
1365 else | |
1366 gcc_unreachable (); | |
1367 missing->probability = prob; | |
1368 } | |
1369 /* If nothing is unknown, we have nothing to update. */ | |
1370 else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs)) | |
1371 ; | |
1372 else if (!dry_run) | |
1293 { | 1373 { |
1294 first->probability | 1374 first->probability |
1295 = profile_probability::from_reg_br_prob_base (combined_probability); | 1375 = profile_probability::from_reg_br_prob_base (combined_probability); |
1296 second->probability = first->probability.invert (); | 1376 second->probability = first->probability.invert (); |
1297 } | 1377 } |
1586 if (tree_fits_shwi_p (loop_bound_var) | 1666 if (tree_fits_shwi_p (loop_bound_var) |
1587 && tree_fits_shwi_p (compare_var) | 1667 && tree_fits_shwi_p (compare_var) |
1588 && tree_fits_shwi_p (compare_base)) | 1668 && tree_fits_shwi_p (compare_base)) |
1589 { | 1669 { |
1590 int probability; | 1670 int probability; |
1591 bool overflow, overall_overflow = false; | 1671 wi::overflow_type overflow; |
1672 bool overall_overflow = false; | |
1592 widest_int compare_count, tem; | 1673 widest_int compare_count, tem; |
1593 | 1674 |
1594 /* (loop_bound - base) / compare_step */ | 1675 /* (loop_bound - base) / compare_step */ |
1595 tem = wi::sub (wi::to_widest (loop_bound_var), | 1676 tem = wi::sub (wi::to_widest (loop_bound_var), |
1596 wi::to_widest (compare_base), SIGNED, &overflow); | 1677 wi::to_widest (compare_base), SIGNED, &overflow); |
2218 { | 2299 { |
2219 bb_estimate_probability_locally (bb); | 2300 bb_estimate_probability_locally (bb); |
2220 combine_predictions_for_insn (BB_END (bb), bb); | 2301 combine_predictions_for_insn (BB_END (bb), bb); |
2221 } | 2302 } |
2222 | 2303 |
2223 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor); | 2304 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor, |
2305 HOST_WIDE_INT *probability); | |
2224 | 2306 |
2225 /* Helper function for expr_expected_value. */ | 2307 /* Helper function for expr_expected_value. */ |
2226 | 2308 |
2227 static tree | 2309 static tree |
2228 expr_expected_value_1 (tree type, tree op0, enum tree_code code, | 2310 expr_expected_value_1 (tree type, tree op0, enum tree_code code, |
2229 tree op1, bitmap visited, enum br_predictor *predictor) | 2311 tree op1, bitmap visited, enum br_predictor *predictor, |
2312 HOST_WIDE_INT *probability) | |
2230 { | 2313 { |
2231 gimple *def; | 2314 gimple *def; |
2232 | 2315 |
2233 if (predictor) | 2316 /* Reset returned probability value. */ |
2234 *predictor = PRED_UNCONDITIONAL; | 2317 *probability = -1; |
2318 *predictor = PRED_UNCONDITIONAL; | |
2235 | 2319 |
2236 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) | 2320 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) |
2237 { | 2321 { |
2238 if (TREE_CONSTANT (op0)) | 2322 if (TREE_CONSTANT (op0)) |
2239 return op0; | 2323 return op0; |
2248 && (gimple_call_internal_fn (def) | 2332 && (gimple_call_internal_fn (def) |
2249 == IFN_ATOMIC_COMPARE_EXCHANGE)) | 2333 == IFN_ATOMIC_COMPARE_EXCHANGE)) |
2250 { | 2334 { |
2251 /* Assume that any given atomic operation has low contention, | 2335 /* Assume that any given atomic operation has low contention, |
2252 and thus the compare-and-swap operation succeeds. */ | 2336 and thus the compare-and-swap operation succeeds. */ |
2253 if (predictor) | 2337 *predictor = PRED_COMPARE_AND_SWAP; |
2254 *predictor = PRED_COMPARE_AND_SWAP; | |
2255 return build_one_cst (TREE_TYPE (op0)); | 2338 return build_one_cst (TREE_TYPE (op0)); |
2256 } | 2339 } |
2257 } | 2340 } |
2258 } | 2341 } |
2259 | 2342 |
2285 likely a constant. So be optimistic and just | 2368 likely a constant. So be optimistic and just |
2286 continue with the next argument. */ | 2369 continue with the next argument. */ |
2287 if (arg == PHI_RESULT (def)) | 2370 if (arg == PHI_RESULT (def)) |
2288 continue; | 2371 continue; |
2289 | 2372 |
2290 new_val = expr_expected_value (arg, visited, &predictor2); | 2373 HOST_WIDE_INT probability2; |
2374 new_val = expr_expected_value (arg, visited, &predictor2, | |
2375 &probability2); | |
2291 | 2376 |
2292 /* It is difficult to combine value predictors. Simply assume | 2377 /* It is difficult to combine value predictors. Simply assume |
2293 that later predictor is weaker and take its prediction. */ | 2378 that later predictor is weaker and take its prediction. */ |
2294 if (predictor && *predictor < predictor2) | 2379 if (*predictor < predictor2) |
2295 *predictor = predictor2; | 2380 { |
2381 *predictor = predictor2; | |
2382 *probability = probability2; | |
2383 } | |
2296 if (!new_val) | 2384 if (!new_val) |
2297 return NULL; | 2385 return NULL; |
2298 if (!val) | 2386 if (!val) |
2299 val = new_val; | 2387 val = new_val; |
2300 else if (!operand_equal_p (val, new_val, false)) | 2388 else if (!operand_equal_p (val, new_val, false)) |
2309 | 2397 |
2310 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), | 2398 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), |
2311 gimple_assign_rhs1 (def), | 2399 gimple_assign_rhs1 (def), |
2312 gimple_assign_rhs_code (def), | 2400 gimple_assign_rhs_code (def), |
2313 gimple_assign_rhs2 (def), | 2401 gimple_assign_rhs2 (def), |
2314 visited, predictor); | 2402 visited, predictor, probability); |
2315 } | 2403 } |
2316 | 2404 |
2317 if (is_gimple_call (def)) | 2405 if (is_gimple_call (def)) |
2318 { | 2406 { |
2319 tree decl = gimple_call_fndecl (def); | 2407 tree decl = gimple_call_fndecl (def); |
2324 { | 2412 { |
2325 gcc_assert (gimple_call_num_args (def) == 3); | 2413 gcc_assert (gimple_call_num_args (def) == 3); |
2326 tree val = gimple_call_arg (def, 0); | 2414 tree val = gimple_call_arg (def, 0); |
2327 if (TREE_CONSTANT (val)) | 2415 if (TREE_CONSTANT (val)) |
2328 return val; | 2416 return val; |
2329 if (predictor) | 2417 tree val2 = gimple_call_arg (def, 2); |
2330 { | 2418 gcc_assert (TREE_CODE (val2) == INTEGER_CST |
2331 tree val2 = gimple_call_arg (def, 2); | 2419 && tree_fits_uhwi_p (val2) |
2332 gcc_assert (TREE_CODE (val2) == INTEGER_CST | 2420 && tree_to_uhwi (val2) < END_PREDICTORS); |
2333 && tree_fits_uhwi_p (val2) | 2421 *predictor = (enum br_predictor) tree_to_uhwi (val2); |
2334 && tree_to_uhwi (val2) < END_PREDICTORS); | 2422 if (*predictor == PRED_BUILTIN_EXPECT) |
2335 *predictor = (enum br_predictor) tree_to_uhwi (val2); | 2423 *probability |
2336 } | 2424 = HITRATE (PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY)); |
2337 return gimple_call_arg (def, 1); | 2425 return gimple_call_arg (def, 1); |
2338 } | 2426 } |
2339 return NULL; | 2427 return NULL; |
2340 } | 2428 } |
2429 | |
2430 if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW (decl)) | |
2431 { | |
2432 if (predictor) | |
2433 *predictor = PRED_MALLOC_NONNULL; | |
2434 return boolean_true_node; | |
2435 } | |
2436 | |
2341 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | 2437 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) |
2342 switch (DECL_FUNCTION_CODE (decl)) | 2438 switch (DECL_FUNCTION_CODE (decl)) |
2343 { | 2439 { |
2344 case BUILT_IN_EXPECT: | 2440 case BUILT_IN_EXPECT: |
2345 { | 2441 { |
2347 if (gimple_call_num_args (def) != 2) | 2443 if (gimple_call_num_args (def) != 2) |
2348 return NULL; | 2444 return NULL; |
2349 val = gimple_call_arg (def, 0); | 2445 val = gimple_call_arg (def, 0); |
2350 if (TREE_CONSTANT (val)) | 2446 if (TREE_CONSTANT (val)) |
2351 return val; | 2447 return val; |
2352 if (predictor) | 2448 *predictor = PRED_BUILTIN_EXPECT; |
2353 *predictor = PRED_BUILTIN_EXPECT; | 2449 *probability |
2450 = HITRATE (PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY)); | |
2451 return gimple_call_arg (def, 1); | |
2452 } | |
2453 case BUILT_IN_EXPECT_WITH_PROBABILITY: | |
2454 { | |
2455 tree val; | |
2456 if (gimple_call_num_args (def) != 3) | |
2457 return NULL; | |
2458 val = gimple_call_arg (def, 0); | |
2459 if (TREE_CONSTANT (val)) | |
2460 return val; | |
2461 /* Compute final probability as: | |
2462 probability * REG_BR_PROB_BASE. */ | |
2463 tree prob = gimple_call_arg (def, 2); | |
2464 tree t = TREE_TYPE (prob); | |
2465 tree base = build_int_cst (integer_type_node, | |
2466 REG_BR_PROB_BASE); | |
2467 base = build_real_from_int_cst (t, base); | |
2468 tree r = fold_build2_initializer_loc (UNKNOWN_LOCATION, | |
2469 MULT_EXPR, t, prob, base); | |
2470 HOST_WIDE_INT probi | |
2471 = real_to_integer (TREE_REAL_CST_PTR (r)); | |
2472 if (probi >= 0 && probi <= REG_BR_PROB_BASE) | |
2473 { | |
2474 *predictor = PRED_BUILTIN_EXPECT_WITH_PROBABILITY; | |
2475 *probability = probi; | |
2476 } | |
2354 return gimple_call_arg (def, 1); | 2477 return gimple_call_arg (def, 1); |
2355 } | 2478 } |
2356 | 2479 |
2357 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N: | 2480 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N: |
2358 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1: | 2481 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1: |
2367 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: | 2490 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: |
2368 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: | 2491 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: |
2369 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: | 2492 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: |
2370 /* Assume that any given atomic operation has low contention, | 2493 /* Assume that any given atomic operation has low contention, |
2371 and thus the compare-and-swap operation succeeds. */ | 2494 and thus the compare-and-swap operation succeeds. */ |
2495 *predictor = PRED_COMPARE_AND_SWAP; | |
2496 return boolean_true_node; | |
2497 case BUILT_IN_REALLOC: | |
2372 if (predictor) | 2498 if (predictor) |
2373 *predictor = PRED_COMPARE_AND_SWAP; | 2499 *predictor = PRED_MALLOC_NONNULL; |
2374 return boolean_true_node; | 2500 return boolean_true_node; |
2375 default: | 2501 default: |
2376 break; | 2502 break; |
2377 } | 2503 } |
2378 } | 2504 } |
2382 | 2508 |
2383 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) | 2509 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) |
2384 { | 2510 { |
2385 tree res; | 2511 tree res; |
2386 enum br_predictor predictor2; | 2512 enum br_predictor predictor2; |
2387 op0 = expr_expected_value (op0, visited, predictor); | 2513 HOST_WIDE_INT probability2; |
2514 op0 = expr_expected_value (op0, visited, predictor, probability); | |
2388 if (!op0) | 2515 if (!op0) |
2389 return NULL; | 2516 return NULL; |
2390 op1 = expr_expected_value (op1, visited, &predictor2); | 2517 op1 = expr_expected_value (op1, visited, &predictor2, &probability2); |
2391 if (predictor && *predictor < predictor2) | |
2392 *predictor = predictor2; | |
2393 if (!op1) | 2518 if (!op1) |
2394 return NULL; | 2519 return NULL; |
2395 res = fold_build2 (code, type, op0, op1); | 2520 res = fold_build2 (code, type, op0, op1); |
2396 if (TREE_CONSTANT (res)) | 2521 if (TREE_CODE (res) == INTEGER_CST |
2397 return res; | 2522 && TREE_CODE (op0) == INTEGER_CST |
2523 && TREE_CODE (op1) == INTEGER_CST) | |
2524 { | |
2525 /* Combine binary predictions. */ | |
2526 if (*probability != -1 || probability2 != -1) | |
2527 { | |
2528 HOST_WIDE_INT p1 = get_predictor_value (*predictor, *probability); | |
2529 HOST_WIDE_INT p2 = get_predictor_value (predictor2, probability2); | |
2530 *probability = RDIV (p1 * p2, REG_BR_PROB_BASE); | |
2531 } | |
2532 | |
2533 if (*predictor < predictor2) | |
2534 *predictor = predictor2; | |
2535 | |
2536 return res; | |
2537 } | |
2398 return NULL; | 2538 return NULL; |
2399 } | 2539 } |
2400 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) | 2540 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) |
2401 { | 2541 { |
2402 tree res; | 2542 tree res; |
2403 op0 = expr_expected_value (op0, visited, predictor); | 2543 op0 = expr_expected_value (op0, visited, predictor, probability); |
2404 if (!op0) | 2544 if (!op0) |
2405 return NULL; | 2545 return NULL; |
2406 res = fold_build1 (code, type, op0); | 2546 res = fold_build1 (code, type, op0); |
2407 if (TREE_CONSTANT (res)) | 2547 if (TREE_CONSTANT (res)) |
2408 return res; | 2548 return res; |
2419 propagation based prediction), but such tricks shall go to new | 2559 propagation based prediction), but such tricks shall go to new |
2420 implementation. */ | 2560 implementation. */ |
2421 | 2561 |
2422 static tree | 2562 static tree |
2423 expr_expected_value (tree expr, bitmap visited, | 2563 expr_expected_value (tree expr, bitmap visited, |
2424 enum br_predictor *predictor) | 2564 enum br_predictor *predictor, |
2565 HOST_WIDE_INT *probability) | |
2425 { | 2566 { |
2426 enum tree_code code; | 2567 enum tree_code code; |
2427 tree op0, op1; | 2568 tree op0, op1; |
2428 | 2569 |
2429 if (TREE_CONSTANT (expr)) | 2570 if (TREE_CONSTANT (expr)) |
2430 { | 2571 { |
2431 if (predictor) | 2572 *predictor = PRED_UNCONDITIONAL; |
2432 *predictor = PRED_UNCONDITIONAL; | 2573 *probability = -1; |
2433 return expr; | 2574 return expr; |
2434 } | 2575 } |
2435 | 2576 |
2436 extract_ops_from_tree (expr, &code, &op0, &op1); | 2577 extract_ops_from_tree (expr, &code, &op0, &op1); |
2437 return expr_expected_value_1 (TREE_TYPE (expr), | 2578 return expr_expected_value_1 (TREE_TYPE (expr), |
2438 op0, code, op1, visited, predictor); | 2579 op0, code, op1, visited, predictor, |
2580 probability); | |
2439 } | 2581 } |
2440 | 2582 |
2583 | |
2584 /* Return probability of a PREDICTOR. If the predictor has variable | |
2585 probability return passed PROBABILITY. */ | |
2586 | |
2587 static HOST_WIDE_INT | |
2588 get_predictor_value (br_predictor predictor, HOST_WIDE_INT probability) | |
2589 { | |
2590 switch (predictor) | |
2591 { | |
2592 case PRED_BUILTIN_EXPECT: | |
2593 case PRED_BUILTIN_EXPECT_WITH_PROBABILITY: | |
2594 gcc_assert (probability != -1); | |
2595 return probability; | |
2596 default: | |
2597 gcc_assert (probability == -1); | |
2598 return predictor_info[(int) predictor].hitrate; | |
2599 } | |
2600 } | |
2601 | |
2441 /* Predict using opcode of the last statement in basic block. */ | 2602 /* Predict using opcode of the last statement in basic block. */ |
2442 static void | 2603 static void |
2443 tree_predict_by_opcode (basic_block bb) | 2604 tree_predict_by_opcode (basic_block bb) |
2444 { | 2605 { |
2445 gimple *stmt = last_stmt (bb); | 2606 gimple *stmt = last_stmt (bb); |
2448 tree type; | 2609 tree type; |
2449 tree val; | 2610 tree val; |
2450 enum tree_code cmp; | 2611 enum tree_code cmp; |
2451 edge_iterator ei; | 2612 edge_iterator ei; |
2452 enum br_predictor predictor; | 2613 enum br_predictor predictor; |
2453 | 2614 HOST_WIDE_INT probability; |
2454 if (!stmt || gimple_code (stmt) != GIMPLE_COND) | 2615 |
2616 if (!stmt) | |
2617 return; | |
2618 | |
2619 if (gswitch *sw = dyn_cast <gswitch *> (stmt)) | |
2620 { | |
2621 tree index = gimple_switch_index (sw); | |
2622 tree val = expr_expected_value (index, auto_bitmap (), | |
2623 &predictor, &probability); | |
2624 if (val && TREE_CODE (val) == INTEGER_CST) | |
2625 { | |
2626 edge e = find_taken_edge_switch_expr (sw, val); | |
2627 if (predictor == PRED_BUILTIN_EXPECT) | |
2628 { | |
2629 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY); | |
2630 gcc_assert (percent >= 0 && percent <= 100); | |
2631 predict_edge (e, PRED_BUILTIN_EXPECT, | |
2632 HITRATE (percent)); | |
2633 } | |
2634 else | |
2635 predict_edge_def (e, predictor, TAKEN); | |
2636 } | |
2637 } | |
2638 | |
2639 if (gimple_code (stmt) != GIMPLE_COND) | |
2455 return; | 2640 return; |
2456 FOR_EACH_EDGE (then_edge, ei, bb->succs) | 2641 FOR_EACH_EDGE (then_edge, ei, bb->succs) |
2457 if (then_edge->flags & EDGE_TRUE_VALUE) | 2642 if (then_edge->flags & EDGE_TRUE_VALUE) |
2458 break; | 2643 break; |
2459 op0 = gimple_cond_lhs (stmt); | 2644 op0 = gimple_cond_lhs (stmt); |
2460 op1 = gimple_cond_rhs (stmt); | 2645 op1 = gimple_cond_rhs (stmt); |
2461 cmp = gimple_cond_code (stmt); | 2646 cmp = gimple_cond_code (stmt); |
2462 type = TREE_TYPE (op0); | 2647 type = TREE_TYPE (op0); |
2463 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (), | 2648 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (), |
2464 &predictor); | 2649 &predictor, &probability); |
2465 if (val && TREE_CODE (val) == INTEGER_CST) | 2650 if (val && TREE_CODE (val) == INTEGER_CST) |
2466 { | 2651 { |
2467 if (predictor == PRED_BUILTIN_EXPECT) | 2652 HOST_WIDE_INT prob = get_predictor_value (predictor, probability); |
2468 { | 2653 if (integer_zerop (val)) |
2469 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY); | 2654 prob = REG_BR_PROB_BASE - prob; |
2470 | 2655 predict_edge (then_edge, predictor, prob); |
2471 gcc_assert (percent >= 0 && percent <= 100); | |
2472 if (integer_zerop (val)) | |
2473 percent = 100 - percent; | |
2474 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent)); | |
2475 } | |
2476 else | |
2477 predict_edge_def (then_edge, predictor, | |
2478 integer_zerop (val) ? NOT_TAKEN : TAKEN); | |
2479 } | 2656 } |
2480 /* Try "pointer heuristic." | 2657 /* Try "pointer heuristic." |
2481 A comparison ptr == 0 is predicted as false. | 2658 A comparison ptr == 0 is predicted as false. |
2482 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ | 2659 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ |
2483 if (POINTER_TYPE_P (type)) | 2660 if (POINTER_TYPE_P (type)) |
2615 } | 2792 } |
2616 } | 2793 } |
2617 return PRED_NO_PREDICTION; | 2794 return PRED_NO_PREDICTION; |
2618 } | 2795 } |
2619 | 2796 |
2797 /* Return zero if phi result could have values other than -1, 0 or 1, | |
2798 otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1 | |
2799 values are used or likely. */ | |
2800 | |
2801 static int | |
2802 zero_one_minusone (gphi *phi, int limit) | |
2803 { | |
2804 int phi_num_args = gimple_phi_num_args (phi); | |
2805 int ret = 0; | |
2806 for (int i = 0; i < phi_num_args; i++) | |
2807 { | |
2808 tree t = PHI_ARG_DEF (phi, i); | |
2809 if (TREE_CODE (t) != INTEGER_CST) | |
2810 continue; | |
2811 wide_int w = wi::to_wide (t); | |
2812 if (w == -1) | |
2813 ret |= 1; | |
2814 else if (w == 0) | |
2815 ret |= 2; | |
2816 else if (w == 1) | |
2817 ret |= 4; | |
2818 else | |
2819 return 0; | |
2820 } | |
2821 for (int i = 0; i < phi_num_args; i++) | |
2822 { | |
2823 tree t = PHI_ARG_DEF (phi, i); | |
2824 if (TREE_CODE (t) == INTEGER_CST) | |
2825 continue; | |
2826 if (TREE_CODE (t) != SSA_NAME) | |
2827 return 0; | |
2828 gimple *g = SSA_NAME_DEF_STMT (t); | |
2829 if (gimple_code (g) == GIMPLE_PHI && limit > 0) | |
2830 if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1)) | |
2831 { | |
2832 ret |= r; | |
2833 continue; | |
2834 } | |
2835 if (!is_gimple_assign (g)) | |
2836 return 0; | |
2837 if (gimple_assign_cast_p (g)) | |
2838 { | |
2839 tree rhs1 = gimple_assign_rhs1 (g); | |
2840 if (TREE_CODE (rhs1) != SSA_NAME | |
2841 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) | |
2842 || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1 | |
2843 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) | |
2844 return 0; | |
2845 ret |= (2 | 4); | |
2846 continue; | |
2847 } | |
2848 if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison) | |
2849 return 0; | |
2850 ret |= (2 | 4); | |
2851 } | |
2852 return ret; | |
2853 } | |
2854 | |
2620 /* Find the basic block with return expression and look up for possible | 2855 /* Find the basic block with return expression and look up for possible |
2621 return value trying to apply RETURN_PREDICTION heuristics. */ | 2856 return value trying to apply RETURN_PREDICTION heuristics. */ |
2622 static void | 2857 static void |
2623 apply_return_prediction (void) | 2858 apply_return_prediction (void) |
2624 { | 2859 { |
2651 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) | 2886 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) |
2652 return; | 2887 return; |
2653 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val)); | 2888 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val)); |
2654 phi_num_args = gimple_phi_num_args (phi); | 2889 phi_num_args = gimple_phi_num_args (phi); |
2655 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); | 2890 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); |
2891 | |
2892 /* Avoid the case where the function returns -1, 0 and 1 values and | |
2893 nothing else. Those could be qsort etc. comparison functions | |
2894 where the negative return isn't less probable than positive. | |
2895 For this require that the function returns at least -1 or 1 | |
2896 or -1 and a boolean value or comparison result, so that functions | |
2897 returning just -1 and 0 are treated as if -1 represents error value. */ | |
2898 if (INTEGRAL_TYPE_P (TREE_TYPE (return_val)) | |
2899 && !TYPE_UNSIGNED (TREE_TYPE (return_val)) | |
2900 && TYPE_PRECISION (TREE_TYPE (return_val)) > 1) | |
2901 if (int r = zero_one_minusone (phi, 3)) | |
2902 if ((r & (1 | 4)) == (1 | 4)) | |
2903 return; | |
2656 | 2904 |
2657 /* Avoid the degenerate case where all return values form the function | 2905 /* Avoid the degenerate case where all return values form the function |
2658 belongs to same category (ie they are all positive constants) | 2906 belongs to same category (ie they are all positive constants) |
2659 so we can hardly say something about them. */ | 2907 so we can hardly say something about them. */ |
2660 for (i = 1; i < phi_num_args; i++) | 2908 for (i = 1; i < phi_num_args; i++) |
3012 e->src->index, bb->index); | 3260 e->src->index, bb->index); |
3013 } | 3261 } |
3014 BLOCK_INFO (bb)->npredecessors = count; | 3262 BLOCK_INFO (bb)->npredecessors = count; |
3015 /* When function never returns, we will never process exit block. */ | 3263 /* When function never returns, we will never process exit block. */ |
3016 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) | 3264 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
3017 { | 3265 bb->count = profile_count::zero (); |
3018 bb->count = profile_count::zero (); | |
3019 bb->frequency = 0; | |
3020 } | |
3021 } | 3266 } |
3022 | 3267 |
3023 BLOCK_INFO (head)->frequency = 1; | 3268 BLOCK_INFO (head)->frequency = 1; |
3024 last = head; | 3269 last = head; |
3025 for (bb = head; bb; bb = nextbb) | 3270 for (bb = head; bb; bb = nextbb) |
3048 { | 3293 { |
3049 /* frequency += (e->probability | 3294 /* frequency += (e->probability |
3050 * BLOCK_INFO (e->src)->frequency / | 3295 * BLOCK_INFO (e->src)->frequency / |
3051 REG_BR_PROB_BASE); */ | 3296 REG_BR_PROB_BASE); */ |
3052 | 3297 |
3053 sreal tmp = e->probability.to_reg_br_prob_base (); | 3298 /* FIXME: Graphite is producing edges with no profile. Once |
3299 this is fixed, drop this. */ | |
3300 sreal tmp = e->probability.initialized_p () ? | |
3301 e->probability.to_reg_br_prob_base () : 0; | |
3054 tmp *= BLOCK_INFO (e->src)->frequency; | 3302 tmp *= BLOCK_INFO (e->src)->frequency; |
3055 tmp *= real_inv_br_prob_base; | 3303 tmp *= real_inv_br_prob_base; |
3056 frequency += tmp; | 3304 frequency += tmp; |
3057 } | 3305 } |
3058 | 3306 |
3080 { | 3328 { |
3081 /* EDGE_INFO (e)->back_edge_prob | 3329 /* EDGE_INFO (e)->back_edge_prob |
3082 = ((e->probability * BLOCK_INFO (bb)->frequency) | 3330 = ((e->probability * BLOCK_INFO (bb)->frequency) |
3083 / REG_BR_PROB_BASE); */ | 3331 / REG_BR_PROB_BASE); */ |
3084 | 3332 |
3085 sreal tmp = e->probability.to_reg_br_prob_base (); | 3333 /* FIXME: Graphite is producing edges with no profile. Once |
3334 this is fixed, drop this. */ | |
3335 sreal tmp = e->probability.initialized_p () ? | |
3336 e->probability.to_reg_br_prob_base () : 0; | |
3086 tmp *= BLOCK_INFO (bb)->frequency; | 3337 tmp *= BLOCK_INFO (bb)->frequency; |
3087 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; | 3338 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; |
3088 } | 3339 } |
3089 | 3340 |
3090 /* Propagate to successor blocks. */ | 3341 /* Propagate to successor blocks. */ |
3194 warning (0, "Missing counts for called function %s", | 3445 warning (0, "Missing counts for called function %s", |
3195 node->dump_name ()); | 3446 node->dump_name ()); |
3196 } | 3447 } |
3197 | 3448 |
3198 basic_block bb; | 3449 basic_block bb; |
3199 FOR_ALL_BB_FN (bb, fn) | 3450 if (opt_for_fn (node->decl, flag_guess_branch_prob)) |
3200 { | 3451 { |
3201 bb->count = profile_count::uninitialized (); | 3452 bool clear_zeros |
3453 = !ENTRY_BLOCK_PTR_FOR_FN (fn)->count.nonzero_p (); | |
3454 FOR_ALL_BB_FN (bb, fn) | |
3455 if (clear_zeros || !(bb->count == profile_count::zero ())) | |
3456 bb->count = bb->count.guessed_local (); | |
3457 fn->cfg->count_max = fn->cfg->count_max.guessed_local (); | |
3458 } | |
3459 else | |
3460 { | |
3461 FOR_ALL_BB_FN (bb, fn) | |
3462 bb->count = profile_count::uninitialized (); | |
3463 fn->cfg->count_max = profile_count::uninitialized (); | |
3202 } | 3464 } |
3203 | 3465 |
3204 struct cgraph_edge *e; | 3466 struct cgraph_edge *e; |
3205 for (e = node->callees; e; e = e->next_caller) | 3467 for (e = node->callees; e; e = e->next_callee) |
3206 { | 3468 e->count = gimple_bb (e->call_stmt)->count; |
3207 e->frequency = compute_call_stmt_bb_frequency (e->caller->decl, | 3469 for (e = node->indirect_calls; e; e = e->next_callee) |
3208 gimple_bb (e->call_stmt)); | 3470 e->count = gimple_bb (e->call_stmt)->count; |
3209 } | 3471 node->count = ENTRY_BLOCK_PTR_FOR_FN (fn)->count; |
3210 | 3472 |
3211 profile_status_for_fn (fn) | 3473 profile_status_for_fn (fn) |
3212 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT); | 3474 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT); |
3213 node->frequency | 3475 node->frequency |
3214 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL; | 3476 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL; |
3241 struct cgraph_edge *e; | 3503 struct cgraph_edge *e; |
3242 profile_count call_count = profile_count::zero (); | 3504 profile_count call_count = profile_count::zero (); |
3243 gcov_type max_tp_first_run = 0; | 3505 gcov_type max_tp_first_run = 0; |
3244 struct function *fn = DECL_STRUCT_FUNCTION (node->decl); | 3506 struct function *fn = DECL_STRUCT_FUNCTION (node->decl); |
3245 | 3507 |
3246 if (!(node->count == profile_count::zero ())) | 3508 if (node->count.ipa ().nonzero_p ()) |
3247 continue; | 3509 continue; |
3248 for (e = node->callers; e; e = e->next_caller) | 3510 for (e = node->callers; e; e = e->next_caller) |
3249 if (e->count.initialized_p () && e->count > 0) | 3511 if (e->count.ipa ().initialized_p () && e->count.ipa () > 0) |
3250 { | 3512 { |
3251 call_count = call_count + e->count; | 3513 call_count = call_count + e->count.ipa (); |
3252 | 3514 |
3253 if (e->caller->tp_first_run > max_tp_first_run) | 3515 if (e->caller->tp_first_run > max_tp_first_run) |
3254 max_tp_first_run = e->caller->tp_first_run; | 3516 max_tp_first_run = e->caller->tp_first_run; |
3255 } | 3517 } |
3256 | 3518 |
3279 for (e = node->callees; e; e = e->next_caller) | 3541 for (e = node->callees; e; e = e->next_caller) |
3280 { | 3542 { |
3281 struct cgraph_node *callee = e->callee; | 3543 struct cgraph_node *callee = e->callee; |
3282 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl); | 3544 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl); |
3283 | 3545 |
3284 if (callee->count > 0) | 3546 if (!(e->count.ipa () == profile_count::zero ()) |
3547 && callee->count.ipa ().nonzero_p ()) | |
3285 continue; | 3548 continue; |
3286 if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl)) | 3549 if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl)) |
3287 && fn && fn->cfg | 3550 && fn && fn->cfg |
3288 && profile_status_for_fn (fn) == PROFILE_READ) | 3551 && profile_status_for_fn (fn) == PROFILE_READ) |
3289 { | 3552 { |
3296 | 3559 |
3297 /* Convert counts measured by profile driven feedback to frequencies. | 3560 /* Convert counts measured by profile driven feedback to frequencies. |
3298 Return nonzero iff there was any nonzero execution count. */ | 3561 Return nonzero iff there was any nonzero execution count. */ |
3299 | 3562 |
3300 bool | 3563 bool |
3301 counts_to_freqs (void) | 3564 update_max_bb_count (void) |
3302 { | 3565 { |
3303 gcov_type count_max; | 3566 profile_count true_count_max = profile_count::uninitialized (); |
3304 profile_count true_count_max = profile_count::zero (); | |
3305 basic_block bb; | 3567 basic_block bb; |
3306 | 3568 |
3307 /* Don't overwrite the estimated frequencies when the profile for | |
3308 the function is missing. We may drop this function PROFILE_GUESSED | |
3309 later in drop_profile (). */ | |
3310 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () | |
3311 || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) | |
3312 return false; | |
3313 | |
3314 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) | 3569 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
3315 if (bb->count > true_count_max) | 3570 true_count_max = true_count_max.max (bb->count); |
3316 true_count_max = bb->count; | 3571 |
3317 | 3572 cfun->cfg->count_max = true_count_max; |
3318 /* If we have no counts to base frequencies on, keep those that are | 3573 |
3319 already there. */ | 3574 return true_count_max.ipa ().nonzero_p (); |
3320 if (!(true_count_max > 0)) | |
3321 return false; | |
3322 | |
3323 count_max = true_count_max.to_gcov_type (); | |
3324 | |
3325 FOR_ALL_BB_FN (bb, cfun) | |
3326 if (bb->count.initialized_p ()) | |
3327 bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max); | |
3328 | |
3329 return true; | |
3330 } | 3575 } |
3331 | 3576 |
3332 /* Return true if function is likely to be expensive, so there is no point to | 3577 /* Return true if function is likely to be expensive, so there is no point to |
3333 optimize performance of prologue, epilogue or do inlining at the expense | 3578 optimize performance of prologue, epilogue or do inlining at the expense |
3334 of code size growth. THRESHOLD is the limit of number of instructions | 3579 of code size growth. THRESHOLD is the limit of number of instructions |
3335 function can execute at average to be still considered not expensive. */ | 3580 function can execute at average to be still considered not expensive. */ |
3336 | 3581 |
3337 bool | 3582 bool |
3338 expensive_function_p (int threshold) | 3583 expensive_function_p (int threshold) |
3339 { | 3584 { |
3340 unsigned int sum = 0; | |
3341 basic_block bb; | 3585 basic_block bb; |
3342 unsigned int limit; | 3586 |
3343 | 3587 /* If profile was scaled in a way entry block has count 0, then the function |
3344 /* We can not compute accurately for large thresholds due to scaled | 3588 is deifnitly taking a lot of time. */ |
3345 frequencies. */ | 3589 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ()) |
3346 gcc_assert (threshold <= BB_FREQ_MAX); | |
3347 | |
3348 /* Frequencies are out of range. This either means that function contains | |
3349 internal loop executing more than BB_FREQ_MAX times or profile feedback | |
3350 is available and function has not been executed at all. */ | |
3351 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0) | |
3352 return true; | 3590 return true; |
3353 | 3591 |
3354 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ | 3592 profile_count limit = ENTRY_BLOCK_PTR_FOR_FN |
3355 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold; | 3593 (cfun)->count.apply_scale (threshold, 1); |
3594 profile_count sum = profile_count::zero (); | |
3356 FOR_EACH_BB_FN (bb, cfun) | 3595 FOR_EACH_BB_FN (bb, cfun) |
3357 { | 3596 { |
3358 rtx_insn *insn; | 3597 rtx_insn *insn; |
3598 | |
3599 if (!bb->count.initialized_p ()) | |
3600 { | |
3601 if (dump_file) | |
3602 fprintf (dump_file, "Function is considered expensive because" | |
3603 " count of bb %i is not initialized\n", bb->index); | |
3604 return true; | |
3605 } | |
3359 | 3606 |
3360 FOR_BB_INSNS (bb, insn) | 3607 FOR_BB_INSNS (bb, insn) |
3361 if (active_insn_p (insn)) | 3608 if (active_insn_p (insn)) |
3362 { | 3609 { |
3363 sum += bb->frequency; | 3610 sum += bb->count; |
3364 if (sum > limit) | 3611 if (sum > limit) |
3365 return true; | 3612 return true; |
3366 } | 3613 } |
3367 } | 3614 } |
3368 | 3615 |
3407 && (dump_file && (dump_flags & TDF_DETAILS))) | 3654 && (dump_file && (dump_flags & TDF_DETAILS))) |
3408 fprintf (dump_file, | 3655 fprintf (dump_file, |
3409 "Basic block %i is marked unlikely by forward prop\n", | 3656 "Basic block %i is marked unlikely by forward prop\n", |
3410 bb->index); | 3657 bb->index); |
3411 bb->count = profile_count::zero (); | 3658 bb->count = profile_count::zero (); |
3412 bb->frequency = 0; | |
3413 } | 3659 } |
3414 else | 3660 else |
3415 bb->aux = NULL; | 3661 bb->aux = NULL; |
3416 } | 3662 } |
3417 } | 3663 } |
3437 if (dump_file && (dump_flags & TDF_DETAILS)) | 3683 if (dump_file && (dump_flags & TDF_DETAILS)) |
3438 fprintf (dump_file, "Basic block %i is locally unlikely\n", | 3684 fprintf (dump_file, "Basic block %i is locally unlikely\n", |
3439 bb->index); | 3685 bb->index); |
3440 bb->count = profile_count::zero (); | 3686 bb->count = profile_count::zero (); |
3441 } | 3687 } |
3442 | |
3443 if (bb->count == profile_count::zero ()) | |
3444 bb->frequency = 0; | |
3445 | 3688 |
3446 FOR_EACH_EDGE (e, ei, bb->succs) | 3689 FOR_EACH_EDGE (e, ei, bb->succs) |
3447 if (!(e->probability == profile_probability::never ()) | 3690 if (!(e->probability == profile_probability::never ()) |
3448 && unlikely_executed_edge_p (e)) | 3691 && unlikely_executed_edge_p (e)) |
3449 { | 3692 { |
3453 e->probability = profile_probability::never (); | 3696 e->probability = profile_probability::never (); |
3454 } | 3697 } |
3455 | 3698 |
3456 gcc_checking_assert (!bb->aux); | 3699 gcc_checking_assert (!bb->aux); |
3457 } | 3700 } |
3701 propagate_unlikely_bbs_forward (); | |
3458 | 3702 |
3459 auto_vec<int, 64> nsuccs; | 3703 auto_vec<int, 64> nsuccs; |
3460 nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun)); | 3704 nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun)); |
3461 FOR_ALL_BB_FN (bb, cfun) | 3705 FOR_ALL_BB_FN (bb, cfun) |
3462 if (!(bb->count == profile_count::zero ()) | 3706 if (!(bb->count == profile_count::zero ()) |
3471 worklist.safe_push (bb); | 3715 worklist.safe_push (bb); |
3472 } | 3716 } |
3473 while (worklist.length () > 0) | 3717 while (worklist.length () > 0) |
3474 { | 3718 { |
3475 bb = worklist.pop (); | 3719 bb = worklist.pop (); |
3720 if (bb->count == profile_count::zero ()) | |
3721 continue; | |
3476 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | 3722 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
3477 { | 3723 { |
3478 bool found = false; | 3724 bool found = false; |
3479 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | 3725 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); |
3480 !gsi_end_p (gsi); gsi_next (&gsi)) | 3726 !gsi_end_p (gsi); gsi_next (&gsi)) |
3489 break; | 3735 break; |
3490 } | 3736 } |
3491 if (found) | 3737 if (found) |
3492 continue; | 3738 continue; |
3493 } | 3739 } |
3494 if (!(bb->count == profile_count::zero ()) | 3740 if (dump_file && (dump_flags & TDF_DETAILS)) |
3495 && (dump_file && (dump_flags & TDF_DETAILS))) | |
3496 fprintf (dump_file, | 3741 fprintf (dump_file, |
3497 "Basic block %i is marked unlikely by backward prop\n", | 3742 "Basic block %i is marked unlikely by backward prop\n", |
3498 bb->index); | 3743 bb->index); |
3499 bb->count = profile_count::zero (); | 3744 bb->count = profile_count::zero (); |
3500 bb->frequency = 0; | |
3501 FOR_EACH_EDGE (e, ei, bb->preds) | 3745 FOR_EACH_EDGE (e, ei, bb->preds) |
3502 if (!(e->probability == profile_probability::never ())) | 3746 if (!(e->probability == profile_probability::never ())) |
3503 { | 3747 { |
3504 e->probability = profile_probability::never (); | |
3505 if (!(e->src->count == profile_count::zero ())) | 3748 if (!(e->src->count == profile_count::zero ())) |
3506 { | 3749 { |
3750 gcc_checking_assert (nsuccs[e->src->index] > 0); | |
3507 nsuccs[e->src->index]--; | 3751 nsuccs[e->src->index]--; |
3508 if (!nsuccs[e->src->index]) | 3752 if (!nsuccs[e->src->index]) |
3509 worklist.safe_push (e->src); | 3753 worklist.safe_push (e->src); |
3510 } | 3754 } |
3511 } | 3755 } |
3512 } | 3756 } |
3757 /* Finally all edges from non-0 regions to 0 are unlikely. */ | |
3758 FOR_ALL_BB_FN (bb, cfun) | |
3759 if (!(bb->count == profile_count::zero ())) | |
3760 FOR_EACH_EDGE (e, ei, bb->succs) | |
3761 if (!(e->probability == profile_probability::never ()) | |
3762 && e->dest->count == profile_count::zero ()) | |
3763 { | |
3764 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3765 fprintf (dump_file, "Edge %i->%i is unlikely because " | |
3766 "it enters unlikely block\n", | |
3767 bb->index, e->dest->index); | |
3768 e->probability = profile_probability::never (); | |
3769 } | |
3770 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) | |
3771 cgraph_node::get (current_function_decl)->count = profile_count::zero (); | |
3513 } | 3772 } |
3514 | 3773 |
3515 /* Estimate and propagate basic block frequencies using the given branch | 3774 /* Estimate and propagate basic block frequencies using the given branch |
3516 probabilities. If FORCE is true, the frequencies are used to estimate | 3775 probabilities. If FORCE is true, the frequencies are used to estimate |
3517 the counts even when there are already non-zero profile counts. */ | 3776 the counts even when there are already non-zero profile counts. */ |
3523 sreal freq_max; | 3782 sreal freq_max; |
3524 | 3783 |
3525 determine_unlikely_bbs (); | 3784 determine_unlikely_bbs (); |
3526 | 3785 |
3527 if (force || profile_status_for_fn (cfun) != PROFILE_READ | 3786 if (force || profile_status_for_fn (cfun) != PROFILE_READ |
3528 || !counts_to_freqs ()) | 3787 || !update_max_bb_count ()) |
3529 { | 3788 { |
3530 static int real_values_initialized = 0; | 3789 static int real_values_initialized = 0; |
3531 | 3790 |
3532 if (!real_values_initialized) | 3791 if (!real_values_initialized) |
3533 { | 3792 { |
3534 real_values_initialized = 1; | 3793 real_values_initialized = 1; |
3535 real_br_prob_base = REG_BR_PROB_BASE; | 3794 real_br_prob_base = REG_BR_PROB_BASE; |
3536 real_bb_freq_max = BB_FREQ_MAX; | 3795 /* Scaling frequencies up to maximal profile count may result in |
3796 frequent overflows especially when inlining loops. | |
3797 Small scalling results in unnecesary precision loss. Stay in | |
3798 the half of the (exponential) range. */ | |
3799 real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2); | |
3537 real_one_half = sreal (1, -1); | 3800 real_one_half = sreal (1, -1); |
3538 real_inv_br_prob_base = sreal (1) / real_br_prob_base; | 3801 real_inv_br_prob_base = sreal (1) / real_br_prob_base; |
3539 real_almost_one = sreal (1) - real_inv_br_prob_base; | 3802 real_almost_one = sreal (1) - real_inv_br_prob_base; |
3540 } | 3803 } |
3541 | 3804 |
3552 edge e; | 3815 edge e; |
3553 edge_iterator ei; | 3816 edge_iterator ei; |
3554 | 3817 |
3555 FOR_EACH_EDGE (e, ei, bb->succs) | 3818 FOR_EACH_EDGE (e, ei, bb->succs) |
3556 { | 3819 { |
3557 EDGE_INFO (e)->back_edge_prob | 3820 /* FIXME: Graphite is producing edges with no profile. Once |
3558 = e->probability.to_reg_br_prob_base (); | 3821 this is fixed, drop this. */ |
3822 if (e->probability.initialized_p ()) | |
3823 EDGE_INFO (e)->back_edge_prob | |
3824 = e->probability.to_reg_br_prob_base (); | |
3825 else | |
3826 EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2; | |
3559 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; | 3827 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; |
3560 } | 3828 } |
3561 } | 3829 } |
3562 | 3830 |
3563 /* First compute frequencies locally for each loop from innermost | 3831 /* First compute frequencies locally for each loop from innermost |
3568 FOR_EACH_BB_FN (bb, cfun) | 3836 FOR_EACH_BB_FN (bb, cfun) |
3569 if (freq_max < BLOCK_INFO (bb)->frequency) | 3837 if (freq_max < BLOCK_INFO (bb)->frequency) |
3570 freq_max = BLOCK_INFO (bb)->frequency; | 3838 freq_max = BLOCK_INFO (bb)->frequency; |
3571 | 3839 |
3572 freq_max = real_bb_freq_max / freq_max; | 3840 freq_max = real_bb_freq_max / freq_max; |
3841 if (freq_max < 16) | |
3842 freq_max = 16; | |
3843 profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); | |
3844 cfun->cfg->count_max = profile_count::uninitialized (); | |
3573 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) | 3845 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
3574 { | 3846 { |
3575 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; | 3847 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; |
3576 bb->frequency = tmp.to_int (); | 3848 profile_count count = profile_count::from_gcov_type (tmp.to_int ()); |
3849 | |
3850 /* If we have profile feedback in which this function was never | |
3851 executed, then preserve this info. */ | |
3852 if (!(bb->count == profile_count::zero ())) | |
3853 bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count); | |
3854 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); | |
3577 } | 3855 } |
3578 | 3856 |
3579 free_aux_for_blocks (); | 3857 free_aux_for_blocks (); |
3580 free_aux_for_edges (); | 3858 free_aux_for_edges (); |
3581 } | 3859 } |
3596 node->only_called_at_exit = true; | 3874 node->only_called_at_exit = true; |
3597 | 3875 |
3598 if (profile_status_for_fn (cfun) != PROFILE_READ) | 3876 if (profile_status_for_fn (cfun) != PROFILE_READ) |
3599 { | 3877 { |
3600 int flags = flags_from_decl_or_type (current_function_decl); | 3878 int flags = flags_from_decl_or_type (current_function_decl); |
3601 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero () | 3879 if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p () |
3880 && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ()) | |
3602 || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) | 3881 || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) |
3603 != NULL) | 3882 != NULL) |
3604 { | 3883 { |
3605 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | 3884 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; |
3606 warn_function_cold (current_function_decl); | 3885 warn_function_cold (current_function_decl); |
3616 || DECL_STATIC_DESTRUCTOR (current_function_decl)) | 3895 || DECL_STATIC_DESTRUCTOR (current_function_decl)) |
3617 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; | 3896 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; |
3618 return; | 3897 return; |
3619 } | 3898 } |
3620 | 3899 |
3621 /* Only first time try to drop function into unlikely executed. | 3900 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; |
3622 After inlining the roundoff errors may confuse us. | 3901 warn_function_cold (current_function_decl); |
3623 Ipa-profile pass will drop functions only called from unlikely | 3902 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ()) |
3624 functions to unlikely and that is most of what we care about. */ | 3903 return; |
3625 if (!cfun->after_inlining) | |
3626 { | |
3627 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; | |
3628 warn_function_cold (current_function_decl); | |
3629 } | |
3630 FOR_EACH_BB_FN (bb, cfun) | 3904 FOR_EACH_BB_FN (bb, cfun) |
3631 { | 3905 { |
3632 if (maybe_hot_bb_p (cfun, bb)) | 3906 if (maybe_hot_bb_p (cfun, bb)) |
3633 { | 3907 { |
3634 node->frequency = NODE_FREQUENCY_HOT; | 3908 node->frequency = NODE_FREQUENCY_HOT; |
3715 profile_status_for_fn (fun) = PROFILE_GUESSED; | 3989 profile_status_for_fn (fun) = PROFILE_GUESSED; |
3716 if (dump_file && (dump_flags & TDF_DETAILS)) | 3990 if (dump_file && (dump_flags & TDF_DETAILS)) |
3717 { | 3991 { |
3718 struct loop *loop; | 3992 struct loop *loop; |
3719 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | 3993 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
3720 if (loop->header->frequency) | 3994 if (loop->header->count.initialized_p ()) |
3721 fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", | 3995 fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", |
3722 loop->num, | 3996 loop->num, |
3723 (int)expected_loop_iterations_unbounded (loop)); | 3997 (int)expected_loop_iterations_unbounded (loop)); |
3724 } | 3998 } |
3725 return 0; | 3999 return 0; |
3731 make_pass_profile (gcc::context *ctxt) | 4005 make_pass_profile (gcc::context *ctxt) |
3732 { | 4006 { |
3733 return new pass_profile (ctxt); | 4007 return new pass_profile (ctxt); |
3734 } | 4008 } |
3735 | 4009 |
3736 namespace { | 4010 /* Return true when PRED predictor should be removed after early |
3737 | 4011 tree passes. Most of the predictors are beneficial to survive |
3738 const pass_data pass_data_strip_predict_hints = | 4012 as early inlining can also distribute then into caller's bodies. */ |
3739 { | 4013 |
3740 GIMPLE_PASS, /* type */ | 4014 static bool |
3741 "*strip_predict_hints", /* name */ | 4015 strip_predictor_early (enum br_predictor pred) |
3742 OPTGROUP_NONE, /* optinfo_flags */ | 4016 { |
3743 TV_BRANCH_PROB, /* tv_id */ | 4017 switch (pred) |
3744 PROP_cfg, /* properties_required */ | 4018 { |
3745 0, /* properties_provided */ | 4019 case PRED_TREE_EARLY_RETURN: |
3746 0, /* properties_destroyed */ | 4020 return true; |
3747 0, /* todo_flags_start */ | 4021 default: |
3748 0, /* todo_flags_finish */ | 4022 return false; |
3749 }; | 4023 } |
3750 | 4024 } |
3751 class pass_strip_predict_hints : public gimple_opt_pass | |
3752 { | |
3753 public: | |
3754 pass_strip_predict_hints (gcc::context *ctxt) | |
3755 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt) | |
3756 {} | |
3757 | |
3758 /* opt_pass methods: */ | |
3759 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); } | |
3760 virtual unsigned int execute (function *); | |
3761 | |
3762 }; // class pass_strip_predict_hints | |
3763 | 4025 |
3764 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements | 4026 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements |
3765 we no longer need. */ | 4027 we no longer need. EARLY is set to true when called from early |
4028 optimizations. */ | |
4029 | |
3766 unsigned int | 4030 unsigned int |
3767 pass_strip_predict_hints::execute (function *fun) | 4031 strip_predict_hints (function *fun, bool early) |
3768 { | 4032 { |
3769 basic_block bb; | 4033 basic_block bb; |
3770 gimple *ass_stmt; | 4034 gimple *ass_stmt; |
3771 tree var; | 4035 tree var; |
3772 bool changed = false; | 4036 bool changed = false; |
3778 { | 4042 { |
3779 gimple *stmt = gsi_stmt (bi); | 4043 gimple *stmt = gsi_stmt (bi); |
3780 | 4044 |
3781 if (gimple_code (stmt) == GIMPLE_PREDICT) | 4045 if (gimple_code (stmt) == GIMPLE_PREDICT) |
3782 { | 4046 { |
3783 gsi_remove (&bi, true); | 4047 if (!early |
3784 changed = true; | 4048 || strip_predictor_early (gimple_predict_predictor (stmt))) |
3785 continue; | 4049 { |
4050 gsi_remove (&bi, true); | |
4051 changed = true; | |
4052 continue; | |
4053 } | |
3786 } | 4054 } |
3787 else if (is_gimple_call (stmt)) | 4055 else if (is_gimple_call (stmt)) |
3788 { | 4056 { |
3789 tree fndecl = gimple_call_fndecl (stmt); | 4057 tree fndecl = gimple_call_fndecl (stmt); |
3790 | 4058 |
3791 if ((fndecl | 4059 if (!early |
3792 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL | 4060 && ((fndecl != NULL_TREE |
3793 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT | 4061 && fndecl_built_in_p (fndecl, BUILT_IN_EXPECT) |
3794 && gimple_call_num_args (stmt) == 2) | 4062 && gimple_call_num_args (stmt) == 2) |
3795 || (gimple_call_internal_p (stmt) | 4063 || (fndecl != NULL_TREE |
3796 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT)) | 4064 && fndecl_built_in_p (fndecl, |
4065 BUILT_IN_EXPECT_WITH_PROBABILITY) | |
4066 && gimple_call_num_args (stmt) == 3) | |
4067 || (gimple_call_internal_p (stmt) | |
4068 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))) | |
3797 { | 4069 { |
3798 var = gimple_call_lhs (stmt); | 4070 var = gimple_call_lhs (stmt); |
3799 changed = true; | 4071 changed = true; |
3800 if (var) | 4072 if (var) |
3801 { | 4073 { |
3814 } | 4086 } |
3815 } | 4087 } |
3816 return changed ? TODO_cleanup_cfg : 0; | 4088 return changed ? TODO_cleanup_cfg : 0; |
3817 } | 4089 } |
3818 | 4090 |
4091 namespace { | |
4092 | |
4093 const pass_data pass_data_strip_predict_hints = | |
4094 { | |
4095 GIMPLE_PASS, /* type */ | |
4096 "*strip_predict_hints", /* name */ | |
4097 OPTGROUP_NONE, /* optinfo_flags */ | |
4098 TV_BRANCH_PROB, /* tv_id */ | |
4099 PROP_cfg, /* properties_required */ | |
4100 0, /* properties_provided */ | |
4101 0, /* properties_destroyed */ | |
4102 0, /* todo_flags_start */ | |
4103 0, /* todo_flags_finish */ | |
4104 }; | |
4105 | |
4106 class pass_strip_predict_hints : public gimple_opt_pass | |
4107 { | |
4108 public: | |
4109 pass_strip_predict_hints (gcc::context *ctxt) | |
4110 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt) | |
4111 {} | |
4112 | |
4113 /* opt_pass methods: */ | |
4114 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); } | |
4115 void set_pass_param (unsigned int n, bool param) | |
4116 { | |
4117 gcc_assert (n == 0); | |
4118 early_p = param; | |
4119 } | |
4120 | |
4121 virtual unsigned int execute (function *); | |
4122 | |
4123 private: | |
4124 bool early_p; | |
4125 | |
4126 }; // class pass_strip_predict_hints | |
4127 | |
4128 unsigned int | |
4129 pass_strip_predict_hints::execute (function *fun) | |
4130 { | |
4131 return strip_predict_hints (fun, early_p); | |
4132 } | |
4133 | |
3819 } // anon namespace | 4134 } // anon namespace |
3820 | 4135 |
3821 gimple_opt_pass * | 4136 gimple_opt_pass * |
3822 make_pass_strip_predict_hints (gcc::context *ctxt) | 4137 make_pass_strip_predict_hints (gcc::context *ctxt) |
3823 { | 4138 { |
3841 In that case, force the estimation of bb counts/frequencies from the | 4156 In that case, force the estimation of bb counts/frequencies from the |
3842 branch probabilities, rather than computing frequencies from counts, | 4157 branch probabilities, rather than computing frequencies from counts, |
3843 which may also lead to frequencies incorrectly reduced to 0. There | 4158 which may also lead to frequencies incorrectly reduced to 0. There |
3844 is less precision in the probabilities, so we only do this for small | 4159 is less precision in the probabilities, so we only do this for small |
3845 max counts. */ | 4160 max counts. */ |
3846 profile_count count_max = profile_count::zero (); | 4161 cfun->cfg->count_max = profile_count::uninitialized (); |
3847 basic_block bb; | 4162 basic_block bb; |
3848 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) | 4163 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
3849 if (bb->count > count_max) | 4164 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); |
3850 count_max = bb->count; | 4165 |
3851 | 4166 if (profile_status_for_fn (cfun) == PROFILE_GUESSED) |
3852 if (profile_status_for_fn (cfun) == PROFILE_GUESSED | |
3853 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ | |
3854 && count_max < REG_BR_PROB_BASE / 10)) | |
3855 { | 4167 { |
3856 loop_optimizer_init (0); | 4168 loop_optimizer_init (0); |
3857 add_noreturn_fake_exit_edges (); | 4169 add_noreturn_fake_exit_edges (); |
3858 mark_irreducible_loops (); | 4170 mark_irreducible_loops (); |
3859 connect_infinite_loops_to_exit (); | 4171 connect_infinite_loops_to_exit (); |
3860 estimate_bb_frequencies (true); | 4172 estimate_bb_frequencies (true); |
3861 remove_fake_exit_edges (); | 4173 remove_fake_exit_edges (); |
3862 loop_optimizer_finalize (); | 4174 loop_optimizer_finalize (); |
3863 } | 4175 } |
3864 else if (profile_status_for_fn (cfun) == PROFILE_READ) | 4176 else if (profile_status_for_fn (cfun) == PROFILE_READ) |
3865 counts_to_freqs (); | 4177 update_max_bb_count (); |
4178 else if (profile_status_for_fn (cfun) == PROFILE_ABSENT | |
4179 && !flag_guess_branch_prob) | |
4180 ; | |
3866 else | 4181 else |
3867 gcc_unreachable (); | 4182 gcc_unreachable (); |
3868 timevar_pop (TV_REBUILD_FREQUENCIES); | 4183 timevar_pop (TV_REBUILD_FREQUENCIES); |
3869 } | 4184 } |
3870 | 4185 |
3915 profile_count count_sum = profile_count::zero (); | 4230 profile_count count_sum = profile_count::zero (); |
3916 profile_probability prob_sum = profile_probability::never (); | 4231 profile_probability prob_sum = profile_probability::never (); |
3917 edge_iterator ei; | 4232 edge_iterator ei; |
3918 edge e2; | 4233 edge e2; |
3919 bool uninitialized_exit = false; | 4234 bool uninitialized_exit = false; |
4235 | |
4236 /* When branch probability guesses are not known, then do nothing. */ | |
4237 if (!impossible && !e->count ().initialized_p ()) | |
4238 return; | |
3920 | 4239 |
3921 profile_probability goal = (impossible ? profile_probability::never () | 4240 profile_probability goal = (impossible ? profile_probability::never () |
3922 : profile_probability::very_unlikely ()); | 4241 : profile_probability::very_unlikely ()); |
3923 | 4242 |
3924 /* If edge is already improbably or cold, just return. */ | 4243 /* If edge is already improbably or cold, just return. */ |
3926 && (!impossible || e->count () == profile_count::zero ())) | 4245 && (!impossible || e->count () == profile_count::zero ())) |
3927 return; | 4246 return; |
3928 FOR_EACH_EDGE (e2, ei, e->src->succs) | 4247 FOR_EACH_EDGE (e2, ei, e->src->succs) |
3929 if (e2 != e) | 4248 if (e2 != e) |
3930 { | 4249 { |
4250 if (e->flags & EDGE_FAKE) | |
4251 continue; | |
3931 if (e2->count ().initialized_p ()) | 4252 if (e2->count ().initialized_p ()) |
3932 count_sum += e2->count (); | 4253 count_sum += e2->count (); |
3933 else | |
3934 uninitialized_exit = true; | |
3935 if (e2->probability.initialized_p ()) | 4254 if (e2->probability.initialized_p ()) |
3936 prob_sum += e2->probability; | 4255 prob_sum += e2->probability; |
4256 else | |
4257 uninitialized_exit = true; | |
3937 } | 4258 } |
3938 | 4259 |
4260 /* If we are not guessing profiles but have some other edges out, | |
4261 just assume the control flow goes elsewhere. */ | |
4262 if (uninitialized_exit) | |
4263 e->probability = goal; | |
3939 /* If there are other edges out of e->src, redistribute probabilitity | 4264 /* If there are other edges out of e->src, redistribute probabilitity |
3940 there. */ | 4265 there. */ |
3941 if (prob_sum > profile_probability::never ()) | 4266 else if (prob_sum > profile_probability::never ()) |
3942 { | 4267 { |
3943 if (!(e->probability < goal)) | 4268 if (!(e->probability < goal)) |
3944 e->probability = goal; | 4269 e->probability = goal; |
3945 | 4270 |
3946 profile_probability prob_comp = prob_sum / e->probability.invert (); | 4271 profile_probability prob_comp = prob_sum / e->probability.invert (); |
3976 if (current_ir_type () != IR_GIMPLE | 4301 if (current_ir_type () != IR_GIMPLE |
3977 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | 4302 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
3978 update_br_prob_note (e->src); | 4303 update_br_prob_note (e->src); |
3979 if (e->src->count == profile_count::zero ()) | 4304 if (e->src->count == profile_count::zero ()) |
3980 return; | 4305 return; |
3981 if (count_sum == profile_count::zero () && !uninitialized_exit | 4306 if (count_sum == profile_count::zero () && impossible) |
3982 && impossible) | |
3983 { | 4307 { |
3984 bool found = false; | 4308 bool found = false; |
3985 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) | 4309 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
3986 ; | 4310 ; |
3987 else if (current_ir_type () == IR_GIMPLE) | 4311 else if (current_ir_type () == IR_GIMPLE) |
4015 This in general is difficult task to do, but handle special case when | 4339 This in general is difficult task to do, but handle special case when |
4016 BB has only one predecestor. This is common case when we are updating | 4340 BB has only one predecestor. This is common case when we are updating |
4017 after loop transforms. */ | 4341 after loop transforms. */ |
4018 if (!(prob_sum > profile_probability::never ()) | 4342 if (!(prob_sum > profile_probability::never ()) |
4019 && count_sum == profile_count::zero () | 4343 && count_sum == profile_count::zero () |
4020 && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1)) | 4344 && single_pred_p (e->src) && e->src->count.to_frequency (cfun) |
4021 { | 4345 > (impossible ? 0 : 1)) |
4022 int old_frequency = e->src->frequency; | 4346 { |
4347 int old_frequency = e->src->count.to_frequency (cfun); | |
4023 if (dump_file && (dump_flags & TDF_DETAILS)) | 4348 if (dump_file && (dump_flags & TDF_DETAILS)) |
4024 fprintf (dump_file, "Making bb %i %s.\n", e->src->index, | 4349 fprintf (dump_file, "Making bb %i %s.\n", e->src->index, |
4025 impossible ? "impossible" : "cold"); | 4350 impossible ? "impossible" : "cold"); |
4026 e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1); | 4351 int new_frequency = MIN (e->src->count.to_frequency (cfun), |
4352 impossible ? 0 : 1); | |
4027 if (impossible) | 4353 if (impossible) |
4028 e->src->count = profile_count::zero (); | 4354 e->src->count = profile_count::zero (); |
4029 else | 4355 else |
4030 e->src->count = e->count ().apply_scale (e->src->frequency, | 4356 e->src->count = e->count ().apply_scale (new_frequency, |
4031 old_frequency); | 4357 old_frequency); |
4032 force_edge_cold (single_pred_edge (e->src), impossible); | 4358 force_edge_cold (single_pred_edge (e->src), impossible); |
4033 } | 4359 } |
4034 else if (dump_file && (dump_flags & TDF_DETAILS) | 4360 else if (dump_file && (dump_flags & TDF_DETAILS) |
4035 && maybe_hot_bb_p (cfun, e->src)) | 4361 && maybe_hot_bb_p (cfun, e->src)) |
4046 within range (50, 100]. */ | 4372 within range (50, 100]. */ |
4047 | 4373 |
4048 struct branch_predictor | 4374 struct branch_predictor |
4049 { | 4375 { |
4050 const char *name; | 4376 const char *name; |
4051 unsigned probability; | 4377 int probability; |
4052 }; | 4378 }; |
4053 | 4379 |
4054 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE }, | 4380 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE }, |
4055 | 4381 |
4056 static void | 4382 static void |
4057 test_prediction_value_range () | 4383 test_prediction_value_range () |
4058 { | 4384 { |
4059 branch_predictor predictors[] = { | 4385 branch_predictor predictors[] = { |
4060 #include "predict.def" | 4386 #include "predict.def" |
4061 {NULL, -1U} | 4387 { NULL, PROB_UNINITIALIZED } |
4062 }; | 4388 }; |
4063 | 4389 |
4064 for (unsigned i = 0; predictors[i].name != NULL; i++) | 4390 for (unsigned i = 0; predictors[i].name != NULL; i++) |
4065 { | 4391 { |
4392 if (predictors[i].probability == PROB_UNINITIALIZED) | |
4393 continue; | |
4394 | |
4066 unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE; | 4395 unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE; |
4067 ASSERT_TRUE (p > 50 && p <= 100); | 4396 ASSERT_TRUE (p >= 50 && p <= 100); |
4068 } | 4397 } |
4069 } | 4398 } |
4070 | 4399 |
4071 #undef DEF_PREDICTOR | 4400 #undef DEF_PREDICTOR |
4072 | 4401 |