Mercurial > hg > CbC > CbC_gcc
diff gcc/predict.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/predict.c Thu Oct 25 08:08:40 2018 +0900 +++ b/gcc/predict.c Thu Oct 25 10:21:07 2018 +0900 @@ -1,5 +1,5 @@ /* Branch prediction routines for the GNU compiler. - Copyright (C) 2000-2017 Free Software Foundation, Inc. + Copyright (C) 2000-2018 Free Software Foundation, Inc. This file is part of GCC. @@ -92,6 +92,7 @@ enum prediction, struct loop *in_loop = NULL); static bool can_predict_insn_p (const rtx_insn *); +static HOST_WIDE_INT get_predictor_value (br_predictor, HOST_WIDE_INT); /* Information we hold about each branch predictor. Filled using information from predict.def. */ @@ -121,32 +122,6 @@ }; #undef DEF_PREDICTOR -/* Return TRUE if frequency FREQ is considered to be hot. */ - -static inline bool -maybe_hot_frequency_p (struct function *fun, int freq) -{ - struct cgraph_node *node = cgraph_node::get (fun->decl); - if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ) - { - if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) - return false; - if (node->frequency == NODE_FREQUENCY_HOT) - return true; - } - if (profile_status_for_fn (fun) == PROFILE_ABSENT) - return true; - if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE - && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3)) - return false; - if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) - return false; - if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) - < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency) - return false; - return true; -} - static gcov_type min_count = -1; /* Determine the threshold for hot BB counts. */ @@ -154,12 +129,13 @@ gcov_type get_hot_bb_threshold () { - gcov_working_set_t *ws; if (min_count == -1) { - ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); - gcc_assert (ws); - min_count = ws->min_counter; + min_count + = profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION); + if (dump_file) + fprintf (dump_file, "Setting hotness threshold to %" PRId64 ".\n", + min_count); } return min_count; } @@ -175,10 +151,34 @@ /* Return TRUE if frequency FREQ is considered to be hot. */ bool -maybe_hot_count_p (struct function *, profile_count count) +maybe_hot_count_p (struct function *fun, profile_count count) { if (!count.initialized_p ()) return true; + if (count.ipa () == profile_count::zero ()) + return false; + if (!count.ipa_p ()) + { + struct cgraph_node *node = cgraph_node::get (fun->decl); + if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ) + { + if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) + return false; + if (node->frequency == NODE_FREQUENCY_HOT) + return true; + } + if (profile_status_for_fn (fun) == PROFILE_ABSENT) + return true; + if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE + && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3))) + return false; + if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) + return false; + if (count.apply_scale (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION), 1) + < ENTRY_BLOCK_PTR_FOR_FN (fun)->count) + return false; + return true; + } /* Code executed at most once is not hot. */ if (count <= MAX (profile_info ? profile_info->runs : 1, 1)) return false; @@ -192,9 +192,7 @@ maybe_hot_bb_p (struct function *fun, const_basic_block bb) { gcc_checking_assert (fun); - if (!maybe_hot_count_p (fun, bb->count)) - return false; - return maybe_hot_frequency_p (fun, bb->frequency); + return maybe_hot_count_p (fun, bb->count); } /* Return true in case BB can be CPU intensive and should be optimized @@ -203,9 +201,7 @@ bool maybe_hot_edge_p (edge e) { - if (!maybe_hot_count_p (cfun, e->count ())) - return false; - return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); + return maybe_hot_count_p (cfun, e->count ()); } /* Return true if profile COUNT and FREQUENCY, or function FUN static @@ -213,12 +209,17 @@ static bool probably_never_executed (struct function *fun, - profile_count count, int) + profile_count count) { gcc_checking_assert (fun); - if (count == profile_count::zero ()) + if (count.ipa () == profile_count::zero ()) return true; - if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ) + /* Do not trust adjusted counts. This will make us to drop int cold section + code with low execution count as a result of inlining. These low counts + are not safe even with read profile and may lead us to dropping + code which actually gets executed into cold section of binary that is not + desirable. */ + if (count.precise_p () && profile_status_for_fn (fun) == PROFILE_READ) { int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs) @@ -238,7 +239,7 @@ bool probably_never_executed_bb_p (struct function *fun, const_basic_block bb) { - return probably_never_executed (fun, bb->count, bb->frequency); + return probably_never_executed (fun, bb->count); } @@ -259,7 +260,7 @@ { if (unlikely_executed_edge_p (e)) return true; - return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e)); + return probably_never_executed (fun, e->count ()); } /* Return true when current function should always be optimized for size. */ @@ -551,6 +552,7 @@ enum prediction taken) { int probability = predictor_info[(int) predictor].hitrate; + gcc_assert (probability != PROB_UNINITIALIZED); if (taken != TAKEN) probability = REG_BR_PROB_BASE - probability; @@ -734,7 +736,7 @@ else edge_info_str[0] = '\0'; - fprintf (file, " %s heuristics%s%s: %.1f%%", + fprintf (file, " %s heuristics%s%s: %.2f%%", predictor_info[predictor].name, edge_info_str, reason_messages[reason], probability * 100.0 / REG_BR_PROB_BASE); @@ -753,6 +755,19 @@ } fprintf (file, "\n"); + + /* Print output that be easily read by analyze_brprob.py script. We are + interested only in counts that are read from GCDA files. */ + if (dump_file && (dump_flags & TDF_DETAILS) + && bb->count.precise_p () + && reason == REASON_NONE) + { + gcc_assert (e->count ().precise_p ()); + fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n", + predictor_info[predictor].name, + bb->count.to_gcov_type (), e->count ().to_gcov_type (), + probability * 100.0 / REG_BR_PROB_BASE); + } } /* Return true if STMT is known to be unlikely executed. */ @@ -813,11 +828,14 @@ /* We can not predict the probabilities of outgoing edges of bb. Set them evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute even probability for all edges not mentioned in the set. These edges - are given PROB_VERY_UNLIKELY probability. */ + are given PROB_VERY_UNLIKELY probability. Similarly for LIKELY_EDGES, + if we have exactly one likely edge, make the other edges predicted + as not probable. */ static void set_even_probabilities (basic_block bb, - hash_set<edge> *unlikely_edges = NULL) + hash_set<edge> *unlikely_edges = NULL, + hash_set<edge_prediction *> *likely_edges = NULL) { unsigned nedges = 0, unlikely_count = 0; edge e = NULL; @@ -829,7 +847,7 @@ all -= e->probability; else if (!unlikely_executed_edge_p (e)) { - nedges ++; + nedges++; if (unlikely_edges != NULL && unlikely_edges->contains (e)) { all -= profile_probability::very_unlikely (); @@ -838,26 +856,54 @@ } /* Make the distribution even if all edges are unlikely. */ + unsigned likely_count = likely_edges ? likely_edges->elements () : 0; if (unlikely_count == nedges) { unlikely_edges = NULL; unlikely_count = 0; } - unsigned c = nedges - unlikely_count; - - FOR_EACH_EDGE (e, ei, bb->succs) - if (e->probability.initialized_p ()) - ; - else if (!unlikely_executed_edge_p (e)) - { - if (unlikely_edges != NULL && unlikely_edges->contains (e)) - e->probability = profile_probability::very_unlikely (); + /* If we have one likely edge, then use its probability and distribute + remaining probabilities as even. */ + if (likely_count == 1) + { + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->probability.initialized_p ()) + ; + else if (!unlikely_executed_edge_p (e)) + { + edge_prediction *prediction = *likely_edges->begin (); + int p = prediction->ep_probability; + profile_probability prob + = profile_probability::from_reg_br_prob_base (p); + profile_probability remainder = prob.invert (); + + if (prediction->ep_edge == e) + e->probability = prob; + else + e->probability = remainder.apply_scale (1, nedges - 1); + } else - e->probability = all.apply_scale (1, c).guessed (); - } - else - e->probability = profile_probability::never (); + e->probability = profile_probability::never (); + } + else + { + /* Make all unlikely edges unlikely and the rest will have even + probability. */ + unsigned scale = nedges - unlikely_count; + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->probability.initialized_p ()) + ; + else if (!unlikely_executed_edge_p (e)) + { + if (unlikely_edges != NULL && unlikely_edges->contains (e)) + e->probability = profile_probability::very_unlikely (); + else + e->probability = all.apply_scale (1, scale); + } + else + e->probability = profile_probability::never (); + } } /* Add REG_BR_PROB note to JUMP with PROB. */ @@ -1124,18 +1170,26 @@ int nedges = 0; edge e, first = NULL, second = NULL; edge_iterator ei; + int nzero = 0; + int nunknown = 0; FOR_EACH_EDGE (e, ei, bb->succs) - if (!unlikely_executed_edge_p (e)) - { - nedges ++; - if (first && !second) - second = e; - if (!first) - first = e; - } - else if (!e->probability.initialized_p ()) - e->probability = profile_probability::never (); + { + if (!unlikely_executed_edge_p (e)) + { + nedges ++; + if (first && !second) + second = e; + if (!first) + first = e; + } + else if (!e->probability.initialized_p ()) + e->probability = profile_probability::never (); + if (!e->probability.initialized_p ()) + nunknown++; + else if (e->probability == profile_probability::never ()) + nzero++; + } /* When there is no successor or only one choice, prediction is easy. @@ -1153,6 +1207,7 @@ if (nedges != 2) { hash_set<edge> unlikely_edges (4); + hash_set<edge_prediction *> likely_edges (4); /* Identify all edges that have a probability close to very unlikely. Doing the approach for very unlikely doesn't worth for doing as @@ -1160,11 +1215,16 @@ edge_prediction **preds = bb_predictions->get (bb); if (preds) for (pred = *preds; pred; pred = pred->ep_next) - if (pred->ep_probability <= PROB_VERY_UNLIKELY) - unlikely_edges.add (pred->ep_edge); + { + if (pred->ep_probability <= PROB_VERY_UNLIKELY) + unlikely_edges.add (pred->ep_edge); + if (pred->ep_probability >= PROB_VERY_LIKELY + || pred->ep_predictor == PRED_BUILTIN_EXPECT) + likely_edges.add (pred); + } if (!dry_run) - set_even_probabilities (bb, &unlikely_edges); + set_even_probabilities (bb, &unlikely_edges, &likely_edges); clear_bb_predictions (bb); if (dump_file) { @@ -1289,7 +1349,27 @@ } clear_bb_predictions (bb); - if (!bb->count.initialized_p () && !dry_run) + + /* If we have only one successor which is unknown, we can compute missing + probablity. */ + if (nunknown == 1) + { + profile_probability prob = profile_probability::always (); + edge missing = NULL; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->probability.initialized_p ()) + prob -= e->probability; + else if (missing == NULL) + missing = e; + else + gcc_unreachable (); + missing->probability = prob; + } + /* If nothing is unknown, we have nothing to update. */ + else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs)) + ; + else if (!dry_run) { first->probability = profile_probability::from_reg_br_prob_base (combined_probability); @@ -1588,7 +1668,8 @@ && tree_fits_shwi_p (compare_base)) { int probability; - bool overflow, overall_overflow = false; + wi::overflow_type overflow; + bool overall_overflow = false; widest_int compare_count, tem; /* (loop_bound - base) / compare_step */ @@ -2220,18 +2301,21 @@ combine_predictions_for_insn (BB_END (bb), bb); } -static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor); +static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor, + HOST_WIDE_INT *probability); /* Helper function for expr_expected_value. */ static tree expr_expected_value_1 (tree type, tree op0, enum tree_code code, - tree op1, bitmap visited, enum br_predictor *predictor) + tree op1, bitmap visited, enum br_predictor *predictor, + HOST_WIDE_INT *probability) { gimple *def; - if (predictor) - *predictor = PRED_UNCONDITIONAL; + /* Reset returned probability value. */ + *probability = -1; + *predictor = PRED_UNCONDITIONAL; if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) { @@ -2250,8 +2334,7 @@ { /* Assume that any given atomic operation has low contention, and thus the compare-and-swap operation succeeds. */ - if (predictor) - *predictor = PRED_COMPARE_AND_SWAP; + *predictor = PRED_COMPARE_AND_SWAP; return build_one_cst (TREE_TYPE (op0)); } } @@ -2287,12 +2370,17 @@ if (arg == PHI_RESULT (def)) continue; - new_val = expr_expected_value (arg, visited, &predictor2); + HOST_WIDE_INT probability2; + new_val = expr_expected_value (arg, visited, &predictor2, + &probability2); /* It is difficult to combine value predictors. Simply assume that later predictor is weaker and take its prediction. */ - if (predictor && *predictor < predictor2) - *predictor = predictor2; + if (*predictor < predictor2) + { + *predictor = predictor2; + *probability = probability2; + } if (!new_val) return NULL; if (!val) @@ -2311,7 +2399,7 @@ gimple_assign_rhs1 (def), gimple_assign_rhs_code (def), gimple_assign_rhs2 (def), - visited, predictor); + visited, predictor, probability); } if (is_gimple_call (def)) @@ -2326,18 +2414,26 @@ tree val = gimple_call_arg (def, 0); if (TREE_CONSTANT (val)) return val; - if (predictor) - { - tree val2 = gimple_call_arg (def, 2); - gcc_assert (TREE_CODE (val2) == INTEGER_CST - && tree_fits_uhwi_p (val2) - && tree_to_uhwi (val2) < END_PREDICTORS); - *predictor = (enum br_predictor) tree_to_uhwi (val2); - } + tree val2 = gimple_call_arg (def, 2); + gcc_assert (TREE_CODE (val2) == INTEGER_CST + && tree_fits_uhwi_p (val2) + && tree_to_uhwi (val2) < END_PREDICTORS); + *predictor = (enum br_predictor) tree_to_uhwi (val2); + if (*predictor == PRED_BUILTIN_EXPECT) + *probability + = HITRATE (PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY)); return gimple_call_arg (def, 1); } return NULL; } + + if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW (decl)) + { + if (predictor) + *predictor = PRED_MALLOC_NONNULL; + return boolean_true_node; + } + if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) switch (DECL_FUNCTION_CODE (decl)) { @@ -2349,8 +2445,35 @@ val = gimple_call_arg (def, 0); if (TREE_CONSTANT (val)) return val; - if (predictor) - *predictor = PRED_BUILTIN_EXPECT; + *predictor = PRED_BUILTIN_EXPECT; + *probability + = HITRATE (PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY)); + return gimple_call_arg (def, 1); + } + case BUILT_IN_EXPECT_WITH_PROBABILITY: + { + tree val; + if (gimple_call_num_args (def) != 3) + return NULL; + val = gimple_call_arg (def, 0); + if (TREE_CONSTANT (val)) + return val; + /* Compute final probability as: + probability * REG_BR_PROB_BASE. */ + tree prob = gimple_call_arg (def, 2); + tree t = TREE_TYPE (prob); + tree base = build_int_cst (integer_type_node, + REG_BR_PROB_BASE); + base = build_real_from_int_cst (t, base); + tree r = fold_build2_initializer_loc (UNKNOWN_LOCATION, + MULT_EXPR, t, prob, base); + HOST_WIDE_INT probi + = real_to_integer (TREE_REAL_CST_PTR (r)); + if (probi >= 0 && probi <= REG_BR_PROB_BASE) + { + *predictor = PRED_BUILTIN_EXPECT_WITH_PROBABILITY; + *probability = probi; + } return gimple_call_arg (def, 1); } @@ -2369,8 +2492,11 @@ case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: /* Assume that any given atomic operation has low contention, and thus the compare-and-swap operation succeeds. */ + *predictor = PRED_COMPARE_AND_SWAP; + return boolean_true_node; + case BUILT_IN_REALLOC: if (predictor) - *predictor = PRED_COMPARE_AND_SWAP; + *predictor = PRED_MALLOC_NONNULL; return boolean_true_node; default: break; @@ -2384,23 +2510,37 @@ { tree res; enum br_predictor predictor2; - op0 = expr_expected_value (op0, visited, predictor); + HOST_WIDE_INT probability2; + op0 = expr_expected_value (op0, visited, predictor, probability); if (!op0) return NULL; - op1 = expr_expected_value (op1, visited, &predictor2); - if (predictor && *predictor < predictor2) - *predictor = predictor2; + op1 = expr_expected_value (op1, visited, &predictor2, &probability2); if (!op1) return NULL; res = fold_build2 (code, type, op0, op1); - if (TREE_CONSTANT (res)) - return res; + if (TREE_CODE (res) == INTEGER_CST + && TREE_CODE (op0) == INTEGER_CST + && TREE_CODE (op1) == INTEGER_CST) + { + /* Combine binary predictions. */ + if (*probability != -1 || probability2 != -1) + { + HOST_WIDE_INT p1 = get_predictor_value (*predictor, *probability); + HOST_WIDE_INT p2 = get_predictor_value (predictor2, probability2); + *probability = RDIV (p1 * p2, REG_BR_PROB_BASE); + } + + if (*predictor < predictor2) + *predictor = predictor2; + + return res; + } return NULL; } if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) { tree res; - op0 = expr_expected_value (op0, visited, predictor); + op0 = expr_expected_value (op0, visited, predictor, probability); if (!op0) return NULL; res = fold_build1 (code, type, op0); @@ -2421,23 +2561,44 @@ static tree expr_expected_value (tree expr, bitmap visited, - enum br_predictor *predictor) + enum br_predictor *predictor, + HOST_WIDE_INT *probability) { enum tree_code code; tree op0, op1; if (TREE_CONSTANT (expr)) { - if (predictor) - *predictor = PRED_UNCONDITIONAL; + *predictor = PRED_UNCONDITIONAL; + *probability = -1; return expr; } extract_ops_from_tree (expr, &code, &op0, &op1); return expr_expected_value_1 (TREE_TYPE (expr), - op0, code, op1, visited, predictor); + op0, code, op1, visited, predictor, + probability); } + +/* Return probability of a PREDICTOR. If the predictor has variable + probability return passed PROBABILITY. */ + +static HOST_WIDE_INT +get_predictor_value (br_predictor predictor, HOST_WIDE_INT probability) +{ + switch (predictor) + { + case PRED_BUILTIN_EXPECT: + case PRED_BUILTIN_EXPECT_WITH_PROBABILITY: + gcc_assert (probability != -1); + return probability; + default: + gcc_assert (probability == -1); + return predictor_info[(int) predictor].hitrate; + } +} + /* Predict using opcode of the last statement in basic block. */ static void tree_predict_by_opcode (basic_block bb) @@ -2450,8 +2611,32 @@ enum tree_code cmp; edge_iterator ei; enum br_predictor predictor; - - if (!stmt || gimple_code (stmt) != GIMPLE_COND) + HOST_WIDE_INT probability; + + if (!stmt) + return; + + if (gswitch *sw = dyn_cast <gswitch *> (stmt)) + { + tree index = gimple_switch_index (sw); + tree val = expr_expected_value (index, auto_bitmap (), + &predictor, &probability); + if (val && TREE_CODE (val) == INTEGER_CST) + { + edge e = find_taken_edge_switch_expr (sw, val); + if (predictor == PRED_BUILTIN_EXPECT) + { + int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY); + gcc_assert (percent >= 0 && percent <= 100); + predict_edge (e, PRED_BUILTIN_EXPECT, + HITRATE (percent)); + } + else + predict_edge_def (e, predictor, TAKEN); + } + } + + if (gimple_code (stmt) != GIMPLE_COND) return; FOR_EACH_EDGE (then_edge, ei, bb->succs) if (then_edge->flags & EDGE_TRUE_VALUE) @@ -2461,21 +2646,13 @@ cmp = gimple_cond_code (stmt); type = TREE_TYPE (op0); val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (), - &predictor); + &predictor, &probability); if (val && TREE_CODE (val) == INTEGER_CST) { - if (predictor == PRED_BUILTIN_EXPECT) - { - int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY); - - gcc_assert (percent >= 0 && percent <= 100); - if (integer_zerop (val)) - percent = 100 - percent; - predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent)); - } - else - predict_edge_def (then_edge, predictor, - integer_zerop (val) ? NOT_TAKEN : TAKEN); + HOST_WIDE_INT prob = get_predictor_value (predictor, probability); + if (integer_zerop (val)) + prob = REG_BR_PROB_BASE - prob; + predict_edge (then_edge, predictor, prob); } /* Try "pointer heuristic." A comparison ptr == 0 is predicted as false. @@ -2617,6 +2794,64 @@ return PRED_NO_PREDICTION; } +/* Return zero if phi result could have values other than -1, 0 or 1, + otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1 + values are used or likely. */ + +static int +zero_one_minusone (gphi *phi, int limit) +{ + int phi_num_args = gimple_phi_num_args (phi); + int ret = 0; + for (int i = 0; i < phi_num_args; i++) + { + tree t = PHI_ARG_DEF (phi, i); + if (TREE_CODE (t) != INTEGER_CST) + continue; + wide_int w = wi::to_wide (t); + if (w == -1) + ret |= 1; + else if (w == 0) + ret |= 2; + else if (w == 1) + ret |= 4; + else + return 0; + } + for (int i = 0; i < phi_num_args; i++) + { + tree t = PHI_ARG_DEF (phi, i); + if (TREE_CODE (t) == INTEGER_CST) + continue; + if (TREE_CODE (t) != SSA_NAME) + return 0; + gimple *g = SSA_NAME_DEF_STMT (t); + if (gimple_code (g) == GIMPLE_PHI && limit > 0) + if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1)) + { + ret |= r; + continue; + } + if (!is_gimple_assign (g)) + return 0; + if (gimple_assign_cast_p (g)) + { + tree rhs1 = gimple_assign_rhs1 (g); + if (TREE_CODE (rhs1) != SSA_NAME + || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1 + || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) + return 0; + ret |= (2 | 4); + continue; + } + if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison) + return 0; + ret |= (2 | 4); + } + return ret; +} + /* Find the basic block with return expression and look up for possible return value trying to apply RETURN_PREDICTION heuristics. */ static void @@ -2654,6 +2889,19 @@ phi_num_args = gimple_phi_num_args (phi); pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); + /* Avoid the case where the function returns -1, 0 and 1 values and + nothing else. Those could be qsort etc. comparison functions + where the negative return isn't less probable than positive. + For this require that the function returns at least -1 or 1 + or -1 and a boolean value or comparison result, so that functions + returning just -1 and 0 are treated as if -1 represents error value. */ + if (INTEGRAL_TYPE_P (TREE_TYPE (return_val)) + && !TYPE_UNSIGNED (TREE_TYPE (return_val)) + && TYPE_PRECISION (TREE_TYPE (return_val)) > 1) + if (int r = zero_one_minusone (phi, 3)) + if ((r & (1 | 4)) == (1 | 4)) + return; + /* Avoid the degenerate case where all return values form the function belongs to same category (ie they are all positive constants) so we can hardly say something about them. */ @@ -3014,10 +3262,7 @@ BLOCK_INFO (bb)->npredecessors = count; /* When function never returns, we will never process exit block. */ if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - bb->count = profile_count::zero (); - bb->frequency = 0; - } + bb->count = profile_count::zero (); } BLOCK_INFO (head)->frequency = 1; @@ -3050,7 +3295,10 @@ * BLOCK_INFO (e->src)->frequency / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (e->src)->frequency; tmp *= real_inv_br_prob_base; frequency += tmp; @@ -3082,7 +3330,10 @@ = ((e->probability * BLOCK_INFO (bb)->frequency) / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (bb)->frequency; EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; } @@ -3196,17 +3447,28 @@ } basic_block bb; - FOR_ALL_BB_FN (bb, fn) + if (opt_for_fn (node->decl, flag_guess_branch_prob)) { - bb->count = profile_count::uninitialized (); + bool clear_zeros + = !ENTRY_BLOCK_PTR_FOR_FN (fn)->count.nonzero_p (); + FOR_ALL_BB_FN (bb, fn) + if (clear_zeros || !(bb->count == profile_count::zero ())) + bb->count = bb->count.guessed_local (); + fn->cfg->count_max = fn->cfg->count_max.guessed_local (); + } + else + { + FOR_ALL_BB_FN (bb, fn) + bb->count = profile_count::uninitialized (); + fn->cfg->count_max = profile_count::uninitialized (); } struct cgraph_edge *e; - for (e = node->callees; e; e = e->next_caller) - { - e->frequency = compute_call_stmt_bb_frequency (e->caller->decl, - gimple_bb (e->call_stmt)); - } + for (e = node->callees; e; e = e->next_callee) + e->count = gimple_bb (e->call_stmt)->count; + for (e = node->indirect_calls; e; e = e->next_callee) + e->count = gimple_bb (e->call_stmt)->count; + node->count = ENTRY_BLOCK_PTR_FOR_FN (fn)->count; profile_status_for_fn (fn) = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT); @@ -3243,12 +3505,12 @@ gcov_type max_tp_first_run = 0; struct function *fn = DECL_STRUCT_FUNCTION (node->decl); - if (!(node->count == profile_count::zero ())) + if (node->count.ipa ().nonzero_p ()) continue; for (e = node->callers; e; e = e->next_caller) - if (e->count.initialized_p () && e->count > 0) + if (e->count.ipa ().initialized_p () && e->count.ipa () > 0) { - call_count = call_count + e->count; + call_count = call_count + e->count.ipa (); if (e->caller->tp_first_run > max_tp_first_run) max_tp_first_run = e->caller->tp_first_run; @@ -3281,7 +3543,8 @@ struct cgraph_node *callee = e->callee; struct function *fn = DECL_STRUCT_FUNCTION (callee->decl); - if (callee->count > 0) + if (!(e->count.ipa () == profile_count::zero ()) + && callee->count.ipa ().nonzero_p ()) continue; if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl)) && fn && fn->cfg @@ -3298,35 +3561,17 @@ Return nonzero iff there was any nonzero execution count. */ bool -counts_to_freqs (void) +update_max_bb_count (void) { - gcov_type count_max; - profile_count true_count_max = profile_count::zero (); + profile_count true_count_max = profile_count::uninitialized (); basic_block bb; - /* Don't overwrite the estimated frequencies when the profile for - the function is missing. We may drop this function PROFILE_GUESSED - later in drop_profile (). */ - if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () - || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) - return false; - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > true_count_max) - true_count_max = bb->count; - - /* If we have no counts to base frequencies on, keep those that are - already there. */ - if (!(true_count_max > 0)) - return false; - - count_max = true_count_max.to_gcov_type (); - - FOR_ALL_BB_FN (bb, cfun) - if (bb->count.initialized_p ()) - bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max); - - return true; + true_count_max = true_count_max.max (bb->count); + + cfun->cfg->count_max = true_count_max; + + return true_count_max.ipa ().nonzero_p (); } /* Return true if function is likely to be expensive, so there is no point to @@ -3337,30 +3582,32 @@ bool expensive_function_p (int threshold) { - unsigned int sum = 0; basic_block bb; - unsigned int limit; - - /* We can not compute accurately for large thresholds due to scaled - frequencies. */ - gcc_assert (threshold <= BB_FREQ_MAX); - - /* Frequencies are out of range. This either means that function contains - internal loop executing more than BB_FREQ_MAX times or profile feedback - is available and function has not been executed at all. */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0) + + /* If profile was scaled in a way entry block has count 0, then the function + is deifnitly taking a lot of time. */ + if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ()) return true; - /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ - limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold; + profile_count limit = ENTRY_BLOCK_PTR_FOR_FN + (cfun)->count.apply_scale (threshold, 1); + profile_count sum = profile_count::zero (); FOR_EACH_BB_FN (bb, cfun) { rtx_insn *insn; + if (!bb->count.initialized_p ()) + { + if (dump_file) + fprintf (dump_file, "Function is considered expensive because" + " count of bb %i is not initialized\n", bb->index); + return true; + } + FOR_BB_INSNS (bb, insn) if (active_insn_p (insn)) { - sum += bb->frequency; + sum += bb->count; if (sum > limit) return true; } @@ -3409,7 +3656,6 @@ "Basic block %i is marked unlikely by forward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; } else bb->aux = NULL; @@ -3440,9 +3686,6 @@ bb->count = profile_count::zero (); } - if (bb->count == profile_count::zero ()) - bb->frequency = 0; - FOR_EACH_EDGE (e, ei, bb->succs) if (!(e->probability == profile_probability::never ()) && unlikely_executed_edge_p (e)) @@ -3455,6 +3698,7 @@ gcc_checking_assert (!bb->aux); } + propagate_unlikely_bbs_forward (); auto_vec<int, 64> nsuccs; nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun)); @@ -3473,6 +3717,8 @@ while (worklist.length () > 0) { bb = worklist.pop (); + if (bb->count == profile_count::zero ()) + continue; if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)) { bool found = false; @@ -3491,25 +3737,38 @@ if (found) continue; } - if (!(bb->count == profile_count::zero ()) - && (dump_file && (dump_flags & TDF_DETAILS))) + if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Basic block %i is marked unlikely by backward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; FOR_EACH_EDGE (e, ei, bb->preds) if (!(e->probability == profile_probability::never ())) { - e->probability = profile_probability::never (); if (!(e->src->count == profile_count::zero ())) { + gcc_checking_assert (nsuccs[e->src->index] > 0); nsuccs[e->src->index]--; if (!nsuccs[e->src->index]) worklist.safe_push (e->src); } } } + /* Finally all edges from non-0 regions to 0 are unlikely. */ + FOR_ALL_BB_FN (bb, cfun) + if (!(bb->count == profile_count::zero ())) + FOR_EACH_EDGE (e, ei, bb->succs) + if (!(e->probability == profile_probability::never ()) + && e->dest->count == profile_count::zero ()) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Edge %i->%i is unlikely because " + "it enters unlikely block\n", + bb->index, e->dest->index); + e->probability = profile_probability::never (); + } + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) + cgraph_node::get (current_function_decl)->count = profile_count::zero (); } /* Estimate and propagate basic block frequencies using the given branch @@ -3525,7 +3784,7 @@ determine_unlikely_bbs (); if (force || profile_status_for_fn (cfun) != PROFILE_READ - || !counts_to_freqs ()) + || !update_max_bb_count ()) { static int real_values_initialized = 0; @@ -3533,7 +3792,11 @@ { real_values_initialized = 1; real_br_prob_base = REG_BR_PROB_BASE; - real_bb_freq_max = BB_FREQ_MAX; + /* Scaling frequencies up to maximal profile count may result in + frequent overflows especially when inlining loops. + Small scalling results in unnecesary precision loss. Stay in + the half of the (exponential) range. */ + real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2); real_one_half = sreal (1, -1); real_inv_br_prob_base = sreal (1) / real_br_prob_base; real_almost_one = sreal (1) - real_inv_br_prob_base; @@ -3554,8 +3817,13 @@ FOR_EACH_EDGE (e, ei, bb->succs) { - EDGE_INFO (e)->back_edge_prob - = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + if (e->probability.initialized_p ()) + EDGE_INFO (e)->back_edge_prob + = e->probability.to_reg_br_prob_base (); + else + EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2; EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; } } @@ -3570,10 +3838,20 @@ freq_max = BLOCK_INFO (bb)->frequency; freq_max = real_bb_freq_max / freq_max; + if (freq_max < 16) + freq_max = 16; + profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); + cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; - bb->frequency = tmp.to_int (); + profile_count count = profile_count::from_gcov_type (tmp.to_int ()); + + /* If we have profile feedback in which this function was never + executed, then preserve this info. */ + if (!(bb->count == profile_count::zero ())) + bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count); + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); } free_aux_for_blocks (); @@ -3598,7 +3876,8 @@ if (profile_status_for_fn (cfun) != PROFILE_READ) { int flags = flags_from_decl_or_type (current_function_decl); - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero () + if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p () + && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ()) || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) != NULL) { @@ -3618,15 +3897,10 @@ return; } - /* Only first time try to drop function into unlikely executed. - After inlining the roundoff errors may confuse us. - Ipa-profile pass will drop functions only called from unlikely - functions to unlikely and that is most of what we care about. */ - if (!cfun->after_inlining) - { - node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; - warn_function_cold (current_function_decl); - } + node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; + warn_function_cold (current_function_decl); + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ()) + return; FOR_EACH_BB_FN (bb, cfun) { if (maybe_hot_bb_p (cfun, bb)) @@ -3717,7 +3991,7 @@ { struct loop *loop; FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) - if (loop->header->frequency) + if (loop->header->count.initialized_p ()) fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", loop->num, (int)expected_loop_iterations_unbounded (loop)); @@ -3733,6 +4007,87 @@ return new pass_profile (ctxt); } +/* Return true when PRED predictor should be removed after early + tree passes. Most of the predictors are beneficial to survive + as early inlining can also distribute then into caller's bodies. */ + +static bool +strip_predictor_early (enum br_predictor pred) +{ + switch (pred) + { + case PRED_TREE_EARLY_RETURN: + return true; + default: + return false; + } +} + +/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements + we no longer need. EARLY is set to true when called from early + optimizations. */ + +unsigned int +strip_predict_hints (function *fun, bool early) +{ + basic_block bb; + gimple *ass_stmt; + tree var; + bool changed = false; + + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator bi; + for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) + { + gimple *stmt = gsi_stmt (bi); + + if (gimple_code (stmt) == GIMPLE_PREDICT) + { + if (!early + || strip_predictor_early (gimple_predict_predictor (stmt))) + { + gsi_remove (&bi, true); + changed = true; + continue; + } + } + else if (is_gimple_call (stmt)) + { + tree fndecl = gimple_call_fndecl (stmt); + + if (!early + && ((fndecl != NULL_TREE + && fndecl_built_in_p (fndecl, BUILT_IN_EXPECT) + && gimple_call_num_args (stmt) == 2) + || (fndecl != NULL_TREE + && fndecl_built_in_p (fndecl, + BUILT_IN_EXPECT_WITH_PROBABILITY) + && gimple_call_num_args (stmt) == 3) + || (gimple_call_internal_p (stmt) + && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))) + { + var = gimple_call_lhs (stmt); + changed = true; + if (var) + { + ass_stmt + = gimple_build_assign (var, gimple_call_arg (stmt, 0)); + gsi_replace (&bi, ass_stmt, true); + } + else + { + gsi_remove (&bi, true); + continue; + } + } + } + gsi_next (&bi); + } + } + return changed ? TODO_cleanup_cfg : 0; +} + namespace { const pass_data pass_data_strip_predict_hints = @@ -3757,63 +4112,23 @@ /* opt_pass methods: */ opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); } + void set_pass_param (unsigned int n, bool param) + { + gcc_assert (n == 0); + early_p = param; + } + virtual unsigned int execute (function *); +private: + bool early_p; + }; // class pass_strip_predict_hints -/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements - we no longer need. */ unsigned int pass_strip_predict_hints::execute (function *fun) { - basic_block bb; - gimple *ass_stmt; - tree var; - bool changed = false; - - FOR_EACH_BB_FN (bb, fun) - { - gimple_stmt_iterator bi; - for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) - { - gimple *stmt = gsi_stmt (bi); - - if (gimple_code (stmt) == GIMPLE_PREDICT) - { - gsi_remove (&bi, true); - changed = true; - continue; - } - else if (is_gimple_call (stmt)) - { - tree fndecl = gimple_call_fndecl (stmt); - - if ((fndecl - && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL - && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT - && gimple_call_num_args (stmt) == 2) - || (gimple_call_internal_p (stmt) - && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT)) - { - var = gimple_call_lhs (stmt); - changed = true; - if (var) - { - ass_stmt - = gimple_build_assign (var, gimple_call_arg (stmt, 0)); - gsi_replace (&bi, ass_stmt, true); - } - else - { - gsi_remove (&bi, true); - continue; - } - } - } - gsi_next (&bi); - } - } - return changed ? TODO_cleanup_cfg : 0; + return strip_predict_hints (fun, early_p); } } // anon namespace @@ -3843,15 +4158,12 @@ which may also lead to frequencies incorrectly reduced to 0. There is less precision in the probabilities, so we only do this for small max counts. */ - profile_count count_max = profile_count::zero (); + cfun->cfg->count_max = profile_count::uninitialized (); basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > count_max) - count_max = bb->count; - - if (profile_status_for_fn (cfun) == PROFILE_GUESSED - || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ - && count_max < REG_BR_PROB_BASE / 10)) + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + + if (profile_status_for_fn (cfun) == PROFILE_GUESSED) { loop_optimizer_init (0); add_noreturn_fake_exit_edges (); @@ -3862,7 +4174,10 @@ loop_optimizer_finalize (); } else if (profile_status_for_fn (cfun) == PROFILE_READ) - counts_to_freqs (); + update_max_bb_count (); + else if (profile_status_for_fn (cfun) == PROFILE_ABSENT + && !flag_guess_branch_prob) + ; else gcc_unreachable (); timevar_pop (TV_REBUILD_FREQUENCIES); @@ -3918,6 +4233,10 @@ edge e2; bool uninitialized_exit = false; + /* When branch probability guesses are not known, then do nothing. */ + if (!impossible && !e->count ().initialized_p ()) + return; + profile_probability goal = (impossible ? profile_probability::never () : profile_probability::very_unlikely ()); @@ -3928,17 +4247,23 @@ FOR_EACH_EDGE (e2, ei, e->src->succs) if (e2 != e) { + if (e->flags & EDGE_FAKE) + continue; if (e2->count ().initialized_p ()) count_sum += e2->count (); - else - uninitialized_exit = true; if (e2->probability.initialized_p ()) prob_sum += e2->probability; + else + uninitialized_exit = true; } + /* If we are not guessing profiles but have some other edges out, + just assume the control flow goes elsewhere. */ + if (uninitialized_exit) + e->probability = goal; /* If there are other edges out of e->src, redistribute probabilitity there. */ - if (prob_sum > profile_probability::never ()) + else if (prob_sum > profile_probability::never ()) { if (!(e->probability < goal)) e->probability = goal; @@ -3978,8 +4303,7 @@ update_br_prob_note (e->src); if (e->src->count == profile_count::zero ()) return; - if (count_sum == profile_count::zero () && !uninitialized_exit - && impossible) + if (count_sum == profile_count::zero () && impossible) { bool found = false; if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) @@ -4017,17 +4341,19 @@ after loop transforms. */ if (!(prob_sum > profile_probability::never ()) && count_sum == profile_count::zero () - && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1)) + && single_pred_p (e->src) && e->src->count.to_frequency (cfun) + > (impossible ? 0 : 1)) { - int old_frequency = e->src->frequency; + int old_frequency = e->src->count.to_frequency (cfun); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Making bb %i %s.\n", e->src->index, impossible ? "impossible" : "cold"); - e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1); + int new_frequency = MIN (e->src->count.to_frequency (cfun), + impossible ? 0 : 1); if (impossible) e->src->count = profile_count::zero (); else - e->src->count = e->count ().apply_scale (e->src->frequency, + e->src->count = e->count ().apply_scale (new_frequency, old_frequency); force_edge_cold (single_pred_edge (e->src), impossible); } @@ -4048,7 +4374,7 @@ struct branch_predictor { const char *name; - unsigned probability; + int probability; }; #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE }, @@ -4058,13 +4384,16 @@ { branch_predictor predictors[] = { #include "predict.def" - {NULL, -1U} + { NULL, PROB_UNINITIALIZED } }; for (unsigned i = 0; predictors[i].name != NULL; i++) { + if (predictors[i].probability == PROB_UNINITIALIZED) + continue; + unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE; - ASSERT_TRUE (p > 50 && p <= 100); + ASSERT_TRUE (p >= 50 && p <= 100); } }