Mercurial > hg > CbC > CbC_gcc
diff gcc/cfgloopmanip.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/cfgloopmanip.c Fri Oct 27 22:46:09 2017 +0900 +++ b/gcc/cfgloopmanip.c Thu Oct 25 07:37:49 2018 +0900 @@ -1,5 +1,5 @@ /* Loop manipulation code for GNU compiler. - Copyright (C) 2002-2017 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of GCC. @@ -502,14 +502,17 @@ /* Scale profile in LOOP by P. If ITERATION_BOUND is non-zero, scale even further if loop is predicted - to iterate too many times. */ + to iterate too many times. + Before caling this function, preheader block profile should be already + scaled to final count. This is necessary because loop iterations are + determined by comparing header edge count to latch ege count and thus + they need to be scaled synchronously. */ void scale_loop_profile (struct loop *loop, profile_probability p, gcov_type iteration_bound) { - gcov_type iterations = expected_loop_iterations_unbounded (loop); - edge e; + edge e, preheader_e; edge_iterator ei; if (dump_file && (dump_flags & TDF_DETAILS)) @@ -517,101 +520,100 @@ fprintf (dump_file, ";; Scaling loop %i with scale ", loop->num); p.dump (dump_file); - fprintf (dump_file, " bounding iterations to %i from guessed %i\n", - (int)iteration_bound, (int)iterations); + fprintf (dump_file, " bounding iterations to %i\n", + (int)iteration_bound); + } + + /* Scale the probabilities. */ + scale_loop_frequencies (loop, p); + + if (iteration_bound == 0) + return; + + gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, true); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, ";; guessed iterations after scaling %i\n", + (int)iterations); } /* See if loop is predicted to iterate too many times. */ - if (iteration_bound && iterations > 0 - && p.apply (iterations) > iteration_bound) + if (iterations <= iteration_bound) + return; + + preheader_e = loop_preheader_edge (loop); + + /* We could handle also loops without preheaders, but bounding is + currently used only by optimizers that have preheaders constructed. */ + gcc_checking_assert (preheader_e); + profile_count count_in = preheader_e->count (); + + if (count_in > profile_count::zero () + && loop->header->count.initialized_p ()) { - /* Fixing loop profile for different trip count is not trivial; the exit - probabilities has to be updated to match and frequencies propagated down - to the loop body. + profile_count count_delta = profile_count::zero (); - We fully update only the simple case of loop with single exit that is - either from the latch or BB just before latch and leads from BB with - simple conditional jump. This is OK for use in vectorizer. */ e = single_exit (loop); if (e) { edge other_e; - int freq_delta; - profile_count count_delta; - - FOR_EACH_EDGE (other_e, ei, e->src->succs) + FOR_EACH_EDGE (other_e, ei, e->src->succs) if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE)) && e != other_e) break; /* Probability of exit must be 1/iterations. */ - freq_delta = EDGE_FREQUENCY (e); count_delta = e->count (); e->probability = profile_probability::always () - .apply_scale (1, iteration_bound); + .apply_scale (1, iteration_bound); other_e->probability = e->probability.invert (); - freq_delta -= EDGE_FREQUENCY (e); - count_delta -= e->count (); - /* If latch exists, change its frequency and count, since we changed - probability of exit. Theoretically we should update everything from - source of exit edge to latch, but for vectorizer this is enough. */ - if (loop->latch - && loop->latch != e->src) + /* In code below we only handle the following two updates. */ + if (other_e->dest != loop->header + && other_e->dest != loop->latch + && (dump_file && (dump_flags & TDF_DETAILS))) { - loop->latch->frequency += freq_delta; - if (loop->latch->frequency < 0) - loop->latch->frequency = 0; - loop->latch->count += count_delta; + fprintf (dump_file, ";; giving up on update of paths from " + "exit condition to latch\n"); } } + else + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Loop has multiple exit edges; " + "giving up on exit condition update\n"); /* Roughly speaking we want to reduce the loop body profile by the difference of loop iterations. We however can do better if we look at the actual profile, if it is available. */ - p = p.apply_scale (iteration_bound, iterations); - - bool determined = false; - if (loop->header->count.initialized_p ()) - { - profile_count count_in = profile_count::zero (); - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (e->src != loop->latch) - count_in += e->count (); + p = profile_probability::always (); - if (count_in > profile_count::zero () ) - { - p = count_in.probability_in (loop->header->count.apply_scale - (iteration_bound, 1)); - determined = true; - } - } - if (!determined && loop->header->frequency) - { - int freq_in = 0; - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (e->src != loop->latch) - freq_in += EDGE_FREQUENCY (e); - - if (freq_in != 0) - p = profile_probability::probability_in_gcov_type - (freq_in * iteration_bound, loop->header->frequency); - } + count_in = count_in.apply_scale (iteration_bound, 1); + p = count_in.probability_in (loop->header->count); if (!(p > profile_probability::never ())) p = profile_probability::very_unlikely (); - } + + if (p == profile_probability::always () + || !p.initialized_p ()) + return; - if (p >= profile_probability::always () - || !p.initialized_p ()) - return; + /* If latch exists, change its count, since we changed + probability of exit. Theoretically we should update everything from + source of exit edge to latch, but for vectorizer this is enough. */ + if (loop->latch && loop->latch != e->src) + loop->latch->count += count_delta; - /* Scale the actual probabilities. */ - scale_loop_frequencies (loop, p); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, ";; guessed iterations are now %i\n", - (int)expected_loop_iterations_unbounded (loop)); + /* Scale the probabilities. */ + scale_loop_frequencies (loop, p); + + /* Change latch's count back. */ + if (loop->latch && loop->latch != e->src) + loop->latch->count -= count_delta; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; guessed iterations are now %i\n", + (int)expected_loop_iterations_unbounded (loop, NULL, true)); + } } /* Recompute dominance information for basic blocks outside LOOP. */ @@ -800,7 +802,7 @@ loop->latch = loop_latch; add_loop (loop, outer); - /* TODO: Fix frequencies and counts. */ + /* TODO: Fix counts. */ scale_loop_frequencies (loop, profile_probability::even ()); /* Update dominators. */ @@ -866,13 +868,11 @@ basic_block pred_bb = header_edge->src; struct loop *loop = alloc_loop (); struct loop *outer = loop_outer (succ_bb->loop_father); - int freq; profile_count cnt; loop->header = header_edge->dest; loop->latch = latch_edge->src; - freq = EDGE_FREQUENCY (header_edge); cnt = header_edge->count (); /* Redirect edges. */ @@ -901,10 +901,9 @@ remove_bb_from_loops (switch_bb); add_bb_to_loop (switch_bb, outer); - /* Fix frequencies. */ + /* Fix counts. */ if (redirect_all_edges) { - switch_bb->frequency = freq; switch_bb->count = cnt; } scale_loop_frequencies (loop, false_scale); @@ -1023,9 +1022,11 @@ } /* Copies copy of LOOP as subloop of TARGET loop, placing newly - created loop into loops structure. */ + created loop into loops structure. If AFTER is non-null + the new loop is added at AFTER->next, otherwise in front of TARGETs + sibling list. */ struct loop * -duplicate_loop (struct loop *loop, struct loop *target) +duplicate_loop (struct loop *loop, struct loop *target, struct loop *after) { struct loop *cloop; cloop = alloc_loop (); @@ -1037,36 +1038,46 @@ set_loop_copy (loop, cloop); /* Add it to target. */ - flow_loop_tree_node_add (target, cloop); + flow_loop_tree_node_add (target, cloop, after); return cloop; } /* Copies structure of subloops of LOOP into TARGET loop, placing - newly created loops into loop tree. */ + newly created loops into loop tree at the end of TARGETs sibling + list in the original order. */ void duplicate_subloops (struct loop *loop, struct loop *target) { - struct loop *aloop, *cloop; + struct loop *aloop, *cloop, *tail; + for (tail = target->inner; tail && tail->next; tail = tail->next) + ; for (aloop = loop->inner; aloop; aloop = aloop->next) { - cloop = duplicate_loop (aloop, target); + cloop = duplicate_loop (aloop, target, tail); + tail = cloop; + gcc_assert(!tail->next); duplicate_subloops (aloop, cloop); } } /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, - into TARGET loop, placing newly created loops into loop tree. */ + into TARGET loop, placing newly created loops into loop tree adding + them to TARGETs sibling list at the end in order. */ static void copy_loops_to (struct loop **copied_loops, int n, struct loop *target) { - struct loop *aloop; + struct loop *aloop, *tail; int i; + for (tail = target->inner; tail && tail->next; tail = tail->next) + ; for (i = 0; i < n; i++) { - aloop = duplicate_loop (copied_loops[i], target); + aloop = duplicate_loop (copied_loops[i], target, tail); + tail = aloop; + gcc_assert(!tail->next); duplicate_subloops (copied_loops[i], aloop); } } @@ -1095,14 +1106,15 @@ } /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating - loop structure and dominators. E's destination must be LOOP header for - this to work, i.e. it must be entry or latch edge of this loop; these are - unique, as the loops must have preheaders for this function to work - correctly (in case E is latch, the function unrolls the loop, if E is entry - edge, it peels the loop). Store edges created by copying ORIG edge from - copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to - original LOOP body, the other copies are numbered in order given by control - flow through them) into TO_REMOVE array. Returns false if duplication is + loop structure and dominators (order of inner subloops is retained). + E's destination must be LOOP header for this to work, i.e. it must be entry + or latch edge of this loop; these are unique, as the loops must have + preheaders for this function to work correctly (in case E is latch, the + function unrolls the loop, if E is entry edge, it peels the loop). Store + edges created by copying ORIG edge from copies corresponding to set bits in + WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies + are numbered in order given by control flow through them) into TO_REMOVE + array. Returns false if duplication is impossible. */ bool @@ -1119,14 +1131,16 @@ basic_block new_bb, bb, first_active_latch = NULL; edge ae, latch_edge; edge spec_edges[2], new_spec_edges[2]; -#define SE_LATCH 0 -#define SE_ORIG 1 + const int SE_LATCH = 0; + const int SE_ORIG = 1; unsigned i, j, n; int is_latch = (latch == e->src); - int scale_act = 0, *scale_step = NULL, scale_main = 0; - int scale_after_exit = 0; - int p, freq_in, freq_le, freq_out_orig; - int prob_pass_thru, prob_pass_wont_exit, prob_pass_main; + profile_probability *scale_step = NULL; + profile_probability scale_main = profile_probability::always (); + profile_probability scale_act = profile_probability::always (); + profile_count after_exit_num = profile_count::zero (), + after_exit_den = profile_count::zero (); + bool scale_after_exit = false; int add_irreducible_flag; basic_block place_after; bitmap bbs_to_scale = NULL; @@ -1165,33 +1179,26 @@ if (flags & DLTHE_FLAG_UPDATE_FREQ) { - /* Calculate coefficients by that we have to scale frequencies + /* Calculate coefficients by that we have to scale counts of duplicated loop bodies. */ - freq_in = header->frequency; - freq_le = EDGE_FREQUENCY (latch_edge); - if (freq_in == 0) - freq_in = 1; - if (freq_in < freq_le) - freq_in = freq_le; - freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le; - if (freq_out_orig > freq_in - freq_le) - freq_out_orig = freq_in - freq_le; - prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in); - prob_pass_wont_exit = - RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in); + profile_count count_in = header->count; + profile_count count_le = latch_edge->count (); + profile_count count_out_orig = orig ? orig->count () : count_in - count_le; + profile_probability prob_pass_thru = count_le.probability_in (count_in); + profile_probability prob_pass_wont_exit = + (count_le + count_out_orig).probability_in (count_in); if (orig && orig->probability.initialized_p () && !(orig->probability == profile_probability::always ())) { /* The blocks that are dominated by a removed exit edge ORIG have frequencies scaled by this. */ - if (orig->probability.initialized_p ()) - scale_after_exit - = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, - REG_BR_PROB_BASE - - orig->probability.to_reg_br_prob_base ()); - else - scale_after_exit = REG_BR_PROB_BASE; + if (orig->count ().initialized_p ()) + { + after_exit_num = orig->src->count; + after_exit_den = after_exit_num - orig->count (); + scale_after_exit = true; + } bbs_to_scale = BITMAP_ALLOC (NULL); for (i = 0; i < n; i++) { @@ -1201,7 +1208,7 @@ } } - scale_step = XNEWVEC (int, ndupl); + scale_step = XNEWVEC (profile_probability, ndupl); for (i = 1; i <= ndupl; i++) scale_step[i - 1] = bitmap_bit_p (wont_exit, i) @@ -1212,52 +1219,48 @@ copy becomes 1. */ if (flags & DLTHE_FLAG_COMPLETTE_PEEL) { - int wanted_freq = EDGE_FREQUENCY (e); - - if (wanted_freq > freq_in) - wanted_freq = freq_in; + profile_count wanted_count = e->count (); gcc_assert (!is_latch); - /* First copy has frequency of incoming edge. Each subsequent - frequency should be reduced by prob_pass_wont_exit. Caller + /* First copy has count of incoming edge. Each subsequent + count should be reduced by prob_pass_wont_exit. Caller should've managed the flags so all except for original loop has won't exist set. */ - scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); + scale_act = wanted_count.probability_in (count_in); /* Now simulate the duplication adjustments and compute header frequency of the last copy. */ for (i = 0; i < ndupl; i++) - wanted_freq = combine_probabilities (wanted_freq, scale_step[i]); - scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); + wanted_count = wanted_count.apply_probability (scale_step [i]); + scale_main = wanted_count.probability_in (count_in); } + /* Here we insert loop bodies inside the loop itself (for loop unrolling). + First iteration will be original loop followed by duplicated bodies. + It is necessary to scale down the original so we get right overall + number of iterations. */ else if (is_latch) { - prob_pass_main = bitmap_bit_p (wont_exit, 0) - ? prob_pass_wont_exit - : prob_pass_thru; - p = prob_pass_main; - scale_main = REG_BR_PROB_BASE; + profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0) + ? prob_pass_wont_exit + : prob_pass_thru; + profile_probability p = prob_pass_main; + profile_count scale_main_den = count_in; for (i = 0; i < ndupl; i++) { - scale_main += p; - p = combine_probabilities (p, scale_step[i]); + scale_main_den += count_in.apply_probability (p); + p = p * scale_step[i]; } - scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main); - scale_act = combine_probabilities (scale_main, prob_pass_main); + /* If original loop is executed COUNT_IN times, the unrolled + loop will account SCALE_MAIN_DEN times. */ + scale_main = count_in.probability_in (scale_main_den); + scale_act = scale_main * prob_pass_main; } else { - int preheader_freq = EDGE_FREQUENCY (e); - scale_main = REG_BR_PROB_BASE; + profile_count preheader_count = e->count (); for (i = 0; i < ndupl; i++) - scale_main = combine_probabilities (scale_main, scale_step[i]); - if (preheader_freq > freq_in) - preheader_freq = freq_in; - scale_act = GCOV_COMPUTE_SCALE (preheader_freq, freq_in); + scale_main = scale_main * scale_step[i]; + scale_act = preheader_count.probability_in (count_in); } - for (i = 0; i < ndupl; i++) - gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE); - gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE - && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE); } /* Loop the new bbs will belong to. */ @@ -1350,13 +1353,11 @@ force_edge_cold (new_spec_edges[SE_ORIG], true); /* Scale the frequencies of the blocks dominated by the exit. */ - if (bbs_to_scale) + if (bbs_to_scale && scale_after_exit) { EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) - { - scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit, - REG_BR_PROB_BASE); - } + scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num, + after_exit_den); } } @@ -1371,8 +1372,8 @@ /* Set counts and frequencies. */ if (flags & DLTHE_FLAG_UPDATE_FREQ) { - scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE); - scale_act = combine_probabilities (scale_act, scale_step[j]); + scale_bbs_frequencies (new_bbs, n, scale_act); + scale_act = scale_act * scale_step[j]; } } free (new_bbs); @@ -1386,13 +1387,11 @@ force_edge_cold (orig, true); /* Scale the frequencies of the blocks dominated by the exit. */ - if (bbs_to_scale) + if (bbs_to_scale && scale_after_exit) { EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) - { - scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit, - REG_BR_PROB_BASE); - } + scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num, + after_exit_den); } } @@ -1401,7 +1400,7 @@ set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); if (flags & DLTHE_FLAG_UPDATE_FREQ) { - scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE); + scale_bbs_frequencies (bbs, n, scale_main); free (scale_step); } @@ -1517,7 +1516,9 @@ mfb_kj_edge = loop_latch_edge (loop); latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0; - if (nentry == 1) + if (nentry == 1 + && ((flags & CP_FALLTHRU_PREHEADERS) == 0 + || (single_entry->flags & EDGE_CROSSING) == 0)) dummy = split_edge (single_entry); else {