Mercurial > hg > CbC > CbC_gcc
diff gcc/tree-ssa-loop-ivcanon.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/tree-ssa-loop-ivcanon.c Thu Oct 25 08:08:40 2018 +0900 +++ b/gcc/tree-ssa-loop-ivcanon.c Thu Oct 25 10:21:07 2018 +0900 @@ -1,5 +1,5 @@ /* Induction variable canonicalization and loop peeling. - Copyright (C) 2004-2017 Free Software Foundation, Inc. + Copyright (C) 2004-2018 Free Software Foundation, Inc. This file is part of GCC. @@ -63,6 +63,7 @@ #include "tree-inline.h" #include "tree-cfgcleanup.h" #include "builtins.h" +#include "tree-ssa-sccvn.h" /* Specifies types of loops that may be unrolled. */ @@ -76,10 +77,13 @@ }; /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT - is the exit edge whose condition is replaced. */ + is the exit edge whose condition is replaced. The ssa versions of the new + IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER + if they are not NULL. */ -static void -create_canonical_iv (struct loop *loop, edge exit, tree niter) +void +create_canonical_iv (struct loop *loop, edge exit, tree niter, + tree *var_before = NULL, tree *var_after = NULL) { edge in; tree type, var; @@ -112,7 +116,9 @@ create_iv (niter, build_int_cst (type, -1), NULL_TREE, loop, - &incr_at, false, NULL, &var); + &incr_at, false, var_before, &var); + if (var_after) + *var_after = var; cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; gimple_cond_set_code (cond, cmp); @@ -362,8 +368,8 @@ size->non_call_stmts_on_hot_path++; if (((gimple_code (stmt) == GIMPLE_COND && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) - || constant_after_peeling (gimple_cond_rhs (stmt), stmt, - loop))) + || !constant_after_peeling (gimple_cond_rhs (stmt), stmt, + loop))) || (gimple_code (stmt) == GIMPLE_SWITCH && !constant_after_peeling (gimple_switch_index ( as_a <gswitch *> (stmt)), @@ -647,7 +653,6 @@ add_bb_to_loop (latch_edge->dest, current_loops->tree_root); latch_edge->dest->count = profile_count::zero (); - latch_edge->dest->frequency = 0; set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); gsi = gsi_start_bb (latch_edge->dest); @@ -656,14 +661,21 @@ loops_to_unloop.release (); loops_to_unloop_nunroll.release (); - /* Remove edges in peeled copies. */ + /* Remove edges in peeled copies. Given remove_path removes dominated + regions we need to cope with removal of already removed paths. */ unsigned i; edge e; + auto_vec<int, 20> src_bbs; + src_bbs.reserve_exact (edges_to_remove.length ()); FOR_EACH_VEC_ELT (edges_to_remove, i, e) - { - bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated); - gcc_assert (ok); - } + src_bbs.quick_push (e->src->index); + FOR_EACH_VEC_ELT (edges_to_remove, i, e) + if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])) + { + bool ok = remove_path (e, irred_invalidated, + loop_closed_ssa_invalidated); + gcc_assert (ok); + } edges_to_remove.release (); } @@ -677,16 +689,14 @@ static bool try_unroll_loop_completely (struct loop *loop, - edge exit, tree niter, + edge exit, tree niter, bool may_be_zero, enum unroll_level ul, HOST_WIDE_INT maxiter, - location_t locus) + dump_user_location_t locus, bool allow_peel) { - unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; - struct loop_size size; + unsigned HOST_WIDE_INT n_unroll = 0; bool n_unroll_found = false; edge edge_to_cancel = NULL; - dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; /* See if we proved number of iterations to be low constant. @@ -714,7 +724,8 @@ exit = NULL; /* See if we can improve our estimate by using recorded loop bounds. */ - if (maxiter >= 0 + if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH) + && maxiter >= 0 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) { n_unroll = maxiter; @@ -727,7 +738,8 @@ if (!n_unroll_found) return false; - if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) + if (!loop->unroll + && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d " @@ -741,121 +753,137 @@ if (n_unroll) { - bool large; if (ul == UL_SINGLE_ITER) return false; - /* EXIT can be removed only if we are sure it passes first N_UNROLL - iterations. */ - bool remove_exit = (exit && niter - && TREE_CODE (niter) == INTEGER_CST - && wi::leu_p (n_unroll, wi::to_widest (niter))); - - large = tree_estimate_loop_size - (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, - PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); - ninsns = size.overall; - if (large) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", - loop->num); - return false; - } - - unr_insns = estimated_unrolled_size (&size, n_unroll); - if (dump_file && (dump_flags & TDF_DETAILS)) + if (loop->unroll) { - fprintf (dump_file, " Loop size: %d\n", (int) ninsns); - fprintf (dump_file, " Estimated size after unrolling: %d\n", - (int) unr_insns); - } - - /* If the code is going to shrink, we don't need to be extra cautious - on guessing if the unrolling is going to be profitable. */ - if (unr_insns - /* If there is IV variable that will become constant, we save - one instruction in the loop prologue we do not account - otherwise. */ - <= ninsns + (size.constant_iv != false)) - ; - /* We unroll only inner loops, because we do not consider it profitable - otheriwse. We still can cancel loopback edge of not rolling loop; - this is always a good idea. */ - else if (ul == UL_NO_GROWTH) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", - loop->num); - return false; - } - /* Outer loops tend to be less interesting candidates for complete - unrolling unless we can do a lot of propagation into the inner loop - body. For now we disable outer loop unrolling when the code would - grow. */ - else if (loop->inner) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "it is not innermost and code would grow.\n", - loop->num); - return false; + /* If the unrolling factor is too large, bail out. */ + if (n_unroll > (unsigned)loop->unroll) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Not unrolling loop %d: " + "user didn't want it unrolled completely.\n", + loop->num); + return false; + } } - /* If there is call on a hot path through the loop, then - there is most probably not much to optimize. */ - else if (size.num_non_pure_calls_on_hot_path) + else { + struct loop_size size; + /* EXIT can be removed only if we are sure it passes first N_UNROLL + iterations. */ + bool remove_exit = (exit && niter + && TREE_CODE (niter) == INTEGER_CST + && wi::leu_p (n_unroll, wi::to_widest (niter))); + bool large + = tree_estimate_loop_size + (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, + PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); + if (large) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", + loop->num); + return false; + } + + unsigned HOST_WIDE_INT ninsns = size.overall; + unsigned HOST_WIDE_INT unr_insns + = estimated_unrolled_size (&size, n_unroll); if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains call and code would grow.\n", - loop->num); - return false; - } - /* If there is pure/const call in the function, then we - can still optimize the unrolled loop body if it contains - some other interesting code than the calls and code - storing or cumulating the return value. */ - else if (size.num_pure_calls_on_hot_path - /* One IV increment, one test, one ivtmp store - and one useful stmt. That is about minimal loop - doing pure call. */ - && (size.non_call_stmts_on_hot_path - <= 3 + size.num_pure_calls_on_hot_path)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains just pure calls and code would grow.\n", - loop->num); - return false; + { + fprintf (dump_file, " Loop size: %d\n", (int) ninsns); + fprintf (dump_file, " Estimated size after unrolling: %d\n", + (int) unr_insns); + } + + /* If the code is going to shrink, we don't need to be extra + cautious on guessing if the unrolling is going to be + profitable. */ + if (unr_insns + /* If there is IV variable that will become constant, we + save one instruction in the loop prologue we do not + account otherwise. */ + <= ninsns + (size.constant_iv != false)) + ; + /* We unroll only inner loops, because we do not consider it + profitable otheriwse. We still can cancel loopback edge + of not rolling loop; this is always a good idea. */ + else if (ul == UL_NO_GROWTH) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", + loop->num); + return false; + } + /* Outer loops tend to be less interesting candidates for + complete unrolling unless we can do a lot of propagation + into the inner loop body. For now we disable outer loop + unrolling when the code would grow. */ + else if (loop->inner) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "it is not innermost and code would grow.\n", + loop->num); + return false; + } + /* If there is call on a hot path through the loop, then + there is most probably not much to optimize. */ + else if (size.num_non_pure_calls_on_hot_path) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains call and code would grow.\n", + loop->num); + return false; + } + /* If there is pure/const call in the function, then we can + still optimize the unrolled loop body if it contains some + other interesting code than the calls and code storing or + cumulating the return value. */ + else if (size.num_pure_calls_on_hot_path + /* One IV increment, one test, one ivtmp store and + one useful stmt. That is about minimal loop + doing pure call. */ + && (size.non_call_stmts_on_hot_path + <= 3 + size.num_pure_calls_on_hot_path)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains just pure calls and code would grow.\n", + loop->num); + return false; + } + /* Complete unrolling is major win when control flow is + removed and one big basic block is created. If the loop + contains control flow the optimization may still be a win + because of eliminating the loop overhead but it also may + blow the branch predictor tables. Limit number of + branches on the hot path through the peeled sequence. */ + else if (size.num_branches_on_hot_path * (int)n_unroll + > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "number of branches on hot path in the unrolled " + "sequence reaches --param max-peel-branches limit.\n", + loop->num); + return false; + } + else if (unr_insns + > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "number of insns in the unrolled sequence reaches " + "--param max-completely-peeled-insns limit.\n", + loop->num); + return false; + } } - /* Complete unrolling is a major win when control flow is removed and - one big basic block is created. If the loop contains control flow - the optimization may still be a win because of eliminating the loop - overhead but it also may blow the branch predictor tables. - Limit number of branches on the hot path through the peeled - sequence. */ - else if (size.num_branches_on_hot_path * (int)n_unroll - > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - " number of branches on hot path in the unrolled sequence" - " reach --param max-peel-branches limit.\n", - loop->num); - return false; - } - else if (unr_insns - > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "(--param max-completely-peeled-insns limit reached).\n", - loop->num); - return false; - } - if (!n_unroll) - dump_printf_loc (report_flags, locus, - "loop turned into non-loop; it never loops.\n"); initialize_original_copy_tables (); auto_sbitmap wont_exit (n_unroll + 1); @@ -873,6 +901,8 @@ exit = NULL; bitmap_clear (wont_exit); } + if (may_be_zero) + bitmap_clear_bit (wont_exit, 1); if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), n_unroll, wont_exit, @@ -899,8 +929,8 @@ else gimple_cond_make_true (cond); update_stmt (cond); - /* Do not remove the path. Doing so may remove outer loop - and confuse bookkeeping code in tree_unroll_loops_completelly. */ + /* Do not remove the path, as doing so may remove outer loop and + confuse bookkeeping code in tree_unroll_loops_completely. */ } /* Store the loop for later unlooping and exit removal. */ @@ -916,7 +946,7 @@ { dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, "loop with %d iterations completely unrolled", - (int) (n_unroll + 1)); + (int) n_unroll); if (loop->header->count.initialized_p ()) dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, " (header execution count %d)", @@ -957,14 +987,15 @@ static bool try_peel_loop (struct loop *loop, - edge exit, tree niter, + edge exit, tree niter, bool may_be_zero, HOST_WIDE_INT maxiter) { HOST_WIDE_INT npeel; struct loop_size size; int peeled_size; - if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 + if (!flag_peel_loops + || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 || !peeled_loops) return false; @@ -975,20 +1006,29 @@ return false; } + /* We don't peel loops that will be unrolled as this can duplicate a + loop more times than the user requested. */ + if (loop->unroll) + { + if (dump_file) + fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); + return false; + } + /* Peel only innermost loops. While the code is perfectly capable of peeling non-innermost loops, the heuristics would probably need some improvements. */ if (loop->inner) { if (dump_file) - fprintf (dump_file, "Not peeling: outer loop\n"); + fprintf (dump_file, "Not peeling: outer loop\n"); return false; } if (!optimize_loop_for_speed_p (loop)) { if (dump_file) - fprintf (dump_file, "Not peeling: cold loop\n"); + fprintf (dump_file, "Not peeling: cold loop\n"); return false; } @@ -1006,7 +1046,7 @@ if (maxiter >= 0 && maxiter <= npeel) { if (dump_file) - fprintf (dump_file, "Not peeling: upper bound is known so can " + fprintf (dump_file, "Not peeling: upper bound is known so can " "unroll completely\n"); return false; } @@ -1017,7 +1057,7 @@ if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) { if (dump_file) - fprintf (dump_file, "Not peeling: rolls too much " + fprintf (dump_file, "Not peeling: rolls too much " "(%i + 1 > --param max-peel-times)\n", (int) npeel); return false; } @@ -1030,7 +1070,7 @@ > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) { if (dump_file) - fprintf (dump_file, "Not peeling: peeled sequence size is too large " + fprintf (dump_file, "Not peeling: peeled sequence size is too large " "(%i insns > --param max-peel-insns)", peeled_size); return false; } @@ -1050,6 +1090,8 @@ exit = NULL; bitmap_clear (wont_exit); } + if (may_be_zero) + bitmap_clear_bit (wont_exit, 1); if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), npeel, wont_exit, exit, &edges_to_remove, @@ -1090,7 +1132,6 @@ } } profile_count entry_count = profile_count::zero (); - int entry_freq = 0; edge e; edge_iterator ei; @@ -1099,15 +1140,10 @@ { if (e->src->count.initialized_p ()) entry_count = e->src->count + e->src->count; - entry_freq += e->src->frequency; gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); } profile_probability p = profile_probability::very_unlikely (); - if (loop->header->count > 0) - p = entry_count.probability_in (loop->header->count); - else if (loop->header->frequency) - p = profile_probability::probability_in_gcov_type - (entry_freq, loop->header->frequency); + p = entry_count.probability_in (loop->header->count); scale_loop_profile (loop, p, 0); bitmap_set_bit (peeled_loops, loop->num); return true; @@ -1121,20 +1157,42 @@ static bool canonicalize_loop_induction_variables (struct loop *loop, bool create_iv, enum unroll_level ul, - bool try_eval) + bool try_eval, bool allow_peel) { edge exit = NULL; tree niter; HOST_WIDE_INT maxiter; bool modified = false; - location_t locus = UNKNOWN_LOCATION; + dump_user_location_t locus; + struct tree_niter_desc niter_desc; + bool may_be_zero = false; - niter = number_of_latch_executions (loop); + /* For unrolling allow conditional constant or zero iterations, thus + perform loop-header copying on-the-fly. */ exit = single_exit (loop); + niter = chrec_dont_know; + if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) + { + niter = niter_desc.niter; + may_be_zero + = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero); + } if (TREE_CODE (niter) == INTEGER_CST) - locus = gimple_location (last_stmt (exit->src)); + locus = last_stmt (exit->src); else { + /* For non-constant niter fold may_be_zero into niter again. */ + if (may_be_zero) + { + if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) + niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), + niter_desc.may_be_zero, + build_int_cst (TREE_TYPE (niter), 0), niter); + else + niter = chrec_dont_know; + may_be_zero = false; + } + /* If the loop has more than one exit, try checking all of them for # of iterations determinable through scev. */ if (!exit) @@ -1147,7 +1205,7 @@ niter = find_loop_niter_by_eval (loop, &exit); if (exit) - locus = gimple_location (last_stmt (exit->src)); + locus = last_stmt (exit->src); if (TREE_CODE (niter) != INTEGER_CST) exit = NULL; @@ -1189,16 +1247,31 @@ populates the loop bounds. */ modified |= remove_redundant_iv_tests (loop); - if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus)) + if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, + maxiter, locus, allow_peel)) return true; if (create_iv && niter && !chrec_contains_undetermined (niter) && exit && just_once_each_iteration_p (loop, exit->src)) - create_canonical_iv (loop, exit, niter); + { + tree iv_niter = niter; + if (may_be_zero) + { + if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) + iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter), + niter_desc.may_be_zero, + build_int_cst (TREE_TYPE (iv_niter), 0), + iv_niter); + else + iv_niter = NULL_TREE; + } + if (iv_niter) + create_canonical_iv (loop, exit, iv_niter); + } if (ul == UL_ALL) - modified |= try_peel_loop (loop, exit, niter, maxiter); + modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter); return modified; } @@ -1220,7 +1293,7 @@ { changed |= canonicalize_loop_induction_variables (loop, true, UL_SINGLE_ITER, - true); + true, false); } gcc_assert (!need_ssa_update_p (cfun)); @@ -1246,50 +1319,6 @@ return 0; } -/* Propagate constant SSA_NAMEs defined in basic block BB. */ - -static void -propagate_constants_for_unrolling (basic_block bb) -{ - /* Look for degenerate PHI nodes with constant argument. */ - for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); ) - { - gphi *phi = gsi.phi (); - tree result = gimple_phi_result (phi); - tree arg = gimple_phi_arg_def (phi, 0); - - if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result) - && gimple_phi_num_args (phi) == 1 - && CONSTANT_CLASS_P (arg)) - { - replace_uses_by (result, arg); - gsi_remove (&gsi, true); - release_ssa_name (result); - } - else - gsi_next (&gsi); - } - - /* Look for assignments to SSA names with constant RHS. */ - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) - { - gimple *stmt = gsi_stmt (gsi); - tree lhs; - - if (is_gimple_assign (stmt) - && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant - && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME) - && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) - { - replace_uses_by (lhs, gimple_assign_rhs1 (stmt)); - gsi_remove (&gsi, true); - release_ssa_name (lhs); - } - else - gsi_next (&gsi); - } -} - /* Process loops from innermost to outer, stopping at the innermost loop we unrolled. */ @@ -1301,18 +1330,42 @@ bool changed = false; struct loop *inner; enum unroll_level ul; + unsigned num = number_of_loops (cfun); - /* Process inner loops first. */ + /* Process inner loops first. Don't walk loops added by the recursive + calls because SSA form is not up-to-date. They can be handled in the + next iteration. */ + bitmap child_father_bbs = NULL; for (inner = loop->inner; inner != NULL; inner = inner->next) - changed |= tree_unroll_loops_completely_1 (may_increase_size, - unroll_outer, father_bbs, - inner); - + if ((unsigned) inner->num < num) + { + if (!child_father_bbs) + child_father_bbs = BITMAP_ALLOC (NULL); + if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer, + child_father_bbs, inner)) + { + bitmap_ior_into (father_bbs, child_father_bbs); + bitmap_clear (child_father_bbs); + changed = true; + } + } + if (child_father_bbs) + BITMAP_FREE (child_father_bbs); + /* If we changed an inner loop we cannot process outer loops in this iteration because SSA form is not up-to-date. Continue with siblings of outer loops instead. */ if (changed) - return true; + { + /* If we are recorded as father clear all other fathers that + are necessarily covered already to avoid redundant work. */ + if (bitmap_bit_p (father_bbs, loop->header->index)) + { + bitmap_clear (father_bbs); + bitmap_set_bit (father_bbs, loop->header->index); + } + return true; + } /* Don't unroll #pragma omp simd loops until the vectorizer attempts to vectorize those. */ @@ -1324,7 +1377,9 @@ if (!loop_father) return false; - if (may_increase_size && optimize_loop_nest_for_speed_p (loop) + if (loop->unroll > 1) + ul = UL_ALL; + else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) /* Unroll outermost loops only if asked to do so or they do not cause code growth. */ && (unroll_outer || loop_outer (loop_father))) @@ -1333,14 +1388,20 @@ ul = UL_NO_GROWTH; if (canonicalize_loop_induction_variables - (loop, false, ul, !flag_tree_loop_ivcanon)) + (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer)) { /* If we'll continue unrolling, we need to propagate constants within the new basic blocks to fold away induction variable computations; otherwise, the size might blow up before the iteration is complete and the IR eventually cleaned up. */ if (loop_outer (loop_father)) - bitmap_set_bit (father_bbs, loop_father->header->index); + { + /* Once we process our father we will have processed + the fathers of our children as well, so avoid doing + redundant work and clear fathers we've gathered sofar. */ + bitmap_clear (father_bbs); + bitmap_set_bit (father_bbs, loop_father->header->index); + } return true; } @@ -1352,7 +1413,7 @@ MAY_INCREASE_SIZE is true, perform the unrolling only if the size of the code does not increase. */ -unsigned int +static unsigned int tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) { bitmap father_bbs = BITMAP_ALLOC (NULL); @@ -1408,10 +1469,14 @@ EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) { loop_p father = get_loop (cfun, i); - basic_block *body = get_loop_body_in_dom_order (father); - for (unsigned j = 0; j < father->num_nodes; j++) - propagate_constants_for_unrolling (body[j]); - free (body); + bitmap exit_bbs = BITMAP_ALLOC (NULL); + loop_exit *exit = father->exits->next; + while (exit->e) + { + bitmap_set_bit (exit_bbs, exit->e->dest->index); + exit = exit->next; + } + do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs); } BITMAP_FREE (fathers); @@ -1529,9 +1594,9 @@ re-peeling the same loop multiple times. */ if (flag_peel_loops) peeled_loops = BITMAP_ALLOC (NULL); - int val = tree_unroll_loops_completely (flag_unroll_loops - || flag_peel_loops - || optimize >= 3, true); + unsigned int val = tree_unroll_loops_completely (flag_unroll_loops + || flag_peel_loops + || optimize >= 3, true); if (peeled_loops) { BITMAP_FREE (peeled_loops); @@ -1583,8 +1648,7 @@ { unsigned ret = 0; - loop_optimizer_init (LOOPS_NORMAL - | LOOPS_HAVE_RECORDED_EXITS); + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); if (number_of_loops (fun) > 1) { scev_initialize ();