Mercurial > hg > CbC > CbC_gcc
diff gcc/tree-ssa-reassoc.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
line wrap: on
line diff
--- a/gcc/tree-ssa-reassoc.c Thu Oct 25 07:37:49 2018 +0900 +++ b/gcc/tree-ssa-reassoc.c Thu Feb 13 11:34:05 2020 +0900 @@ -1,5 +1,5 @@ /* Reassociation for trees. - Copyright (C) 2005-2018 Free Software Foundation, Inc. + Copyright (C) 2005-2020 Free Software Foundation, Inc. Contributed by Daniel Berlin <dan@dberlin.org> This file is part of GCC. @@ -48,7 +48,6 @@ #include "tree-ssa.h" #include "langhooks.h" #include "cfgloop.h" -#include "params.h" #include "builtins.h" #include "gimplify.h" #include "case-cfn-macros.h" @@ -270,7 +269,7 @@ phi_rank (gimple *stmt) { basic_block bb = gimple_bb (stmt); - struct loop *father = bb->loop_father; + class loop *father = bb->loop_father; tree res; unsigned i; use_operand_p use; @@ -603,7 +602,7 @@ operation with tree code CODE, and is inside LOOP. */ static bool -is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop) +is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop) { basic_block bb = gimple_bb (stmt); @@ -1015,7 +1014,7 @@ fprintf (dump_file, "Found * 0, removing all other ops\n"); reassociate_stats.ops_eliminated += ops->length () - 1; - ops->truncate (1); + ops->truncate (0); ops->quick_push (oelast); return; } @@ -1560,7 +1559,7 @@ static bool undistribute_ops_list (enum tree_code opcode, - vec<operand_entry *> *ops, struct loop *loop) + vec<operand_entry *> *ops, class loop *loop) { unsigned int length = ops->length (); operand_entry *oe1; @@ -1772,6 +1771,350 @@ return changed; } +/* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME: + first: element index for each relevant BIT_FIELD_REF. + second: the index of vec ops* for each relevant BIT_FIELD_REF. */ +typedef std::pair<unsigned, unsigned> v_info_elem; +struct v_info { + tree vec_type; + auto_vec<v_info_elem, 32> vec; +}; +typedef v_info *v_info_ptr; + +/* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */ +static int +sort_by_mach_mode (const void *p_i, const void *p_j) +{ + const tree tr1 = *((const tree *) p_i); + const tree tr2 = *((const tree *) p_j); + unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1)); + unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2)); + if (mode1 > mode2) + return 1; + else if (mode1 < mode2) + return -1; + else + return 0; +} + +/* Cleanup hash map for VECTOR information. */ +static void +cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map) +{ + for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin (); + it != info_map.end (); ++it) + { + v_info_ptr info = (*it).second; + delete info; + (*it).second = NULL; + } +} + +/* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE. + V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k] + is transformed to + Vs = (V1 + V2 + ... + Vn) + Vs[0] + Vs[1] + ... + Vs[k] + + The basic steps are listed below: + + 1) Check the addition chain *OPS by looking those summands coming from + VECTOR bit_field_ref on VECTOR type. Put the information into + v_info_map for each satisfied summand, using VECTOR SSA_NAME as key. + + 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are + continuous, they can cover the whole VECTOR perfectly without any holes. + Obtain one VECTOR list which contain candidates to be transformed. + + 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of + candidates with same mode, build the addition statements for them and + generate BIT_FIELD_REFs accordingly. + + TODO: + The current implementation requires the whole VECTORs should be fully + covered, but it can be extended to support partial, checking adjacent + but not fill the whole, it may need some cost model to define the + boundary to do or not. +*/ +static bool +undistribute_bitref_for_vector (enum tree_code opcode, + vec<operand_entry *> *ops, struct loop *loop) +{ + if (ops->length () <= 1) + return false; + + if (opcode != PLUS_EXPR + && opcode != MULT_EXPR + && opcode != BIT_XOR_EXPR + && opcode != BIT_IOR_EXPR + && opcode != BIT_AND_EXPR) + return false; + + hash_map<tree, v_info_ptr> v_info_map; + operand_entry *oe1; + unsigned i; + + /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the + information into map. */ + FOR_EACH_VEC_ELT (*ops, i, oe1) + { + enum tree_code dcode; + gimple *oe1def; + + if (TREE_CODE (oe1->op) != SSA_NAME) + continue; + oe1def = SSA_NAME_DEF_STMT (oe1->op); + if (!is_gimple_assign (oe1def)) + continue; + dcode = gimple_assign_rhs_code (oe1def); + if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop)) + continue; + + tree rhs = gimple_assign_rhs1 (oe1def); + tree vec = TREE_OPERAND (rhs, 0); + tree vec_type = TREE_TYPE (vec); + + if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type)) + continue; + + /* Ignore it if target machine can't support this VECTOR type. */ + if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) + continue; + + /* Check const vector type, constrain BIT_FIELD_REF offset and size. */ + if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) + continue; + + if (VECTOR_TYPE_P (TREE_TYPE (rhs)) + || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs)))) + continue; + + /* The type of BIT_FIELD_REF might not be equal to the element type of + the vector. We want to use a vector type with element type the + same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */ + if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type))) + { + machine_mode simd_mode; + unsigned HOST_WIDE_INT size, nunits; + unsigned HOST_WIDE_INT elem_size + = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); + if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size)) + continue; + if (size <= elem_size || (size % elem_size) != 0) + continue; + nunits = size / elem_size; + if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)), + nunits).exists (&simd_mode)) + continue; + vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode); + + /* Ignore it if target machine can't support this VECTOR type. */ + if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) + continue; + + /* Check const vector type, constrain BIT_FIELD_REF offset and + size. */ + if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) + continue; + + if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)), + GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec))))) + continue; + } + + tree elem_type = TREE_TYPE (vec_type); + unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type)); + if (maybe_ne (bit_field_size (rhs), elem_size)) + continue; + + unsigned idx; + if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx)) + continue; + + /* Ignore it if target machine can't support this type of VECTOR + operation. */ + optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector); + if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing) + continue; + + bool existed; + v_info_ptr &info = v_info_map.get_or_insert (vec, &existed); + if (!existed) + { + info = new v_info; + info->vec_type = vec_type; + } + else if (!types_compatible_p (vec_type, info->vec_type)) + continue; + info->vec.safe_push (std::make_pair (idx, i)); + } + + /* At least two VECTOR to combine. */ + if (v_info_map.elements () <= 1) + { + cleanup_vinfo_map (v_info_map); + return false; + } + + /* Verify all VECTOR candidates by checking two conditions: + 1) sorted offsets are adjacent, no holes. + 2) can fill the whole VECTOR perfectly. + And add the valid candidates to a vector for further handling. */ + auto_vec<tree> valid_vecs (v_info_map.elements ()); + for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin (); + it != v_info_map.end (); ++it) + { + tree cand_vec = (*it).first; + v_info_ptr cand_info = (*it).second; + unsigned int num_elems + = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant (); + if (cand_info->vec.length () != num_elems) + continue; + sbitmap holes = sbitmap_alloc (num_elems); + bitmap_ones (holes); + bool valid = true; + v_info_elem *curr; + FOR_EACH_VEC_ELT (cand_info->vec, i, curr) + { + if (!bitmap_bit_p (holes, curr->first)) + { + valid = false; + break; + } + else + bitmap_clear_bit (holes, curr->first); + } + if (valid && bitmap_empty_p (holes)) + valid_vecs.quick_push (cand_vec); + sbitmap_free (holes); + } + + /* At least two VECTOR to combine. */ + if (valid_vecs.length () <= 1) + { + cleanup_vinfo_map (v_info_map); + return false; + } + + valid_vecs.qsort (sort_by_mach_mode); + /* Go through all candidates by machine mode order, query the mode_to_total + to get the total number for each mode and skip the single one. */ + for (unsigned i = 0; i < valid_vecs.length () - 1; ++i) + { + tree tvec = valid_vecs[i]; + enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec)); + + /* Skip modes with only a single candidate. */ + if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode) + continue; + + unsigned int idx, j; + gimple *sum = NULL; + tree sum_vec = tvec; + v_info_ptr info_ptr = *(v_info_map.get (tvec)); + v_info_elem *elem; + tree vec_type = info_ptr->vec_type; + + /* Build the sum for all candidates with same mode. */ + do + { + sum = build_and_add_sum (vec_type, sum_vec, + valid_vecs[i + 1], opcode); + if (!useless_type_conversion_p (vec_type, + TREE_TYPE (valid_vecs[i + 1]))) + { + /* Update the operands only after build_and_add_sum, + so that we don't have to repeat the placement algorithm + of build_and_add_sum. */ + gimple_stmt_iterator gsi = gsi_for_stmt (sum); + tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, + valid_vecs[i + 1]); + tree lhs = make_ssa_name (vec_type); + gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); + gimple_set_uid (g, gimple_uid (sum)); + gsi_insert_before (&gsi, g, GSI_NEW_STMT); + gimple_assign_set_rhs2 (sum, lhs); + if (sum_vec == tvec) + { + vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec); + lhs = make_ssa_name (vec_type); + g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); + gimple_set_uid (g, gimple_uid (sum)); + gsi_insert_before (&gsi, g, GSI_NEW_STMT); + gimple_assign_set_rhs1 (sum, lhs); + } + update_stmt (sum); + } + sum_vec = gimple_get_lhs (sum); + info_ptr = *(v_info_map.get (valid_vecs[i + 1])); + gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type)); + /* Update those related ops of current candidate VECTOR. */ + FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) + { + idx = elem->second; + gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); + /* Set this then op definition will get DCEd later. */ + gimple_set_visited (def, true); + if (opcode == PLUS_EXPR + || opcode == BIT_XOR_EXPR + || opcode == BIT_IOR_EXPR) + (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); + else if (opcode == MULT_EXPR) + (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op)); + else + { + gcc_assert (opcode == BIT_AND_EXPR); + (*ops)[idx]->op + = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op)); + } + (*ops)[idx]->rank = 0; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Generating addition -> "); + print_gimple_stmt (dump_file, sum, 0); + } + i++; + } + while ((i < valid_vecs.length () - 1) + && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode); + + /* Referring to first valid VECTOR with this mode, generate the + BIT_FIELD_REF statements accordingly. */ + info_ptr = *(v_info_map.get (tvec)); + gcc_assert (sum); + tree elem_type = TREE_TYPE (vec_type); + FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) + { + idx = elem->second; + tree dst = make_ssa_name (elem_type); + tree pos = bitsize_int (elem->first + * tree_to_uhwi (TYPE_SIZE (elem_type))); + tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec, + TYPE_SIZE (elem_type), pos); + gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr); + insert_stmt_after (gs, sum); + gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); + /* Set this then op definition will get DCEd later. */ + gimple_set_visited (def, true); + (*ops)[idx]->op = gimple_assign_lhs (gs); + (*ops)[idx]->rank = get_rank ((*ops)[idx]->op); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Generating bit_field_ref -> "); + print_gimple_stmt (dump_file, gs, 0); + } + } + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n"); + + cleanup_vinfo_map (v_info_map); + + return true; +} + /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison expression, examine the other OPS to see if any of them are comparisons of the same values, which we may be able to combine or eliminate. @@ -1820,12 +2163,15 @@ /* If we got here, we have a match. See if we can combine the two comparisons. */ + tree type = TREE_TYPE (gimple_assign_lhs (def1)); if (opcode == BIT_IOR_EXPR) - t = maybe_fold_or_comparisons (lcode, op1, op2, + t = maybe_fold_or_comparisons (type, + lcode, op1, op2, rcode, gimple_assign_rhs1 (def2), gimple_assign_rhs2 (def2)); else - t = maybe_fold_and_comparisons (lcode, op1, op2, + t = maybe_fold_and_comparisons (type, + lcode, op1, op2, rcode, gimple_assign_rhs1 (def2), gimple_assign_rhs2 (def2)); if (!t) @@ -2039,9 +2385,6 @@ i++; } - length = ops->length (); - oelast = ops->last (); - if (iterate) optimize_ops_list (opcode, ops); } @@ -2143,7 +2486,8 @@ exp_type = boolean_type_node; } - if (TREE_CODE (arg0) != SSA_NAME) + if (TREE_CODE (arg0) != SSA_NAME + || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) break; loc = gimple_location (stmt); switch (code) @@ -2537,8 +2881,23 @@ if (!tree_int_cst_equal (lowxor, highxor)) return false; + exp = rangei->exp; + scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); + int prec = GET_MODE_PRECISION (mode); + if (TYPE_PRECISION (type) < prec + || (wi::to_wide (TYPE_MIN_VALUE (type)) + != wi::min_value (prec, TYPE_SIGN (type))) + || (wi::to_wide (TYPE_MAX_VALUE (type)) + != wi::max_value (prec, TYPE_SIGN (type)))) + { + type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); + exp = fold_convert (type, exp); + lowxor = fold_convert (type, lowxor); + lowi = fold_convert (type, lowi); + highi = fold_convert (type, highi); + } tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); - exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); + exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, @@ -2581,7 +2940,16 @@ if (!integer_pow2p (tem1)) return false; - type = unsigned_type_for (type); + scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); + int prec = GET_MODE_PRECISION (mode); + if (TYPE_PRECISION (type) < prec + || (wi::to_wide (TYPE_MIN_VALUE (type)) + != wi::min_value (prec, TYPE_SIGN (type))) + || (wi::to_wide (TYPE_MAX_VALUE (type)) + != wi::max_value (prec, TYPE_SIGN (type)))) + type = build_nonstandard_integer_type (prec, 1); + else + type = unsigned_type_for (type); tem1 = fold_convert (type, tem1); tem2 = fold_convert (type, tem2); lowi = fold_convert (type, lowi); @@ -3455,10 +3823,11 @@ /* A subroutine of optimize_vec_cond_expr to extract and canonicalize the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, - otherwise the comparison code. */ + otherwise the comparison code. TYPE is a return value that is set + to type of comparison. */ static tree_code -ovce_extract_ops (tree var, gassign **rets, bool *reti) +ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) { if (TREE_CODE (var) != SSA_NAME) return ERROR_MARK; @@ -3500,6 +3869,8 @@ *rets = stmt; if (reti) *reti = inv; + if (type) + *type = TREE_TYPE (cond); return cmp; } @@ -3521,7 +3892,8 @@ gassign *stmt0; bool invert; - tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); + tree type; + tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type); if (cmp0 == ERROR_MARK) continue; @@ -3530,7 +3902,7 @@ tree &elt1 = (*ops)[j]->op; gassign *stmt1; - tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); + tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL); if (cmp1 == ERROR_MARK) continue; @@ -3544,9 +3916,11 @@ tree comb; if (opcode == BIT_AND_EXPR) - comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); + comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1, + y1); else if (opcode == BIT_IOR_EXPR) - comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); + comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1, + y1); else gcc_unreachable (); if (comb == NULL) @@ -3839,7 +4213,7 @@ static bool get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, - struct loop *loop) + class loop *loop) { gimple *stmt = SSA_NAME_DEF_STMT (var); tree rhs[2]; @@ -3874,7 +4248,7 @@ static tree update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, - unsigned int *pidx, struct loop *loop) + unsigned int *pidx, class loop *loop) { gimple *stmt = SSA_NAME_DEF_STMT (var); tree rhs[4]; @@ -4646,7 +5020,7 @@ get_reassociation_width (int ops_num, enum tree_code opc, machine_mode mode) { - int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); + int param_width = param_tree_reassoc_width; int width; int width_min; int cycles_best; @@ -4787,6 +5161,7 @@ else { stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); + gimple_set_visited (stmts[i], true); } if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -4811,7 +5186,7 @@ gimple *oldbinrhs = binrhs; enum tree_code rhscode = gimple_assign_rhs_code (stmt); gimple *newbinrhs = NULL; - struct loop *loop = loop_containing_stmt (stmt); + class loop *loop = loop_containing_stmt (stmt); tree lhs = gimple_assign_lhs (stmt); gcc_assert (is_reassociable_op (binlhs, rhscode, loop) @@ -4945,7 +5320,7 @@ tree binlhs = gimple_assign_rhs1 (stmt); tree binrhs = gimple_assign_rhs2 (stmt); gimple *immusestmt; - struct loop *loop = loop_containing_stmt (stmt); + class loop *loop = loop_containing_stmt (stmt); if (TREE_CODE (binlhs) == SSA_NAME && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) @@ -5100,7 +5475,7 @@ bool binlhsisreassoc = false; bool binrhsisreassoc = false; enum tree_code rhscode = gimple_assign_rhs_code (stmt); - struct loop *loop = loop_containing_stmt (stmt); + class loop *loop = loop_containing_stmt (stmt); if (set_visited) gimple_set_visited (stmt, true); @@ -5856,11 +6231,6 @@ tree lhs, rhs1, rhs2; enum tree_code rhs_code = gimple_assign_rhs_code (stmt); - /* If this is not a gimple binary expression, there is - nothing for us to do with it. */ - if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) - continue; - /* If this was part of an already processed statement, we don't need to touch it again. */ if (gimple_visited_p (stmt)) @@ -5887,6 +6257,11 @@ continue; } + /* If this is not a gimple binary expression, there is + nothing for us to do with it. */ + if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) + continue; + lhs = gimple_assign_lhs (stmt); rhs1 = gimple_assign_rhs1 (stmt); rhs2 = gimple_assign_rhs2 (stmt); @@ -5925,7 +6300,12 @@ ops.qsort (sort_by_operand_rank); optimize_ops_list (rhs_code, &ops); } - + if (undistribute_bitref_for_vector (rhs_code, &ops, + loop_containing_stmt (stmt))) + { + ops.qsort (sort_by_operand_rank); + optimize_ops_list (rhs_code, &ops); + } if (rhs_code == PLUS_EXPR && transform_add_to_multiply (&ops)) ops.qsort (sort_by_operand_rank); @@ -5987,12 +6367,7 @@ { machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); int ops_num = ops.length (); - int width = get_reassociation_width (ops_num, rhs_code, mode); - - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, - "Width = %d was chosen for reassociation\n", width); - + int width; /* For binary bit operations, if there are at least 3 operands and the last last operand in OPS is a constant, @@ -6006,10 +6381,21 @@ && TREE_CODE (ops.last ()->op) == INTEGER_CST) std::swap (*ops[0], *ops[ops_num - 1]); - if (width > 1 - && ops.length () > 3) - rewrite_expr_tree_parallel (as_a <gassign *> (stmt), - width, ops); + /* Only rewrite the expression tree to parallel in the + last reassoc pass to avoid useless work back-and-forth + with initial linearization. */ + if (!reassoc_insert_powi_p + && ops.length () > 3 + && (width = get_reassociation_width (ops_num, rhs_code, + mode)) > 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Width = %d was chosen for reassociation\n", + width); + rewrite_expr_tree_parallel (as_a <gassign *> (stmt), + width, ops); + } else { /* When there are three operands left, we want