Mercurial > hg > CbC > CbC_gcc
diff gcc/tree-vect-data-refs.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
line wrap: on
line diff
--- a/gcc/tree-vect-data-refs.c Thu Oct 25 07:37:49 2018 +0900 +++ b/gcc/tree-vect-data-refs.c Thu Feb 13 11:34:05 2020 +0900 @@ -1,5 +1,5 @@ /* Data References Analysis and Manipulation Utilities for Vectorization. - Copyright (C) 2003-2018 Free Software Foundation, Inc. + Copyright (C) 2003-2020 Free Software Foundation, Inc. Contributed by Dorit Naishlos <dorit@il.ibm.com> and Ira Rosen <irar@il.ibm.com> @@ -49,7 +49,6 @@ #include "tree-vectorizer.h" #include "expr.h" #include "builtins.h" -#include "params.h" #include "tree-cfg.h" #include "tree-hash-traits.h" #include "vec-perm-indices.h" @@ -145,6 +144,30 @@ if (rhs < lhs) scalar_type = rhs_type; } + else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt)) + { + unsigned int i = 0; + if (gimple_call_internal_p (call)) + { + internal_fn ifn = gimple_call_internal_fn (call); + if (internal_load_fn_p (ifn) || internal_store_fn_p (ifn)) + /* gimple_expr_type already picked the type of the loaded + or stored data. */ + i = ~0U; + else if (internal_fn_mask_index (ifn) == 0) + i = 1; + } + if (i < gimple_call_num_args (call)) + { + tree rhs_type = TREE_TYPE (gimple_call_arg (call, i)); + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type))) + { + rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); + if (rhs < lhs) + scalar_type = rhs_type; + } + } + } *lhs_size_unit = lhs; *rhs_size_unit = rhs; @@ -159,9 +182,9 @@ static opt_result vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - - if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0) + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + + if ((unsigned) param_vect_max_version_for_alias_checks == 0) return opt_result::failure_at (vect_location, "will not create alias checks, as" " --param vect-max-version-for-alias-checks" @@ -210,26 +233,60 @@ return true; /* STMT_A and STMT_B belong to overlapping groups. All loads in a - group are emitted at the position of the last scalar load and all - stores in a group are emitted at the position of the last scalar store. + SLP group are emitted at the position of the last scalar load and + all loads in an interleaving group are emitted at the position + of the first scalar load. + Stores in a group are emitted at the position of the last scalar store. Compute that position and check whether the resulting order matches - the current one. */ - stmt_vec_info last_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); + the current one. + We have not yet decided between SLP and interleaving so we have + to conservatively assume both. */ + stmt_vec_info il_a; + stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); if (last_a) - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - last_a = get_later_stmt (last_a, s); + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + last_a = get_later_stmt (last_a, s); + if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a))) + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + if (get_later_stmt (il_a, s) == il_a) + il_a = s; + } + else + il_a = last_a; + } else - last_a = stmtinfo_a; - stmt_vec_info last_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); + last_a = il_a = stmtinfo_a; + stmt_vec_info il_b; + stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); if (last_b) - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - last_b = get_later_stmt (last_b, s); + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + last_b = get_later_stmt (last_b, s); + if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b))) + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + if (get_later_stmt (il_b, s) == il_b) + il_b = s; + } + else + il_b = last_b; + } else - last_b = stmtinfo_b; - return ((get_later_stmt (last_a, last_b) == last_a) - == (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a)); + last_b = il_b = stmtinfo_b; + bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a); + return (/* SLP */ + (get_later_stmt (last_a, last_b) == last_a) == a_after_b + /* Interleaving */ + && (get_later_stmt (il_a, il_b) == il_a) == a_after_b + /* Mixed */ + && (get_later_stmt (il_a, last_b) == il_a) == a_after_b + && (get_later_stmt (last_a, il_b) == last_a) == a_after_b); } /* A subroutine of vect_analyze_data_ref_dependence. Handle @@ -248,7 +305,7 @@ loop_vec_info loop_vinfo, int loop_depth, unsigned int *max_vf) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); lambda_vector dist_v; unsigned int i; FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) @@ -305,7 +362,7 @@ unsigned int *max_vf) { unsigned int i; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct data_reference *dra = DDR_A (ddr); struct data_reference *drb = DDR_B (ddr); dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra); @@ -473,8 +530,22 @@ reversed (to make distance vector positive), and the actual distance is negative. */ if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + dump_printf_loc (MSG_NOTE, vect_location, "dependence distance negative.\n"); + /* When doing outer loop vectorization, we need to check if there is + a backward dependence at the inner loop level if the dependence + at the outer loop is reversed. See PR81740. */ + if (nested_in_vect_loop_p (loop, stmtinfo_a) + || nested_in_vect_loop_p (loop, stmtinfo_b)) + { + unsigned inner_depth = index_in_loop_nest (loop->inner->num, + DDR_LOOP_NEST (ddr)); + if (dist_v[inner_depth] < 0) + return opt_result::failure_at (stmtinfo_a->stmt, + "not vectorized, dependence " + "between data-refs %T and %T\n", + DR_REF (dra), DR_REF (drb)); + } /* Record a negative dependence distance to later limit the amount of stmt copying / unrolling we can perform. Only need to handle read-after-write dependence. */ @@ -490,7 +561,7 @@ { /* The dependence distance requires reduction of the maximal vectorization factor. */ - *max_vf = abs (dist); + *max_vf = abs_dist; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "adjusting maximal vectorization factor to %i\n", @@ -795,7 +866,7 @@ vect_record_base_alignments (vec_info *vinfo) { loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); - struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; + class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; data_reference *dr; unsigned int i; FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr) @@ -819,7 +890,7 @@ /* Return the target alignment for the vectorized form of DR_INFO. */ -static unsigned int +static poly_uint64 vect_calculate_target_alignment (dr_vec_info *dr_info) { tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt); @@ -842,7 +913,7 @@ stmt_vec_info stmt_info = dr_info->stmt; vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = NULL; + class loop *loop = NULL; tree ref = DR_REF (dr_info->dr); tree vectype = STMT_VINFO_VECTYPE (stmt_info); @@ -862,10 +933,26 @@ innermost_loop_behavior *drb = vect_dr_behavior (dr_info); bool step_preserves_misalignment_p; - unsigned HOST_WIDE_INT vector_alignment - = vect_calculate_target_alignment (dr_info) / BITS_PER_UNIT; + poly_uint64 vector_alignment + = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT); DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; + /* If the main loop has peeled for alignment we have no way of knowing + whether the data accesses in the epilogues are aligned. We can't at + compile time answer the question whether we have entered the main loop or + not. Fixes PR 92351. */ + if (loop_vinfo) + { + loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); + if (orig_loop_vinfo + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0) + return; + } + + unsigned HOST_WIDE_INT vect_align_c; + if (!vector_alignment.is_constant (&vect_align_c)) + return; + /* No step for BB vectorization. */ if (!loop) { @@ -882,7 +969,7 @@ else if (nested_in_vect_loop_p (loop, stmt_info)) { step_preserves_misalignment_p - = (DR_STEP_ALIGNMENT (dr_info->dr) % vector_alignment) == 0; + = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0; if (dump_enabled_p ()) { @@ -904,7 +991,7 @@ { poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); step_preserves_misalignment_p - = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vector_alignment); + = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c); if (!step_preserves_misalignment_p && dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -923,7 +1010,7 @@ base_misalignment = (*entry)->base_misalignment; } - if (drb->offset_alignment < vector_alignment + if (drb->offset_alignment < vect_align_c || !step_preserves_misalignment_p /* We need to know whether the step wrt the vectorized loop is negative when computing the starting misalignment below. */ @@ -935,13 +1022,13 @@ return; } - if (base_alignment < vector_alignment) + if (base_alignment < vect_align_c) { unsigned int max_alignment; tree base = get_base_for_alignment (drb->base_address, &max_alignment); - if (max_alignment < vector_alignment + if (max_alignment < vect_align_c || !vect_can_force_dr_alignment_p (base, - vector_alignment * BITS_PER_UNIT)) + vect_align_c * BITS_PER_UNIT)) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -972,8 +1059,7 @@ * TREE_INT_CST_LOW (drb->step)); unsigned int const_misalignment; - if (!known_misalignment (misalignment, vector_alignment, - &const_misalignment)) + if (!known_misalignment (misalignment, vect_align_c, &const_misalignment)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -1010,20 +1096,10 @@ unsigned int i; vec<dr_p> same_aligned_drs; struct data_reference *current_dr; - int dr_size = vect_get_scalar_dr_size (dr_info); - int dr_peel_size = vect_get_scalar_dr_size (dr_peel_info); - stmt_vec_info stmt_info = dr_info->stmt; stmt_vec_info peel_stmt_info = dr_peel_info->stmt; - /* For interleaved data accesses the step in the loop must be multiplied by - the size of the interleaving group. */ - if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - dr_size *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); - if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info)) - dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info); - - /* It can be assumed that the data refs with the same alignment as dr_peel - are aligned in the vector loop. */ + /* It can be assumed that if dr_info has the same alignment as dr_peel, + it is aligned in the vector loop. */ same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info); FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr) { @@ -1031,20 +1107,20 @@ continue; gcc_assert (!known_alignment_for_access_p (dr_info) || !known_alignment_for_access_p (dr_peel_info) - || (DR_MISALIGNMENT (dr_info) / dr_size - == DR_MISALIGNMENT (dr_peel_info) / dr_peel_size)); + || (DR_MISALIGNMENT (dr_info) + == DR_MISALIGNMENT (dr_peel_info))); SET_DR_MISALIGNMENT (dr_info, 0); return; } - if (known_alignment_for_access_p (dr_info) + unsigned HOST_WIDE_INT alignment; + if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment) + && known_alignment_for_access_p (dr_info) && known_alignment_for_access_p (dr_peel_info)) { - bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr), - size_zero_node) < 0; int misal = DR_MISALIGNMENT (dr_info); - misal += negative ? -npeel * dr_size : npeel * dr_size; - misal &= DR_TARGET_ALIGNMENT (dr_info) - 1; + misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr)); + misal &= alignment - 1; SET_DR_MISALIGNMENT (dr_info, misal); return; } @@ -1594,7 +1670,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum dr_alignment_support supportable_dr_alignment; dr_vec_info *first_store = NULL; dr_vec_info *dr0_info = NULL; @@ -1688,7 +1764,12 @@ size_zero_node) < 0; vectype = STMT_VINFO_VECTYPE (stmt_info); - unsigned int target_align = DR_TARGET_ALIGNMENT (dr_info); + /* If known_alignment_for_access_p then we have set + DR_MISALIGNMENT which is only done if we know it at compiler + time, so it is safe to assume target alignment is constant. + */ + unsigned int target_align = + DR_TARGET_ALIGNMENT (dr_info).to_constant (); unsigned int dr_size = vect_get_scalar_dr_size (dr_info); mis = (negative ? DR_MISALIGNMENT (dr_info) @@ -1752,7 +1833,7 @@ computation will be invariant in the outermost loop. */ else if (same_align_drs_max == same_align_drs) { - struct loop *ivloop0, *ivloop; + class loop *ivloop0, *ivloop; ivloop0 = outermost_invariant_loop_for_expr (loop, DR_BASE_ADDRESS (dr0_info->dr)); ivloop = outermost_invariant_loop_for_expr @@ -1965,7 +2046,12 @@ mis = (negative ? DR_MISALIGNMENT (dr0_info) : -DR_MISALIGNMENT (dr0_info)); - unsigned int target_align = DR_TARGET_ALIGNMENT (dr0_info); + /* If known_alignment_for_access_p then we have set + DR_MISALIGNMENT which is only done if we know it at compiler + time, so it is safe to assume target alignment is constant. + */ + unsigned int target_align = + DR_TARGET_ALIGNMENT (dr0_info).to_constant (); npeel = ((mis & (target_align - 1)) / vect_get_scalar_dr_size (dr0_info)); } @@ -1999,15 +2085,27 @@ if (do_peeling) { unsigned max_allowed_peel - = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT); + = param_vect_max_peeling_for_alignment; + if (flag_vect_cost_model == VECT_COST_MODEL_CHEAP) + max_allowed_peel = 0; if (max_allowed_peel != (unsigned)-1) { unsigned max_peel = npeel; if (max_peel == 0) { - unsigned int target_align = DR_TARGET_ALIGNMENT (dr0_info); - max_peel = (target_align - / vect_get_scalar_dr_size (dr0_info) - 1); + poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info); + unsigned HOST_WIDE_INT target_align_c; + if (target_align.is_constant (&target_align_c)) + max_peel = + target_align_c / vect_get_scalar_dr_size (dr0_info) - 1; + else + { + do_peeling = false; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Disable peeling, max peels set and vector" + " alignment unknown\n"); + } } if (max_peel > max_allowed_peel) { @@ -2083,15 +2181,16 @@ /* (2) Versioning to force alignment. */ /* Try versioning if: - 1) optimize loop for speed + 1) optimize loop for speed and the cost-model is not cheap 2) there is at least one unsupported misaligned data ref with an unknown misalignment, and 3) all misaligned data refs with a known misalignment are supported, and 4) the number of runtime alignment checks is within reason. */ - do_versioning = - optimize_loop_nest_for_speed_p (loop) - && (!loop->inner); /* FORNOW */ + do_versioning + = (optimize_loop_nest_for_speed_p (loop) + && !loop->inner /* FORNOW */ + && flag_vect_cost_model != VECT_COST_MODEL_CHEAP); if (do_versioning) { @@ -2127,7 +2226,7 @@ if (known_alignment_for_access_p (dr_info) || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length () - >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS)) + >= (unsigned) param_vect_max_version_for_alignment_checks) { do_versioning = false; break; @@ -2147,19 +2246,35 @@ break; } + /* Forcing alignment in the first iteration is no good if + we don't keep it across iterations. For now, just disable + versioning in this case. + ?? We could actually unroll the loop to achieve the required + overall step alignment, and forcing the alignment could be + done by doing some iterations of the non-vectorized loop. */ + if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo) + * DR_STEP_ALIGNMENT (dr), + DR_TARGET_ALIGNMENT (dr_info))) + { + do_versioning = false; + break; + } + /* The rightmost bits of an aligned address must be zeros. Construct the mask needed for this test. For example, GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the mask must be 15 = 0xf. */ mask = size - 1; - /* FORNOW: use the same mask to test all potentially unaligned - references in the loop. The vectorizer currently supports - a single vector size, see the reference to - GET_MODE_NUNITS (TYPE_MODE (vectype)) where the - vectorization factor is computed. */ - gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo) - || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask); + /* FORNOW: use the same mask to test all potentially unaligned + references in the loop. */ + if (LOOP_VINFO_PTR_MASK (loop_vinfo) + && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask) + { + do_versioning = false; + break; + } + LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info); } @@ -2246,11 +2361,18 @@ if (maybe_ne (diff, 0)) { /* Get the wider of the two alignments. */ - unsigned int align_a = (vect_calculate_target_alignment (dr_info_a) - / BITS_PER_UNIT); - unsigned int align_b = (vect_calculate_target_alignment (dr_info_b) - / BITS_PER_UNIT); - unsigned int max_align = MAX (align_a, align_b); + poly_uint64 align_a = + exact_div (vect_calculate_target_alignment (dr_info_a), + BITS_PER_UNIT); + poly_uint64 align_b = + exact_div (vect_calculate_target_alignment (dr_info_b), + BITS_PER_UNIT); + unsigned HOST_WIDE_INT align_a_c, align_b_c; + if (!align_a.is_constant (&align_a_c) + || !align_b.is_constant (&align_b_c)) + return; + + unsigned HOST_WIDE_INT max_align = MAX (align_a_c, align_b_c); /* Require the gap to be a multiple of the larger vector alignment. */ if (!multiple_p (diff, max_align)) @@ -2438,7 +2560,8 @@ return true; } - dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n"); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n"); STMT_VINFO_STRIDED_P (stmt_info) = true; return true; } @@ -2450,40 +2573,15 @@ struct data_reference *data_ref = dr; unsigned int count = 1; tree prev_init = DR_INIT (data_ref); - stmt_vec_info prev = stmt_info; HOST_WIDE_INT diff, gaps = 0; /* By construction, all group members have INTEGER_CST DR_INITs. */ while (next) { - /* Skip same data-refs. In case that two or more stmts share - data-ref (supported only for loads), we vectorize only the first - stmt, and the rest get their vectorized loads from the first - one. */ - if (!tree_int_cst_compare (DR_INIT (data_ref), - DR_INIT (STMT_VINFO_DATA_REF (next)))) - { - if (DR_IS_WRITE (data_ref)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Two store stmts share the same dr.\n"); - return false; - } - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Two or more load stmts share the same dr.\n"); - - /* For load use the same data-ref load. */ - DR_GROUP_SAME_DR_STMT (next) = prev; - - prev = next; - next = DR_GROUP_NEXT_ELEMENT (next); - continue; - } - - prev = next; + /* We never have the same DR multiple times. */ + gcc_assert (tree_int_cst_compare (DR_INIT (data_ref), + DR_INIT (STMT_VINFO_DATA_REF (next))) != 0); + data_ref = STMT_VINFO_DATA_REF (next); /* All group members have the same STEP by construction. */ @@ -2559,11 +2657,22 @@ dump_printf (MSG_NOTE, "strided store "); else dump_printf (MSG_NOTE, "store "); - dump_printf (MSG_NOTE, "of size %u starting with %G", - (unsigned)groupsize, stmt_info->stmt); + dump_printf (MSG_NOTE, "of size %u\n", + (unsigned)groupsize); + dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt); + next = DR_GROUP_NEXT_ELEMENT (stmt_info); + while (next) + { + if (DR_GROUP_GAP (next) != 1) + dump_printf_loc (MSG_NOTE, vect_location, + "\t<gap of %d elements>\n", + DR_GROUP_GAP (next) - 1); + dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt); + next = DR_GROUP_NEXT_ELEMENT (next); + } if (DR_GROUP_GAP (stmt_info) != 0) dump_printf_loc (MSG_NOTE, vect_location, - "There is a gap of %u elements after the group\n", + "\t<gap of %d elements>\n", DR_GROUP_GAP (stmt_info)); } @@ -2617,7 +2726,7 @@ tree scalar_type = TREE_TYPE (DR_REF (dr)); stmt_vec_info stmt_info = dr_info->stmt; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = NULL; + class loop *loop = NULL; if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) return true; @@ -2770,10 +2879,12 @@ } /* Return true if vectorizable_* routines can handle statements STMT1_INFO - and STMT2_INFO being in a single group. */ + and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can + be grouped in SLP mode. */ static bool -can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) +can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info, + bool allow_slp_p) { if (gimple_assign_single_p (stmt1_info->stmt)) return gimple_assign_single_p (stmt2_info->stmt); @@ -2795,7 +2906,8 @@ like those created by build_mask_conversion. */ tree mask1 = gimple_call_arg (call1, 2); tree mask2 = gimple_call_arg (call2, 2); - if (!operand_equal_p (mask1, mask2, 0)) + if (!operand_equal_p (mask1, mask2, 0) + && (ifn == IFN_MASK_STORE || !allow_slp_p)) { mask1 = strip_conversion (mask1); if (!mask1) @@ -2881,7 +2993,7 @@ || data_ref_compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb)) != 0 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0 - || !can_group_stmts_p (stmtinfo_a, stmtinfo_b)) + || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true)) break; /* Check that the data-refs have the same constant size. */ @@ -2907,6 +3019,13 @@ || TREE_CODE (DR_INIT (drb)) != INTEGER_CST) break; + /* Different .GOMP_SIMD_LANE calls still give the same lane, + just hold extra information. */ + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a) + && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b) + && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0) + break; + /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); @@ -2966,6 +3085,13 @@ DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b; lastinfo = stmtinfo_b; + STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a) + = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false); + + if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)) + dump_printf_loc (MSG_NOTE, vect_location, + "Load suitable for SLP vectorization only.\n"); + if (init_b == init_prev && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)) && dump_enabled_p ()) @@ -2988,8 +3114,8 @@ stmt_vec_info next, g = grp; while ((next = DR_GROUP_NEXT_ELEMENT (g))) { - if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr) - == DR_INIT (STMT_VINFO_DR_INFO (g)->dr)) + if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr), + DR_INIT (STMT_VINFO_DR_INFO (g)->dr)) && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate) first_duplicate = gimple_uid (STMT_VINFO_STMT (next)); g = next; @@ -3001,7 +3127,7 @@ Note this is a heuristic but one with the property that *it is fixed up completely. */ g = grp; - stmt_vec_info newgroup = NULL, ng; + stmt_vec_info newgroup = NULL, ng = grp; while ((next = DR_GROUP_NEXT_ELEMENT (g))) { if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate) @@ -3353,7 +3479,6 @@ /* First, we collect all data ref pairs for aliasing checks. */ FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr) { - int comp_res; poly_uint64 lower_bound; tree segment_length_a, segment_length_b; unsigned HOST_WIDE_INT access_size_a, access_size_b; @@ -3385,10 +3510,13 @@ dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr)); stmt_vec_info stmt_info_b = dr_info_b->stmt; + bool preserves_scalar_order_p + = vect_preserves_scalar_order_p (dr_info_a, dr_info_b); + /* Skip the pair if inter-iteration dependencies are irrelevant and intra-iteration dependencies are guaranteed to be honored. */ if (ignore_step_p - && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b) + && (preserves_scalar_order_p || vectorizable_with_step_bound_p (dr_info_a, dr_info_b, &lower_bound))) { @@ -3469,14 +3597,11 @@ align_a = vect_vfa_align (dr_info_a); align_b = vect_vfa_align (dr_info_b); - comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr), - DR_BASE_ADDRESS (dr_info_b->dr)); - if (comp_res == 0) - comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr), - DR_OFFSET (dr_info_b->dr)); - /* See whether the alias is known at compilation time. */ - if (comp_res == 0 + if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr), + DR_BASE_ADDRESS (dr_info_b->dr), 0) + && operand_equal_p (DR_OFFSET (dr_info_a->dr), + DR_OFFSET (dr_info_b->dr), 0) && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST && poly_int_tree_p (segment_length_a) @@ -3509,15 +3634,21 @@ stmt_info_b->stmt); } + dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a, + access_size_a, align_a); + dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b, + access_size_b, align_b); + /* Canonicalize the order to be the one that's needed for accurate + RAW, WAR and WAW flags, in cases where the data references are + well-ordered. The order doesn't really matter otherwise, + but we might as well be consistent. */ + if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a) + std::swap (dr_a, dr_b); + dr_with_seg_len_pair_t dr_with_seg_len_pair - (dr_with_seg_len (dr_info_a->dr, segment_length_a, - access_size_a, align_a), - dr_with_seg_len (dr_info_b->dr, segment_length_b, - access_size_b, align_b)); - - /* Canonicalize pairs by sorting the two DR members. */ - if (comp_res > 0) - std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second); + (dr_a, dr_b, (preserves_scalar_order_p + ? dr_with_seg_len_pair_t::WELL_ORDERED + : dr_with_seg_len_pair_t::REORDERED)); comp_alias_ddrs.safe_push (dr_with_seg_len_pair); } @@ -3527,16 +3658,18 @@ unsigned int count = (comp_alias_ddrs.length () + check_unequal_addrs.length ()); - dump_printf_loc (MSG_NOTE, vect_location, - "improved number of alias checks from %d to %d\n", - may_alias_ddrs.length (), count); - if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "improved number of alias checks from %d to %d\n", + may_alias_ddrs.length (), count); + unsigned limit = param_vect_max_version_for_alias_checks; + if (flag_simd_cost_model == VECT_COST_MODEL_CHEAP) + limit = param_vect_max_version_for_alias_checks * 6 / 10; + if (count > limit) return opt_result::failure_at (vect_location, - "number of versioning for alias " - "run-time tests exceeds %d " - "(--param vect-max-version-for-alias-checks)\n", - PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)); + "number of versioning for alias run-time tests exceeds %d " + "(--param vect-max-version-for-alias-checks)\n", limit); return opt_result::success (); } @@ -3544,28 +3677,22 @@ /* Check whether we can use an internal function for a gather load or scatter store. READ_P is true for loads and false for stores. MASKED_P is true if the load or store is conditional. MEMORY_TYPE is - the type of the memory elements being loaded or stored. OFFSET_BITS - is the number of bits in each scalar offset and OFFSET_SIGN is the - sign of the offset. SCALE is the amount by which the offset should + the type of the memory elements being loaded or stored. OFFSET_TYPE + is the type of the offset that is being applied to the invariant + base address. SCALE is the amount by which the offset should be multiplied *after* it has been converted to address width. - Return true if the function is supported, storing the function - id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */ + Return true if the function is supported, storing the function id in + *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */ bool -vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype, - tree memory_type, unsigned int offset_bits, - signop offset_sign, int scale, - internal_fn *ifn_out, tree *element_type_out) +vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, + tree vectype, tree memory_type, tree offset_type, + int scale, internal_fn *ifn_out, + tree *offset_vectype_out) { unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); - if (offset_bits > element_bits) - /* Internal functions require the offset to be the same width as - the vector elements. We can extend narrower offsets, but it isn't - safe to truncate wider offsets. */ - return false; - if (element_bits != memory_bits) /* For now the vector elements must be the same width as the memory elements. */ @@ -3578,14 +3705,28 @@ else ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; - /* Test whether the target supports this combination. */ - if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, - offset_sign, scale)) - return false; - - *ifn_out = ifn; - *element_type_out = TREE_TYPE (vectype); - return true; + for (;;) + { + tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + return false; + + /* Test whether the target supports this combination. */ + if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, + offset_vectype, scale)) + { + *ifn_out = ifn; + *offset_vectype_out = offset_vectype; + return true; + } + + if (TYPE_PRECISION (offset_type) >= POINTER_SIZE + && TYPE_PRECISION (offset_type) >= element_bits) + return false; + + offset_type = build_nonstandard_integer_type + (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); + } } /* STMT_INFO is a call to an internal gather load or scatter store function. @@ -3619,7 +3760,7 @@ { HOST_WIDE_INT scale = 1; poly_int64 pbitpos, pbitsize; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); tree offtype = NULL_TREE; tree decl = NULL_TREE, base, off; @@ -3628,7 +3769,7 @@ machine_mode pmode; int punsignedp, reversep, pvolatilep = 0; internal_fn ifn; - tree element_type; + tree offset_vectype; bool masked_p = false; /* See whether this is already a call to a gather/scatter internal function. @@ -3789,13 +3930,18 @@ { int new_scale = tree_to_shwi (op1); /* Only treat this as a scaling operation if the target - supports it. */ + supports it for at least some offset type. */ if (use_ifn_p - && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, - vectype, memory_type, 1, - TYPE_SIGN (TREE_TYPE (op0)), + && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + signed_char_type_node, new_scale, &ifn, - &element_type)) + &offset_vectype) + && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + unsigned_char_type_node, + new_scale, &ifn, + &offset_vectype)) break; scale = new_scale; off = op0; @@ -3809,6 +3955,16 @@ if (!POINTER_TYPE_P (TREE_TYPE (op0)) && !INTEGRAL_TYPE_P (TREE_TYPE (op0))) break; + + /* Don't include the conversion if the target is happy with + the current offset type. */ + if (use_ifn_p + && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + TREE_TYPE (off), scale, &ifn, + &offset_vectype)) + break; + if (TYPE_PRECISION (TREE_TYPE (op0)) == TYPE_PRECISION (TREE_TYPE (off))) { @@ -3816,14 +3972,6 @@ continue; } - /* The internal functions need the offset to be the same width - as the elements of VECTYPE. Don't include operations that - cast the offset from that width to a different width. */ - if (use_ifn_p - && (int_size_in_bytes (TREE_TYPE (vectype)) - == int_size_in_bytes (TREE_TYPE (off)))) - break; - if (TYPE_PRECISION (TREE_TYPE (op0)) < TYPE_PRECISION (TREE_TYPE (off))) { @@ -3850,10 +3998,9 @@ if (use_ifn_p) { - if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, - memory_type, TYPE_PRECISION (offtype), - TYPE_SIGN (offtype), scale, &ifn, - &element_type)) + if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, + vectype, memory_type, offtype, scale, + &ifn, &offset_vectype)) return false; } else @@ -3873,7 +4020,8 @@ return false; ifn = IFN_LAST; - element_type = TREE_TYPE (vectype); + /* The offset vector type will be read from DECL when needed. */ + offset_vectype = NULL_TREE; } info->ifn = ifn; @@ -3881,9 +4029,9 @@ info->base = base; info->offset = off; info->offset_dt = vect_unknown_def_type; - info->offset_vectype = NULL_TREE; + info->offset_vectype = offset_vectype; info->scale = scale; - info->element_type = element_type; + info->element_type = TREE_TYPE (vectype); info->memory_type = memory_type; return true; } @@ -3961,47 +4109,67 @@ && DR_OFFSET (newdr) && DR_INIT (newdr) && DR_STEP (newdr) + && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST && integer_zerop (DR_STEP (newdr))) { + tree base_address = DR_BASE_ADDRESS (newdr); tree off = DR_OFFSET (newdr); + tree step = ssize_int (1); + if (integer_zerop (off) + && TREE_CODE (base_address) == POINTER_PLUS_EXPR) + { + off = TREE_OPERAND (base_address, 1); + base_address = TREE_OPERAND (base_address, 0); + } STRIP_NOPS (off); - if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST - && TREE_CODE (off) == MULT_EXPR + if (TREE_CODE (off) == MULT_EXPR && tree_fits_uhwi_p (TREE_OPERAND (off, 1))) { - tree step = TREE_OPERAND (off, 1); + step = TREE_OPERAND (off, 1); off = TREE_OPERAND (off, 0); STRIP_NOPS (off); - if (CONVERT_EXPR_P (off) - && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0))) - < TYPE_PRECISION (TREE_TYPE (off)))) - off = TREE_OPERAND (off, 0); - if (TREE_CODE (off) == SSA_NAME) + } + if (CONVERT_EXPR_P (off) + && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0))) + < TYPE_PRECISION (TREE_TYPE (off)))) + off = TREE_OPERAND (off, 0); + if (TREE_CODE (off) == SSA_NAME) + { + gimple *def = SSA_NAME_DEF_STMT (off); + /* Look through widening conversion. */ + if (is_gimple_assign (def) + && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) { - gimple *def = SSA_NAME_DEF_STMT (off); + tree rhs1 = gimple_assign_rhs1 (def); + if (TREE_CODE (rhs1) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + && (TYPE_PRECISION (TREE_TYPE (off)) + > TYPE_PRECISION (TREE_TYPE (rhs1)))) + def = SSA_NAME_DEF_STMT (rhs1); + } + if (is_gimple_call (def) + && gimple_call_internal_p (def) + && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE)) + { + tree arg = gimple_call_arg (def, 0); tree reft = TREE_TYPE (DR_REF (newdr)); - if (is_gimple_call (def) - && gimple_call_internal_p (def) - && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE)) + gcc_assert (TREE_CODE (arg) == SSA_NAME); + arg = SSA_NAME_VAR (arg); + if (arg == loop->simduid + /* For now. */ + && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step)) { - tree arg = gimple_call_arg (def, 0); - gcc_assert (TREE_CODE (arg) == SSA_NAME); - arg = SSA_NAME_VAR (arg); - if (arg == loop->simduid - /* For now. */ - && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step)) - { - DR_OFFSET (newdr) = ssize_int (0); - DR_STEP (newdr) = step; - DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT; - DR_STEP_ALIGNMENT (newdr) - = highest_pow2_factor (step); - /* Mark as simd-lane access. */ - newdr->aux = (void *)-1; - free_data_ref (dr); - datarefs->safe_push (newdr); - return opt_result::success (); - } + DR_BASE_ADDRESS (newdr) = base_address; + DR_OFFSET (newdr) = ssize_int (0); + DR_STEP (newdr) = step; + DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT; + DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step); + /* Mark as simd-lane access. */ + tree arg2 = gimple_call_arg (def, 1); + newdr->aux = (void *) (-1 - tree_to_uhwi (arg2)); + free_data_ref (dr); + datarefs->safe_push (newdr); + return opt_result::success (); } } } @@ -4029,9 +4197,9 @@ */ opt_result -vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) +vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal) { - struct loop *loop = NULL; + class loop *loop = NULL; unsigned int i; struct data_reference *dr; tree scalar_type; @@ -4106,14 +4274,18 @@ } /* See if this was detected as SIMD lane access. */ - if (dr->aux == (void *)-1) + if (dr->aux == (void *)-1 + || dr->aux == (void *)-2 + || dr->aux == (void *)-3 + || dr->aux == (void *)-4) { if (nested_in_vect_loop_p (loop, stmt_info)) return opt_result::failure_at (stmt_info->stmt, "not vectorized:" " data ref analysis failed: %G", stmt_info->stmt); - STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true; + STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) + = -(uintptr_t) dr->aux; } tree base = get_base_address (DR_REF (dr)); @@ -4142,7 +4314,7 @@ { if (nested_in_vect_loop_p (loop, stmt_info)) return opt_result::failure_at (stmt_info->stmt, - "not vectorized:" + "not vectorized: " "not suitable for strided load %G", stmt_info->stmt); STMT_VINFO_STRIDED_P (stmt_info) = true; @@ -4203,9 +4375,8 @@ /* Set vectype for STMT. */ scalar_type = TREE_TYPE (DR_REF (dr)); - STMT_VINFO_VECTYPE (stmt_info) - = get_vectype_for_scalar_type (scalar_type); - if (!STMT_VINFO_VECTYPE (stmt_info)) + tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); + if (!vectype) { if (dump_enabled_p ()) { @@ -4225,6 +4396,8 @@ STMT_VINFO_VECTORIZABLE (stmt_info) = false; continue; } + if (fatal) + *fatal = false; return opt_result::failure_at (stmt_info->stmt, "not vectorized:" " no vectype for stmt: %G" @@ -4236,27 +4409,37 @@ if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "got vectype for stmt: %G%T\n", - stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info)); + stmt_info->stmt, vectype); } /* Adjust the minimal vectorization factor according to the vector type. */ - vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); + vf = TYPE_VECTOR_SUBPARTS (vectype); *min_vf = upper_bound (*min_vf, vf); + /* Leave the BB vectorizer to pick the vector type later, based on + the final dataref group size and SLP node size. */ + if (is_a <loop_vec_info> (vinfo)) + STMT_VINFO_VECTYPE (stmt_info) = vectype; + if (gatherscatter != SG_NONE) { gather_scatter_info gs_info; if (!vect_check_gather_scatter (stmt_info, as_a <loop_vec_info> (vinfo), &gs_info) - || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset))) - return opt_result::failure_at - (stmt_info->stmt, - (gatherscatter == GATHER) ? - "not vectorized: not suitable for gather load %G" : - "not vectorized: not suitable for scatter store %G", - stmt_info->stmt); + || !get_vectype_for_scalar_type (vinfo, + TREE_TYPE (gs_info.offset))) + { + if (fatal) + *fatal = false; + return opt_result::failure_at + (stmt_info->stmt, + (gatherscatter == GATHER) + ? "not vectorized: not suitable for gather load %G" + : "not vectorized: not suitable for scatter store %G", + stmt_info->stmt); + } STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter; } } @@ -4358,7 +4541,8 @@ mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name)); else set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), - DR_TARGET_ALIGNMENT (dr_info), misalign); + known_alignment (DR_TARGET_ALIGNMENT (dr_info)), + misalign); } /* Function vect_create_addr_base_for_vector_ref. @@ -4413,7 +4597,7 @@ innermost_loop_behavior *drb = vect_dr_behavior (dr_info); tree data_ref_base = unshare_expr (drb->base_address); - tree base_offset = unshare_expr (drb->offset); + tree base_offset = unshare_expr (get_dr_vinfo_offset (dr_info, true)); tree init = unshare_expr (drb->init); if (loop_vinfo) @@ -4531,16 +4715,16 @@ tree vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type, - struct loop *at_loop, tree offset, + class loop *at_loop, tree offset, tree *initial_address, gimple_stmt_iterator *gsi, gimple **ptr_incr, bool only_init, tree byte_offset, tree iv_step) { const char *base_name; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = NULL; + class loop *loop = NULL; bool nested_in_vect_loop = false; - struct loop *containing_loop = NULL; + class loop *containing_loop = NULL; tree aggr_ptr_type; tree aggr_ptr; tree new_temp; @@ -5285,13 +5469,13 @@ tree *realignment_token, enum dr_alignment_support alignment_support_scheme, tree init_addr, - struct loop **at_loop) + class loop **at_loop) { tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); struct data_reference *dr = dr_info->dr; - struct loop *loop = NULL; + class loop *loop = NULL; edge pe = NULL; tree scalar_dest = gimple_assign_lhs (stmt_info->stmt); tree vec_dest; @@ -5306,8 +5490,8 @@ gimple_seq stmts = NULL; bool compute_in_loop = false; bool nested_in_vect_loop = false; - struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father; - struct loop *loop_for_initial_load = NULL; + class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father; + class loop *loop_for_initial_load = NULL; if (loop_vinfo) { @@ -5402,10 +5586,13 @@ new_temp = copy_ssa_name (ptr); else new_temp = make_ssa_name (TREE_TYPE (ptr)); - unsigned int align = DR_TARGET_ALIGNMENT (dr_info); + poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info); + tree type = TREE_TYPE (ptr); new_stmt = gimple_build_assign (new_temp, BIT_AND_EXPR, ptr, - build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); + fold_build2 (MINUS_EXPR, type, + build_int_cst (type, 0), + build_int_cst (type, align))); new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); gcc_assert (!new_bb); data_ref @@ -6240,12 +6427,14 @@ correspond to the gaps. */ if (next_stmt_info != first_stmt_info && gap_count < DR_GROUP_GAP (next_stmt_info)) - { - gap_count++; - continue; - } - - while (next_stmt_info) + { + gap_count++; + continue; + } + + /* ??? The following needs cleanup after the removal of + DR_GROUP_SAME_DR_STMT. */ + if (next_stmt_info) { stmt_vec_info new_stmt_info = vinfo->lookup_def (tmp_data_ref); /* We assume that if VEC_STMT is not NULL, this is a case of multiple @@ -6255,29 +6444,21 @@ STMT_VINFO_VEC_STMT (next_stmt_info) = new_stmt_info; else { - if (!DR_GROUP_SAME_DR_STMT (next_stmt_info)) - { - stmt_vec_info prev_stmt_info - = STMT_VINFO_VEC_STMT (next_stmt_info); - stmt_vec_info rel_stmt_info - = STMT_VINFO_RELATED_STMT (prev_stmt_info); - while (rel_stmt_info) - { - prev_stmt_info = rel_stmt_info; - rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info); - } - - STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; - } + stmt_vec_info prev_stmt_info + = STMT_VINFO_VEC_STMT (next_stmt_info); + stmt_vec_info rel_stmt_info + = STMT_VINFO_RELATED_STMT (prev_stmt_info); + while (rel_stmt_info) + { + prev_stmt_info = rel_stmt_info; + rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info); + } + + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; } next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); gap_count = 1; - /* If NEXT_STMT_INFO accesses the same DR as the previous statement, - put the same TMP_DATA_REF as its vectorized statement; otherwise - get the next data-ref from RESULT_CHAIN. */ - if (!next_stmt_info || !DR_GROUP_SAME_DR_STMT (next_stmt_info)) - break; } } } @@ -6288,7 +6469,7 @@ on ALIGNMENT bit boundary. */ bool -vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment) +vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment) { if (!VAR_P (decl)) return false; @@ -6298,9 +6479,10 @@ return false; if (TREE_STATIC (decl)) - return (alignment <= MAX_OFILE_ALIGNMENT); + return (known_le (alignment, + (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT)); else - return (alignment <= MAX_STACK_ALIGNMENT); + return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT)); } @@ -6319,7 +6501,7 @@ tree vectype = STMT_VINFO_VECTYPE (stmt_info); machine_mode mode = TYPE_MODE (vectype); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *vect_loop = NULL; + class loop *vect_loop = NULL; bool nested_in_vect_loop = false; if (aligned_access_p (dr_info) && !check_aligned_accesses)