comparison gcc/tree-vect-data-refs.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Data References Analysis and Manipulation Utilities for Vectorization. 1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc. 2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com>
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
47 #include "cfgloop.h" 47 #include "cfgloop.h"
48 #include "tree-scalar-evolution.h" 48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h" 49 #include "tree-vectorizer.h"
50 #include "expr.h" 50 #include "expr.h"
51 #include "builtins.h" 51 #include "builtins.h"
52 #include "params.h"
53 #include "tree-cfg.h" 52 #include "tree-cfg.h"
54 #include "tree-hash-traits.h" 53 #include "tree-hash-traits.h"
55 #include "vec-perm-indices.h" 54 #include "vec-perm-indices.h"
56 #include "internal-fn.h" 55 #include "internal-fn.h"
57 56
143 142
144 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); 143 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
145 if (rhs < lhs) 144 if (rhs < lhs)
146 scalar_type = rhs_type; 145 scalar_type = rhs_type;
147 } 146 }
147 else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
148 {
149 unsigned int i = 0;
150 if (gimple_call_internal_p (call))
151 {
152 internal_fn ifn = gimple_call_internal_fn (call);
153 if (internal_load_fn_p (ifn) || internal_store_fn_p (ifn))
154 /* gimple_expr_type already picked the type of the loaded
155 or stored data. */
156 i = ~0U;
157 else if (internal_fn_mask_index (ifn) == 0)
158 i = 1;
159 }
160 if (i < gimple_call_num_args (call))
161 {
162 tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
163 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
164 {
165 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
166 if (rhs < lhs)
167 scalar_type = rhs_type;
168 }
169 }
170 }
148 171
149 *lhs_size_unit = lhs; 172 *lhs_size_unit = lhs;
150 *rhs_size_unit = rhs; 173 *rhs_size_unit = rhs;
151 return scalar_type; 174 return scalar_type;
152 } 175 }
157 Return false if versioning is not supported. */ 180 Return false if versioning is not supported. */
158 181
159 static opt_result 182 static opt_result
160 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) 183 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
161 { 184 {
162 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 185 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
163 186
164 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0) 187 if ((unsigned) param_vect_max_version_for_alias_checks == 0)
165 return opt_result::failure_at (vect_location, 188 return opt_result::failure_at (vect_location,
166 "will not create alias checks, as" 189 "will not create alias checks, as"
167 " --param vect-max-version-for-alias-checks" 190 " --param vect-max-version-for-alias-checks"
168 " == 0\n"); 191 " == 0\n");
169 192
208 if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a) 231 if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
209 && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)) 232 && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
210 return true; 233 return true;
211 234
212 /* STMT_A and STMT_B belong to overlapping groups. All loads in a 235 /* STMT_A and STMT_B belong to overlapping groups. All loads in a
213 group are emitted at the position of the last scalar load and all 236 SLP group are emitted at the position of the last scalar load and
214 stores in a group are emitted at the position of the last scalar store. 237 all loads in an interleaving group are emitted at the position
238 of the first scalar load.
239 Stores in a group are emitted at the position of the last scalar store.
215 Compute that position and check whether the resulting order matches 240 Compute that position and check whether the resulting order matches
216 the current one. */ 241 the current one.
217 stmt_vec_info last_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); 242 We have not yet decided between SLP and interleaving so we have
243 to conservatively assume both. */
244 stmt_vec_info il_a;
245 stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
218 if (last_a) 246 if (last_a)
219 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; 247 {
220 s = DR_GROUP_NEXT_ELEMENT (s)) 248 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s;
221 last_a = get_later_stmt (last_a, s); 249 s = DR_GROUP_NEXT_ELEMENT (s))
250 last_a = get_later_stmt (last_a, s);
251 if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
252 {
253 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
254 s = DR_GROUP_NEXT_ELEMENT (s))
255 if (get_later_stmt (il_a, s) == il_a)
256 il_a = s;
257 }
258 else
259 il_a = last_a;
260 }
222 else 261 else
223 last_a = stmtinfo_a; 262 last_a = il_a = stmtinfo_a;
224 stmt_vec_info last_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); 263 stmt_vec_info il_b;
264 stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
225 if (last_b) 265 if (last_b)
226 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; 266 {
227 s = DR_GROUP_NEXT_ELEMENT (s)) 267 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s;
228 last_b = get_later_stmt (last_b, s); 268 s = DR_GROUP_NEXT_ELEMENT (s))
269 last_b = get_later_stmt (last_b, s);
270 if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
271 {
272 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
273 s = DR_GROUP_NEXT_ELEMENT (s))
274 if (get_later_stmt (il_b, s) == il_b)
275 il_b = s;
276 }
277 else
278 il_b = last_b;
279 }
229 else 280 else
230 last_b = stmtinfo_b; 281 last_b = il_b = stmtinfo_b;
231 return ((get_later_stmt (last_a, last_b) == last_a) 282 bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
232 == (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a)); 283 return (/* SLP */
284 (get_later_stmt (last_a, last_b) == last_a) == a_after_b
285 /* Interleaving */
286 && (get_later_stmt (il_a, il_b) == il_a) == a_after_b
287 /* Mixed */
288 && (get_later_stmt (il_a, last_b) == il_a) == a_after_b
289 && (get_later_stmt (last_a, il_b) == last_a) == a_after_b);
233 } 290 }
234 291
235 /* A subroutine of vect_analyze_data_ref_dependence. Handle 292 /* A subroutine of vect_analyze_data_ref_dependence. Handle
236 DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence 293 DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
237 distances. These distances are conservatively correct but they don't 294 distances. These distances are conservatively correct but they don't
246 static bool 303 static bool
247 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr, 304 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
248 loop_vec_info loop_vinfo, 305 loop_vec_info loop_vinfo,
249 int loop_depth, unsigned int *max_vf) 306 int loop_depth, unsigned int *max_vf)
250 { 307 {
251 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 308 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
252 lambda_vector dist_v; 309 lambda_vector dist_v;
253 unsigned int i; 310 unsigned int i;
254 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 311 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
255 { 312 {
256 int dist = dist_v[loop_depth]; 313 int dist = dist_v[loop_depth];
303 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, 360 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
304 loop_vec_info loop_vinfo, 361 loop_vec_info loop_vinfo,
305 unsigned int *max_vf) 362 unsigned int *max_vf)
306 { 363 {
307 unsigned int i; 364 unsigned int i;
308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 365 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
309 struct data_reference *dra = DDR_A (ddr); 366 struct data_reference *dra = DDR_A (ddr);
310 struct data_reference *drb = DDR_B (ddr); 367 struct data_reference *drb = DDR_B (ddr);
311 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra); 368 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
312 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb); 369 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
313 stmt_vec_info stmtinfo_a = dr_info_a->stmt; 370 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
471 { 528 {
472 /* If DDR_REVERSED_P the order of the data-refs in DDR was 529 /* If DDR_REVERSED_P the order of the data-refs in DDR was
473 reversed (to make distance vector positive), and the actual 530 reversed (to make distance vector positive), and the actual
474 distance is negative. */ 531 distance is negative. */
475 if (dump_enabled_p ()) 532 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 533 dump_printf_loc (MSG_NOTE, vect_location,
477 "dependence distance negative.\n"); 534 "dependence distance negative.\n");
535 /* When doing outer loop vectorization, we need to check if there is
536 a backward dependence at the inner loop level if the dependence
537 at the outer loop is reversed. See PR81740. */
538 if (nested_in_vect_loop_p (loop, stmtinfo_a)
539 || nested_in_vect_loop_p (loop, stmtinfo_b))
540 {
541 unsigned inner_depth = index_in_loop_nest (loop->inner->num,
542 DDR_LOOP_NEST (ddr));
543 if (dist_v[inner_depth] < 0)
544 return opt_result::failure_at (stmtinfo_a->stmt,
545 "not vectorized, dependence "
546 "between data-refs %T and %T\n",
547 DR_REF (dra), DR_REF (drb));
548 }
478 /* Record a negative dependence distance to later limit the 549 /* Record a negative dependence distance to later limit the
479 amount of stmt copying / unrolling we can perform. 550 amount of stmt copying / unrolling we can perform.
480 Only need to handle read-after-write dependence. */ 551 Only need to handle read-after-write dependence. */
481 if (DR_IS_READ (drb) 552 if (DR_IS_READ (drb)
482 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0 553 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
488 unsigned int abs_dist = abs (dist); 559 unsigned int abs_dist = abs (dist);
489 if (abs_dist >= 2 && abs_dist < *max_vf) 560 if (abs_dist >= 2 && abs_dist < *max_vf)
490 { 561 {
491 /* The dependence distance requires reduction of the maximal 562 /* The dependence distance requires reduction of the maximal
492 vectorization factor. */ 563 vectorization factor. */
493 *max_vf = abs (dist); 564 *max_vf = abs_dist;
494 if (dump_enabled_p ()) 565 if (dump_enabled_p ())
495 dump_printf_loc (MSG_NOTE, vect_location, 566 dump_printf_loc (MSG_NOTE, vect_location,
496 "adjusting maximal vectorization factor to %i\n", 567 "adjusting maximal vectorization factor to %i\n",
497 *max_vf); 568 *max_vf);
498 } 569 }
793 864
794 void 865 void
795 vect_record_base_alignments (vec_info *vinfo) 866 vect_record_base_alignments (vec_info *vinfo)
796 { 867 {
797 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 868 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
798 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 869 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
799 data_reference *dr; 870 data_reference *dr;
800 unsigned int i; 871 unsigned int i;
801 FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr) 872 FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr)
802 { 873 {
803 dr_vec_info *dr_info = vinfo->lookup_dr (dr); 874 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
817 } 888 }
818 } 889 }
819 890
820 /* Return the target alignment for the vectorized form of DR_INFO. */ 891 /* Return the target alignment for the vectorized form of DR_INFO. */
821 892
822 static unsigned int 893 static poly_uint64
823 vect_calculate_target_alignment (dr_vec_info *dr_info) 894 vect_calculate_target_alignment (dr_vec_info *dr_info)
824 { 895 {
825 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt); 896 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
826 return targetm.vectorize.preferred_vector_alignment (vectype); 897 return targetm.vectorize.preferred_vector_alignment (vectype);
827 } 898 }
840 vect_compute_data_ref_alignment (dr_vec_info *dr_info) 911 vect_compute_data_ref_alignment (dr_vec_info *dr_info)
841 { 912 {
842 stmt_vec_info stmt_info = dr_info->stmt; 913 stmt_vec_info stmt_info = dr_info->stmt;
843 vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments; 914 vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments;
844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
845 struct loop *loop = NULL; 916 class loop *loop = NULL;
846 tree ref = DR_REF (dr_info->dr); 917 tree ref = DR_REF (dr_info->dr);
847 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 918 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
848 919
849 if (dump_enabled_p ()) 920 if (dump_enabled_p ())
850 dump_printf_loc (MSG_NOTE, vect_location, 921 dump_printf_loc (MSG_NOTE, vect_location,
860 return; 931 return;
861 932
862 innermost_loop_behavior *drb = vect_dr_behavior (dr_info); 933 innermost_loop_behavior *drb = vect_dr_behavior (dr_info);
863 bool step_preserves_misalignment_p; 934 bool step_preserves_misalignment_p;
864 935
865 unsigned HOST_WIDE_INT vector_alignment 936 poly_uint64 vector_alignment
866 = vect_calculate_target_alignment (dr_info) / BITS_PER_UNIT; 937 = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
867 DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; 938 DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
939
940 /* If the main loop has peeled for alignment we have no way of knowing
941 whether the data accesses in the epilogues are aligned. We can't at
942 compile time answer the question whether we have entered the main loop or
943 not. Fixes PR 92351. */
944 if (loop_vinfo)
945 {
946 loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
947 if (orig_loop_vinfo
948 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
949 return;
950 }
951
952 unsigned HOST_WIDE_INT vect_align_c;
953 if (!vector_alignment.is_constant (&vect_align_c))
954 return;
868 955
869 /* No step for BB vectorization. */ 956 /* No step for BB vectorization. */
870 if (!loop) 957 if (!loop)
871 { 958 {
872 gcc_assert (integer_zerop (drb->step)); 959 gcc_assert (integer_zerop (drb->step));
880 we have to check that the stride of the dataref in the inner-loop evenly 967 we have to check that the stride of the dataref in the inner-loop evenly
881 divides by the vector alignment. */ 968 divides by the vector alignment. */
882 else if (nested_in_vect_loop_p (loop, stmt_info)) 969 else if (nested_in_vect_loop_p (loop, stmt_info))
883 { 970 {
884 step_preserves_misalignment_p 971 step_preserves_misalignment_p
885 = (DR_STEP_ALIGNMENT (dr_info->dr) % vector_alignment) == 0; 972 = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
886 973
887 if (dump_enabled_p ()) 974 if (dump_enabled_p ())
888 { 975 {
889 if (step_preserves_misalignment_p) 976 if (step_preserves_misalignment_p)
890 dump_printf_loc (MSG_NOTE, vect_location, 977 dump_printf_loc (MSG_NOTE, vect_location,
902 the dataref evenly divides by the alignment. */ 989 the dataref evenly divides by the alignment. */
903 else 990 else
904 { 991 {
905 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 992 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
906 step_preserves_misalignment_p 993 step_preserves_misalignment_p
907 = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vector_alignment); 994 = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c);
908 995
909 if (!step_preserves_misalignment_p && dump_enabled_p ()) 996 if (!step_preserves_misalignment_p && dump_enabled_p ())
910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
911 "step doesn't divide the vector alignment.\n"); 998 "step doesn't divide the vector alignment.\n");
912 } 999 }
921 { 1008 {
922 base_alignment = (*entry)->base_alignment; 1009 base_alignment = (*entry)->base_alignment;
923 base_misalignment = (*entry)->base_misalignment; 1010 base_misalignment = (*entry)->base_misalignment;
924 } 1011 }
925 1012
926 if (drb->offset_alignment < vector_alignment 1013 if (drb->offset_alignment < vect_align_c
927 || !step_preserves_misalignment_p 1014 || !step_preserves_misalignment_p
928 /* We need to know whether the step wrt the vectorized loop is 1015 /* We need to know whether the step wrt the vectorized loop is
929 negative when computing the starting misalignment below. */ 1016 negative when computing the starting misalignment below. */
930 || TREE_CODE (drb->step) != INTEGER_CST) 1017 || TREE_CODE (drb->step) != INTEGER_CST)
931 { 1018 {
933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
934 "Unknown alignment for access: %T\n", ref); 1021 "Unknown alignment for access: %T\n", ref);
935 return; 1022 return;
936 } 1023 }
937 1024
938 if (base_alignment < vector_alignment) 1025 if (base_alignment < vect_align_c)
939 { 1026 {
940 unsigned int max_alignment; 1027 unsigned int max_alignment;
941 tree base = get_base_for_alignment (drb->base_address, &max_alignment); 1028 tree base = get_base_for_alignment (drb->base_address, &max_alignment);
942 if (max_alignment < vector_alignment 1029 if (max_alignment < vect_align_c
943 || !vect_can_force_dr_alignment_p (base, 1030 || !vect_can_force_dr_alignment_p (base,
944 vector_alignment * BITS_PER_UNIT)) 1031 vect_align_c * BITS_PER_UNIT))
945 { 1032 {
946 if (dump_enabled_p ()) 1033 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location, 1034 dump_printf_loc (MSG_NOTE, vect_location,
948 "can't force alignment of ref: %T\n", ref); 1035 "can't force alignment of ref: %T\n", ref);
949 return; 1036 return;
970 /* PLUS because STEP is negative. */ 1057 /* PLUS because STEP is negative. */
971 misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1) 1058 misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
972 * TREE_INT_CST_LOW (drb->step)); 1059 * TREE_INT_CST_LOW (drb->step));
973 1060
974 unsigned int const_misalignment; 1061 unsigned int const_misalignment;
975 if (!known_misalignment (misalignment, vector_alignment, 1062 if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
976 &const_misalignment))
977 { 1063 {
978 if (dump_enabled_p ()) 1064 if (dump_enabled_p ())
979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
980 "Non-constant misalignment for access: %T\n", ref); 1066 "Non-constant misalignment for access: %T\n", ref);
981 return; 1067 return;
1008 dr_vec_info *dr_peel_info, int npeel) 1094 dr_vec_info *dr_peel_info, int npeel)
1009 { 1095 {
1010 unsigned int i; 1096 unsigned int i;
1011 vec<dr_p> same_aligned_drs; 1097 vec<dr_p> same_aligned_drs;
1012 struct data_reference *current_dr; 1098 struct data_reference *current_dr;
1013 int dr_size = vect_get_scalar_dr_size (dr_info);
1014 int dr_peel_size = vect_get_scalar_dr_size (dr_peel_info);
1015 stmt_vec_info stmt_info = dr_info->stmt;
1016 stmt_vec_info peel_stmt_info = dr_peel_info->stmt; 1099 stmt_vec_info peel_stmt_info = dr_peel_info->stmt;
1017 1100
1018 /* For interleaved data accesses the step in the loop must be multiplied by 1101 /* It can be assumed that if dr_info has the same alignment as dr_peel,
1019 the size of the interleaving group. */ 1102 it is aligned in the vector loop. */
1020 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1021 dr_size *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
1022 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1023 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1024
1025 /* It can be assumed that the data refs with the same alignment as dr_peel
1026 are aligned in the vector loop. */
1027 same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info); 1103 same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info);
1028 FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr) 1104 FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr)
1029 { 1105 {
1030 if (current_dr != dr_info->dr) 1106 if (current_dr != dr_info->dr)
1031 continue; 1107 continue;
1032 gcc_assert (!known_alignment_for_access_p (dr_info) 1108 gcc_assert (!known_alignment_for_access_p (dr_info)
1033 || !known_alignment_for_access_p (dr_peel_info) 1109 || !known_alignment_for_access_p (dr_peel_info)
1034 || (DR_MISALIGNMENT (dr_info) / dr_size 1110 || (DR_MISALIGNMENT (dr_info)
1035 == DR_MISALIGNMENT (dr_peel_info) / dr_peel_size)); 1111 == DR_MISALIGNMENT (dr_peel_info)));
1036 SET_DR_MISALIGNMENT (dr_info, 0); 1112 SET_DR_MISALIGNMENT (dr_info, 0);
1037 return; 1113 return;
1038 } 1114 }
1039 1115
1040 if (known_alignment_for_access_p (dr_info) 1116 unsigned HOST_WIDE_INT alignment;
1117 if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
1118 && known_alignment_for_access_p (dr_info)
1041 && known_alignment_for_access_p (dr_peel_info)) 1119 && known_alignment_for_access_p (dr_peel_info))
1042 { 1120 {
1043 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1044 size_zero_node) < 0;
1045 int misal = DR_MISALIGNMENT (dr_info); 1121 int misal = DR_MISALIGNMENT (dr_info);
1046 misal += negative ? -npeel * dr_size : npeel * dr_size; 1122 misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1047 misal &= DR_TARGET_ALIGNMENT (dr_info) - 1; 1123 misal &= alignment - 1;
1048 SET_DR_MISALIGNMENT (dr_info, misal); 1124 SET_DR_MISALIGNMENT (dr_info, misal);
1049 return; 1125 return;
1050 } 1126 }
1051 1127
1052 if (dump_enabled_p ()) 1128 if (dump_enabled_p ())
1592 1668
1593 opt_result 1669 opt_result
1594 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 1670 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1595 { 1671 {
1596 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1672 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1597 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1673 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598 enum dr_alignment_support supportable_dr_alignment; 1674 enum dr_alignment_support supportable_dr_alignment;
1599 dr_vec_info *first_store = NULL; 1675 dr_vec_info *first_store = NULL;
1600 dr_vec_info *dr0_info = NULL; 1676 dr_vec_info *dr0_info = NULL;
1601 struct data_reference *dr; 1677 struct data_reference *dr;
1602 unsigned int i, j; 1678 unsigned int i, j;
1686 unsigned int npeel_tmp = 0; 1762 unsigned int npeel_tmp = 0;
1687 bool negative = tree_int_cst_compare (DR_STEP (dr), 1763 bool negative = tree_int_cst_compare (DR_STEP (dr),
1688 size_zero_node) < 0; 1764 size_zero_node) < 0;
1689 1765
1690 vectype = STMT_VINFO_VECTYPE (stmt_info); 1766 vectype = STMT_VINFO_VECTYPE (stmt_info);
1691 unsigned int target_align = DR_TARGET_ALIGNMENT (dr_info); 1767 /* If known_alignment_for_access_p then we have set
1768 DR_MISALIGNMENT which is only done if we know it at compiler
1769 time, so it is safe to assume target alignment is constant.
1770 */
1771 unsigned int target_align =
1772 DR_TARGET_ALIGNMENT (dr_info).to_constant ();
1692 unsigned int dr_size = vect_get_scalar_dr_size (dr_info); 1773 unsigned int dr_size = vect_get_scalar_dr_size (dr_info);
1693 mis = (negative 1774 mis = (negative
1694 ? DR_MISALIGNMENT (dr_info) 1775 ? DR_MISALIGNMENT (dr_info)
1695 : -DR_MISALIGNMENT (dr_info)); 1776 : -DR_MISALIGNMENT (dr_info));
1696 if (DR_MISALIGNMENT (dr_info) != 0) 1777 if (DR_MISALIGNMENT (dr_info) != 0)
1750 /* For data-refs with the same number of related 1831 /* For data-refs with the same number of related
1751 accesses prefer the one where the misalign 1832 accesses prefer the one where the misalign
1752 computation will be invariant in the outermost loop. */ 1833 computation will be invariant in the outermost loop. */
1753 else if (same_align_drs_max == same_align_drs) 1834 else if (same_align_drs_max == same_align_drs)
1754 { 1835 {
1755 struct loop *ivloop0, *ivloop; 1836 class loop *ivloop0, *ivloop;
1756 ivloop0 = outermost_invariant_loop_for_expr 1837 ivloop0 = outermost_invariant_loop_for_expr
1757 (loop, DR_BASE_ADDRESS (dr0_info->dr)); 1838 (loop, DR_BASE_ADDRESS (dr0_info->dr));
1758 ivloop = outermost_invariant_loop_for_expr 1839 ivloop = outermost_invariant_loop_for_expr
1759 (loop, DR_BASE_ADDRESS (dr)); 1840 (loop, DR_BASE_ADDRESS (dr));
1760 if ((ivloop && !ivloop0) 1841 if ((ivloop && !ivloop0)
1963 vectorization factor minus the misalignment as an element 2044 vectorization factor minus the misalignment as an element
1964 count. */ 2045 count. */
1965 mis = (negative 2046 mis = (negative
1966 ? DR_MISALIGNMENT (dr0_info) 2047 ? DR_MISALIGNMENT (dr0_info)
1967 : -DR_MISALIGNMENT (dr0_info)); 2048 : -DR_MISALIGNMENT (dr0_info));
1968 unsigned int target_align = DR_TARGET_ALIGNMENT (dr0_info); 2049 /* If known_alignment_for_access_p then we have set
2050 DR_MISALIGNMENT which is only done if we know it at compiler
2051 time, so it is safe to assume target alignment is constant.
2052 */
2053 unsigned int target_align =
2054 DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
1969 npeel = ((mis & (target_align - 1)) 2055 npeel = ((mis & (target_align - 1))
1970 / vect_get_scalar_dr_size (dr0_info)); 2056 / vect_get_scalar_dr_size (dr0_info));
1971 } 2057 }
1972 2058
1973 /* For interleaved data access every iteration accesses all the 2059 /* For interleaved data access every iteration accesses all the
1997 2083
1998 /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */ 2084 /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
1999 if (do_peeling) 2085 if (do_peeling)
2000 { 2086 {
2001 unsigned max_allowed_peel 2087 unsigned max_allowed_peel
2002 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT); 2088 = param_vect_max_peeling_for_alignment;
2089 if (flag_vect_cost_model == VECT_COST_MODEL_CHEAP)
2090 max_allowed_peel = 0;
2003 if (max_allowed_peel != (unsigned)-1) 2091 if (max_allowed_peel != (unsigned)-1)
2004 { 2092 {
2005 unsigned max_peel = npeel; 2093 unsigned max_peel = npeel;
2006 if (max_peel == 0) 2094 if (max_peel == 0)
2007 { 2095 {
2008 unsigned int target_align = DR_TARGET_ALIGNMENT (dr0_info); 2096 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2009 max_peel = (target_align 2097 unsigned HOST_WIDE_INT target_align_c;
2010 / vect_get_scalar_dr_size (dr0_info) - 1); 2098 if (target_align.is_constant (&target_align_c))
2099 max_peel =
2100 target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
2101 else
2102 {
2103 do_peeling = false;
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_NOTE, vect_location,
2106 "Disable peeling, max peels set and vector"
2107 " alignment unknown\n");
2108 }
2011 } 2109 }
2012 if (max_peel > max_allowed_peel) 2110 if (max_peel > max_allowed_peel)
2013 { 2111 {
2014 do_peeling = false; 2112 do_peeling = false;
2015 if (dump_enabled_p ()) 2113 if (dump_enabled_p ())
2081 } 2179 }
2082 2180
2083 /* (2) Versioning to force alignment. */ 2181 /* (2) Versioning to force alignment. */
2084 2182
2085 /* Try versioning if: 2183 /* Try versioning if:
2086 1) optimize loop for speed 2184 1) optimize loop for speed and the cost-model is not cheap
2087 2) there is at least one unsupported misaligned data ref with an unknown 2185 2) there is at least one unsupported misaligned data ref with an unknown
2088 misalignment, and 2186 misalignment, and
2089 3) all misaligned data refs with a known misalignment are supported, and 2187 3) all misaligned data refs with a known misalignment are supported, and
2090 4) the number of runtime alignment checks is within reason. */ 2188 4) the number of runtime alignment checks is within reason. */
2091 2189
2092 do_versioning = 2190 do_versioning
2093 optimize_loop_nest_for_speed_p (loop) 2191 = (optimize_loop_nest_for_speed_p (loop)
2094 && (!loop->inner); /* FORNOW */ 2192 && !loop->inner /* FORNOW */
2193 && flag_vect_cost_model != VECT_COST_MODEL_CHEAP);
2095 2194
2096 if (do_versioning) 2195 if (do_versioning)
2097 { 2196 {
2098 FOR_EACH_VEC_ELT (datarefs, i, dr) 2197 FOR_EACH_VEC_ELT (datarefs, i, dr)
2099 { 2198 {
2125 int mask; 2224 int mask;
2126 tree vectype; 2225 tree vectype;
2127 2226
2128 if (known_alignment_for_access_p (dr_info) 2227 if (known_alignment_for_access_p (dr_info)
2129 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length () 2228 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2130 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS)) 2229 >= (unsigned) param_vect_max_version_for_alignment_checks)
2131 { 2230 {
2132 do_versioning = false; 2231 do_versioning = false;
2133 break; 2232 break;
2134 } 2233 }
2135 2234
2145 { 2244 {
2146 do_versioning = false; 2245 do_versioning = false;
2147 break; 2246 break;
2148 } 2247 }
2149 2248
2249 /* Forcing alignment in the first iteration is no good if
2250 we don't keep it across iterations. For now, just disable
2251 versioning in this case.
2252 ?? We could actually unroll the loop to achieve the required
2253 overall step alignment, and forcing the alignment could be
2254 done by doing some iterations of the non-vectorized loop. */
2255 if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2256 * DR_STEP_ALIGNMENT (dr),
2257 DR_TARGET_ALIGNMENT (dr_info)))
2258 {
2259 do_versioning = false;
2260 break;
2261 }
2262
2150 /* The rightmost bits of an aligned address must be zeros. 2263 /* The rightmost bits of an aligned address must be zeros.
2151 Construct the mask needed for this test. For example, 2264 Construct the mask needed for this test. For example,
2152 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the 2265 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2153 mask must be 15 = 0xf. */ 2266 mask must be 15 = 0xf. */
2154 mask = size - 1; 2267 mask = size - 1;
2155 2268
2156 /* FORNOW: use the same mask to test all potentially unaligned 2269 /* FORNOW: use the same mask to test all potentially unaligned
2157 references in the loop. The vectorizer currently supports 2270 references in the loop. */
2158 a single vector size, see the reference to 2271 if (LOOP_VINFO_PTR_MASK (loop_vinfo)
2159 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the 2272 && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
2160 vectorization factor is computed. */ 2273 {
2161 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo) 2274 do_versioning = false;
2162 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask); 2275 break;
2276 }
2277
2163 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; 2278 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2164 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info); 2279 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
2165 } 2280 }
2166 } 2281 }
2167 2282
2244 poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra)) 2359 poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra))
2245 - wi::to_poly_offset (DR_INIT (drb))); 2360 - wi::to_poly_offset (DR_INIT (drb)));
2246 if (maybe_ne (diff, 0)) 2361 if (maybe_ne (diff, 0))
2247 { 2362 {
2248 /* Get the wider of the two alignments. */ 2363 /* Get the wider of the two alignments. */
2249 unsigned int align_a = (vect_calculate_target_alignment (dr_info_a) 2364 poly_uint64 align_a =
2250 / BITS_PER_UNIT); 2365 exact_div (vect_calculate_target_alignment (dr_info_a),
2251 unsigned int align_b = (vect_calculate_target_alignment (dr_info_b) 2366 BITS_PER_UNIT);
2252 / BITS_PER_UNIT); 2367 poly_uint64 align_b =
2253 unsigned int max_align = MAX (align_a, align_b); 2368 exact_div (vect_calculate_target_alignment (dr_info_b),
2369 BITS_PER_UNIT);
2370 unsigned HOST_WIDE_INT align_a_c, align_b_c;
2371 if (!align_a.is_constant (&align_a_c)
2372 || !align_b.is_constant (&align_b_c))
2373 return;
2374
2375 unsigned HOST_WIDE_INT max_align = MAX (align_a_c, align_b_c);
2254 2376
2255 /* Require the gap to be a multiple of the larger vector alignment. */ 2377 /* Require the gap to be a multiple of the larger vector alignment. */
2256 if (!multiple_p (diff, max_align)) 2378 if (!multiple_p (diff, max_align))
2257 return; 2379 return;
2258 } 2380 }
2436 /* Mark the statement as unvectorizable. */ 2558 /* Mark the statement as unvectorizable. */
2437 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2559 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2438 return true; 2560 return true;
2439 } 2561 }
2440 2562
2441 dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n"); 2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
2442 STMT_VINFO_STRIDED_P (stmt_info) = true; 2565 STMT_VINFO_STRIDED_P (stmt_info) = true;
2443 return true; 2566 return true;
2444 } 2567 }
2445 2568
2446 if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info) 2569 if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
2448 /* First stmt in the interleaving chain. Check the chain. */ 2571 /* First stmt in the interleaving chain. Check the chain. */
2449 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info); 2572 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2450 struct data_reference *data_ref = dr; 2573 struct data_reference *data_ref = dr;
2451 unsigned int count = 1; 2574 unsigned int count = 1;
2452 tree prev_init = DR_INIT (data_ref); 2575 tree prev_init = DR_INIT (data_ref);
2453 stmt_vec_info prev = stmt_info;
2454 HOST_WIDE_INT diff, gaps = 0; 2576 HOST_WIDE_INT diff, gaps = 0;
2455 2577
2456 /* By construction, all group members have INTEGER_CST DR_INITs. */ 2578 /* By construction, all group members have INTEGER_CST DR_INITs. */
2457 while (next) 2579 while (next)
2458 { 2580 {
2459 /* Skip same data-refs. In case that two or more stmts share 2581 /* We never have the same DR multiple times. */
2460 data-ref (supported only for loads), we vectorize only the first 2582 gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
2461 stmt, and the rest get their vectorized loads from the first 2583 DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
2462 one. */ 2584
2463 if (!tree_int_cst_compare (DR_INIT (data_ref),
2464 DR_INIT (STMT_VINFO_DATA_REF (next))))
2465 {
2466 if (DR_IS_WRITE (data_ref))
2467 {
2468 if (dump_enabled_p ())
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2470 "Two store stmts share the same dr.\n");
2471 return false;
2472 }
2473
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_NOTE, vect_location,
2476 "Two or more load stmts share the same dr.\n");
2477
2478 /* For load use the same data-ref load. */
2479 DR_GROUP_SAME_DR_STMT (next) = prev;
2480
2481 prev = next;
2482 next = DR_GROUP_NEXT_ELEMENT (next);
2483 continue;
2484 }
2485
2486 prev = next;
2487 data_ref = STMT_VINFO_DATA_REF (next); 2585 data_ref = STMT_VINFO_DATA_REF (next);
2488 2586
2489 /* All group members have the same STEP by construction. */ 2587 /* All group members have the same STEP by construction. */
2490 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0)); 2588 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
2491 2589
2557 dump_printf (MSG_NOTE, "load "); 2655 dump_printf (MSG_NOTE, "load ");
2558 else if (STMT_VINFO_STRIDED_P (stmt_info)) 2656 else if (STMT_VINFO_STRIDED_P (stmt_info))
2559 dump_printf (MSG_NOTE, "strided store "); 2657 dump_printf (MSG_NOTE, "strided store ");
2560 else 2658 else
2561 dump_printf (MSG_NOTE, "store "); 2659 dump_printf (MSG_NOTE, "store ");
2562 dump_printf (MSG_NOTE, "of size %u starting with %G", 2660 dump_printf (MSG_NOTE, "of size %u\n",
2563 (unsigned)groupsize, stmt_info->stmt); 2661 (unsigned)groupsize);
2662 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
2663 next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2664 while (next)
2665 {
2666 if (DR_GROUP_GAP (next) != 1)
2667 dump_printf_loc (MSG_NOTE, vect_location,
2668 "\t<gap of %d elements>\n",
2669 DR_GROUP_GAP (next) - 1);
2670 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
2671 next = DR_GROUP_NEXT_ELEMENT (next);
2672 }
2564 if (DR_GROUP_GAP (stmt_info) != 0) 2673 if (DR_GROUP_GAP (stmt_info) != 0)
2565 dump_printf_loc (MSG_NOTE, vect_location, 2674 dump_printf_loc (MSG_NOTE, vect_location,
2566 "There is a gap of %u elements after the group\n", 2675 "\t<gap of %d elements>\n",
2567 DR_GROUP_GAP (stmt_info)); 2676 DR_GROUP_GAP (stmt_info));
2568 } 2677 }
2569 2678
2570 /* SLP: create an SLP data structure for every interleaving group of 2679 /* SLP: create an SLP data structure for every interleaving group of
2571 stores for further analysis in vect_analyse_slp. */ 2680 stores for further analysis in vect_analyse_slp. */
2615 data_reference *dr = dr_info->dr; 2724 data_reference *dr = dr_info->dr;
2616 tree step = DR_STEP (dr); 2725 tree step = DR_STEP (dr);
2617 tree scalar_type = TREE_TYPE (DR_REF (dr)); 2726 tree scalar_type = TREE_TYPE (DR_REF (dr));
2618 stmt_vec_info stmt_info = dr_info->stmt; 2727 stmt_vec_info stmt_info = dr_info->stmt;
2619 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2728 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2620 struct loop *loop = NULL; 2729 class loop *loop = NULL;
2621 2730
2622 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 2731 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2623 return true; 2732 return true;
2624 2733
2625 if (loop_vinfo) 2734 if (loop_vinfo)
2768 return NULL_TREE; 2877 return NULL_TREE;
2769 return gimple_assign_rhs1 (stmt); 2878 return gimple_assign_rhs1 (stmt);
2770 } 2879 }
2771 2880
2772 /* Return true if vectorizable_* routines can handle statements STMT1_INFO 2881 /* Return true if vectorizable_* routines can handle statements STMT1_INFO
2773 and STMT2_INFO being in a single group. */ 2882 and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
2883 be grouped in SLP mode. */
2774 2884
2775 static bool 2885 static bool
2776 can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) 2886 can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
2887 bool allow_slp_p)
2777 { 2888 {
2778 if (gimple_assign_single_p (stmt1_info->stmt)) 2889 if (gimple_assign_single_p (stmt1_info->stmt))
2779 return gimple_assign_single_p (stmt2_info->stmt); 2890 return gimple_assign_single_p (stmt2_info->stmt);
2780 2891
2781 gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt); 2892 gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
2793 2904
2794 /* Check that the masks are the same. Cope with casts of masks, 2905 /* Check that the masks are the same. Cope with casts of masks,
2795 like those created by build_mask_conversion. */ 2906 like those created by build_mask_conversion. */
2796 tree mask1 = gimple_call_arg (call1, 2); 2907 tree mask1 = gimple_call_arg (call1, 2);
2797 tree mask2 = gimple_call_arg (call2, 2); 2908 tree mask2 = gimple_call_arg (call2, 2);
2798 if (!operand_equal_p (mask1, mask2, 0)) 2909 if (!operand_equal_p (mask1, mask2, 0)
2910 && (ifn == IFN_MASK_STORE || !allow_slp_p))
2799 { 2911 {
2800 mask1 = strip_conversion (mask1); 2912 mask1 = strip_conversion (mask1);
2801 if (!mask1) 2913 if (!mask1)
2802 return false; 2914 return false;
2803 mask2 = strip_conversion (mask2); 2915 mask2 = strip_conversion (mask2);
2879 not masked loads or stores). */ 2991 not masked loads or stores). */
2880 if (DR_IS_READ (dra) != DR_IS_READ (drb) 2992 if (DR_IS_READ (dra) != DR_IS_READ (drb)
2881 || data_ref_compare_tree (DR_BASE_ADDRESS (dra), 2993 || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2882 DR_BASE_ADDRESS (drb)) != 0 2994 DR_BASE_ADDRESS (drb)) != 0
2883 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0 2995 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
2884 || !can_group_stmts_p (stmtinfo_a, stmtinfo_b)) 2996 || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
2885 break; 2997 break;
2886 2998
2887 /* Check that the data-refs have the same constant size. */ 2999 /* Check that the data-refs have the same constant size. */
2888 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))); 3000 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2889 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))); 3001 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
2903 break; 3015 break;
2904 3016
2905 /* Check that the DR_INITs are compile-time constants. */ 3017 /* Check that the DR_INITs are compile-time constants. */
2906 if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST 3018 if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST
2907 || TREE_CODE (DR_INIT (drb)) != INTEGER_CST) 3019 || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
3020 break;
3021
3022 /* Different .GOMP_SIMD_LANE calls still give the same lane,
3023 just hold extra information. */
3024 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3025 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3026 && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
2908 break; 3027 break;
2909 3028
2910 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ 3029 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2911 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); 3030 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2912 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); 3031 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2964 } 3083 }
2965 DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a; 3084 DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
2966 DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b; 3085 DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
2967 lastinfo = stmtinfo_b; 3086 lastinfo = stmtinfo_b;
2968 3087
3088 STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3089 = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
3090
3091 if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3092 dump_printf_loc (MSG_NOTE, vect_location,
3093 "Load suitable for SLP vectorization only.\n");
3094
2969 if (init_b == init_prev 3095 if (init_b == init_prev
2970 && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)) 3096 && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
2971 && dump_enabled_p ()) 3097 && dump_enabled_p ())
2972 dump_printf_loc (MSG_NOTE, vect_location, 3098 dump_printf_loc (MSG_NOTE, vect_location,
2973 "Queuing group with duplicate access for fixup\n"); 3099 "Queuing group with duplicate access for fixup\n");
2986 /* Find the earliest duplicate group member. */ 3112 /* Find the earliest duplicate group member. */
2987 unsigned first_duplicate = -1u; 3113 unsigned first_duplicate = -1u;
2988 stmt_vec_info next, g = grp; 3114 stmt_vec_info next, g = grp;
2989 while ((next = DR_GROUP_NEXT_ELEMENT (g))) 3115 while ((next = DR_GROUP_NEXT_ELEMENT (g)))
2990 { 3116 {
2991 if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr) 3117 if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
2992 == DR_INIT (STMT_VINFO_DR_INFO (g)->dr)) 3118 DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
2993 && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate) 3119 && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
2994 first_duplicate = gimple_uid (STMT_VINFO_STMT (next)); 3120 first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
2995 g = next; 3121 g = next;
2996 } 3122 }
2997 if (first_duplicate == -1U) 3123 if (first_duplicate == -1U)
2999 3125
3000 /* Then move all stmts after the first duplicate to a new group. 3126 /* Then move all stmts after the first duplicate to a new group.
3001 Note this is a heuristic but one with the property that *it 3127 Note this is a heuristic but one with the property that *it
3002 is fixed up completely. */ 3128 is fixed up completely. */
3003 g = grp; 3129 g = grp;
3004 stmt_vec_info newgroup = NULL, ng; 3130 stmt_vec_info newgroup = NULL, ng = grp;
3005 while ((next = DR_GROUP_NEXT_ELEMENT (g))) 3131 while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3006 { 3132 {
3007 if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate) 3133 if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3008 { 3134 {
3009 DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next); 3135 DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3351 LOOP_VINFO_LOOP_NEST (loop_vinfo)); 3477 LOOP_VINFO_LOOP_NEST (loop_vinfo));
3352 3478
3353 /* First, we collect all data ref pairs for aliasing checks. */ 3479 /* First, we collect all data ref pairs for aliasing checks. */
3354 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr) 3480 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
3355 { 3481 {
3356 int comp_res;
3357 poly_uint64 lower_bound; 3482 poly_uint64 lower_bound;
3358 tree segment_length_a, segment_length_b; 3483 tree segment_length_a, segment_length_b;
3359 unsigned HOST_WIDE_INT access_size_a, access_size_b; 3484 unsigned HOST_WIDE_INT access_size_a, access_size_b;
3360 unsigned int align_a, align_b; 3485 unsigned int align_a, align_b;
3361 3486
3383 stmt_vec_info stmt_info_a = dr_info_a->stmt; 3508 stmt_vec_info stmt_info_a = dr_info_a->stmt;
3384 3509
3385 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr)); 3510 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
3386 stmt_vec_info stmt_info_b = dr_info_b->stmt; 3511 stmt_vec_info stmt_info_b = dr_info_b->stmt;
3387 3512
3513 bool preserves_scalar_order_p
3514 = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
3515
3388 /* Skip the pair if inter-iteration dependencies are irrelevant 3516 /* Skip the pair if inter-iteration dependencies are irrelevant
3389 and intra-iteration dependencies are guaranteed to be honored. */ 3517 and intra-iteration dependencies are guaranteed to be honored. */
3390 if (ignore_step_p 3518 if (ignore_step_p
3391 && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b) 3519 && (preserves_scalar_order_p
3392 || vectorizable_with_step_bound_p (dr_info_a, dr_info_b, 3520 || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3393 &lower_bound))) 3521 &lower_bound)))
3394 { 3522 {
3395 if (dump_enabled_p ()) 3523 if (dump_enabled_p ())
3396 dump_printf_loc (MSG_NOTE, vect_location, 3524 dump_printf_loc (MSG_NOTE, vect_location,
3467 access_size_a = vect_vfa_access_size (dr_info_a); 3595 access_size_a = vect_vfa_access_size (dr_info_a);
3468 access_size_b = vect_vfa_access_size (dr_info_b); 3596 access_size_b = vect_vfa_access_size (dr_info_b);
3469 align_a = vect_vfa_align (dr_info_a); 3597 align_a = vect_vfa_align (dr_info_a);
3470 align_b = vect_vfa_align (dr_info_b); 3598 align_b = vect_vfa_align (dr_info_b);
3471 3599
3472 comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr),
3473 DR_BASE_ADDRESS (dr_info_b->dr));
3474 if (comp_res == 0)
3475 comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr),
3476 DR_OFFSET (dr_info_b->dr));
3477
3478 /* See whether the alias is known at compilation time. */ 3600 /* See whether the alias is known at compilation time. */
3479 if (comp_res == 0 3601 if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
3602 DR_BASE_ADDRESS (dr_info_b->dr), 0)
3603 && operand_equal_p (DR_OFFSET (dr_info_a->dr),
3604 DR_OFFSET (dr_info_b->dr), 0)
3480 && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST 3605 && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
3481 && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST 3606 && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
3482 && poly_int_tree_p (segment_length_a) 3607 && poly_int_tree_p (segment_length_a)
3483 && poly_int_tree_p (segment_length_b)) 3608 && poly_int_tree_p (segment_length_b))
3484 { 3609 {
3507 " compilation time alias: %G%G", 3632 " compilation time alias: %G%G",
3508 stmt_info_a->stmt, 3633 stmt_info_a->stmt,
3509 stmt_info_b->stmt); 3634 stmt_info_b->stmt);
3510 } 3635 }
3511 3636
3637 dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
3638 access_size_a, align_a);
3639 dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
3640 access_size_b, align_b);
3641 /* Canonicalize the order to be the one that's needed for accurate
3642 RAW, WAR and WAW flags, in cases where the data references are
3643 well-ordered. The order doesn't really matter otherwise,
3644 but we might as well be consistent. */
3645 if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
3646 std::swap (dr_a, dr_b);
3647
3512 dr_with_seg_len_pair_t dr_with_seg_len_pair 3648 dr_with_seg_len_pair_t dr_with_seg_len_pair
3513 (dr_with_seg_len (dr_info_a->dr, segment_length_a, 3649 (dr_a, dr_b, (preserves_scalar_order_p
3514 access_size_a, align_a), 3650 ? dr_with_seg_len_pair_t::WELL_ORDERED
3515 dr_with_seg_len (dr_info_b->dr, segment_length_b, 3651 : dr_with_seg_len_pair_t::REORDERED));
3516 access_size_b, align_b));
3517
3518 /* Canonicalize pairs by sorting the two DR members. */
3519 if (comp_res > 0)
3520 std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
3521 3652
3522 comp_alias_ddrs.safe_push (dr_with_seg_len_pair); 3653 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
3523 } 3654 }
3524 3655
3525 prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor); 3656 prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
3526 3657
3527 unsigned int count = (comp_alias_ddrs.length () 3658 unsigned int count = (comp_alias_ddrs.length ()
3528 + check_unequal_addrs.length ()); 3659 + check_unequal_addrs.length ());
3529 3660
3530 dump_printf_loc (MSG_NOTE, vect_location, 3661 if (dump_enabled_p ())
3531 "improved number of alias checks from %d to %d\n", 3662 dump_printf_loc (MSG_NOTE, vect_location,
3532 may_alias_ddrs.length (), count); 3663 "improved number of alias checks from %d to %d\n",
3533 if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) 3664 may_alias_ddrs.length (), count);
3665 unsigned limit = param_vect_max_version_for_alias_checks;
3666 if (flag_simd_cost_model == VECT_COST_MODEL_CHEAP)
3667 limit = param_vect_max_version_for_alias_checks * 6 / 10;
3668 if (count > limit)
3534 return opt_result::failure_at 3669 return opt_result::failure_at
3535 (vect_location, 3670 (vect_location,
3536 "number of versioning for alias " 3671 "number of versioning for alias run-time tests exceeds %d "
3537 "run-time tests exceeds %d " 3672 "(--param vect-max-version-for-alias-checks)\n", limit);
3538 "(--param vect-max-version-for-alias-checks)\n",
3539 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
3540 3673
3541 return opt_result::success (); 3674 return opt_result::success ();
3542 } 3675 }
3543 3676
3544 /* Check whether we can use an internal function for a gather load 3677 /* Check whether we can use an internal function for a gather load
3545 or scatter store. READ_P is true for loads and false for stores. 3678 or scatter store. READ_P is true for loads and false for stores.
3546 MASKED_P is true if the load or store is conditional. MEMORY_TYPE is 3679 MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
3547 the type of the memory elements being loaded or stored. OFFSET_BITS 3680 the type of the memory elements being loaded or stored. OFFSET_TYPE
3548 is the number of bits in each scalar offset and OFFSET_SIGN is the 3681 is the type of the offset that is being applied to the invariant
3549 sign of the offset. SCALE is the amount by which the offset should 3682 base address. SCALE is the amount by which the offset should
3550 be multiplied *after* it has been converted to address width. 3683 be multiplied *after* it has been converted to address width.
3551 3684
3552 Return true if the function is supported, storing the function 3685 Return true if the function is supported, storing the function id in
3553 id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */ 3686 *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */
3554 3687
3555 bool 3688 bool
3556 vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype, 3689 vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
3557 tree memory_type, unsigned int offset_bits, 3690 tree vectype, tree memory_type, tree offset_type,
3558 signop offset_sign, int scale, 3691 int scale, internal_fn *ifn_out,
3559 internal_fn *ifn_out, tree *element_type_out) 3692 tree *offset_vectype_out)
3560 { 3693 {
3561 unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); 3694 unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
3562 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); 3695 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
3563 if (offset_bits > element_bits)
3564 /* Internal functions require the offset to be the same width as
3565 the vector elements. We can extend narrower offsets, but it isn't
3566 safe to truncate wider offsets. */
3567 return false;
3568
3569 if (element_bits != memory_bits) 3696 if (element_bits != memory_bits)
3570 /* For now the vector elements must be the same width as the 3697 /* For now the vector elements must be the same width as the
3571 memory elements. */ 3698 memory elements. */
3572 return false; 3699 return false;
3573 3700
3576 if (read_p) 3703 if (read_p)
3577 ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; 3704 ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
3578 else 3705 else
3579 ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; 3706 ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
3580 3707
3581 /* Test whether the target supports this combination. */ 3708 for (;;)
3582 if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, 3709 {
3583 offset_sign, scale)) 3710 tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
3584 return false; 3711 if (!offset_vectype)
3585 3712 return false;
3586 *ifn_out = ifn; 3713
3587 *element_type_out = TREE_TYPE (vectype); 3714 /* Test whether the target supports this combination. */
3588 return true; 3715 if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
3716 offset_vectype, scale))
3717 {
3718 *ifn_out = ifn;
3719 *offset_vectype_out = offset_vectype;
3720 return true;
3721 }
3722
3723 if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
3724 && TYPE_PRECISION (offset_type) >= element_bits)
3725 return false;
3726
3727 offset_type = build_nonstandard_integer_type
3728 (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
3729 }
3589 } 3730 }
3590 3731
3591 /* STMT_INFO is a call to an internal gather load or scatter store function. 3732 /* STMT_INFO is a call to an internal gather load or scatter store function.
3592 Describe the operation in INFO. */ 3733 Describe the operation in INFO. */
3593 3734
3617 vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, 3758 vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
3618 gather_scatter_info *info) 3759 gather_scatter_info *info)
3619 { 3760 {
3620 HOST_WIDE_INT scale = 1; 3761 HOST_WIDE_INT scale = 1;
3621 poly_int64 pbitpos, pbitsize; 3762 poly_int64 pbitpos, pbitsize;
3622 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 3763 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3623 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 3764 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3624 tree offtype = NULL_TREE; 3765 tree offtype = NULL_TREE;
3625 tree decl = NULL_TREE, base, off; 3766 tree decl = NULL_TREE, base, off;
3626 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3767 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3627 tree memory_type = TREE_TYPE (DR_REF (dr)); 3768 tree memory_type = TREE_TYPE (DR_REF (dr));
3628 machine_mode pmode; 3769 machine_mode pmode;
3629 int punsignedp, reversep, pvolatilep = 0; 3770 int punsignedp, reversep, pvolatilep = 0;
3630 internal_fn ifn; 3771 internal_fn ifn;
3631 tree element_type; 3772 tree offset_vectype;
3632 bool masked_p = false; 3773 bool masked_p = false;
3633 3774
3634 /* See whether this is already a call to a gather/scatter internal function. 3775 /* See whether this is already a call to a gather/scatter internal function.
3635 If not, see whether it's a masked load or store. */ 3776 If not, see whether it's a masked load or store. */
3636 gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 3777 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
3787 case MULT_EXPR: 3928 case MULT_EXPR:
3788 if (scale == 1 && tree_fits_shwi_p (op1)) 3929 if (scale == 1 && tree_fits_shwi_p (op1))
3789 { 3930 {
3790 int new_scale = tree_to_shwi (op1); 3931 int new_scale = tree_to_shwi (op1);
3791 /* Only treat this as a scaling operation if the target 3932 /* Only treat this as a scaling operation if the target
3792 supports it. */ 3933 supports it for at least some offset type. */
3793 if (use_ifn_p 3934 if (use_ifn_p
3794 && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, 3935 && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
3795 vectype, memory_type, 1, 3936 masked_p, vectype, memory_type,
3796 TYPE_SIGN (TREE_TYPE (op0)), 3937 signed_char_type_node,
3797 new_scale, &ifn, 3938 new_scale, &ifn,
3798 &element_type)) 3939 &offset_vectype)
3940 && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
3941 masked_p, vectype, memory_type,
3942 unsigned_char_type_node,
3943 new_scale, &ifn,
3944 &offset_vectype))
3799 break; 3945 break;
3800 scale = new_scale; 3946 scale = new_scale;
3801 off = op0; 3947 off = op0;
3802 continue; 3948 continue;
3803 } 3949 }
3807 continue; 3953 continue;
3808 CASE_CONVERT: 3954 CASE_CONVERT:
3809 if (!POINTER_TYPE_P (TREE_TYPE (op0)) 3955 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3810 && !INTEGRAL_TYPE_P (TREE_TYPE (op0))) 3956 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3811 break; 3957 break;
3958
3959 /* Don't include the conversion if the target is happy with
3960 the current offset type. */
3961 if (use_ifn_p
3962 && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
3963 masked_p, vectype, memory_type,
3964 TREE_TYPE (off), scale, &ifn,
3965 &offset_vectype))
3966 break;
3967
3812 if (TYPE_PRECISION (TREE_TYPE (op0)) 3968 if (TYPE_PRECISION (TREE_TYPE (op0))
3813 == TYPE_PRECISION (TREE_TYPE (off))) 3969 == TYPE_PRECISION (TREE_TYPE (off)))
3814 { 3970 {
3815 off = op0; 3971 off = op0;
3816 continue; 3972 continue;
3817 } 3973 }
3818
3819 /* The internal functions need the offset to be the same width
3820 as the elements of VECTYPE. Don't include operations that
3821 cast the offset from that width to a different width. */
3822 if (use_ifn_p
3823 && (int_size_in_bytes (TREE_TYPE (vectype))
3824 == int_size_in_bytes (TREE_TYPE (off))))
3825 break;
3826 3974
3827 if (TYPE_PRECISION (TREE_TYPE (op0)) 3975 if (TYPE_PRECISION (TREE_TYPE (op0))
3828 < TYPE_PRECISION (TREE_TYPE (off))) 3976 < TYPE_PRECISION (TREE_TYPE (off)))
3829 { 3977 {
3830 off = op0; 3978 off = op0;
3848 if (offtype == NULL_TREE) 3996 if (offtype == NULL_TREE)
3849 offtype = TREE_TYPE (off); 3997 offtype = TREE_TYPE (off);
3850 3998
3851 if (use_ifn_p) 3999 if (use_ifn_p)
3852 { 4000 {
3853 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, 4001 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
3854 memory_type, TYPE_PRECISION (offtype), 4002 vectype, memory_type, offtype, scale,
3855 TYPE_SIGN (offtype), scale, &ifn, 4003 &ifn, &offset_vectype))
3856 &element_type))
3857 return false; 4004 return false;
3858 } 4005 }
3859 else 4006 else
3860 { 4007 {
3861 if (DR_IS_READ (dr)) 4008 if (DR_IS_READ (dr))
3871 4018
3872 if (!decl) 4019 if (!decl)
3873 return false; 4020 return false;
3874 4021
3875 ifn = IFN_LAST; 4022 ifn = IFN_LAST;
3876 element_type = TREE_TYPE (vectype); 4023 /* The offset vector type will be read from DECL when needed. */
4024 offset_vectype = NULL_TREE;
3877 } 4025 }
3878 4026
3879 info->ifn = ifn; 4027 info->ifn = ifn;
3880 info->decl = decl; 4028 info->decl = decl;
3881 info->base = base; 4029 info->base = base;
3882 info->offset = off; 4030 info->offset = off;
3883 info->offset_dt = vect_unknown_def_type; 4031 info->offset_dt = vect_unknown_def_type;
3884 info->offset_vectype = NULL_TREE; 4032 info->offset_vectype = offset_vectype;
3885 info->scale = scale; 4033 info->scale = scale;
3886 info->element_type = element_type; 4034 info->element_type = TREE_TYPE (vectype);
3887 info->memory_type = memory_type; 4035 info->memory_type = memory_type;
3888 return true; 4036 return true;
3889 } 4037 }
3890 4038
3891 /* Find the data references in STMT, analyze them with respect to LOOP and 4039 /* Find the data references in STMT, analyze them with respect to LOOP and
3959 DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr)); 4107 DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
3960 if (DR_BASE_ADDRESS (newdr) 4108 if (DR_BASE_ADDRESS (newdr)
3961 && DR_OFFSET (newdr) 4109 && DR_OFFSET (newdr)
3962 && DR_INIT (newdr) 4110 && DR_INIT (newdr)
3963 && DR_STEP (newdr) 4111 && DR_STEP (newdr)
4112 && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3964 && integer_zerop (DR_STEP (newdr))) 4113 && integer_zerop (DR_STEP (newdr)))
3965 { 4114 {
4115 tree base_address = DR_BASE_ADDRESS (newdr);
3966 tree off = DR_OFFSET (newdr); 4116 tree off = DR_OFFSET (newdr);
4117 tree step = ssize_int (1);
4118 if (integer_zerop (off)
4119 && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
4120 {
4121 off = TREE_OPERAND (base_address, 1);
4122 base_address = TREE_OPERAND (base_address, 0);
4123 }
3967 STRIP_NOPS (off); 4124 STRIP_NOPS (off);
3968 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST 4125 if (TREE_CODE (off) == MULT_EXPR
3969 && TREE_CODE (off) == MULT_EXPR
3970 && tree_fits_uhwi_p (TREE_OPERAND (off, 1))) 4126 && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
3971 { 4127 {
3972 tree step = TREE_OPERAND (off, 1); 4128 step = TREE_OPERAND (off, 1);
3973 off = TREE_OPERAND (off, 0); 4129 off = TREE_OPERAND (off, 0);
3974 STRIP_NOPS (off); 4130 STRIP_NOPS (off);
3975 if (CONVERT_EXPR_P (off) 4131 }
3976 && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0))) 4132 if (CONVERT_EXPR_P (off)
3977 < TYPE_PRECISION (TREE_TYPE (off)))) 4133 && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
3978 off = TREE_OPERAND (off, 0); 4134 < TYPE_PRECISION (TREE_TYPE (off))))
3979 if (TREE_CODE (off) == SSA_NAME) 4135 off = TREE_OPERAND (off, 0);
4136 if (TREE_CODE (off) == SSA_NAME)
4137 {
4138 gimple *def = SSA_NAME_DEF_STMT (off);
4139 /* Look through widening conversion. */
4140 if (is_gimple_assign (def)
4141 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
3980 { 4142 {
3981 gimple *def = SSA_NAME_DEF_STMT (off); 4143 tree rhs1 = gimple_assign_rhs1 (def);
4144 if (TREE_CODE (rhs1) == SSA_NAME
4145 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
4146 && (TYPE_PRECISION (TREE_TYPE (off))
4147 > TYPE_PRECISION (TREE_TYPE (rhs1))))
4148 def = SSA_NAME_DEF_STMT (rhs1);
4149 }
4150 if (is_gimple_call (def)
4151 && gimple_call_internal_p (def)
4152 && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
4153 {
4154 tree arg = gimple_call_arg (def, 0);
3982 tree reft = TREE_TYPE (DR_REF (newdr)); 4155 tree reft = TREE_TYPE (DR_REF (newdr));
3983 if (is_gimple_call (def) 4156 gcc_assert (TREE_CODE (arg) == SSA_NAME);
3984 && gimple_call_internal_p (def) 4157 arg = SSA_NAME_VAR (arg);
3985 && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE)) 4158 if (arg == loop->simduid
4159 /* For now. */
4160 && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
3986 { 4161 {
3987 tree arg = gimple_call_arg (def, 0); 4162 DR_BASE_ADDRESS (newdr) = base_address;
3988 gcc_assert (TREE_CODE (arg) == SSA_NAME); 4163 DR_OFFSET (newdr) = ssize_int (0);
3989 arg = SSA_NAME_VAR (arg); 4164 DR_STEP (newdr) = step;
3990 if (arg == loop->simduid 4165 DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
3991 /* For now. */ 4166 DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
3992 && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step)) 4167 /* Mark as simd-lane access. */
3993 { 4168 tree arg2 = gimple_call_arg (def, 1);
3994 DR_OFFSET (newdr) = ssize_int (0); 4169 newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
3995 DR_STEP (newdr) = step; 4170 free_data_ref (dr);
3996 DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT; 4171 datarefs->safe_push (newdr);
3997 DR_STEP_ALIGNMENT (newdr) 4172 return opt_result::success ();
3998 = highest_pow2_factor (step);
3999 /* Mark as simd-lane access. */
4000 newdr->aux = (void *)-1;
4001 free_data_ref (dr);
4002 datarefs->safe_push (newdr);
4003 return opt_result::success ();
4004 }
4005 } 4173 }
4006 } 4174 }
4007 } 4175 }
4008 } 4176 }
4009 free_data_ref (newdr); 4177 free_data_ref (newdr);
4027 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. 4195 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4028 4196
4029 */ 4197 */
4030 4198
4031 opt_result 4199 opt_result
4032 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) 4200 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
4033 { 4201 {
4034 struct loop *loop = NULL; 4202 class loop *loop = NULL;
4035 unsigned int i; 4203 unsigned int i;
4036 struct data_reference *dr; 4204 struct data_reference *dr;
4037 tree scalar_type; 4205 tree scalar_type;
4038 4206
4039 DUMP_VECT_SCOPE ("vect_analyze_data_refs"); 4207 DUMP_VECT_SCOPE ("vect_analyze_data_refs");
4104 stmt_info->stmt); 4272 stmt_info->stmt);
4105 } 4273 }
4106 } 4274 }
4107 4275
4108 /* See if this was detected as SIMD lane access. */ 4276 /* See if this was detected as SIMD lane access. */
4109 if (dr->aux == (void *)-1) 4277 if (dr->aux == (void *)-1
4278 || dr->aux == (void *)-2
4279 || dr->aux == (void *)-3
4280 || dr->aux == (void *)-4)
4110 { 4281 {
4111 if (nested_in_vect_loop_p (loop, stmt_info)) 4282 if (nested_in_vect_loop_p (loop, stmt_info))
4112 return opt_result::failure_at (stmt_info->stmt, 4283 return opt_result::failure_at (stmt_info->stmt,
4113 "not vectorized:" 4284 "not vectorized:"
4114 " data ref analysis failed: %G", 4285 " data ref analysis failed: %G",
4115 stmt_info->stmt); 4286 stmt_info->stmt);
4116 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true; 4287 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
4288 = -(uintptr_t) dr->aux;
4117 } 4289 }
4118 4290
4119 tree base = get_base_address (DR_REF (dr)); 4291 tree base = get_base_address (DR_REF (dr));
4120 if (base && VAR_P (base) && DECL_NONALIASED (base)) 4292 if (base && VAR_P (base) && DECL_NONALIASED (base))
4121 { 4293 {
4140 && DR_STEP (dr) 4312 && DR_STEP (dr)
4141 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST) 4313 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
4142 { 4314 {
4143 if (nested_in_vect_loop_p (loop, stmt_info)) 4315 if (nested_in_vect_loop_p (loop, stmt_info))
4144 return opt_result::failure_at (stmt_info->stmt, 4316 return opt_result::failure_at (stmt_info->stmt,
4145 "not vectorized:" 4317 "not vectorized: "
4146 "not suitable for strided load %G", 4318 "not suitable for strided load %G",
4147 stmt_info->stmt); 4319 stmt_info->stmt);
4148 STMT_VINFO_STRIDED_P (stmt_info) = true; 4320 STMT_VINFO_STRIDED_P (stmt_info) = true;
4149 } 4321 }
4150 4322
4201 STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info)); 4373 STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
4202 } 4374 }
4203 4375
4204 /* Set vectype for STMT. */ 4376 /* Set vectype for STMT. */
4205 scalar_type = TREE_TYPE (DR_REF (dr)); 4377 scalar_type = TREE_TYPE (DR_REF (dr));
4206 STMT_VINFO_VECTYPE (stmt_info) 4378 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
4207 = get_vectype_for_scalar_type (scalar_type); 4379 if (!vectype)
4208 if (!STMT_VINFO_VECTYPE (stmt_info))
4209 { 4380 {
4210 if (dump_enabled_p ()) 4381 if (dump_enabled_p ())
4211 { 4382 {
4212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4213 "not vectorized: no vectype for stmt: %G", 4384 "not vectorized: no vectype for stmt: %G",
4223 /* No vector type is fine, the ref can still participate 4394 /* No vector type is fine, the ref can still participate
4224 in dependence analysis, we just can't vectorize it. */ 4395 in dependence analysis, we just can't vectorize it. */
4225 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 4396 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4226 continue; 4397 continue;
4227 } 4398 }
4399 if (fatal)
4400 *fatal = false;
4228 return opt_result::failure_at (stmt_info->stmt, 4401 return opt_result::failure_at (stmt_info->stmt,
4229 "not vectorized:" 4402 "not vectorized:"
4230 " no vectype for stmt: %G" 4403 " no vectype for stmt: %G"
4231 " scalar_type: %T\n", 4404 " scalar_type: %T\n",
4232 stmt_info->stmt, scalar_type); 4405 stmt_info->stmt, scalar_type);
4234 else 4407 else
4235 { 4408 {
4236 if (dump_enabled_p ()) 4409 if (dump_enabled_p ())
4237 dump_printf_loc (MSG_NOTE, vect_location, 4410 dump_printf_loc (MSG_NOTE, vect_location,
4238 "got vectype for stmt: %G%T\n", 4411 "got vectype for stmt: %G%T\n",
4239 stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info)); 4412 stmt_info->stmt, vectype);
4240 } 4413 }
4241 4414
4242 /* Adjust the minimal vectorization factor according to the 4415 /* Adjust the minimal vectorization factor according to the
4243 vector type. */ 4416 vector type. */
4244 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); 4417 vf = TYPE_VECTOR_SUBPARTS (vectype);
4245 *min_vf = upper_bound (*min_vf, vf); 4418 *min_vf = upper_bound (*min_vf, vf);
4419
4420 /* Leave the BB vectorizer to pick the vector type later, based on
4421 the final dataref group size and SLP node size. */
4422 if (is_a <loop_vec_info> (vinfo))
4423 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4246 4424
4247 if (gatherscatter != SG_NONE) 4425 if (gatherscatter != SG_NONE)
4248 { 4426 {
4249 gather_scatter_info gs_info; 4427 gather_scatter_info gs_info;
4250 if (!vect_check_gather_scatter (stmt_info, 4428 if (!vect_check_gather_scatter (stmt_info,
4251 as_a <loop_vec_info> (vinfo), 4429 as_a <loop_vec_info> (vinfo),
4252 &gs_info) 4430 &gs_info)
4253 || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset))) 4431 || !get_vectype_for_scalar_type (vinfo,
4254 return opt_result::failure_at 4432 TREE_TYPE (gs_info.offset)))
4255 (stmt_info->stmt, 4433 {
4256 (gatherscatter == GATHER) ? 4434 if (fatal)
4257 "not vectorized: not suitable for gather load %G" : 4435 *fatal = false;
4258 "not vectorized: not suitable for scatter store %G", 4436 return opt_result::failure_at
4259 stmt_info->stmt); 4437 (stmt_info->stmt,
4438 (gatherscatter == GATHER)
4439 ? "not vectorized: not suitable for gather load %G"
4440 : "not vectorized: not suitable for scatter store %G",
4441 stmt_info->stmt);
4442 }
4260 STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter; 4443 STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
4261 } 4444 }
4262 } 4445 }
4263 4446
4264 /* We used to stop processing and prune the list here. Verify we no 4447 /* We used to stop processing and prune the list here. Verify we no
4356 int misalign = DR_MISALIGNMENT (dr_info); 4539 int misalign = DR_MISALIGNMENT (dr_info);
4357 if (misalign == DR_MISALIGNMENT_UNKNOWN) 4540 if (misalign == DR_MISALIGNMENT_UNKNOWN)
4358 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name)); 4541 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4359 else 4542 else
4360 set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), 4543 set_ptr_info_alignment (SSA_NAME_PTR_INFO (name),
4361 DR_TARGET_ALIGNMENT (dr_info), misalign); 4544 known_alignment (DR_TARGET_ALIGNMENT (dr_info)),
4545 misalign);
4362 } 4546 }
4363 4547
4364 /* Function vect_create_addr_base_for_vector_ref. 4548 /* Function vect_create_addr_base_for_vector_ref.
4365 4549
4366 Create an expression that computes the address of the first memory location 4550 Create an expression that computes the address of the first memory location
4411 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); 4595 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
4412 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4596 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4413 innermost_loop_behavior *drb = vect_dr_behavior (dr_info); 4597 innermost_loop_behavior *drb = vect_dr_behavior (dr_info);
4414 4598
4415 tree data_ref_base = unshare_expr (drb->base_address); 4599 tree data_ref_base = unshare_expr (drb->base_address);
4416 tree base_offset = unshare_expr (drb->offset); 4600 tree base_offset = unshare_expr (get_dr_vinfo_offset (dr_info, true));
4417 tree init = unshare_expr (drb->init); 4601 tree init = unshare_expr (drb->init);
4418 4602
4419 if (loop_vinfo) 4603 if (loop_vinfo)
4420 base_name = get_name (data_ref_base); 4604 base_name = get_name (data_ref_base);
4421 else 4605 else
4529 4713
4530 3. Return the pointer. */ 4714 3. Return the pointer. */
4531 4715
4532 tree 4716 tree
4533 vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type, 4717 vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
4534 struct loop *at_loop, tree offset, 4718 class loop *at_loop, tree offset,
4535 tree *initial_address, gimple_stmt_iterator *gsi, 4719 tree *initial_address, gimple_stmt_iterator *gsi,
4536 gimple **ptr_incr, bool only_init, 4720 gimple **ptr_incr, bool only_init,
4537 tree byte_offset, tree iv_step) 4721 tree byte_offset, tree iv_step)
4538 { 4722 {
4539 const char *base_name; 4723 const char *base_name;
4540 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4724 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4541 struct loop *loop = NULL; 4725 class loop *loop = NULL;
4542 bool nested_in_vect_loop = false; 4726 bool nested_in_vect_loop = false;
4543 struct loop *containing_loop = NULL; 4727 class loop *containing_loop = NULL;
4544 tree aggr_ptr_type; 4728 tree aggr_ptr_type;
4545 tree aggr_ptr; 4729 tree aggr_ptr;
4546 tree new_temp; 4730 tree new_temp;
4547 gimple_seq new_stmt_list = NULL; 4731 gimple_seq new_stmt_list = NULL;
4548 edge pe = NULL; 4732 edge pe = NULL;
5283 tree 5467 tree
5284 vect_setup_realignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 5468 vect_setup_realignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5285 tree *realignment_token, 5469 tree *realignment_token,
5286 enum dr_alignment_support alignment_support_scheme, 5470 enum dr_alignment_support alignment_support_scheme,
5287 tree init_addr, 5471 tree init_addr,
5288 struct loop **at_loop) 5472 class loop **at_loop)
5289 { 5473 {
5290 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5474 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5292 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 5476 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5293 struct data_reference *dr = dr_info->dr; 5477 struct data_reference *dr = dr_info->dr;
5294 struct loop *loop = NULL; 5478 class loop *loop = NULL;
5295 edge pe = NULL; 5479 edge pe = NULL;
5296 tree scalar_dest = gimple_assign_lhs (stmt_info->stmt); 5480 tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
5297 tree vec_dest; 5481 tree vec_dest;
5298 gimple *inc; 5482 gimple *inc;
5299 tree ptr; 5483 tree ptr;
5304 gphi *phi_stmt; 5488 gphi *phi_stmt;
5305 tree msq = NULL_TREE; 5489 tree msq = NULL_TREE;
5306 gimple_seq stmts = NULL; 5490 gimple_seq stmts = NULL;
5307 bool compute_in_loop = false; 5491 bool compute_in_loop = false;
5308 bool nested_in_vect_loop = false; 5492 bool nested_in_vect_loop = false;
5309 struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father; 5493 class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
5310 struct loop *loop_for_initial_load = NULL; 5494 class loop *loop_for_initial_load = NULL;
5311 5495
5312 if (loop_vinfo) 5496 if (loop_vinfo)
5313 { 5497 {
5314 loop = LOOP_VINFO_LOOP (loop_vinfo); 5498 loop = LOOP_VINFO_LOOP (loop_vinfo);
5315 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info); 5499 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5400 &init_addr, NULL, &inc, true); 5584 &init_addr, NULL, &inc, true);
5401 if (TREE_CODE (ptr) == SSA_NAME) 5585 if (TREE_CODE (ptr) == SSA_NAME)
5402 new_temp = copy_ssa_name (ptr); 5586 new_temp = copy_ssa_name (ptr);
5403 else 5587 else
5404 new_temp = make_ssa_name (TREE_TYPE (ptr)); 5588 new_temp = make_ssa_name (TREE_TYPE (ptr));
5405 unsigned int align = DR_TARGET_ALIGNMENT (dr_info); 5589 poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
5590 tree type = TREE_TYPE (ptr);
5406 new_stmt = gimple_build_assign 5591 new_stmt = gimple_build_assign
5407 (new_temp, BIT_AND_EXPR, ptr, 5592 (new_temp, BIT_AND_EXPR, ptr,
5408 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); 5593 fold_build2 (MINUS_EXPR, type,
5594 build_int_cst (type, 0),
5595 build_int_cst (type, align)));
5409 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 5596 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5410 gcc_assert (!new_bb); 5597 gcc_assert (!new_bb);
5411 data_ref 5598 data_ref
5412 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp, 5599 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
5413 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0)); 5600 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
6238 DR_GROUP_GAP is the number of steps in elements from the previous 6425 DR_GROUP_GAP is the number of steps in elements from the previous
6239 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that 6426 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
6240 correspond to the gaps. */ 6427 correspond to the gaps. */
6241 if (next_stmt_info != first_stmt_info 6428 if (next_stmt_info != first_stmt_info
6242 && gap_count < DR_GROUP_GAP (next_stmt_info)) 6429 && gap_count < DR_GROUP_GAP (next_stmt_info))
6243 { 6430 {
6244 gap_count++; 6431 gap_count++;
6245 continue; 6432 continue;
6246 } 6433 }
6247 6434
6248 while (next_stmt_info) 6435 /* ??? The following needs cleanup after the removal of
6436 DR_GROUP_SAME_DR_STMT. */
6437 if (next_stmt_info)
6249 { 6438 {
6250 stmt_vec_info new_stmt_info = vinfo->lookup_def (tmp_data_ref); 6439 stmt_vec_info new_stmt_info = vinfo->lookup_def (tmp_data_ref);
6251 /* We assume that if VEC_STMT is not NULL, this is a case of multiple 6440 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
6252 copies, and we put the new vector statement in the first available 6441 copies, and we put the new vector statement in the first available
6253 RELATED_STMT. */ 6442 RELATED_STMT. */
6254 if (!STMT_VINFO_VEC_STMT (next_stmt_info)) 6443 if (!STMT_VINFO_VEC_STMT (next_stmt_info))
6255 STMT_VINFO_VEC_STMT (next_stmt_info) = new_stmt_info; 6444 STMT_VINFO_VEC_STMT (next_stmt_info) = new_stmt_info;
6256 else 6445 else
6257 { 6446 {
6258 if (!DR_GROUP_SAME_DR_STMT (next_stmt_info)) 6447 stmt_vec_info prev_stmt_info
6259 { 6448 = STMT_VINFO_VEC_STMT (next_stmt_info);
6260 stmt_vec_info prev_stmt_info 6449 stmt_vec_info rel_stmt_info
6261 = STMT_VINFO_VEC_STMT (next_stmt_info); 6450 = STMT_VINFO_RELATED_STMT (prev_stmt_info);
6262 stmt_vec_info rel_stmt_info 6451 while (rel_stmt_info)
6263 = STMT_VINFO_RELATED_STMT (prev_stmt_info); 6452 {
6264 while (rel_stmt_info) 6453 prev_stmt_info = rel_stmt_info;
6265 { 6454 rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info);
6266 prev_stmt_info = rel_stmt_info; 6455 }
6267 rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info); 6456
6268 } 6457 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6269
6270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6271 }
6272 } 6458 }
6273 6459
6274 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 6460 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6275 gap_count = 1; 6461 gap_count = 1;
6276 /* If NEXT_STMT_INFO accesses the same DR as the previous statement,
6277 put the same TMP_DATA_REF as its vectorized statement; otherwise
6278 get the next data-ref from RESULT_CHAIN. */
6279 if (!next_stmt_info || !DR_GROUP_SAME_DR_STMT (next_stmt_info))
6280 break;
6281 } 6462 }
6282 } 6463 }
6283 } 6464 }
6284 6465
6285 /* Function vect_force_dr_alignment_p. 6466 /* Function vect_force_dr_alignment_p.
6286 6467
6287 Returns whether the alignment of a DECL can be forced to be aligned 6468 Returns whether the alignment of a DECL can be forced to be aligned
6288 on ALIGNMENT bit boundary. */ 6469 on ALIGNMENT bit boundary. */
6289 6470
6290 bool 6471 bool
6291 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment) 6472 vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
6292 { 6473 {
6293 if (!VAR_P (decl)) 6474 if (!VAR_P (decl))
6294 return false; 6475 return false;
6295 6476
6296 if (decl_in_symtab_p (decl) 6477 if (decl_in_symtab_p (decl)
6297 && !symtab_node::get (decl)->can_increase_alignment_p ()) 6478 && !symtab_node::get (decl)->can_increase_alignment_p ())
6298 return false; 6479 return false;
6299 6480
6300 if (TREE_STATIC (decl)) 6481 if (TREE_STATIC (decl))
6301 return (alignment <= MAX_OFILE_ALIGNMENT); 6482 return (known_le (alignment,
6483 (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
6302 else 6484 else
6303 return (alignment <= MAX_STACK_ALIGNMENT); 6485 return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
6304 } 6486 }
6305 6487
6306 6488
6307 /* Return whether the data reference DR_INFO is supported with respect to its 6489 /* Return whether the data reference DR_INFO is supported with respect to its
6308 alignment. 6490 alignment.
6317 data_reference *dr = dr_info->dr; 6499 data_reference *dr = dr_info->dr;
6318 stmt_vec_info stmt_info = dr_info->stmt; 6500 stmt_vec_info stmt_info = dr_info->stmt;
6319 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 6501 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6320 machine_mode mode = TYPE_MODE (vectype); 6502 machine_mode mode = TYPE_MODE (vectype);
6321 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 6503 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6322 struct loop *vect_loop = NULL; 6504 class loop *vect_loop = NULL;
6323 bool nested_in_vect_loop = false; 6505 bool nested_in_vect_loop = false;
6324 6506
6325 if (aligned_access_p (dr_info) && !check_aligned_accesses) 6507 if (aligned_access_p (dr_info) && !check_aligned_accesses)
6326 return dr_aligned; 6508 return dr_aligned;
6327 6509