Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vect-data-refs.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Data References Analysis and Manipulation Utilities for Vectorization. | 1 /* Data References Analysis and Manipulation Utilities for Vectorization. |
2 Copyright (C) 2003-2018 Free Software Foundation, Inc. | 2 Copyright (C) 2003-2020 Free Software Foundation, Inc. |
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 and Ira Rosen <irar@il.ibm.com> | 4 and Ira Rosen <irar@il.ibm.com> |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
47 #include "cfgloop.h" | 47 #include "cfgloop.h" |
48 #include "tree-scalar-evolution.h" | 48 #include "tree-scalar-evolution.h" |
49 #include "tree-vectorizer.h" | 49 #include "tree-vectorizer.h" |
50 #include "expr.h" | 50 #include "expr.h" |
51 #include "builtins.h" | 51 #include "builtins.h" |
52 #include "params.h" | |
53 #include "tree-cfg.h" | 52 #include "tree-cfg.h" |
54 #include "tree-hash-traits.h" | 53 #include "tree-hash-traits.h" |
55 #include "vec-perm-indices.h" | 54 #include "vec-perm-indices.h" |
56 #include "internal-fn.h" | 55 #include "internal-fn.h" |
57 | 56 |
143 | 142 |
144 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); | 143 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); |
145 if (rhs < lhs) | 144 if (rhs < lhs) |
146 scalar_type = rhs_type; | 145 scalar_type = rhs_type; |
147 } | 146 } |
147 else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt)) | |
148 { | |
149 unsigned int i = 0; | |
150 if (gimple_call_internal_p (call)) | |
151 { | |
152 internal_fn ifn = gimple_call_internal_fn (call); | |
153 if (internal_load_fn_p (ifn) || internal_store_fn_p (ifn)) | |
154 /* gimple_expr_type already picked the type of the loaded | |
155 or stored data. */ | |
156 i = ~0U; | |
157 else if (internal_fn_mask_index (ifn) == 0) | |
158 i = 1; | |
159 } | |
160 if (i < gimple_call_num_args (call)) | |
161 { | |
162 tree rhs_type = TREE_TYPE (gimple_call_arg (call, i)); | |
163 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type))) | |
164 { | |
165 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); | |
166 if (rhs < lhs) | |
167 scalar_type = rhs_type; | |
168 } | |
169 } | |
170 } | |
148 | 171 |
149 *lhs_size_unit = lhs; | 172 *lhs_size_unit = lhs; |
150 *rhs_size_unit = rhs; | 173 *rhs_size_unit = rhs; |
151 return scalar_type; | 174 return scalar_type; |
152 } | 175 } |
157 Return false if versioning is not supported. */ | 180 Return false if versioning is not supported. */ |
158 | 181 |
159 static opt_result | 182 static opt_result |
160 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) | 183 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) |
161 { | 184 { |
162 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 185 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
163 | 186 |
164 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0) | 187 if ((unsigned) param_vect_max_version_for_alias_checks == 0) |
165 return opt_result::failure_at (vect_location, | 188 return opt_result::failure_at (vect_location, |
166 "will not create alias checks, as" | 189 "will not create alias checks, as" |
167 " --param vect-max-version-for-alias-checks" | 190 " --param vect-max-version-for-alias-checks" |
168 " == 0\n"); | 191 " == 0\n"); |
169 | 192 |
208 if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a) | 231 if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a) |
209 && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)) | 232 && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)) |
210 return true; | 233 return true; |
211 | 234 |
212 /* STMT_A and STMT_B belong to overlapping groups. All loads in a | 235 /* STMT_A and STMT_B belong to overlapping groups. All loads in a |
213 group are emitted at the position of the last scalar load and all | 236 SLP group are emitted at the position of the last scalar load and |
214 stores in a group are emitted at the position of the last scalar store. | 237 all loads in an interleaving group are emitted at the position |
238 of the first scalar load. | |
239 Stores in a group are emitted at the position of the last scalar store. | |
215 Compute that position and check whether the resulting order matches | 240 Compute that position and check whether the resulting order matches |
216 the current one. */ | 241 the current one. |
217 stmt_vec_info last_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); | 242 We have not yet decided between SLP and interleaving so we have |
243 to conservatively assume both. */ | |
244 stmt_vec_info il_a; | |
245 stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); | |
218 if (last_a) | 246 if (last_a) |
219 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; | 247 { |
220 s = DR_GROUP_NEXT_ELEMENT (s)) | 248 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; |
221 last_a = get_later_stmt (last_a, s); | 249 s = DR_GROUP_NEXT_ELEMENT (s)) |
250 last_a = get_later_stmt (last_a, s); | |
251 if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a))) | |
252 { | |
253 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s; | |
254 s = DR_GROUP_NEXT_ELEMENT (s)) | |
255 if (get_later_stmt (il_a, s) == il_a) | |
256 il_a = s; | |
257 } | |
258 else | |
259 il_a = last_a; | |
260 } | |
222 else | 261 else |
223 last_a = stmtinfo_a; | 262 last_a = il_a = stmtinfo_a; |
224 stmt_vec_info last_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); | 263 stmt_vec_info il_b; |
264 stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); | |
225 if (last_b) | 265 if (last_b) |
226 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; | 266 { |
227 s = DR_GROUP_NEXT_ELEMENT (s)) | 267 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; |
228 last_b = get_later_stmt (last_b, s); | 268 s = DR_GROUP_NEXT_ELEMENT (s)) |
269 last_b = get_later_stmt (last_b, s); | |
270 if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b))) | |
271 { | |
272 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s; | |
273 s = DR_GROUP_NEXT_ELEMENT (s)) | |
274 if (get_later_stmt (il_b, s) == il_b) | |
275 il_b = s; | |
276 } | |
277 else | |
278 il_b = last_b; | |
279 } | |
229 else | 280 else |
230 last_b = stmtinfo_b; | 281 last_b = il_b = stmtinfo_b; |
231 return ((get_later_stmt (last_a, last_b) == last_a) | 282 bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a); |
232 == (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a)); | 283 return (/* SLP */ |
284 (get_later_stmt (last_a, last_b) == last_a) == a_after_b | |
285 /* Interleaving */ | |
286 && (get_later_stmt (il_a, il_b) == il_a) == a_after_b | |
287 /* Mixed */ | |
288 && (get_later_stmt (il_a, last_b) == il_a) == a_after_b | |
289 && (get_later_stmt (last_a, il_b) == last_a) == a_after_b); | |
233 } | 290 } |
234 | 291 |
235 /* A subroutine of vect_analyze_data_ref_dependence. Handle | 292 /* A subroutine of vect_analyze_data_ref_dependence. Handle |
236 DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence | 293 DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence |
237 distances. These distances are conservatively correct but they don't | 294 distances. These distances are conservatively correct but they don't |
246 static bool | 303 static bool |
247 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr, | 304 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr, |
248 loop_vec_info loop_vinfo, | 305 loop_vec_info loop_vinfo, |
249 int loop_depth, unsigned int *max_vf) | 306 int loop_depth, unsigned int *max_vf) |
250 { | 307 { |
251 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 308 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
252 lambda_vector dist_v; | 309 lambda_vector dist_v; |
253 unsigned int i; | 310 unsigned int i; |
254 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) | 311 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) |
255 { | 312 { |
256 int dist = dist_v[loop_depth]; | 313 int dist = dist_v[loop_depth]; |
303 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, | 360 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, |
304 loop_vec_info loop_vinfo, | 361 loop_vec_info loop_vinfo, |
305 unsigned int *max_vf) | 362 unsigned int *max_vf) |
306 { | 363 { |
307 unsigned int i; | 364 unsigned int i; |
308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 365 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
309 struct data_reference *dra = DDR_A (ddr); | 366 struct data_reference *dra = DDR_A (ddr); |
310 struct data_reference *drb = DDR_B (ddr); | 367 struct data_reference *drb = DDR_B (ddr); |
311 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra); | 368 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra); |
312 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb); | 369 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb); |
313 stmt_vec_info stmtinfo_a = dr_info_a->stmt; | 370 stmt_vec_info stmtinfo_a = dr_info_a->stmt; |
471 { | 528 { |
472 /* If DDR_REVERSED_P the order of the data-refs in DDR was | 529 /* If DDR_REVERSED_P the order of the data-refs in DDR was |
473 reversed (to make distance vector positive), and the actual | 530 reversed (to make distance vector positive), and the actual |
474 distance is negative. */ | 531 distance is negative. */ |
475 if (dump_enabled_p ()) | 532 if (dump_enabled_p ()) |
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 533 dump_printf_loc (MSG_NOTE, vect_location, |
477 "dependence distance negative.\n"); | 534 "dependence distance negative.\n"); |
535 /* When doing outer loop vectorization, we need to check if there is | |
536 a backward dependence at the inner loop level if the dependence | |
537 at the outer loop is reversed. See PR81740. */ | |
538 if (nested_in_vect_loop_p (loop, stmtinfo_a) | |
539 || nested_in_vect_loop_p (loop, stmtinfo_b)) | |
540 { | |
541 unsigned inner_depth = index_in_loop_nest (loop->inner->num, | |
542 DDR_LOOP_NEST (ddr)); | |
543 if (dist_v[inner_depth] < 0) | |
544 return opt_result::failure_at (stmtinfo_a->stmt, | |
545 "not vectorized, dependence " | |
546 "between data-refs %T and %T\n", | |
547 DR_REF (dra), DR_REF (drb)); | |
548 } | |
478 /* Record a negative dependence distance to later limit the | 549 /* Record a negative dependence distance to later limit the |
479 amount of stmt copying / unrolling we can perform. | 550 amount of stmt copying / unrolling we can perform. |
480 Only need to handle read-after-write dependence. */ | 551 Only need to handle read-after-write dependence. */ |
481 if (DR_IS_READ (drb) | 552 if (DR_IS_READ (drb) |
482 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0 | 553 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0 |
488 unsigned int abs_dist = abs (dist); | 559 unsigned int abs_dist = abs (dist); |
489 if (abs_dist >= 2 && abs_dist < *max_vf) | 560 if (abs_dist >= 2 && abs_dist < *max_vf) |
490 { | 561 { |
491 /* The dependence distance requires reduction of the maximal | 562 /* The dependence distance requires reduction of the maximal |
492 vectorization factor. */ | 563 vectorization factor. */ |
493 *max_vf = abs (dist); | 564 *max_vf = abs_dist; |
494 if (dump_enabled_p ()) | 565 if (dump_enabled_p ()) |
495 dump_printf_loc (MSG_NOTE, vect_location, | 566 dump_printf_loc (MSG_NOTE, vect_location, |
496 "adjusting maximal vectorization factor to %i\n", | 567 "adjusting maximal vectorization factor to %i\n", |
497 *max_vf); | 568 *max_vf); |
498 } | 569 } |
793 | 864 |
794 void | 865 void |
795 vect_record_base_alignments (vec_info *vinfo) | 866 vect_record_base_alignments (vec_info *vinfo) |
796 { | 867 { |
797 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); | 868 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); |
798 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; | 869 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; |
799 data_reference *dr; | 870 data_reference *dr; |
800 unsigned int i; | 871 unsigned int i; |
801 FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr) | 872 FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr) |
802 { | 873 { |
803 dr_vec_info *dr_info = vinfo->lookup_dr (dr); | 874 dr_vec_info *dr_info = vinfo->lookup_dr (dr); |
817 } | 888 } |
818 } | 889 } |
819 | 890 |
820 /* Return the target alignment for the vectorized form of DR_INFO. */ | 891 /* Return the target alignment for the vectorized form of DR_INFO. */ |
821 | 892 |
822 static unsigned int | 893 static poly_uint64 |
823 vect_calculate_target_alignment (dr_vec_info *dr_info) | 894 vect_calculate_target_alignment (dr_vec_info *dr_info) |
824 { | 895 { |
825 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt); | 896 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt); |
826 return targetm.vectorize.preferred_vector_alignment (vectype); | 897 return targetm.vectorize.preferred_vector_alignment (vectype); |
827 } | 898 } |
840 vect_compute_data_ref_alignment (dr_vec_info *dr_info) | 911 vect_compute_data_ref_alignment (dr_vec_info *dr_info) |
841 { | 912 { |
842 stmt_vec_info stmt_info = dr_info->stmt; | 913 stmt_vec_info stmt_info = dr_info->stmt; |
843 vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments; | 914 vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments; |
844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
845 struct loop *loop = NULL; | 916 class loop *loop = NULL; |
846 tree ref = DR_REF (dr_info->dr); | 917 tree ref = DR_REF (dr_info->dr); |
847 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 918 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
848 | 919 |
849 if (dump_enabled_p ()) | 920 if (dump_enabled_p ()) |
850 dump_printf_loc (MSG_NOTE, vect_location, | 921 dump_printf_loc (MSG_NOTE, vect_location, |
860 return; | 931 return; |
861 | 932 |
862 innermost_loop_behavior *drb = vect_dr_behavior (dr_info); | 933 innermost_loop_behavior *drb = vect_dr_behavior (dr_info); |
863 bool step_preserves_misalignment_p; | 934 bool step_preserves_misalignment_p; |
864 | 935 |
865 unsigned HOST_WIDE_INT vector_alignment | 936 poly_uint64 vector_alignment |
866 = vect_calculate_target_alignment (dr_info) / BITS_PER_UNIT; | 937 = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT); |
867 DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; | 938 DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; |
939 | |
940 /* If the main loop has peeled for alignment we have no way of knowing | |
941 whether the data accesses in the epilogues are aligned. We can't at | |
942 compile time answer the question whether we have entered the main loop or | |
943 not. Fixes PR 92351. */ | |
944 if (loop_vinfo) | |
945 { | |
946 loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); | |
947 if (orig_loop_vinfo | |
948 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0) | |
949 return; | |
950 } | |
951 | |
952 unsigned HOST_WIDE_INT vect_align_c; | |
953 if (!vector_alignment.is_constant (&vect_align_c)) | |
954 return; | |
868 | 955 |
869 /* No step for BB vectorization. */ | 956 /* No step for BB vectorization. */ |
870 if (!loop) | 957 if (!loop) |
871 { | 958 { |
872 gcc_assert (integer_zerop (drb->step)); | 959 gcc_assert (integer_zerop (drb->step)); |
880 we have to check that the stride of the dataref in the inner-loop evenly | 967 we have to check that the stride of the dataref in the inner-loop evenly |
881 divides by the vector alignment. */ | 968 divides by the vector alignment. */ |
882 else if (nested_in_vect_loop_p (loop, stmt_info)) | 969 else if (nested_in_vect_loop_p (loop, stmt_info)) |
883 { | 970 { |
884 step_preserves_misalignment_p | 971 step_preserves_misalignment_p |
885 = (DR_STEP_ALIGNMENT (dr_info->dr) % vector_alignment) == 0; | 972 = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0; |
886 | 973 |
887 if (dump_enabled_p ()) | 974 if (dump_enabled_p ()) |
888 { | 975 { |
889 if (step_preserves_misalignment_p) | 976 if (step_preserves_misalignment_p) |
890 dump_printf_loc (MSG_NOTE, vect_location, | 977 dump_printf_loc (MSG_NOTE, vect_location, |
902 the dataref evenly divides by the alignment. */ | 989 the dataref evenly divides by the alignment. */ |
903 else | 990 else |
904 { | 991 { |
905 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 992 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
906 step_preserves_misalignment_p | 993 step_preserves_misalignment_p |
907 = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vector_alignment); | 994 = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c); |
908 | 995 |
909 if (!step_preserves_misalignment_p && dump_enabled_p ()) | 996 if (!step_preserves_misalignment_p && dump_enabled_p ()) |
910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
911 "step doesn't divide the vector alignment.\n"); | 998 "step doesn't divide the vector alignment.\n"); |
912 } | 999 } |
921 { | 1008 { |
922 base_alignment = (*entry)->base_alignment; | 1009 base_alignment = (*entry)->base_alignment; |
923 base_misalignment = (*entry)->base_misalignment; | 1010 base_misalignment = (*entry)->base_misalignment; |
924 } | 1011 } |
925 | 1012 |
926 if (drb->offset_alignment < vector_alignment | 1013 if (drb->offset_alignment < vect_align_c |
927 || !step_preserves_misalignment_p | 1014 || !step_preserves_misalignment_p |
928 /* We need to know whether the step wrt the vectorized loop is | 1015 /* We need to know whether the step wrt the vectorized loop is |
929 negative when computing the starting misalignment below. */ | 1016 negative when computing the starting misalignment below. */ |
930 || TREE_CODE (drb->step) != INTEGER_CST) | 1017 || TREE_CODE (drb->step) != INTEGER_CST) |
931 { | 1018 { |
933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
934 "Unknown alignment for access: %T\n", ref); | 1021 "Unknown alignment for access: %T\n", ref); |
935 return; | 1022 return; |
936 } | 1023 } |
937 | 1024 |
938 if (base_alignment < vector_alignment) | 1025 if (base_alignment < vect_align_c) |
939 { | 1026 { |
940 unsigned int max_alignment; | 1027 unsigned int max_alignment; |
941 tree base = get_base_for_alignment (drb->base_address, &max_alignment); | 1028 tree base = get_base_for_alignment (drb->base_address, &max_alignment); |
942 if (max_alignment < vector_alignment | 1029 if (max_alignment < vect_align_c |
943 || !vect_can_force_dr_alignment_p (base, | 1030 || !vect_can_force_dr_alignment_p (base, |
944 vector_alignment * BITS_PER_UNIT)) | 1031 vect_align_c * BITS_PER_UNIT)) |
945 { | 1032 { |
946 if (dump_enabled_p ()) | 1033 if (dump_enabled_p ()) |
947 dump_printf_loc (MSG_NOTE, vect_location, | 1034 dump_printf_loc (MSG_NOTE, vect_location, |
948 "can't force alignment of ref: %T\n", ref); | 1035 "can't force alignment of ref: %T\n", ref); |
949 return; | 1036 return; |
970 /* PLUS because STEP is negative. */ | 1057 /* PLUS because STEP is negative. */ |
971 misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1) | 1058 misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1) |
972 * TREE_INT_CST_LOW (drb->step)); | 1059 * TREE_INT_CST_LOW (drb->step)); |
973 | 1060 |
974 unsigned int const_misalignment; | 1061 unsigned int const_misalignment; |
975 if (!known_misalignment (misalignment, vector_alignment, | 1062 if (!known_misalignment (misalignment, vect_align_c, &const_misalignment)) |
976 &const_misalignment)) | |
977 { | 1063 { |
978 if (dump_enabled_p ()) | 1064 if (dump_enabled_p ()) |
979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
980 "Non-constant misalignment for access: %T\n", ref); | 1066 "Non-constant misalignment for access: %T\n", ref); |
981 return; | 1067 return; |
1008 dr_vec_info *dr_peel_info, int npeel) | 1094 dr_vec_info *dr_peel_info, int npeel) |
1009 { | 1095 { |
1010 unsigned int i; | 1096 unsigned int i; |
1011 vec<dr_p> same_aligned_drs; | 1097 vec<dr_p> same_aligned_drs; |
1012 struct data_reference *current_dr; | 1098 struct data_reference *current_dr; |
1013 int dr_size = vect_get_scalar_dr_size (dr_info); | |
1014 int dr_peel_size = vect_get_scalar_dr_size (dr_peel_info); | |
1015 stmt_vec_info stmt_info = dr_info->stmt; | |
1016 stmt_vec_info peel_stmt_info = dr_peel_info->stmt; | 1099 stmt_vec_info peel_stmt_info = dr_peel_info->stmt; |
1017 | 1100 |
1018 /* For interleaved data accesses the step in the loop must be multiplied by | 1101 /* It can be assumed that if dr_info has the same alignment as dr_peel, |
1019 the size of the interleaving group. */ | 1102 it is aligned in the vector loop. */ |
1020 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) | |
1021 dr_size *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); | |
1022 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info)) | |
1023 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info); | |
1024 | |
1025 /* It can be assumed that the data refs with the same alignment as dr_peel | |
1026 are aligned in the vector loop. */ | |
1027 same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info); | 1103 same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info); |
1028 FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr) | 1104 FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr) |
1029 { | 1105 { |
1030 if (current_dr != dr_info->dr) | 1106 if (current_dr != dr_info->dr) |
1031 continue; | 1107 continue; |
1032 gcc_assert (!known_alignment_for_access_p (dr_info) | 1108 gcc_assert (!known_alignment_for_access_p (dr_info) |
1033 || !known_alignment_for_access_p (dr_peel_info) | 1109 || !known_alignment_for_access_p (dr_peel_info) |
1034 || (DR_MISALIGNMENT (dr_info) / dr_size | 1110 || (DR_MISALIGNMENT (dr_info) |
1035 == DR_MISALIGNMENT (dr_peel_info) / dr_peel_size)); | 1111 == DR_MISALIGNMENT (dr_peel_info))); |
1036 SET_DR_MISALIGNMENT (dr_info, 0); | 1112 SET_DR_MISALIGNMENT (dr_info, 0); |
1037 return; | 1113 return; |
1038 } | 1114 } |
1039 | 1115 |
1040 if (known_alignment_for_access_p (dr_info) | 1116 unsigned HOST_WIDE_INT alignment; |
1117 if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment) | |
1118 && known_alignment_for_access_p (dr_info) | |
1041 && known_alignment_for_access_p (dr_peel_info)) | 1119 && known_alignment_for_access_p (dr_peel_info)) |
1042 { | 1120 { |
1043 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr), | |
1044 size_zero_node) < 0; | |
1045 int misal = DR_MISALIGNMENT (dr_info); | 1121 int misal = DR_MISALIGNMENT (dr_info); |
1046 misal += negative ? -npeel * dr_size : npeel * dr_size; | 1122 misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr)); |
1047 misal &= DR_TARGET_ALIGNMENT (dr_info) - 1; | 1123 misal &= alignment - 1; |
1048 SET_DR_MISALIGNMENT (dr_info, misal); | 1124 SET_DR_MISALIGNMENT (dr_info, misal); |
1049 return; | 1125 return; |
1050 } | 1126 } |
1051 | 1127 |
1052 if (dump_enabled_p ()) | 1128 if (dump_enabled_p ()) |
1592 | 1668 |
1593 opt_result | 1669 opt_result |
1594 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) | 1670 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) |
1595 { | 1671 { |
1596 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); | 1672 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); |
1597 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 1673 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
1598 enum dr_alignment_support supportable_dr_alignment; | 1674 enum dr_alignment_support supportable_dr_alignment; |
1599 dr_vec_info *first_store = NULL; | 1675 dr_vec_info *first_store = NULL; |
1600 dr_vec_info *dr0_info = NULL; | 1676 dr_vec_info *dr0_info = NULL; |
1601 struct data_reference *dr; | 1677 struct data_reference *dr; |
1602 unsigned int i, j; | 1678 unsigned int i, j; |
1686 unsigned int npeel_tmp = 0; | 1762 unsigned int npeel_tmp = 0; |
1687 bool negative = tree_int_cst_compare (DR_STEP (dr), | 1763 bool negative = tree_int_cst_compare (DR_STEP (dr), |
1688 size_zero_node) < 0; | 1764 size_zero_node) < 0; |
1689 | 1765 |
1690 vectype = STMT_VINFO_VECTYPE (stmt_info); | 1766 vectype = STMT_VINFO_VECTYPE (stmt_info); |
1691 unsigned int target_align = DR_TARGET_ALIGNMENT (dr_info); | 1767 /* If known_alignment_for_access_p then we have set |
1768 DR_MISALIGNMENT which is only done if we know it at compiler | |
1769 time, so it is safe to assume target alignment is constant. | |
1770 */ | |
1771 unsigned int target_align = | |
1772 DR_TARGET_ALIGNMENT (dr_info).to_constant (); | |
1692 unsigned int dr_size = vect_get_scalar_dr_size (dr_info); | 1773 unsigned int dr_size = vect_get_scalar_dr_size (dr_info); |
1693 mis = (negative | 1774 mis = (negative |
1694 ? DR_MISALIGNMENT (dr_info) | 1775 ? DR_MISALIGNMENT (dr_info) |
1695 : -DR_MISALIGNMENT (dr_info)); | 1776 : -DR_MISALIGNMENT (dr_info)); |
1696 if (DR_MISALIGNMENT (dr_info) != 0) | 1777 if (DR_MISALIGNMENT (dr_info) != 0) |
1750 /* For data-refs with the same number of related | 1831 /* For data-refs with the same number of related |
1751 accesses prefer the one where the misalign | 1832 accesses prefer the one where the misalign |
1752 computation will be invariant in the outermost loop. */ | 1833 computation will be invariant in the outermost loop. */ |
1753 else if (same_align_drs_max == same_align_drs) | 1834 else if (same_align_drs_max == same_align_drs) |
1754 { | 1835 { |
1755 struct loop *ivloop0, *ivloop; | 1836 class loop *ivloop0, *ivloop; |
1756 ivloop0 = outermost_invariant_loop_for_expr | 1837 ivloop0 = outermost_invariant_loop_for_expr |
1757 (loop, DR_BASE_ADDRESS (dr0_info->dr)); | 1838 (loop, DR_BASE_ADDRESS (dr0_info->dr)); |
1758 ivloop = outermost_invariant_loop_for_expr | 1839 ivloop = outermost_invariant_loop_for_expr |
1759 (loop, DR_BASE_ADDRESS (dr)); | 1840 (loop, DR_BASE_ADDRESS (dr)); |
1760 if ((ivloop && !ivloop0) | 1841 if ((ivloop && !ivloop0) |
1963 vectorization factor minus the misalignment as an element | 2044 vectorization factor minus the misalignment as an element |
1964 count. */ | 2045 count. */ |
1965 mis = (negative | 2046 mis = (negative |
1966 ? DR_MISALIGNMENT (dr0_info) | 2047 ? DR_MISALIGNMENT (dr0_info) |
1967 : -DR_MISALIGNMENT (dr0_info)); | 2048 : -DR_MISALIGNMENT (dr0_info)); |
1968 unsigned int target_align = DR_TARGET_ALIGNMENT (dr0_info); | 2049 /* If known_alignment_for_access_p then we have set |
2050 DR_MISALIGNMENT which is only done if we know it at compiler | |
2051 time, so it is safe to assume target alignment is constant. | |
2052 */ | |
2053 unsigned int target_align = | |
2054 DR_TARGET_ALIGNMENT (dr0_info).to_constant (); | |
1969 npeel = ((mis & (target_align - 1)) | 2055 npeel = ((mis & (target_align - 1)) |
1970 / vect_get_scalar_dr_size (dr0_info)); | 2056 / vect_get_scalar_dr_size (dr0_info)); |
1971 } | 2057 } |
1972 | 2058 |
1973 /* For interleaved data access every iteration accesses all the | 2059 /* For interleaved data access every iteration accesses all the |
1997 | 2083 |
1998 /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */ | 2084 /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */ |
1999 if (do_peeling) | 2085 if (do_peeling) |
2000 { | 2086 { |
2001 unsigned max_allowed_peel | 2087 unsigned max_allowed_peel |
2002 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT); | 2088 = param_vect_max_peeling_for_alignment; |
2089 if (flag_vect_cost_model == VECT_COST_MODEL_CHEAP) | |
2090 max_allowed_peel = 0; | |
2003 if (max_allowed_peel != (unsigned)-1) | 2091 if (max_allowed_peel != (unsigned)-1) |
2004 { | 2092 { |
2005 unsigned max_peel = npeel; | 2093 unsigned max_peel = npeel; |
2006 if (max_peel == 0) | 2094 if (max_peel == 0) |
2007 { | 2095 { |
2008 unsigned int target_align = DR_TARGET_ALIGNMENT (dr0_info); | 2096 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info); |
2009 max_peel = (target_align | 2097 unsigned HOST_WIDE_INT target_align_c; |
2010 / vect_get_scalar_dr_size (dr0_info) - 1); | 2098 if (target_align.is_constant (&target_align_c)) |
2099 max_peel = | |
2100 target_align_c / vect_get_scalar_dr_size (dr0_info) - 1; | |
2101 else | |
2102 { | |
2103 do_peeling = false; | |
2104 if (dump_enabled_p ()) | |
2105 dump_printf_loc (MSG_NOTE, vect_location, | |
2106 "Disable peeling, max peels set and vector" | |
2107 " alignment unknown\n"); | |
2108 } | |
2011 } | 2109 } |
2012 if (max_peel > max_allowed_peel) | 2110 if (max_peel > max_allowed_peel) |
2013 { | 2111 { |
2014 do_peeling = false; | 2112 do_peeling = false; |
2015 if (dump_enabled_p ()) | 2113 if (dump_enabled_p ()) |
2081 } | 2179 } |
2082 | 2180 |
2083 /* (2) Versioning to force alignment. */ | 2181 /* (2) Versioning to force alignment. */ |
2084 | 2182 |
2085 /* Try versioning if: | 2183 /* Try versioning if: |
2086 1) optimize loop for speed | 2184 1) optimize loop for speed and the cost-model is not cheap |
2087 2) there is at least one unsupported misaligned data ref with an unknown | 2185 2) there is at least one unsupported misaligned data ref with an unknown |
2088 misalignment, and | 2186 misalignment, and |
2089 3) all misaligned data refs with a known misalignment are supported, and | 2187 3) all misaligned data refs with a known misalignment are supported, and |
2090 4) the number of runtime alignment checks is within reason. */ | 2188 4) the number of runtime alignment checks is within reason. */ |
2091 | 2189 |
2092 do_versioning = | 2190 do_versioning |
2093 optimize_loop_nest_for_speed_p (loop) | 2191 = (optimize_loop_nest_for_speed_p (loop) |
2094 && (!loop->inner); /* FORNOW */ | 2192 && !loop->inner /* FORNOW */ |
2193 && flag_vect_cost_model != VECT_COST_MODEL_CHEAP); | |
2095 | 2194 |
2096 if (do_versioning) | 2195 if (do_versioning) |
2097 { | 2196 { |
2098 FOR_EACH_VEC_ELT (datarefs, i, dr) | 2197 FOR_EACH_VEC_ELT (datarefs, i, dr) |
2099 { | 2198 { |
2125 int mask; | 2224 int mask; |
2126 tree vectype; | 2225 tree vectype; |
2127 | 2226 |
2128 if (known_alignment_for_access_p (dr_info) | 2227 if (known_alignment_for_access_p (dr_info) |
2129 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length () | 2228 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length () |
2130 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS)) | 2229 >= (unsigned) param_vect_max_version_for_alignment_checks) |
2131 { | 2230 { |
2132 do_versioning = false; | 2231 do_versioning = false; |
2133 break; | 2232 break; |
2134 } | 2233 } |
2135 | 2234 |
2145 { | 2244 { |
2146 do_versioning = false; | 2245 do_versioning = false; |
2147 break; | 2246 break; |
2148 } | 2247 } |
2149 | 2248 |
2249 /* Forcing alignment in the first iteration is no good if | |
2250 we don't keep it across iterations. For now, just disable | |
2251 versioning in this case. | |
2252 ?? We could actually unroll the loop to achieve the required | |
2253 overall step alignment, and forcing the alignment could be | |
2254 done by doing some iterations of the non-vectorized loop. */ | |
2255 if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo) | |
2256 * DR_STEP_ALIGNMENT (dr), | |
2257 DR_TARGET_ALIGNMENT (dr_info))) | |
2258 { | |
2259 do_versioning = false; | |
2260 break; | |
2261 } | |
2262 | |
2150 /* The rightmost bits of an aligned address must be zeros. | 2263 /* The rightmost bits of an aligned address must be zeros. |
2151 Construct the mask needed for this test. For example, | 2264 Construct the mask needed for this test. For example, |
2152 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the | 2265 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the |
2153 mask must be 15 = 0xf. */ | 2266 mask must be 15 = 0xf. */ |
2154 mask = size - 1; | 2267 mask = size - 1; |
2155 | 2268 |
2156 /* FORNOW: use the same mask to test all potentially unaligned | 2269 /* FORNOW: use the same mask to test all potentially unaligned |
2157 references in the loop. The vectorizer currently supports | 2270 references in the loop. */ |
2158 a single vector size, see the reference to | 2271 if (LOOP_VINFO_PTR_MASK (loop_vinfo) |
2159 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the | 2272 && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask) |
2160 vectorization factor is computed. */ | 2273 { |
2161 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo) | 2274 do_versioning = false; |
2162 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask); | 2275 break; |
2276 } | |
2277 | |
2163 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; | 2278 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; |
2164 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info); | 2279 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info); |
2165 } | 2280 } |
2166 } | 2281 } |
2167 | 2282 |
2244 poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra)) | 2359 poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra)) |
2245 - wi::to_poly_offset (DR_INIT (drb))); | 2360 - wi::to_poly_offset (DR_INIT (drb))); |
2246 if (maybe_ne (diff, 0)) | 2361 if (maybe_ne (diff, 0)) |
2247 { | 2362 { |
2248 /* Get the wider of the two alignments. */ | 2363 /* Get the wider of the two alignments. */ |
2249 unsigned int align_a = (vect_calculate_target_alignment (dr_info_a) | 2364 poly_uint64 align_a = |
2250 / BITS_PER_UNIT); | 2365 exact_div (vect_calculate_target_alignment (dr_info_a), |
2251 unsigned int align_b = (vect_calculate_target_alignment (dr_info_b) | 2366 BITS_PER_UNIT); |
2252 / BITS_PER_UNIT); | 2367 poly_uint64 align_b = |
2253 unsigned int max_align = MAX (align_a, align_b); | 2368 exact_div (vect_calculate_target_alignment (dr_info_b), |
2369 BITS_PER_UNIT); | |
2370 unsigned HOST_WIDE_INT align_a_c, align_b_c; | |
2371 if (!align_a.is_constant (&align_a_c) | |
2372 || !align_b.is_constant (&align_b_c)) | |
2373 return; | |
2374 | |
2375 unsigned HOST_WIDE_INT max_align = MAX (align_a_c, align_b_c); | |
2254 | 2376 |
2255 /* Require the gap to be a multiple of the larger vector alignment. */ | 2377 /* Require the gap to be a multiple of the larger vector alignment. */ |
2256 if (!multiple_p (diff, max_align)) | 2378 if (!multiple_p (diff, max_align)) |
2257 return; | 2379 return; |
2258 } | 2380 } |
2436 /* Mark the statement as unvectorizable. */ | 2558 /* Mark the statement as unvectorizable. */ |
2437 STMT_VINFO_VECTORIZABLE (stmt_info) = false; | 2559 STMT_VINFO_VECTORIZABLE (stmt_info) = false; |
2438 return true; | 2560 return true; |
2439 } | 2561 } |
2440 | 2562 |
2441 dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n"); | 2563 if (dump_enabled_p ()) |
2564 dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n"); | |
2442 STMT_VINFO_STRIDED_P (stmt_info) = true; | 2565 STMT_VINFO_STRIDED_P (stmt_info) = true; |
2443 return true; | 2566 return true; |
2444 } | 2567 } |
2445 | 2568 |
2446 if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info) | 2569 if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info) |
2448 /* First stmt in the interleaving chain. Check the chain. */ | 2571 /* First stmt in the interleaving chain. Check the chain. */ |
2449 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info); | 2572 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info); |
2450 struct data_reference *data_ref = dr; | 2573 struct data_reference *data_ref = dr; |
2451 unsigned int count = 1; | 2574 unsigned int count = 1; |
2452 tree prev_init = DR_INIT (data_ref); | 2575 tree prev_init = DR_INIT (data_ref); |
2453 stmt_vec_info prev = stmt_info; | |
2454 HOST_WIDE_INT diff, gaps = 0; | 2576 HOST_WIDE_INT diff, gaps = 0; |
2455 | 2577 |
2456 /* By construction, all group members have INTEGER_CST DR_INITs. */ | 2578 /* By construction, all group members have INTEGER_CST DR_INITs. */ |
2457 while (next) | 2579 while (next) |
2458 { | 2580 { |
2459 /* Skip same data-refs. In case that two or more stmts share | 2581 /* We never have the same DR multiple times. */ |
2460 data-ref (supported only for loads), we vectorize only the first | 2582 gcc_assert (tree_int_cst_compare (DR_INIT (data_ref), |
2461 stmt, and the rest get their vectorized loads from the first | 2583 DR_INIT (STMT_VINFO_DATA_REF (next))) != 0); |
2462 one. */ | 2584 |
2463 if (!tree_int_cst_compare (DR_INIT (data_ref), | |
2464 DR_INIT (STMT_VINFO_DATA_REF (next)))) | |
2465 { | |
2466 if (DR_IS_WRITE (data_ref)) | |
2467 { | |
2468 if (dump_enabled_p ()) | |
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
2470 "Two store stmts share the same dr.\n"); | |
2471 return false; | |
2472 } | |
2473 | |
2474 if (dump_enabled_p ()) | |
2475 dump_printf_loc (MSG_NOTE, vect_location, | |
2476 "Two or more load stmts share the same dr.\n"); | |
2477 | |
2478 /* For load use the same data-ref load. */ | |
2479 DR_GROUP_SAME_DR_STMT (next) = prev; | |
2480 | |
2481 prev = next; | |
2482 next = DR_GROUP_NEXT_ELEMENT (next); | |
2483 continue; | |
2484 } | |
2485 | |
2486 prev = next; | |
2487 data_ref = STMT_VINFO_DATA_REF (next); | 2585 data_ref = STMT_VINFO_DATA_REF (next); |
2488 | 2586 |
2489 /* All group members have the same STEP by construction. */ | 2587 /* All group members have the same STEP by construction. */ |
2490 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0)); | 2588 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0)); |
2491 | 2589 |
2557 dump_printf (MSG_NOTE, "load "); | 2655 dump_printf (MSG_NOTE, "load "); |
2558 else if (STMT_VINFO_STRIDED_P (stmt_info)) | 2656 else if (STMT_VINFO_STRIDED_P (stmt_info)) |
2559 dump_printf (MSG_NOTE, "strided store "); | 2657 dump_printf (MSG_NOTE, "strided store "); |
2560 else | 2658 else |
2561 dump_printf (MSG_NOTE, "store "); | 2659 dump_printf (MSG_NOTE, "store "); |
2562 dump_printf (MSG_NOTE, "of size %u starting with %G", | 2660 dump_printf (MSG_NOTE, "of size %u\n", |
2563 (unsigned)groupsize, stmt_info->stmt); | 2661 (unsigned)groupsize); |
2662 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt); | |
2663 next = DR_GROUP_NEXT_ELEMENT (stmt_info); | |
2664 while (next) | |
2665 { | |
2666 if (DR_GROUP_GAP (next) != 1) | |
2667 dump_printf_loc (MSG_NOTE, vect_location, | |
2668 "\t<gap of %d elements>\n", | |
2669 DR_GROUP_GAP (next) - 1); | |
2670 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt); | |
2671 next = DR_GROUP_NEXT_ELEMENT (next); | |
2672 } | |
2564 if (DR_GROUP_GAP (stmt_info) != 0) | 2673 if (DR_GROUP_GAP (stmt_info) != 0) |
2565 dump_printf_loc (MSG_NOTE, vect_location, | 2674 dump_printf_loc (MSG_NOTE, vect_location, |
2566 "There is a gap of %u elements after the group\n", | 2675 "\t<gap of %d elements>\n", |
2567 DR_GROUP_GAP (stmt_info)); | 2676 DR_GROUP_GAP (stmt_info)); |
2568 } | 2677 } |
2569 | 2678 |
2570 /* SLP: create an SLP data structure for every interleaving group of | 2679 /* SLP: create an SLP data structure for every interleaving group of |
2571 stores for further analysis in vect_analyse_slp. */ | 2680 stores for further analysis in vect_analyse_slp. */ |
2615 data_reference *dr = dr_info->dr; | 2724 data_reference *dr = dr_info->dr; |
2616 tree step = DR_STEP (dr); | 2725 tree step = DR_STEP (dr); |
2617 tree scalar_type = TREE_TYPE (DR_REF (dr)); | 2726 tree scalar_type = TREE_TYPE (DR_REF (dr)); |
2618 stmt_vec_info stmt_info = dr_info->stmt; | 2727 stmt_vec_info stmt_info = dr_info->stmt; |
2619 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 2728 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
2620 struct loop *loop = NULL; | 2729 class loop *loop = NULL; |
2621 | 2730 |
2622 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) | 2731 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) |
2623 return true; | 2732 return true; |
2624 | 2733 |
2625 if (loop_vinfo) | 2734 if (loop_vinfo) |
2768 return NULL_TREE; | 2877 return NULL_TREE; |
2769 return gimple_assign_rhs1 (stmt); | 2878 return gimple_assign_rhs1 (stmt); |
2770 } | 2879 } |
2771 | 2880 |
2772 /* Return true if vectorizable_* routines can handle statements STMT1_INFO | 2881 /* Return true if vectorizable_* routines can handle statements STMT1_INFO |
2773 and STMT2_INFO being in a single group. */ | 2882 and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can |
2883 be grouped in SLP mode. */ | |
2774 | 2884 |
2775 static bool | 2885 static bool |
2776 can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) | 2886 can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info, |
2887 bool allow_slp_p) | |
2777 { | 2888 { |
2778 if (gimple_assign_single_p (stmt1_info->stmt)) | 2889 if (gimple_assign_single_p (stmt1_info->stmt)) |
2779 return gimple_assign_single_p (stmt2_info->stmt); | 2890 return gimple_assign_single_p (stmt2_info->stmt); |
2780 | 2891 |
2781 gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt); | 2892 gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt); |
2793 | 2904 |
2794 /* Check that the masks are the same. Cope with casts of masks, | 2905 /* Check that the masks are the same. Cope with casts of masks, |
2795 like those created by build_mask_conversion. */ | 2906 like those created by build_mask_conversion. */ |
2796 tree mask1 = gimple_call_arg (call1, 2); | 2907 tree mask1 = gimple_call_arg (call1, 2); |
2797 tree mask2 = gimple_call_arg (call2, 2); | 2908 tree mask2 = gimple_call_arg (call2, 2); |
2798 if (!operand_equal_p (mask1, mask2, 0)) | 2909 if (!operand_equal_p (mask1, mask2, 0) |
2910 && (ifn == IFN_MASK_STORE || !allow_slp_p)) | |
2799 { | 2911 { |
2800 mask1 = strip_conversion (mask1); | 2912 mask1 = strip_conversion (mask1); |
2801 if (!mask1) | 2913 if (!mask1) |
2802 return false; | 2914 return false; |
2803 mask2 = strip_conversion (mask2); | 2915 mask2 = strip_conversion (mask2); |
2879 not masked loads or stores). */ | 2991 not masked loads or stores). */ |
2880 if (DR_IS_READ (dra) != DR_IS_READ (drb) | 2992 if (DR_IS_READ (dra) != DR_IS_READ (drb) |
2881 || data_ref_compare_tree (DR_BASE_ADDRESS (dra), | 2993 || data_ref_compare_tree (DR_BASE_ADDRESS (dra), |
2882 DR_BASE_ADDRESS (drb)) != 0 | 2994 DR_BASE_ADDRESS (drb)) != 0 |
2883 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0 | 2995 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0 |
2884 || !can_group_stmts_p (stmtinfo_a, stmtinfo_b)) | 2996 || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true)) |
2885 break; | 2997 break; |
2886 | 2998 |
2887 /* Check that the data-refs have the same constant size. */ | 2999 /* Check that the data-refs have the same constant size. */ |
2888 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))); | 3000 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))); |
2889 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))); | 3001 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))); |
2903 break; | 3015 break; |
2904 | 3016 |
2905 /* Check that the DR_INITs are compile-time constants. */ | 3017 /* Check that the DR_INITs are compile-time constants. */ |
2906 if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST | 3018 if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST |
2907 || TREE_CODE (DR_INIT (drb)) != INTEGER_CST) | 3019 || TREE_CODE (DR_INIT (drb)) != INTEGER_CST) |
3020 break; | |
3021 | |
3022 /* Different .GOMP_SIMD_LANE calls still give the same lane, | |
3023 just hold extra information. */ | |
3024 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a) | |
3025 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b) | |
3026 && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0) | |
2908 break; | 3027 break; |
2909 | 3028 |
2910 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ | 3029 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ |
2911 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); | 3030 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); |
2912 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); | 3031 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); |
2964 } | 3083 } |
2965 DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a; | 3084 DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a; |
2966 DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b; | 3085 DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b; |
2967 lastinfo = stmtinfo_b; | 3086 lastinfo = stmtinfo_b; |
2968 | 3087 |
3088 STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a) | |
3089 = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false); | |
3090 | |
3091 if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)) | |
3092 dump_printf_loc (MSG_NOTE, vect_location, | |
3093 "Load suitable for SLP vectorization only.\n"); | |
3094 | |
2969 if (init_b == init_prev | 3095 if (init_b == init_prev |
2970 && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)) | 3096 && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)) |
2971 && dump_enabled_p ()) | 3097 && dump_enabled_p ()) |
2972 dump_printf_loc (MSG_NOTE, vect_location, | 3098 dump_printf_loc (MSG_NOTE, vect_location, |
2973 "Queuing group with duplicate access for fixup\n"); | 3099 "Queuing group with duplicate access for fixup\n"); |
2986 /* Find the earliest duplicate group member. */ | 3112 /* Find the earliest duplicate group member. */ |
2987 unsigned first_duplicate = -1u; | 3113 unsigned first_duplicate = -1u; |
2988 stmt_vec_info next, g = grp; | 3114 stmt_vec_info next, g = grp; |
2989 while ((next = DR_GROUP_NEXT_ELEMENT (g))) | 3115 while ((next = DR_GROUP_NEXT_ELEMENT (g))) |
2990 { | 3116 { |
2991 if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr) | 3117 if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr), |
2992 == DR_INIT (STMT_VINFO_DR_INFO (g)->dr)) | 3118 DR_INIT (STMT_VINFO_DR_INFO (g)->dr)) |
2993 && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate) | 3119 && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate) |
2994 first_duplicate = gimple_uid (STMT_VINFO_STMT (next)); | 3120 first_duplicate = gimple_uid (STMT_VINFO_STMT (next)); |
2995 g = next; | 3121 g = next; |
2996 } | 3122 } |
2997 if (first_duplicate == -1U) | 3123 if (first_duplicate == -1U) |
2999 | 3125 |
3000 /* Then move all stmts after the first duplicate to a new group. | 3126 /* Then move all stmts after the first duplicate to a new group. |
3001 Note this is a heuristic but one with the property that *it | 3127 Note this is a heuristic but one with the property that *it |
3002 is fixed up completely. */ | 3128 is fixed up completely. */ |
3003 g = grp; | 3129 g = grp; |
3004 stmt_vec_info newgroup = NULL, ng; | 3130 stmt_vec_info newgroup = NULL, ng = grp; |
3005 while ((next = DR_GROUP_NEXT_ELEMENT (g))) | 3131 while ((next = DR_GROUP_NEXT_ELEMENT (g))) |
3006 { | 3132 { |
3007 if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate) | 3133 if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate) |
3008 { | 3134 { |
3009 DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next); | 3135 DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next); |
3351 LOOP_VINFO_LOOP_NEST (loop_vinfo)); | 3477 LOOP_VINFO_LOOP_NEST (loop_vinfo)); |
3352 | 3478 |
3353 /* First, we collect all data ref pairs for aliasing checks. */ | 3479 /* First, we collect all data ref pairs for aliasing checks. */ |
3354 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr) | 3480 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr) |
3355 { | 3481 { |
3356 int comp_res; | |
3357 poly_uint64 lower_bound; | 3482 poly_uint64 lower_bound; |
3358 tree segment_length_a, segment_length_b; | 3483 tree segment_length_a, segment_length_b; |
3359 unsigned HOST_WIDE_INT access_size_a, access_size_b; | 3484 unsigned HOST_WIDE_INT access_size_a, access_size_b; |
3360 unsigned int align_a, align_b; | 3485 unsigned int align_a, align_b; |
3361 | 3486 |
3383 stmt_vec_info stmt_info_a = dr_info_a->stmt; | 3508 stmt_vec_info stmt_info_a = dr_info_a->stmt; |
3384 | 3509 |
3385 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr)); | 3510 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr)); |
3386 stmt_vec_info stmt_info_b = dr_info_b->stmt; | 3511 stmt_vec_info stmt_info_b = dr_info_b->stmt; |
3387 | 3512 |
3513 bool preserves_scalar_order_p | |
3514 = vect_preserves_scalar_order_p (dr_info_a, dr_info_b); | |
3515 | |
3388 /* Skip the pair if inter-iteration dependencies are irrelevant | 3516 /* Skip the pair if inter-iteration dependencies are irrelevant |
3389 and intra-iteration dependencies are guaranteed to be honored. */ | 3517 and intra-iteration dependencies are guaranteed to be honored. */ |
3390 if (ignore_step_p | 3518 if (ignore_step_p |
3391 && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b) | 3519 && (preserves_scalar_order_p |
3392 || vectorizable_with_step_bound_p (dr_info_a, dr_info_b, | 3520 || vectorizable_with_step_bound_p (dr_info_a, dr_info_b, |
3393 &lower_bound))) | 3521 &lower_bound))) |
3394 { | 3522 { |
3395 if (dump_enabled_p ()) | 3523 if (dump_enabled_p ()) |
3396 dump_printf_loc (MSG_NOTE, vect_location, | 3524 dump_printf_loc (MSG_NOTE, vect_location, |
3467 access_size_a = vect_vfa_access_size (dr_info_a); | 3595 access_size_a = vect_vfa_access_size (dr_info_a); |
3468 access_size_b = vect_vfa_access_size (dr_info_b); | 3596 access_size_b = vect_vfa_access_size (dr_info_b); |
3469 align_a = vect_vfa_align (dr_info_a); | 3597 align_a = vect_vfa_align (dr_info_a); |
3470 align_b = vect_vfa_align (dr_info_b); | 3598 align_b = vect_vfa_align (dr_info_b); |
3471 | 3599 |
3472 comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr), | |
3473 DR_BASE_ADDRESS (dr_info_b->dr)); | |
3474 if (comp_res == 0) | |
3475 comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr), | |
3476 DR_OFFSET (dr_info_b->dr)); | |
3477 | |
3478 /* See whether the alias is known at compilation time. */ | 3600 /* See whether the alias is known at compilation time. */ |
3479 if (comp_res == 0 | 3601 if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr), |
3602 DR_BASE_ADDRESS (dr_info_b->dr), 0) | |
3603 && operand_equal_p (DR_OFFSET (dr_info_a->dr), | |
3604 DR_OFFSET (dr_info_b->dr), 0) | |
3480 && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST | 3605 && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST |
3481 && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST | 3606 && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST |
3482 && poly_int_tree_p (segment_length_a) | 3607 && poly_int_tree_p (segment_length_a) |
3483 && poly_int_tree_p (segment_length_b)) | 3608 && poly_int_tree_p (segment_length_b)) |
3484 { | 3609 { |
3507 " compilation time alias: %G%G", | 3632 " compilation time alias: %G%G", |
3508 stmt_info_a->stmt, | 3633 stmt_info_a->stmt, |
3509 stmt_info_b->stmt); | 3634 stmt_info_b->stmt); |
3510 } | 3635 } |
3511 | 3636 |
3637 dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a, | |
3638 access_size_a, align_a); | |
3639 dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b, | |
3640 access_size_b, align_b); | |
3641 /* Canonicalize the order to be the one that's needed for accurate | |
3642 RAW, WAR and WAW flags, in cases where the data references are | |
3643 well-ordered. The order doesn't really matter otherwise, | |
3644 but we might as well be consistent. */ | |
3645 if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a) | |
3646 std::swap (dr_a, dr_b); | |
3647 | |
3512 dr_with_seg_len_pair_t dr_with_seg_len_pair | 3648 dr_with_seg_len_pair_t dr_with_seg_len_pair |
3513 (dr_with_seg_len (dr_info_a->dr, segment_length_a, | 3649 (dr_a, dr_b, (preserves_scalar_order_p |
3514 access_size_a, align_a), | 3650 ? dr_with_seg_len_pair_t::WELL_ORDERED |
3515 dr_with_seg_len (dr_info_b->dr, segment_length_b, | 3651 : dr_with_seg_len_pair_t::REORDERED)); |
3516 access_size_b, align_b)); | |
3517 | |
3518 /* Canonicalize pairs by sorting the two DR members. */ | |
3519 if (comp_res > 0) | |
3520 std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second); | |
3521 | 3652 |
3522 comp_alias_ddrs.safe_push (dr_with_seg_len_pair); | 3653 comp_alias_ddrs.safe_push (dr_with_seg_len_pair); |
3523 } | 3654 } |
3524 | 3655 |
3525 prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor); | 3656 prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor); |
3526 | 3657 |
3527 unsigned int count = (comp_alias_ddrs.length () | 3658 unsigned int count = (comp_alias_ddrs.length () |
3528 + check_unequal_addrs.length ()); | 3659 + check_unequal_addrs.length ()); |
3529 | 3660 |
3530 dump_printf_loc (MSG_NOTE, vect_location, | 3661 if (dump_enabled_p ()) |
3531 "improved number of alias checks from %d to %d\n", | 3662 dump_printf_loc (MSG_NOTE, vect_location, |
3532 may_alias_ddrs.length (), count); | 3663 "improved number of alias checks from %d to %d\n", |
3533 if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) | 3664 may_alias_ddrs.length (), count); |
3665 unsigned limit = param_vect_max_version_for_alias_checks; | |
3666 if (flag_simd_cost_model == VECT_COST_MODEL_CHEAP) | |
3667 limit = param_vect_max_version_for_alias_checks * 6 / 10; | |
3668 if (count > limit) | |
3534 return opt_result::failure_at | 3669 return opt_result::failure_at |
3535 (vect_location, | 3670 (vect_location, |
3536 "number of versioning for alias " | 3671 "number of versioning for alias run-time tests exceeds %d " |
3537 "run-time tests exceeds %d " | 3672 "(--param vect-max-version-for-alias-checks)\n", limit); |
3538 "(--param vect-max-version-for-alias-checks)\n", | |
3539 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)); | |
3540 | 3673 |
3541 return opt_result::success (); | 3674 return opt_result::success (); |
3542 } | 3675 } |
3543 | 3676 |
3544 /* Check whether we can use an internal function for a gather load | 3677 /* Check whether we can use an internal function for a gather load |
3545 or scatter store. READ_P is true for loads and false for stores. | 3678 or scatter store. READ_P is true for loads and false for stores. |
3546 MASKED_P is true if the load or store is conditional. MEMORY_TYPE is | 3679 MASKED_P is true if the load or store is conditional. MEMORY_TYPE is |
3547 the type of the memory elements being loaded or stored. OFFSET_BITS | 3680 the type of the memory elements being loaded or stored. OFFSET_TYPE |
3548 is the number of bits in each scalar offset and OFFSET_SIGN is the | 3681 is the type of the offset that is being applied to the invariant |
3549 sign of the offset. SCALE is the amount by which the offset should | 3682 base address. SCALE is the amount by which the offset should |
3550 be multiplied *after* it has been converted to address width. | 3683 be multiplied *after* it has been converted to address width. |
3551 | 3684 |
3552 Return true if the function is supported, storing the function | 3685 Return true if the function is supported, storing the function id in |
3553 id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */ | 3686 *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */ |
3554 | 3687 |
3555 bool | 3688 bool |
3556 vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype, | 3689 vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, |
3557 tree memory_type, unsigned int offset_bits, | 3690 tree vectype, tree memory_type, tree offset_type, |
3558 signop offset_sign, int scale, | 3691 int scale, internal_fn *ifn_out, |
3559 internal_fn *ifn_out, tree *element_type_out) | 3692 tree *offset_vectype_out) |
3560 { | 3693 { |
3561 unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); | 3694 unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); |
3562 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); | 3695 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); |
3563 if (offset_bits > element_bits) | |
3564 /* Internal functions require the offset to be the same width as | |
3565 the vector elements. We can extend narrower offsets, but it isn't | |
3566 safe to truncate wider offsets. */ | |
3567 return false; | |
3568 | |
3569 if (element_bits != memory_bits) | 3696 if (element_bits != memory_bits) |
3570 /* For now the vector elements must be the same width as the | 3697 /* For now the vector elements must be the same width as the |
3571 memory elements. */ | 3698 memory elements. */ |
3572 return false; | 3699 return false; |
3573 | 3700 |
3576 if (read_p) | 3703 if (read_p) |
3577 ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; | 3704 ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; |
3578 else | 3705 else |
3579 ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; | 3706 ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; |
3580 | 3707 |
3581 /* Test whether the target supports this combination. */ | 3708 for (;;) |
3582 if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, | 3709 { |
3583 offset_sign, scale)) | 3710 tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); |
3584 return false; | 3711 if (!offset_vectype) |
3585 | 3712 return false; |
3586 *ifn_out = ifn; | 3713 |
3587 *element_type_out = TREE_TYPE (vectype); | 3714 /* Test whether the target supports this combination. */ |
3588 return true; | 3715 if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, |
3716 offset_vectype, scale)) | |
3717 { | |
3718 *ifn_out = ifn; | |
3719 *offset_vectype_out = offset_vectype; | |
3720 return true; | |
3721 } | |
3722 | |
3723 if (TYPE_PRECISION (offset_type) >= POINTER_SIZE | |
3724 && TYPE_PRECISION (offset_type) >= element_bits) | |
3725 return false; | |
3726 | |
3727 offset_type = build_nonstandard_integer_type | |
3728 (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); | |
3729 } | |
3589 } | 3730 } |
3590 | 3731 |
3591 /* STMT_INFO is a call to an internal gather load or scatter store function. | 3732 /* STMT_INFO is a call to an internal gather load or scatter store function. |
3592 Describe the operation in INFO. */ | 3733 Describe the operation in INFO. */ |
3593 | 3734 |
3617 vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, | 3758 vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, |
3618 gather_scatter_info *info) | 3759 gather_scatter_info *info) |
3619 { | 3760 { |
3620 HOST_WIDE_INT scale = 1; | 3761 HOST_WIDE_INT scale = 1; |
3621 poly_int64 pbitpos, pbitsize; | 3762 poly_int64 pbitpos, pbitsize; |
3622 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 3763 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
3623 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | 3764 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
3624 tree offtype = NULL_TREE; | 3765 tree offtype = NULL_TREE; |
3625 tree decl = NULL_TREE, base, off; | 3766 tree decl = NULL_TREE, base, off; |
3626 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 3767 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
3627 tree memory_type = TREE_TYPE (DR_REF (dr)); | 3768 tree memory_type = TREE_TYPE (DR_REF (dr)); |
3628 machine_mode pmode; | 3769 machine_mode pmode; |
3629 int punsignedp, reversep, pvolatilep = 0; | 3770 int punsignedp, reversep, pvolatilep = 0; |
3630 internal_fn ifn; | 3771 internal_fn ifn; |
3631 tree element_type; | 3772 tree offset_vectype; |
3632 bool masked_p = false; | 3773 bool masked_p = false; |
3633 | 3774 |
3634 /* See whether this is already a call to a gather/scatter internal function. | 3775 /* See whether this is already a call to a gather/scatter internal function. |
3635 If not, see whether it's a masked load or store. */ | 3776 If not, see whether it's a masked load or store. */ |
3636 gcall *call = dyn_cast <gcall *> (stmt_info->stmt); | 3777 gcall *call = dyn_cast <gcall *> (stmt_info->stmt); |
3787 case MULT_EXPR: | 3928 case MULT_EXPR: |
3788 if (scale == 1 && tree_fits_shwi_p (op1)) | 3929 if (scale == 1 && tree_fits_shwi_p (op1)) |
3789 { | 3930 { |
3790 int new_scale = tree_to_shwi (op1); | 3931 int new_scale = tree_to_shwi (op1); |
3791 /* Only treat this as a scaling operation if the target | 3932 /* Only treat this as a scaling operation if the target |
3792 supports it. */ | 3933 supports it for at least some offset type. */ |
3793 if (use_ifn_p | 3934 if (use_ifn_p |
3794 && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, | 3935 && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), |
3795 vectype, memory_type, 1, | 3936 masked_p, vectype, memory_type, |
3796 TYPE_SIGN (TREE_TYPE (op0)), | 3937 signed_char_type_node, |
3797 new_scale, &ifn, | 3938 new_scale, &ifn, |
3798 &element_type)) | 3939 &offset_vectype) |
3940 && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), | |
3941 masked_p, vectype, memory_type, | |
3942 unsigned_char_type_node, | |
3943 new_scale, &ifn, | |
3944 &offset_vectype)) | |
3799 break; | 3945 break; |
3800 scale = new_scale; | 3946 scale = new_scale; |
3801 off = op0; | 3947 off = op0; |
3802 continue; | 3948 continue; |
3803 } | 3949 } |
3807 continue; | 3953 continue; |
3808 CASE_CONVERT: | 3954 CASE_CONVERT: |
3809 if (!POINTER_TYPE_P (TREE_TYPE (op0)) | 3955 if (!POINTER_TYPE_P (TREE_TYPE (op0)) |
3810 && !INTEGRAL_TYPE_P (TREE_TYPE (op0))) | 3956 && !INTEGRAL_TYPE_P (TREE_TYPE (op0))) |
3811 break; | 3957 break; |
3958 | |
3959 /* Don't include the conversion if the target is happy with | |
3960 the current offset type. */ | |
3961 if (use_ifn_p | |
3962 && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), | |
3963 masked_p, vectype, memory_type, | |
3964 TREE_TYPE (off), scale, &ifn, | |
3965 &offset_vectype)) | |
3966 break; | |
3967 | |
3812 if (TYPE_PRECISION (TREE_TYPE (op0)) | 3968 if (TYPE_PRECISION (TREE_TYPE (op0)) |
3813 == TYPE_PRECISION (TREE_TYPE (off))) | 3969 == TYPE_PRECISION (TREE_TYPE (off))) |
3814 { | 3970 { |
3815 off = op0; | 3971 off = op0; |
3816 continue; | 3972 continue; |
3817 } | 3973 } |
3818 | |
3819 /* The internal functions need the offset to be the same width | |
3820 as the elements of VECTYPE. Don't include operations that | |
3821 cast the offset from that width to a different width. */ | |
3822 if (use_ifn_p | |
3823 && (int_size_in_bytes (TREE_TYPE (vectype)) | |
3824 == int_size_in_bytes (TREE_TYPE (off)))) | |
3825 break; | |
3826 | 3974 |
3827 if (TYPE_PRECISION (TREE_TYPE (op0)) | 3975 if (TYPE_PRECISION (TREE_TYPE (op0)) |
3828 < TYPE_PRECISION (TREE_TYPE (off))) | 3976 < TYPE_PRECISION (TREE_TYPE (off))) |
3829 { | 3977 { |
3830 off = op0; | 3978 off = op0; |
3848 if (offtype == NULL_TREE) | 3996 if (offtype == NULL_TREE) |
3849 offtype = TREE_TYPE (off); | 3997 offtype = TREE_TYPE (off); |
3850 | 3998 |
3851 if (use_ifn_p) | 3999 if (use_ifn_p) |
3852 { | 4000 { |
3853 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, | 4001 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, |
3854 memory_type, TYPE_PRECISION (offtype), | 4002 vectype, memory_type, offtype, scale, |
3855 TYPE_SIGN (offtype), scale, &ifn, | 4003 &ifn, &offset_vectype)) |
3856 &element_type)) | |
3857 return false; | 4004 return false; |
3858 } | 4005 } |
3859 else | 4006 else |
3860 { | 4007 { |
3861 if (DR_IS_READ (dr)) | 4008 if (DR_IS_READ (dr)) |
3871 | 4018 |
3872 if (!decl) | 4019 if (!decl) |
3873 return false; | 4020 return false; |
3874 | 4021 |
3875 ifn = IFN_LAST; | 4022 ifn = IFN_LAST; |
3876 element_type = TREE_TYPE (vectype); | 4023 /* The offset vector type will be read from DECL when needed. */ |
4024 offset_vectype = NULL_TREE; | |
3877 } | 4025 } |
3878 | 4026 |
3879 info->ifn = ifn; | 4027 info->ifn = ifn; |
3880 info->decl = decl; | 4028 info->decl = decl; |
3881 info->base = base; | 4029 info->base = base; |
3882 info->offset = off; | 4030 info->offset = off; |
3883 info->offset_dt = vect_unknown_def_type; | 4031 info->offset_dt = vect_unknown_def_type; |
3884 info->offset_vectype = NULL_TREE; | 4032 info->offset_vectype = offset_vectype; |
3885 info->scale = scale; | 4033 info->scale = scale; |
3886 info->element_type = element_type; | 4034 info->element_type = TREE_TYPE (vectype); |
3887 info->memory_type = memory_type; | 4035 info->memory_type = memory_type; |
3888 return true; | 4036 return true; |
3889 } | 4037 } |
3890 | 4038 |
3891 /* Find the data references in STMT, analyze them with respect to LOOP and | 4039 /* Find the data references in STMT, analyze them with respect to LOOP and |
3959 DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr)); | 4107 DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr)); |
3960 if (DR_BASE_ADDRESS (newdr) | 4108 if (DR_BASE_ADDRESS (newdr) |
3961 && DR_OFFSET (newdr) | 4109 && DR_OFFSET (newdr) |
3962 && DR_INIT (newdr) | 4110 && DR_INIT (newdr) |
3963 && DR_STEP (newdr) | 4111 && DR_STEP (newdr) |
4112 && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST | |
3964 && integer_zerop (DR_STEP (newdr))) | 4113 && integer_zerop (DR_STEP (newdr))) |
3965 { | 4114 { |
4115 tree base_address = DR_BASE_ADDRESS (newdr); | |
3966 tree off = DR_OFFSET (newdr); | 4116 tree off = DR_OFFSET (newdr); |
4117 tree step = ssize_int (1); | |
4118 if (integer_zerop (off) | |
4119 && TREE_CODE (base_address) == POINTER_PLUS_EXPR) | |
4120 { | |
4121 off = TREE_OPERAND (base_address, 1); | |
4122 base_address = TREE_OPERAND (base_address, 0); | |
4123 } | |
3967 STRIP_NOPS (off); | 4124 STRIP_NOPS (off); |
3968 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST | 4125 if (TREE_CODE (off) == MULT_EXPR |
3969 && TREE_CODE (off) == MULT_EXPR | |
3970 && tree_fits_uhwi_p (TREE_OPERAND (off, 1))) | 4126 && tree_fits_uhwi_p (TREE_OPERAND (off, 1))) |
3971 { | 4127 { |
3972 tree step = TREE_OPERAND (off, 1); | 4128 step = TREE_OPERAND (off, 1); |
3973 off = TREE_OPERAND (off, 0); | 4129 off = TREE_OPERAND (off, 0); |
3974 STRIP_NOPS (off); | 4130 STRIP_NOPS (off); |
3975 if (CONVERT_EXPR_P (off) | 4131 } |
3976 && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0))) | 4132 if (CONVERT_EXPR_P (off) |
3977 < TYPE_PRECISION (TREE_TYPE (off)))) | 4133 && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0))) |
3978 off = TREE_OPERAND (off, 0); | 4134 < TYPE_PRECISION (TREE_TYPE (off)))) |
3979 if (TREE_CODE (off) == SSA_NAME) | 4135 off = TREE_OPERAND (off, 0); |
4136 if (TREE_CODE (off) == SSA_NAME) | |
4137 { | |
4138 gimple *def = SSA_NAME_DEF_STMT (off); | |
4139 /* Look through widening conversion. */ | |
4140 if (is_gimple_assign (def) | |
4141 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) | |
3980 { | 4142 { |
3981 gimple *def = SSA_NAME_DEF_STMT (off); | 4143 tree rhs1 = gimple_assign_rhs1 (def); |
4144 if (TREE_CODE (rhs1) == SSA_NAME | |
4145 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) | |
4146 && (TYPE_PRECISION (TREE_TYPE (off)) | |
4147 > TYPE_PRECISION (TREE_TYPE (rhs1)))) | |
4148 def = SSA_NAME_DEF_STMT (rhs1); | |
4149 } | |
4150 if (is_gimple_call (def) | |
4151 && gimple_call_internal_p (def) | |
4152 && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE)) | |
4153 { | |
4154 tree arg = gimple_call_arg (def, 0); | |
3982 tree reft = TREE_TYPE (DR_REF (newdr)); | 4155 tree reft = TREE_TYPE (DR_REF (newdr)); |
3983 if (is_gimple_call (def) | 4156 gcc_assert (TREE_CODE (arg) == SSA_NAME); |
3984 && gimple_call_internal_p (def) | 4157 arg = SSA_NAME_VAR (arg); |
3985 && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE)) | 4158 if (arg == loop->simduid |
4159 /* For now. */ | |
4160 && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step)) | |
3986 { | 4161 { |
3987 tree arg = gimple_call_arg (def, 0); | 4162 DR_BASE_ADDRESS (newdr) = base_address; |
3988 gcc_assert (TREE_CODE (arg) == SSA_NAME); | 4163 DR_OFFSET (newdr) = ssize_int (0); |
3989 arg = SSA_NAME_VAR (arg); | 4164 DR_STEP (newdr) = step; |
3990 if (arg == loop->simduid | 4165 DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT; |
3991 /* For now. */ | 4166 DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step); |
3992 && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step)) | 4167 /* Mark as simd-lane access. */ |
3993 { | 4168 tree arg2 = gimple_call_arg (def, 1); |
3994 DR_OFFSET (newdr) = ssize_int (0); | 4169 newdr->aux = (void *) (-1 - tree_to_uhwi (arg2)); |
3995 DR_STEP (newdr) = step; | 4170 free_data_ref (dr); |
3996 DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT; | 4171 datarefs->safe_push (newdr); |
3997 DR_STEP_ALIGNMENT (newdr) | 4172 return opt_result::success (); |
3998 = highest_pow2_factor (step); | |
3999 /* Mark as simd-lane access. */ | |
4000 newdr->aux = (void *)-1; | |
4001 free_data_ref (dr); | |
4002 datarefs->safe_push (newdr); | |
4003 return opt_result::success (); | |
4004 } | |
4005 } | 4173 } |
4006 } | 4174 } |
4007 } | 4175 } |
4008 } | 4176 } |
4009 free_data_ref (newdr); | 4177 free_data_ref (newdr); |
4027 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. | 4195 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. |
4028 | 4196 |
4029 */ | 4197 */ |
4030 | 4198 |
4031 opt_result | 4199 opt_result |
4032 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) | 4200 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal) |
4033 { | 4201 { |
4034 struct loop *loop = NULL; | 4202 class loop *loop = NULL; |
4035 unsigned int i; | 4203 unsigned int i; |
4036 struct data_reference *dr; | 4204 struct data_reference *dr; |
4037 tree scalar_type; | 4205 tree scalar_type; |
4038 | 4206 |
4039 DUMP_VECT_SCOPE ("vect_analyze_data_refs"); | 4207 DUMP_VECT_SCOPE ("vect_analyze_data_refs"); |
4104 stmt_info->stmt); | 4272 stmt_info->stmt); |
4105 } | 4273 } |
4106 } | 4274 } |
4107 | 4275 |
4108 /* See if this was detected as SIMD lane access. */ | 4276 /* See if this was detected as SIMD lane access. */ |
4109 if (dr->aux == (void *)-1) | 4277 if (dr->aux == (void *)-1 |
4278 || dr->aux == (void *)-2 | |
4279 || dr->aux == (void *)-3 | |
4280 || dr->aux == (void *)-4) | |
4110 { | 4281 { |
4111 if (nested_in_vect_loop_p (loop, stmt_info)) | 4282 if (nested_in_vect_loop_p (loop, stmt_info)) |
4112 return opt_result::failure_at (stmt_info->stmt, | 4283 return opt_result::failure_at (stmt_info->stmt, |
4113 "not vectorized:" | 4284 "not vectorized:" |
4114 " data ref analysis failed: %G", | 4285 " data ref analysis failed: %G", |
4115 stmt_info->stmt); | 4286 stmt_info->stmt); |
4116 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true; | 4287 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) |
4288 = -(uintptr_t) dr->aux; | |
4117 } | 4289 } |
4118 | 4290 |
4119 tree base = get_base_address (DR_REF (dr)); | 4291 tree base = get_base_address (DR_REF (dr)); |
4120 if (base && VAR_P (base) && DECL_NONALIASED (base)) | 4292 if (base && VAR_P (base) && DECL_NONALIASED (base)) |
4121 { | 4293 { |
4140 && DR_STEP (dr) | 4312 && DR_STEP (dr) |
4141 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST) | 4313 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST) |
4142 { | 4314 { |
4143 if (nested_in_vect_loop_p (loop, stmt_info)) | 4315 if (nested_in_vect_loop_p (loop, stmt_info)) |
4144 return opt_result::failure_at (stmt_info->stmt, | 4316 return opt_result::failure_at (stmt_info->stmt, |
4145 "not vectorized:" | 4317 "not vectorized: " |
4146 "not suitable for strided load %G", | 4318 "not suitable for strided load %G", |
4147 stmt_info->stmt); | 4319 stmt_info->stmt); |
4148 STMT_VINFO_STRIDED_P (stmt_info) = true; | 4320 STMT_VINFO_STRIDED_P (stmt_info) = true; |
4149 } | 4321 } |
4150 | 4322 |
4201 STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info)); | 4373 STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info)); |
4202 } | 4374 } |
4203 | 4375 |
4204 /* Set vectype for STMT. */ | 4376 /* Set vectype for STMT. */ |
4205 scalar_type = TREE_TYPE (DR_REF (dr)); | 4377 scalar_type = TREE_TYPE (DR_REF (dr)); |
4206 STMT_VINFO_VECTYPE (stmt_info) | 4378 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); |
4207 = get_vectype_for_scalar_type (scalar_type); | 4379 if (!vectype) |
4208 if (!STMT_VINFO_VECTYPE (stmt_info)) | |
4209 { | 4380 { |
4210 if (dump_enabled_p ()) | 4381 if (dump_enabled_p ()) |
4211 { | 4382 { |
4212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 4383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
4213 "not vectorized: no vectype for stmt: %G", | 4384 "not vectorized: no vectype for stmt: %G", |
4223 /* No vector type is fine, the ref can still participate | 4394 /* No vector type is fine, the ref can still participate |
4224 in dependence analysis, we just can't vectorize it. */ | 4395 in dependence analysis, we just can't vectorize it. */ |
4225 STMT_VINFO_VECTORIZABLE (stmt_info) = false; | 4396 STMT_VINFO_VECTORIZABLE (stmt_info) = false; |
4226 continue; | 4397 continue; |
4227 } | 4398 } |
4399 if (fatal) | |
4400 *fatal = false; | |
4228 return opt_result::failure_at (stmt_info->stmt, | 4401 return opt_result::failure_at (stmt_info->stmt, |
4229 "not vectorized:" | 4402 "not vectorized:" |
4230 " no vectype for stmt: %G" | 4403 " no vectype for stmt: %G" |
4231 " scalar_type: %T\n", | 4404 " scalar_type: %T\n", |
4232 stmt_info->stmt, scalar_type); | 4405 stmt_info->stmt, scalar_type); |
4234 else | 4407 else |
4235 { | 4408 { |
4236 if (dump_enabled_p ()) | 4409 if (dump_enabled_p ()) |
4237 dump_printf_loc (MSG_NOTE, vect_location, | 4410 dump_printf_loc (MSG_NOTE, vect_location, |
4238 "got vectype for stmt: %G%T\n", | 4411 "got vectype for stmt: %G%T\n", |
4239 stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info)); | 4412 stmt_info->stmt, vectype); |
4240 } | 4413 } |
4241 | 4414 |
4242 /* Adjust the minimal vectorization factor according to the | 4415 /* Adjust the minimal vectorization factor according to the |
4243 vector type. */ | 4416 vector type. */ |
4244 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); | 4417 vf = TYPE_VECTOR_SUBPARTS (vectype); |
4245 *min_vf = upper_bound (*min_vf, vf); | 4418 *min_vf = upper_bound (*min_vf, vf); |
4419 | |
4420 /* Leave the BB vectorizer to pick the vector type later, based on | |
4421 the final dataref group size and SLP node size. */ | |
4422 if (is_a <loop_vec_info> (vinfo)) | |
4423 STMT_VINFO_VECTYPE (stmt_info) = vectype; | |
4246 | 4424 |
4247 if (gatherscatter != SG_NONE) | 4425 if (gatherscatter != SG_NONE) |
4248 { | 4426 { |
4249 gather_scatter_info gs_info; | 4427 gather_scatter_info gs_info; |
4250 if (!vect_check_gather_scatter (stmt_info, | 4428 if (!vect_check_gather_scatter (stmt_info, |
4251 as_a <loop_vec_info> (vinfo), | 4429 as_a <loop_vec_info> (vinfo), |
4252 &gs_info) | 4430 &gs_info) |
4253 || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset))) | 4431 || !get_vectype_for_scalar_type (vinfo, |
4254 return opt_result::failure_at | 4432 TREE_TYPE (gs_info.offset))) |
4255 (stmt_info->stmt, | 4433 { |
4256 (gatherscatter == GATHER) ? | 4434 if (fatal) |
4257 "not vectorized: not suitable for gather load %G" : | 4435 *fatal = false; |
4258 "not vectorized: not suitable for scatter store %G", | 4436 return opt_result::failure_at |
4259 stmt_info->stmt); | 4437 (stmt_info->stmt, |
4438 (gatherscatter == GATHER) | |
4439 ? "not vectorized: not suitable for gather load %G" | |
4440 : "not vectorized: not suitable for scatter store %G", | |
4441 stmt_info->stmt); | |
4442 } | |
4260 STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter; | 4443 STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter; |
4261 } | 4444 } |
4262 } | 4445 } |
4263 | 4446 |
4264 /* We used to stop processing and prune the list here. Verify we no | 4447 /* We used to stop processing and prune the list here. Verify we no |
4356 int misalign = DR_MISALIGNMENT (dr_info); | 4539 int misalign = DR_MISALIGNMENT (dr_info); |
4357 if (misalign == DR_MISALIGNMENT_UNKNOWN) | 4540 if (misalign == DR_MISALIGNMENT_UNKNOWN) |
4358 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name)); | 4541 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name)); |
4359 else | 4542 else |
4360 set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), | 4543 set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), |
4361 DR_TARGET_ALIGNMENT (dr_info), misalign); | 4544 known_alignment (DR_TARGET_ALIGNMENT (dr_info)), |
4545 misalign); | |
4362 } | 4546 } |
4363 | 4547 |
4364 /* Function vect_create_addr_base_for_vector_ref. | 4548 /* Function vect_create_addr_base_for_vector_ref. |
4365 | 4549 |
4366 Create an expression that computes the address of the first memory location | 4550 Create an expression that computes the address of the first memory location |
4411 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); | 4595 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); |
4412 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 4596 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
4413 innermost_loop_behavior *drb = vect_dr_behavior (dr_info); | 4597 innermost_loop_behavior *drb = vect_dr_behavior (dr_info); |
4414 | 4598 |
4415 tree data_ref_base = unshare_expr (drb->base_address); | 4599 tree data_ref_base = unshare_expr (drb->base_address); |
4416 tree base_offset = unshare_expr (drb->offset); | 4600 tree base_offset = unshare_expr (get_dr_vinfo_offset (dr_info, true)); |
4417 tree init = unshare_expr (drb->init); | 4601 tree init = unshare_expr (drb->init); |
4418 | 4602 |
4419 if (loop_vinfo) | 4603 if (loop_vinfo) |
4420 base_name = get_name (data_ref_base); | 4604 base_name = get_name (data_ref_base); |
4421 else | 4605 else |
4529 | 4713 |
4530 3. Return the pointer. */ | 4714 3. Return the pointer. */ |
4531 | 4715 |
4532 tree | 4716 tree |
4533 vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type, | 4717 vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type, |
4534 struct loop *at_loop, tree offset, | 4718 class loop *at_loop, tree offset, |
4535 tree *initial_address, gimple_stmt_iterator *gsi, | 4719 tree *initial_address, gimple_stmt_iterator *gsi, |
4536 gimple **ptr_incr, bool only_init, | 4720 gimple **ptr_incr, bool only_init, |
4537 tree byte_offset, tree iv_step) | 4721 tree byte_offset, tree iv_step) |
4538 { | 4722 { |
4539 const char *base_name; | 4723 const char *base_name; |
4540 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 4724 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
4541 struct loop *loop = NULL; | 4725 class loop *loop = NULL; |
4542 bool nested_in_vect_loop = false; | 4726 bool nested_in_vect_loop = false; |
4543 struct loop *containing_loop = NULL; | 4727 class loop *containing_loop = NULL; |
4544 tree aggr_ptr_type; | 4728 tree aggr_ptr_type; |
4545 tree aggr_ptr; | 4729 tree aggr_ptr; |
4546 tree new_temp; | 4730 tree new_temp; |
4547 gimple_seq new_stmt_list = NULL; | 4731 gimple_seq new_stmt_list = NULL; |
4548 edge pe = NULL; | 4732 edge pe = NULL; |
5283 tree | 5467 tree |
5284 vect_setup_realignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, | 5468 vect_setup_realignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, |
5285 tree *realignment_token, | 5469 tree *realignment_token, |
5286 enum dr_alignment_support alignment_support_scheme, | 5470 enum dr_alignment_support alignment_support_scheme, |
5287 tree init_addr, | 5471 tree init_addr, |
5288 struct loop **at_loop) | 5472 class loop **at_loop) |
5289 { | 5473 { |
5290 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 5474 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
5291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 5475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
5292 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); | 5476 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); |
5293 struct data_reference *dr = dr_info->dr; | 5477 struct data_reference *dr = dr_info->dr; |
5294 struct loop *loop = NULL; | 5478 class loop *loop = NULL; |
5295 edge pe = NULL; | 5479 edge pe = NULL; |
5296 tree scalar_dest = gimple_assign_lhs (stmt_info->stmt); | 5480 tree scalar_dest = gimple_assign_lhs (stmt_info->stmt); |
5297 tree vec_dest; | 5481 tree vec_dest; |
5298 gimple *inc; | 5482 gimple *inc; |
5299 tree ptr; | 5483 tree ptr; |
5304 gphi *phi_stmt; | 5488 gphi *phi_stmt; |
5305 tree msq = NULL_TREE; | 5489 tree msq = NULL_TREE; |
5306 gimple_seq stmts = NULL; | 5490 gimple_seq stmts = NULL; |
5307 bool compute_in_loop = false; | 5491 bool compute_in_loop = false; |
5308 bool nested_in_vect_loop = false; | 5492 bool nested_in_vect_loop = false; |
5309 struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father; | 5493 class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father; |
5310 struct loop *loop_for_initial_load = NULL; | 5494 class loop *loop_for_initial_load = NULL; |
5311 | 5495 |
5312 if (loop_vinfo) | 5496 if (loop_vinfo) |
5313 { | 5497 { |
5314 loop = LOOP_VINFO_LOOP (loop_vinfo); | 5498 loop = LOOP_VINFO_LOOP (loop_vinfo); |
5315 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info); | 5499 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info); |
5400 &init_addr, NULL, &inc, true); | 5584 &init_addr, NULL, &inc, true); |
5401 if (TREE_CODE (ptr) == SSA_NAME) | 5585 if (TREE_CODE (ptr) == SSA_NAME) |
5402 new_temp = copy_ssa_name (ptr); | 5586 new_temp = copy_ssa_name (ptr); |
5403 else | 5587 else |
5404 new_temp = make_ssa_name (TREE_TYPE (ptr)); | 5588 new_temp = make_ssa_name (TREE_TYPE (ptr)); |
5405 unsigned int align = DR_TARGET_ALIGNMENT (dr_info); | 5589 poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info); |
5590 tree type = TREE_TYPE (ptr); | |
5406 new_stmt = gimple_build_assign | 5591 new_stmt = gimple_build_assign |
5407 (new_temp, BIT_AND_EXPR, ptr, | 5592 (new_temp, BIT_AND_EXPR, ptr, |
5408 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); | 5593 fold_build2 (MINUS_EXPR, type, |
5594 build_int_cst (type, 0), | |
5595 build_int_cst (type, align))); | |
5409 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); | 5596 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); |
5410 gcc_assert (!new_bb); | 5597 gcc_assert (!new_bb); |
5411 data_ref | 5598 data_ref |
5412 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp, | 5599 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp, |
5413 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0)); | 5600 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0)); |
6238 DR_GROUP_GAP is the number of steps in elements from the previous | 6425 DR_GROUP_GAP is the number of steps in elements from the previous |
6239 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that | 6426 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that |
6240 correspond to the gaps. */ | 6427 correspond to the gaps. */ |
6241 if (next_stmt_info != first_stmt_info | 6428 if (next_stmt_info != first_stmt_info |
6242 && gap_count < DR_GROUP_GAP (next_stmt_info)) | 6429 && gap_count < DR_GROUP_GAP (next_stmt_info)) |
6243 { | 6430 { |
6244 gap_count++; | 6431 gap_count++; |
6245 continue; | 6432 continue; |
6246 } | 6433 } |
6247 | 6434 |
6248 while (next_stmt_info) | 6435 /* ??? The following needs cleanup after the removal of |
6436 DR_GROUP_SAME_DR_STMT. */ | |
6437 if (next_stmt_info) | |
6249 { | 6438 { |
6250 stmt_vec_info new_stmt_info = vinfo->lookup_def (tmp_data_ref); | 6439 stmt_vec_info new_stmt_info = vinfo->lookup_def (tmp_data_ref); |
6251 /* We assume that if VEC_STMT is not NULL, this is a case of multiple | 6440 /* We assume that if VEC_STMT is not NULL, this is a case of multiple |
6252 copies, and we put the new vector statement in the first available | 6441 copies, and we put the new vector statement in the first available |
6253 RELATED_STMT. */ | 6442 RELATED_STMT. */ |
6254 if (!STMT_VINFO_VEC_STMT (next_stmt_info)) | 6443 if (!STMT_VINFO_VEC_STMT (next_stmt_info)) |
6255 STMT_VINFO_VEC_STMT (next_stmt_info) = new_stmt_info; | 6444 STMT_VINFO_VEC_STMT (next_stmt_info) = new_stmt_info; |
6256 else | 6445 else |
6257 { | 6446 { |
6258 if (!DR_GROUP_SAME_DR_STMT (next_stmt_info)) | 6447 stmt_vec_info prev_stmt_info |
6259 { | 6448 = STMT_VINFO_VEC_STMT (next_stmt_info); |
6260 stmt_vec_info prev_stmt_info | 6449 stmt_vec_info rel_stmt_info |
6261 = STMT_VINFO_VEC_STMT (next_stmt_info); | 6450 = STMT_VINFO_RELATED_STMT (prev_stmt_info); |
6262 stmt_vec_info rel_stmt_info | 6451 while (rel_stmt_info) |
6263 = STMT_VINFO_RELATED_STMT (prev_stmt_info); | 6452 { |
6264 while (rel_stmt_info) | 6453 prev_stmt_info = rel_stmt_info; |
6265 { | 6454 rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info); |
6266 prev_stmt_info = rel_stmt_info; | 6455 } |
6267 rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info); | 6456 |
6268 } | 6457 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; |
6269 | |
6270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; | |
6271 } | |
6272 } | 6458 } |
6273 | 6459 |
6274 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); | 6460 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); |
6275 gap_count = 1; | 6461 gap_count = 1; |
6276 /* If NEXT_STMT_INFO accesses the same DR as the previous statement, | |
6277 put the same TMP_DATA_REF as its vectorized statement; otherwise | |
6278 get the next data-ref from RESULT_CHAIN. */ | |
6279 if (!next_stmt_info || !DR_GROUP_SAME_DR_STMT (next_stmt_info)) | |
6280 break; | |
6281 } | 6462 } |
6282 } | 6463 } |
6283 } | 6464 } |
6284 | 6465 |
6285 /* Function vect_force_dr_alignment_p. | 6466 /* Function vect_force_dr_alignment_p. |
6286 | 6467 |
6287 Returns whether the alignment of a DECL can be forced to be aligned | 6468 Returns whether the alignment of a DECL can be forced to be aligned |
6288 on ALIGNMENT bit boundary. */ | 6469 on ALIGNMENT bit boundary. */ |
6289 | 6470 |
6290 bool | 6471 bool |
6291 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment) | 6472 vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment) |
6292 { | 6473 { |
6293 if (!VAR_P (decl)) | 6474 if (!VAR_P (decl)) |
6294 return false; | 6475 return false; |
6295 | 6476 |
6296 if (decl_in_symtab_p (decl) | 6477 if (decl_in_symtab_p (decl) |
6297 && !symtab_node::get (decl)->can_increase_alignment_p ()) | 6478 && !symtab_node::get (decl)->can_increase_alignment_p ()) |
6298 return false; | 6479 return false; |
6299 | 6480 |
6300 if (TREE_STATIC (decl)) | 6481 if (TREE_STATIC (decl)) |
6301 return (alignment <= MAX_OFILE_ALIGNMENT); | 6482 return (known_le (alignment, |
6483 (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT)); | |
6302 else | 6484 else |
6303 return (alignment <= MAX_STACK_ALIGNMENT); | 6485 return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT)); |
6304 } | 6486 } |
6305 | 6487 |
6306 | 6488 |
6307 /* Return whether the data reference DR_INFO is supported with respect to its | 6489 /* Return whether the data reference DR_INFO is supported with respect to its |
6308 alignment. | 6490 alignment. |
6317 data_reference *dr = dr_info->dr; | 6499 data_reference *dr = dr_info->dr; |
6318 stmt_vec_info stmt_info = dr_info->stmt; | 6500 stmt_vec_info stmt_info = dr_info->stmt; |
6319 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 6501 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
6320 machine_mode mode = TYPE_MODE (vectype); | 6502 machine_mode mode = TYPE_MODE (vectype); |
6321 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 6503 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
6322 struct loop *vect_loop = NULL; | 6504 class loop *vect_loop = NULL; |
6323 bool nested_in_vect_loop = false; | 6505 bool nested_in_vect_loop = false; |
6324 | 6506 |
6325 if (aligned_access_p (dr_info) && !check_aligned_accesses) | 6507 if (aligned_access_p (dr_info) && !check_aligned_accesses) |
6326 return dr_aligned; | 6508 return dr_aligned; |
6327 | 6509 |