comparison gcc/tree-vect-slp.c @ 63:b7f97abdc517 gcc-4.6-20100522

update gcc from gcc-4.5.0 to gcc-4.6
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Mon, 24 May 2010 12:47:05 +0900
parents 77e2b8dfacca
children f6334be47118
comparison
equal deleted inserted replaced
56:3c8a44c06a95 63:b7f97abdc517
1 /* SLP - Basic Block Vectorization 1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. 2 Copyright (C) 2007, 2008, 2009, 2010
3 Foundation, Inc. 3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com> 5 and Ira Rosen <irar@il.ibm.com>
6 6
7 This file is part of GCC. 7 This file is part of GCC.
8 8
27 #include "ggc.h" 27 #include "ggc.h"
28 #include "tree.h" 28 #include "tree.h"
29 #include "target.h" 29 #include "target.h"
30 #include "basic-block.h" 30 #include "basic-block.h"
31 #include "diagnostic.h" 31 #include "diagnostic.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
32 #include "tree-flow.h" 34 #include "tree-flow.h"
33 #include "tree-dump.h" 35 #include "tree-dump.h"
34 #include "cfgloop.h" 36 #include "cfgloop.h"
35 #include "cfglayout.h" 37 #include "cfglayout.h"
36 #include "expr.h" 38 #include "expr.h"
244 /* Not first stmt of the group, check that the def-stmt/s match 246 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */ 247 the def-stmt/s of the first stmt. */
246 if ((i == 0 248 if ((i == 0
247 && (*first_stmt_dt0 != dt[i] 249 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def 250 || (*first_stmt_def0_type && def
249 && *first_stmt_def0_type != TREE_TYPE (def)))) 251 && !types_compatible_p (*first_stmt_def0_type,
252 TREE_TYPE (def)))))
250 || (i == 1 253 || (i == 1
251 && (*first_stmt_dt1 != dt[i] 254 && (*first_stmt_dt1 != dt[i]
252 || (*first_stmt_def1_type && def 255 || (*first_stmt_def1_type && def
253 && *first_stmt_def1_type != TREE_TYPE (def)))) 256 && !types_compatible_p (*first_stmt_def1_type,
257 TREE_TYPE (def)))))
254 || (!def 258 || (!def
255 && TREE_TYPE (*first_stmt_const_oprnd) 259 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
256 != TREE_TYPE (oprnd))) 260 TREE_TYPE (oprnd))))
257 { 261 {
258 if (vect_print_dump_info (REPORT_SLP)) 262 if (vect_print_dump_info (REPORT_SLP))
259 fprintf (vect_dump, "Build SLP failed: different types "); 263 fprintf (vect_dump, "Build SLP failed: different types ");
260 264
261 return false; 265 return false;
269 case vect_constant_def: 273 case vect_constant_def:
270 case vect_external_def: 274 case vect_external_def:
271 break; 275 break;
272 276
273 case vect_internal_def: 277 case vect_internal_def:
278 case vect_reduction_def:
274 if (i == 0) 279 if (i == 0)
275 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); 280 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
276 else 281 else
277 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); 282 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
278 break; 283 break;
328 struct data_reference *first_dr; 333 struct data_reference *first_dr;
329 bool pattern0 = false, pattern1 = false; 334 bool pattern0 = false, pattern1 = false;
330 HOST_WIDE_INT dummy; 335 HOST_WIDE_INT dummy;
331 bool permutation = false; 336 bool permutation = false;
332 unsigned int load_place; 337 unsigned int load_place;
333 gimple first_load; 338 gimple first_load, prev_first_load = NULL;
334 339
335 /* For every stmt in NODE find its def stmt/s. */ 340 /* For every stmt in NODE find its def stmt/s. */
336 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++) 341 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
337 { 342 {
338 if (vect_print_dump_info (REPORT_SLP)) 343 if (vect_print_dump_info (REPORT_SLP))
339 { 344 {
340 fprintf (vect_dump, "Build SLP for "); 345 fprintf (vect_dump, "Build SLP for ");
341 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 346 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
342 } 347 }
348
349 /* Fail to vectorize statements marked as unvectorizable. */
350 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
351 {
352 if (vect_print_dump_info (REPORT_SLP))
353 {
354 fprintf (vect_dump,
355 "Build SLP failed: unvectorizable statement ");
356 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
357 }
358
359 return false;
360 }
343 361
344 lhs = gimple_get_lhs (stmt); 362 lhs = gimple_get_lhs (stmt);
345 if (lhs == NULL_TREE) 363 if (lhs == NULL_TREE)
346 { 364 {
347 if (vect_print_dump_info (REPORT_SLP)) 365 if (vect_print_dump_info (REPORT_SLP))
481 &first_stmt_const_oprnd, 499 &first_stmt_const_oprnd,
482 ncopies_for_cost, 500 ncopies_for_cost,
483 &pattern0, &pattern1)) 501 &pattern0, &pattern1))
484 return false; 502 return false;
485 } 503 }
486 else 504 else
487 { 505 {
488 /* Load. */ 506 /* Load. */
489 /* FORNOW: Check that there is no gap between the loads. */ 507 /* FORNOW: Check that there is no gap between the loads. */
490 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt 508 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
491 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0) 509 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
492 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt 510 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)) 511 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
494 { 512 {
495 if (vect_print_dump_info (REPORT_SLP)) 513 if (vect_print_dump_info (REPORT_SLP))
496 { 514 {
497 fprintf (vect_dump, "Build SLP failed: strided " 515 fprintf (vect_dump, "Build SLP failed: strided "
498 "loads have gaps "); 516 "loads have gaps ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 } 518 }
501 519
502 return false; 520 return false;
503 } 521 }
504 522
505 /* Check that the size of interleaved loads group is not 523 /* Check that the size of interleaved loads group is not
506 greater than the SLP group size. */ 524 greater than the SLP group size. */
507 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) 525 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
508 > ncopies * group_size) 526 {
509 { 527 if (vect_print_dump_info (REPORT_SLP))
510 if (vect_print_dump_info (REPORT_SLP)) 528 {
511 { 529 fprintf (vect_dump, "Build SLP failed: the number of "
512 fprintf (vect_dump, "Build SLP failed: the number of " 530 "interleaved loads is greater than"
513 "interleaved loads is greater than" 531 " the SLP group size ");
514 " the SLP group size "); 532 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 533 }
516 } 534
517 535 return false;
518 return false; 536 }
519 } 537
520 538 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
521 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); 539 if (prev_first_load)
540 {
541 /* Check that there are no loads from different interleaving
542 chains in the same node. The only exception is complex
543 numbers. */
544 if (prev_first_load != first_load
545 && rhs_code != REALPART_EXPR
546 && rhs_code != IMAGPART_EXPR)
547 {
548 if (vect_print_dump_info (REPORT_SLP))
549 {
550 fprintf (vect_dump, "Build SLP failed: different "
551 "interleaving chains in one node ");
552 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
553 }
554
555 return false;
556 }
557 }
558 else
559 prev_first_load = first_load;
522 560
523 if (first_load == stmt) 561 if (first_load == stmt)
524 { 562 {
525 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); 563 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
526 if (vect_supportable_dr_alignment (first_dr) 564 if (vect_supportable_dr_alignment (first_dr)
783 821
784 return true; 822 return true;
785 } 823 }
786 824
787 825
826 /* Rearrange the statements of NODE according to PERMUTATION. */
827
828 static void
829 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
830 VEC (int, heap) *permutation)
831 {
832 gimple stmt;
833 VEC (gimple, heap) *tmp_stmts;
834 unsigned int index, i;
835
836 if (!node)
837 return;
838
839 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
840 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
841
842 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
843 tmp_stmts = VEC_alloc (gimple, heap, group_size);
844
845 for (i = 0; i < group_size; i++)
846 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
847
848 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
849 {
850 index = VEC_index (int, permutation, i);
851 VEC_replace (gimple, tmp_stmts, index, stmt);
852 }
853
854 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
855 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
856 }
857
858
788 /* Check if the required load permutation is supported. 859 /* Check if the required load permutation is supported.
789 LOAD_PERMUTATION contains a list of indices of the loads. 860 LOAD_PERMUTATION contains a list of indices of the loads.
790 In SLP this permutation is relative to the order of strided stores that are 861 In SLP this permutation is relative to the order of strided stores that are
791 the base of the SLP instance. */ 862 the base of the SLP instance. */
792 863
793 static bool 864 static bool
794 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, 865 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
795 VEC (int, heap) *load_permutation) 866 VEC (int, heap) *load_permutation)
796 { 867 {
797 int i = 0, j, prev = -1, next, k; 868 int i = 0, j, prev = -1, next, k, number_of_groups;
798 bool supported; 869 bool supported, bad_permutation = false;
799 sbitmap load_index; 870 sbitmap load_index;
871 slp_tree node;
872 gimple stmt;
800 873
801 /* FORNOW: permutations are only supported in SLP. */ 874 /* FORNOW: permutations are only supported in SLP. */
802 if (!slp_instn) 875 if (!slp_instn)
803 return false; 876 return false;
804 877
807 fprintf (vect_dump, "Load permutation "); 880 fprintf (vect_dump, "Load permutation ");
808 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++) 881 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
809 fprintf (vect_dump, "%d ", next); 882 fprintf (vect_dump, "%d ", next);
810 } 883 }
811 884
885 /* In case of reduction every load permutation is allowed, since the order
886 of the reduction statements is not important (as opposed to the case of
887 strided stores). The only condition we need to check is that all the
888 load nodes are of the same size and have the same permutation (and then
889 rearrange all the nodes of the SLP instance according to this
890 permutation). */
891
892 /* Check that all the load nodes are of the same size. */
893 for (i = 0;
894 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
895 i++)
896 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
897 != (unsigned) group_size)
898 return false;
899
900 node = SLP_INSTANCE_TREE (slp_instn);
901 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
902 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
903 instance, not all the loads belong to the same node or interleaving
904 group. Hence, we need to divide them into groups according to
905 GROUP_SIZE. */
906 number_of_groups = VEC_length (int, load_permutation) / group_size;
907
908 /* Reduction (there are no data-refs in the root). */
909 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
910 {
911 int first_group_load_index;
912
913 /* Compare all the permutation sequences to the first one. */
914 for (i = 1; i < number_of_groups; i++)
915 {
916 k = 0;
917 for (j = i * group_size; j < i * group_size + group_size; j++)
918 {
919 next = VEC_index (int, load_permutation, j);
920 first_group_load_index = VEC_index (int, load_permutation, k);
921
922 if (next != first_group_load_index)
923 {
924 bad_permutation = true;
925 break;
926 }
927
928 k++;
929 }
930
931 if (bad_permutation)
932 break;
933 }
934
935 if (!bad_permutation)
936 {
937 /* This permutaion is valid for reduction. Since the order of the
938 statements in the nodes is not important unless they are memory
939 accesses, we can rearrange the statements in all the nodes
940 according to the order of the loads. */
941 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
942 load_permutation);
943 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
944 return true;
945 }
946 }
947
812 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to 948 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
813 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as 949 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
814 well. */ 950 well (unless it's reduction). */
815 if (VEC_length (int, load_permutation) 951 if (VEC_length (int, load_permutation)
816 != (unsigned int) (group_size * group_size)) 952 != (unsigned int) (group_size * group_size))
817 return false; 953 return false;
818 954
819 supported = true; 955 supported = true;
840 break; 976 break;
841 } 977 }
842 978
843 SET_BIT (load_index, prev); 979 SET_BIT (load_index, prev);
844 } 980 }
845 981
982 for (j = 0; j < group_size; j++)
983 if (!TEST_BIT (load_index, j))
984 return false;
985
846 sbitmap_free (load_index); 986 sbitmap_free (load_index);
847 987
848 if (supported && i == group_size * group_size 988 if (supported && i == group_size * group_size
849 && vect_supported_slp_permutation_p (slp_instn)) 989 && vect_supported_slp_permutation_p (slp_instn))
850 return true; 990 return true;
888 { 1028 {
889 slp_instance new_instance; 1029 slp_instance new_instance;
890 slp_tree node = XNEW (struct _slp_tree); 1030 slp_tree node = XNEW (struct _slp_tree);
891 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); 1031 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
892 unsigned int unrolling_factor = 1, nunits; 1032 unsigned int unrolling_factor = 1, nunits;
893 tree vectype, scalar_type; 1033 tree vectype, scalar_type = NULL_TREE;
894 gimple next; 1034 gimple next;
895 unsigned int vectorization_factor = 0; 1035 unsigned int vectorization_factor = 0;
896 int inside_cost = 0, outside_cost = 0, ncopies_for_cost; 1036 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
897 unsigned int max_nunits = 0; 1037 unsigned int max_nunits = 0;
898 VEC (int, heap) *load_permutation; 1038 VEC (int, heap) *load_permutation;
899 VEC (slp_tree, heap) *loads; 1039 VEC (slp_tree, heap) *loads;
900 1040 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
901 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF ( 1041
902 vinfo_for_stmt (stmt)))); 1042 if (dr)
903 vectype = get_vectype_for_scalar_type (scalar_type); 1043 {
1044 scalar_type = TREE_TYPE (DR_REF (dr));
1045 vectype = get_vectype_for_scalar_type (scalar_type);
1046 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1047 }
1048 else
1049 {
1050 gcc_assert (loop_vinfo);
1051 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1052 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1053 }
1054
904 if (!vectype) 1055 if (!vectype)
905 { 1056 {
906 if (vect_print_dump_info (REPORT_SLP)) 1057 if (vect_print_dump_info (REPORT_SLP))
907 { 1058 {
908 fprintf (vect_dump, "Build SLP failed: unsupported data-type "); 1059 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
909 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 1060 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
910 } 1061 }
1062
911 return false; 1063 return false;
912 } 1064 }
913 1065
914 nunits = TYPE_VECTOR_SUBPARTS (vectype); 1066 nunits = TYPE_VECTOR_SUBPARTS (vectype);
915 if (loop_vinfo) 1067 if (loop_vinfo)
930 } 1082 }
931 1083
932 /* Create a node (a root of the SLP tree) for the packed strided stores. */ 1084 /* Create a node (a root of the SLP tree) for the packed strided stores. */
933 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); 1085 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
934 next = stmt; 1086 next = stmt;
935 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ 1087 if (dr)
936 while (next) 1088 {
937 { 1089 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
938 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); 1090 while (next)
939 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); 1091 {
1092 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1093 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1094 }
1095 }
1096 else
1097 {
1098 /* Collect reduction statements. */
1099 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1100 next);
1101 i++)
1102 {
1103 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1104 if (vect_print_dump_info (REPORT_DETAILS))
1105 {
1106 fprintf (vect_dump, "pushing reduction into node: ");
1107 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1108 }
1109 }
940 } 1110 }
941 1111
942 SLP_TREE_VEC_STMTS (node) = NULL; 1112 SLP_TREE_VEC_STMTS (node) = NULL;
943 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; 1113 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
944 SLP_TREE_LEFT (node) = NULL; 1114 SLP_TREE_LEFT (node) = NULL;
1027 1197
1028 bool 1198 bool
1029 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 1199 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1030 { 1200 {
1031 unsigned int i; 1201 unsigned int i;
1032 VEC (gimple, heap) *strided_stores; 1202 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1033 gimple store; 1203 gimple store;
1034 bool ok = false; 1204 bool ok = false;
1035 1205
1036 if (vect_print_dump_info (REPORT_SLP)) 1206 if (vect_print_dump_info (REPORT_SLP))
1037 fprintf (vect_dump, "=== vect_analyze_slp ==="); 1207 fprintf (vect_dump, "=== vect_analyze_slp ===");
1038 1208
1039 if (loop_vinfo) 1209 if (loop_vinfo)
1040 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo); 1210 {
1211 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1212 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1213 }
1041 else 1214 else
1042 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo); 1215 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1043 1216
1217 /* Find SLP sequences starting from groups of strided stores. */
1044 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++) 1218 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1045 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store)) 1219 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1046 ok = true; 1220 ok = true;
1047 1221
1048 if (bb_vinfo && !ok) 1222 if (bb_vinfo && !ok)
1050 if (vect_print_dump_info (REPORT_SLP)) 1224 if (vect_print_dump_info (REPORT_SLP))
1051 fprintf (vect_dump, "Failed to SLP the basic block."); 1225 fprintf (vect_dump, "Failed to SLP the basic block.");
1052 1226
1053 return false; 1227 return false;
1054 } 1228 }
1229
1230 /* Find SLP sequences starting from groups of reductions. */
1231 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1232 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1233 VEC_index (gimple, reductions, 0)))
1234 ok = true;
1055 1235
1056 return true; 1236 return true;
1057 } 1237 }
1058 1238
1059 1239
1100 { 1280 {
1101 int i; 1281 int i;
1102 gimple stmt; 1282 gimple stmt;
1103 imm_use_iterator imm_iter; 1283 imm_use_iterator imm_iter;
1104 gimple use_stmt; 1284 gimple use_stmt;
1285 stmt_vec_info stmt_vinfo;
1105 1286
1106 if (!node) 1287 if (!node)
1107 return; 1288 return;
1108 1289
1109 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) 1290 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1110 if (PURE_SLP_STMT (vinfo_for_stmt (stmt)) 1291 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1111 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME) 1292 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1112 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0)) 1293 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1113 if (vinfo_for_stmt (use_stmt) 1294 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1114 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)) 1295 && !STMT_SLP_TYPE (stmt_vinfo)
1115 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt))) 1296 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1297 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1298 && !(gimple_code (use_stmt) == GIMPLE_PHI
1299 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1300 == vect_reduction_def))
1116 vect_mark_slp_stmts (node, hybrid, i); 1301 vect_mark_slp_stmts (node, hybrid, i);
1117 1302
1118 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); 1303 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1119 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); 1304 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1120 } 1305 }
1264 VEC (ddr_p, heap) *ddrs; 1449 VEC (ddr_p, heap) *ddrs;
1265 VEC (slp_instance, heap) *slp_instances; 1450 VEC (slp_instance, heap) *slp_instances;
1266 slp_instance instance; 1451 slp_instance instance;
1267 int i, insns = 0; 1452 int i, insns = 0;
1268 gimple_stmt_iterator gsi; 1453 gimple_stmt_iterator gsi;
1454 int min_vf = 2;
1455 int max_vf = MAX_VECTORIZATION_FACTOR;
1269 1456
1270 if (vect_print_dump_info (REPORT_DETAILS)) 1457 if (vect_print_dump_info (REPORT_DETAILS))
1271 fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); 1458 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1272 1459
1273 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1460 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1274 insns++; 1461 {
1462 gimple stmt = gsi_stmt (gsi);
1463 if (!is_gimple_debug (stmt)
1464 && !gimple_nop_p (stmt)
1465 && gimple_code (stmt) != GIMPLE_LABEL)
1466 insns++;
1467 }
1275 1468
1276 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) 1469 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1277 { 1470 {
1278 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1471 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1279 fprintf (vect_dump, "not vectorized: too many instructions in basic " 1472 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1284 1477
1285 bb_vinfo = new_bb_vec_info (bb); 1478 bb_vinfo = new_bb_vec_info (bb);
1286 if (!bb_vinfo) 1479 if (!bb_vinfo)
1287 return NULL; 1480 return NULL;
1288 1481
1289 if (!vect_analyze_data_refs (NULL, bb_vinfo)) 1482 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1290 { 1483 {
1291 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1484 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1292 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic " 1485 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1293 "block.\n"); 1486 "block.\n");
1294 1487
1305 1498
1306 destroy_bb_vec_info (bb_vinfo); 1499 destroy_bb_vec_info (bb_vinfo);
1307 return NULL; 1500 return NULL;
1308 } 1501 }
1309 1502
1503 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1504 || min_vf > max_vf)
1505 {
1506 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1507 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1508 "in basic block.\n");
1509
1510 destroy_bb_vec_info (bb_vinfo);
1511 return NULL;
1512 }
1513
1310 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo)) 1514 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1311 { 1515 {
1312 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1516 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1313 fprintf (vect_dump, "not vectorized: bad data alignment in basic " 1517 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1314 "block.\n"); 1518 "block.\n");
1315
1316 destroy_bb_vec_info (bb_vinfo);
1317 return NULL;
1318 }
1319
1320 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1321 {
1322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1323 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1324 " block.\n");
1325 1519
1326 destroy_bb_vec_info (bb_vinfo); 1520 destroy_bb_vec_info (bb_vinfo);
1327 return NULL; 1521 return NULL;
1328 } 1522 }
1329 1523
1410 1604
1411 1605
1412 /* For constant and loop invariant defs of SLP_NODE this function returns 1606 /* For constant and loop invariant defs of SLP_NODE this function returns
1413 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. 1607 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1414 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar 1608 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1415 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */ 1609 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1610 REDUC_INDEX is the index of the reduction operand in the statements, unless
1611 it is -1. */
1416 1612
1417 static void 1613 static void
1418 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds, 1614 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1419 unsigned int op_num, unsigned int number_of_vectors) 1615 unsigned int op_num, unsigned int number_of_vectors,
1616 int reduc_index)
1420 { 1617 {
1421 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node); 1618 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1422 gimple stmt = VEC_index (gimple, stmts, 0); 1619 gimple stmt = VEC_index (gimple, stmts, 0);
1423 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1620 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1424 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1425 int nunits; 1621 int nunits;
1426 tree vec_cst; 1622 tree vec_cst;
1427 tree t = NULL_TREE; 1623 tree t = NULL_TREE;
1428 int j, number_of_places_left_in_vector; 1624 int j, number_of_places_left_in_vector;
1429 tree vector_type; 1625 tree vector_type;
1431 int group_size = VEC_length (gimple, stmts); 1627 int group_size = VEC_length (gimple, stmts);
1432 unsigned int vec_num, i; 1628 unsigned int vec_num, i;
1433 int number_of_copies = 1; 1629 int number_of_copies = 1;
1434 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors); 1630 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1435 bool constant_p, is_store; 1631 bool constant_p, is_store;
1632 tree neutral_op = NULL;
1633
1634 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1635 {
1636 enum tree_code code = gimple_assign_rhs_code (stmt);
1637 if (reduc_index == -1)
1638 {
1639 VEC_free (tree, heap, *vec_oprnds);
1640 return;
1641 }
1642
1643 op_num = reduc_index - 1;
1644 op = gimple_op (stmt, op_num + 1);
1645 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1646 we need either neutral operands or the original operands. See
1647 get_initial_def_for_reduction() for details. */
1648 switch (code)
1649 {
1650 case WIDEN_SUM_EXPR:
1651 case DOT_PROD_EXPR:
1652 case PLUS_EXPR:
1653 case MINUS_EXPR:
1654 case BIT_IOR_EXPR:
1655 case BIT_XOR_EXPR:
1656 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1657 neutral_op = build_real (TREE_TYPE (op), dconst0);
1658 else
1659 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1660
1661 break;
1662
1663 case MULT_EXPR:
1664 case BIT_AND_EXPR:
1665 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1666 neutral_op = build_real (TREE_TYPE (op), dconst1);
1667 else
1668 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1669
1670 break;
1671
1672 default:
1673 neutral_op = NULL;
1674 }
1675 }
1436 1676
1437 if (STMT_VINFO_DATA_REF (stmt_vinfo)) 1677 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1438 { 1678 {
1439 is_store = true; 1679 is_store = true;
1440 op = gimple_assign_rhs1 (stmt); 1680 op = gimple_assign_rhs1 (stmt);
1444 is_store = false; 1684 is_store = false;
1445 op = gimple_op (stmt, op_num + 1); 1685 op = gimple_op (stmt, op_num + 1);
1446 } 1686 }
1447 1687
1448 if (CONSTANT_CLASS_P (op)) 1688 if (CONSTANT_CLASS_P (op))
1449 { 1689 constant_p = true;
1450 vector_type = vectype;
1451 constant_p = true;
1452 }
1453 else 1690 else
1454 { 1691 constant_p = false;
1455 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 1692
1456 gcc_assert (vector_type); 1693 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1457 constant_p = false; 1694 gcc_assert (vector_type);
1458 }
1459 1695
1460 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 1696 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1461 1697
1462 /* NUMBER_OF_COPIES is the number of times we need to use the same values in 1698 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1463 created vectors. It is greater than 1 if unrolling is performed. 1699 created vectors. It is greater than 1 if unrolling is performed.
1484 { 1720 {
1485 if (is_store) 1721 if (is_store)
1486 op = gimple_assign_rhs1 (stmt); 1722 op = gimple_assign_rhs1 (stmt);
1487 else 1723 else
1488 op = gimple_op (stmt, op_num + 1); 1724 op = gimple_op (stmt, op_num + 1);
1725
1726 if (reduc_index != -1)
1727 {
1728 struct loop *loop = (gimple_bb (stmt))->loop_father;
1729 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1730
1731 gcc_assert (loop);
1732 /* Get the def before the loop. */
1733 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1734 loop_preheader_edge (loop));
1735 if (j != (number_of_copies - 1) && neutral_op)
1736 op = neutral_op;
1737 }
1489 1738
1490 /* Create 'vect_ = {op0,op1,...,opn}'. */ 1739 /* Create 'vect_ = {op0,op1,...,opn}'. */
1491 t = tree_cons (NULL_TREE, op, t); 1740 t = tree_cons (NULL_TREE, op, t);
1492 1741
1493 number_of_places_left_in_vector--; 1742 number_of_places_left_in_vector--;
1522 group of stmts, NUMBER_OF_VECTORS to be created is greater than 1771 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1523 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have 1772 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1524 to replicate the vectors. */ 1773 to replicate the vectors. */
1525 while (number_of_vectors > VEC_length (tree, *vec_oprnds)) 1774 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1526 { 1775 {
1527 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++) 1776 tree neutral_vec = NULL;
1528 VEC_quick_push (tree, *vec_oprnds, vop); 1777
1778 if (neutral_op)
1779 {
1780 if (!neutral_vec)
1781 {
1782 t = NULL;
1783 for (i = 0; i < (unsigned) nunits; i++)
1784 t = tree_cons (NULL_TREE, neutral_op, t);
1785 neutral_vec = build_vector (vector_type, t);
1786 }
1787
1788 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1789 }
1790 else
1791 {
1792 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1793 VEC_quick_push (tree, *vec_oprnds, vop);
1794 }
1529 } 1795 }
1530 } 1796 }
1531 1797
1532 1798
1533 /* Get vectorized definitions from SLP_NODE that contains corresponding 1799 /* Get vectorized definitions from SLP_NODE that contains corresponding
1562 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from 1828 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1563 the right node. This is used when the second operand must remain scalar. */ 1829 the right node. This is used when the second operand must remain scalar. */
1564 1830
1565 void 1831 void
1566 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0, 1832 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1567 VEC (tree,heap) **vec_oprnds1) 1833 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1568 { 1834 {
1569 gimple first_stmt; 1835 gimple first_stmt;
1570 enum tree_code code; 1836 enum tree_code code;
1571 int number_of_vects; 1837 int number_of_vects;
1572 HOST_WIDE_INT lhs_size_unit, rhs_size_unit; 1838 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1593 1859
1594 /* Allocate memory for vectorized defs. */ 1860 /* Allocate memory for vectorized defs. */
1595 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); 1861 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1596 1862
1597 /* SLP_NODE corresponds either to a group of stores or to a group of 1863 /* SLP_NODE corresponds either to a group of stores or to a group of
1598 unary/binary operations. We don't call this function for loads. */ 1864 unary/binary operations. We don't call this function for loads.
1599 if (SLP_TREE_LEFT (slp_node)) 1865 For reduction defs we call vect_get_constant_vectors(), since we are
1866 looking for initial loop invariant values. */
1867 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1600 /* The defs are already vectorized. */ 1868 /* The defs are already vectorized. */
1601 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); 1869 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1602 else 1870 else
1603 /* Build vectors from scalar defs. */ 1871 /* Build vectors from scalar defs. */
1604 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects); 1872 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1873 reduc_index);
1605 1874
1606 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) 1875 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1607 /* Since we don't call this function with loads, this is a group of 1876 /* Since we don't call this function with loads, this is a group of
1608 stores. */ 1877 stores. */
1878 return;
1879
1880 /* For reductions, we only need initial values. */
1881 if (reduc_index != -1)
1609 return; 1882 return;
1610 1883
1611 code = gimple_assign_rhs_code (first_stmt); 1884 code = gimple_assign_rhs_code (first_stmt);
1612 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) 1885 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1613 return; 1886 return;
1624 if (SLP_TREE_RIGHT (slp_node)) 1897 if (SLP_TREE_RIGHT (slp_node))
1625 /* The defs are already vectorized. */ 1898 /* The defs are already vectorized. */
1626 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); 1899 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1627 else 1900 else
1628 /* Build vectors from scalar defs. */ 1901 /* Build vectors from scalar defs. */
1629 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects); 1902 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1630 } 1903 }
1631 1904
1632 1905
1633 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by 1906 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1634 building a vector of type MASK_TYPE from it) and two input vectors placed in 1907 building a vector of type MASK_TYPE from it) and two input vectors placed in
1964 2237
1965 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); 2238 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1966 stmt_info = vinfo_for_stmt (stmt); 2239 stmt_info = vinfo_for_stmt (stmt);
1967 2240
1968 /* VECTYPE is the type of the destination. */ 2241 /* VECTYPE is the type of the destination. */
1969 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt))); 2242 vectype = STMT_VINFO_VECTYPE (stmt_info);
1970 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype); 2243 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1971 group_size = SLP_INSTANCE_GROUP_SIZE (instance); 2244 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1972 2245
1973 /* For each SLP instance calculate number of vector stmts to be created 2246 /* For each SLP instance calculate number of vector stmts to be created
1974 for the scalar stmts in each node of the SLP tree. Number of vector 2247 for the scalar stmts in each node of the SLP tree. Number of vector
2013 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); 2286 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2014 else 2287 else
2015 si = gsi_for_stmt (stmt); 2288 si = gsi_for_stmt (stmt);
2016 2289
2017 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance); 2290 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2018 if (is_store) 2291 return is_store;
2019 {
2020 if (DR_GROUP_FIRST_DR (stmt_info))
2021 /* If IS_STORE is TRUE, the vectorization of the
2022 interleaving chain was completed - free all the stores in
2023 the chain. */
2024 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2025 else
2026 /* FORNOW: SLP originates only from strided stores. */
2027 gcc_unreachable ();
2028
2029 return true;
2030 }
2031
2032 /* FORNOW: SLP originates only from strided stores. */
2033 return false;
2034 } 2292 }
2035 2293
2036 2294
2037 bool 2295 bool
2038 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 2296 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2061 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS) 2319 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2062 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2320 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2063 fprintf (vect_dump, "vectorizing stmts using SLP."); 2321 fprintf (vect_dump, "vectorizing stmts using SLP.");
2064 } 2322 }
2065 2323
2324 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2325 {
2326 slp_tree root = SLP_INSTANCE_TREE (instance);
2327 gimple store;
2328 unsigned int j;
2329 gimple_stmt_iterator gsi;
2330
2331 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2332 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2333 {
2334 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2335 break;
2336
2337 /* Free the attached stmt_vec_info and remove the stmt. */
2338 gsi = gsi_for_stmt (store);
2339 gsi_remove (&gsi, true);
2340 free_stmt_vec_info (store);
2341 }
2342 }
2343
2066 return is_store; 2344 return is_store;
2067 } 2345 }
2068 2346
2069 2347
2070 /* Vectorize the basic block. */ 2348 /* Vectorize the basic block. */