Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vect-slp.c @ 63:b7f97abdc517 gcc-4.6-20100522
update gcc from gcc-4.5.0 to gcc-4.6
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 24 May 2010 12:47:05 +0900 |
parents | 77e2b8dfacca |
children | f6334be47118 |
comparison
equal
deleted
inserted
replaced
56:3c8a44c06a95 | 63:b7f97abdc517 |
---|---|
1 /* SLP - Basic Block Vectorization | 1 /* SLP - Basic Block Vectorization |
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. | 2 Copyright (C) 2007, 2008, 2009, 2010 |
3 Foundation, Inc. | 3 Free Software Foundation, Inc. |
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> | 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
5 and Ira Rosen <irar@il.ibm.com> | 5 and Ira Rosen <irar@il.ibm.com> |
6 | 6 |
7 This file is part of GCC. | 7 This file is part of GCC. |
8 | 8 |
27 #include "ggc.h" | 27 #include "ggc.h" |
28 #include "tree.h" | 28 #include "tree.h" |
29 #include "target.h" | 29 #include "target.h" |
30 #include "basic-block.h" | 30 #include "basic-block.h" |
31 #include "diagnostic.h" | 31 #include "diagnostic.h" |
32 #include "tree-pretty-print.h" | |
33 #include "gimple-pretty-print.h" | |
32 #include "tree-flow.h" | 34 #include "tree-flow.h" |
33 #include "tree-dump.h" | 35 #include "tree-dump.h" |
34 #include "cfgloop.h" | 36 #include "cfgloop.h" |
35 #include "cfglayout.h" | 37 #include "cfglayout.h" |
36 #include "expr.h" | 38 #include "expr.h" |
244 /* Not first stmt of the group, check that the def-stmt/s match | 246 /* Not first stmt of the group, check that the def-stmt/s match |
245 the def-stmt/s of the first stmt. */ | 247 the def-stmt/s of the first stmt. */ |
246 if ((i == 0 | 248 if ((i == 0 |
247 && (*first_stmt_dt0 != dt[i] | 249 && (*first_stmt_dt0 != dt[i] |
248 || (*first_stmt_def0_type && def | 250 || (*first_stmt_def0_type && def |
249 && *first_stmt_def0_type != TREE_TYPE (def)))) | 251 && !types_compatible_p (*first_stmt_def0_type, |
252 TREE_TYPE (def))))) | |
250 || (i == 1 | 253 || (i == 1 |
251 && (*first_stmt_dt1 != dt[i] | 254 && (*first_stmt_dt1 != dt[i] |
252 || (*first_stmt_def1_type && def | 255 || (*first_stmt_def1_type && def |
253 && *first_stmt_def1_type != TREE_TYPE (def)))) | 256 && !types_compatible_p (*first_stmt_def1_type, |
257 TREE_TYPE (def))))) | |
254 || (!def | 258 || (!def |
255 && TREE_TYPE (*first_stmt_const_oprnd) | 259 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), |
256 != TREE_TYPE (oprnd))) | 260 TREE_TYPE (oprnd)))) |
257 { | 261 { |
258 if (vect_print_dump_info (REPORT_SLP)) | 262 if (vect_print_dump_info (REPORT_SLP)) |
259 fprintf (vect_dump, "Build SLP failed: different types "); | 263 fprintf (vect_dump, "Build SLP failed: different types "); |
260 | 264 |
261 return false; | 265 return false; |
269 case vect_constant_def: | 273 case vect_constant_def: |
270 case vect_external_def: | 274 case vect_external_def: |
271 break; | 275 break; |
272 | 276 |
273 case vect_internal_def: | 277 case vect_internal_def: |
278 case vect_reduction_def: | |
274 if (i == 0) | 279 if (i == 0) |
275 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); | 280 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); |
276 else | 281 else |
277 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); | 282 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); |
278 break; | 283 break; |
328 struct data_reference *first_dr; | 333 struct data_reference *first_dr; |
329 bool pattern0 = false, pattern1 = false; | 334 bool pattern0 = false, pattern1 = false; |
330 HOST_WIDE_INT dummy; | 335 HOST_WIDE_INT dummy; |
331 bool permutation = false; | 336 bool permutation = false; |
332 unsigned int load_place; | 337 unsigned int load_place; |
333 gimple first_load; | 338 gimple first_load, prev_first_load = NULL; |
334 | 339 |
335 /* For every stmt in NODE find its def stmt/s. */ | 340 /* For every stmt in NODE find its def stmt/s. */ |
336 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++) | 341 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++) |
337 { | 342 { |
338 if (vect_print_dump_info (REPORT_SLP)) | 343 if (vect_print_dump_info (REPORT_SLP)) |
339 { | 344 { |
340 fprintf (vect_dump, "Build SLP for "); | 345 fprintf (vect_dump, "Build SLP for "); |
341 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | 346 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); |
342 } | 347 } |
348 | |
349 /* Fail to vectorize statements marked as unvectorizable. */ | |
350 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) | |
351 { | |
352 if (vect_print_dump_info (REPORT_SLP)) | |
353 { | |
354 fprintf (vect_dump, | |
355 "Build SLP failed: unvectorizable statement "); | |
356 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | |
357 } | |
358 | |
359 return false; | |
360 } | |
343 | 361 |
344 lhs = gimple_get_lhs (stmt); | 362 lhs = gimple_get_lhs (stmt); |
345 if (lhs == NULL_TREE) | 363 if (lhs == NULL_TREE) |
346 { | 364 { |
347 if (vect_print_dump_info (REPORT_SLP)) | 365 if (vect_print_dump_info (REPORT_SLP)) |
481 &first_stmt_const_oprnd, | 499 &first_stmt_const_oprnd, |
482 ncopies_for_cost, | 500 ncopies_for_cost, |
483 &pattern0, &pattern1)) | 501 &pattern0, &pattern1)) |
484 return false; | 502 return false; |
485 } | 503 } |
486 else | 504 else |
487 { | 505 { |
488 /* Load. */ | 506 /* Load. */ |
489 /* FORNOW: Check that there is no gap between the loads. */ | 507 /* FORNOW: Check that there is no gap between the loads. */ |
490 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt | 508 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt |
491 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0) | 509 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0) |
492 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt | 510 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt |
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)) | 511 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)) |
494 { | 512 { |
495 if (vect_print_dump_info (REPORT_SLP)) | 513 if (vect_print_dump_info (REPORT_SLP)) |
496 { | 514 { |
497 fprintf (vect_dump, "Build SLP failed: strided " | 515 fprintf (vect_dump, "Build SLP failed: strided " |
498 "loads have gaps "); | 516 "loads have gaps "); |
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | 517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); |
500 } | 518 } |
501 | 519 |
502 return false; | 520 return false; |
503 } | 521 } |
504 | 522 |
505 /* Check that the size of interleaved loads group is not | 523 /* Check that the size of interleaved loads group is not |
506 greater than the SLP group size. */ | 524 greater than the SLP group size. */ |
507 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) | 525 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) |
508 > ncopies * group_size) | 526 { |
509 { | 527 if (vect_print_dump_info (REPORT_SLP)) |
510 if (vect_print_dump_info (REPORT_SLP)) | 528 { |
511 { | 529 fprintf (vect_dump, "Build SLP failed: the number of " |
512 fprintf (vect_dump, "Build SLP failed: the number of " | 530 "interleaved loads is greater than" |
513 "interleaved loads is greater than" | 531 " the SLP group size "); |
514 " the SLP group size "); | 532 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); |
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | 533 } |
516 } | 534 |
517 | 535 return false; |
518 return false; | 536 } |
519 } | 537 |
520 | 538 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); |
521 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); | 539 if (prev_first_load) |
540 { | |
541 /* Check that there are no loads from different interleaving | |
542 chains in the same node. The only exception is complex | |
543 numbers. */ | |
544 if (prev_first_load != first_load | |
545 && rhs_code != REALPART_EXPR | |
546 && rhs_code != IMAGPART_EXPR) | |
547 { | |
548 if (vect_print_dump_info (REPORT_SLP)) | |
549 { | |
550 fprintf (vect_dump, "Build SLP failed: different " | |
551 "interleaving chains in one node "); | |
552 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | |
553 } | |
554 | |
555 return false; | |
556 } | |
557 } | |
558 else | |
559 prev_first_load = first_load; | |
522 | 560 |
523 if (first_load == stmt) | 561 if (first_load == stmt) |
524 { | 562 { |
525 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); | 563 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); |
526 if (vect_supportable_dr_alignment (first_dr) | 564 if (vect_supportable_dr_alignment (first_dr) |
783 | 821 |
784 return true; | 822 return true; |
785 } | 823 } |
786 | 824 |
787 | 825 |
826 /* Rearrange the statements of NODE according to PERMUTATION. */ | |
827 | |
828 static void | |
829 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, | |
830 VEC (int, heap) *permutation) | |
831 { | |
832 gimple stmt; | |
833 VEC (gimple, heap) *tmp_stmts; | |
834 unsigned int index, i; | |
835 | |
836 if (!node) | |
837 return; | |
838 | |
839 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation); | |
840 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation); | |
841 | |
842 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))); | |
843 tmp_stmts = VEC_alloc (gimple, heap, group_size); | |
844 | |
845 for (i = 0; i < group_size; i++) | |
846 VEC_safe_push (gimple, heap, tmp_stmts, NULL); | |
847 | |
848 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) | |
849 { | |
850 index = VEC_index (int, permutation, i); | |
851 VEC_replace (gimple, tmp_stmts, index, stmt); | |
852 } | |
853 | |
854 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node)); | |
855 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts; | |
856 } | |
857 | |
858 | |
788 /* Check if the required load permutation is supported. | 859 /* Check if the required load permutation is supported. |
789 LOAD_PERMUTATION contains a list of indices of the loads. | 860 LOAD_PERMUTATION contains a list of indices of the loads. |
790 In SLP this permutation is relative to the order of strided stores that are | 861 In SLP this permutation is relative to the order of strided stores that are |
791 the base of the SLP instance. */ | 862 the base of the SLP instance. */ |
792 | 863 |
793 static bool | 864 static bool |
794 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, | 865 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, |
795 VEC (int, heap) *load_permutation) | 866 VEC (int, heap) *load_permutation) |
796 { | 867 { |
797 int i = 0, j, prev = -1, next, k; | 868 int i = 0, j, prev = -1, next, k, number_of_groups; |
798 bool supported; | 869 bool supported, bad_permutation = false; |
799 sbitmap load_index; | 870 sbitmap load_index; |
871 slp_tree node; | |
872 gimple stmt; | |
800 | 873 |
801 /* FORNOW: permutations are only supported in SLP. */ | 874 /* FORNOW: permutations are only supported in SLP. */ |
802 if (!slp_instn) | 875 if (!slp_instn) |
803 return false; | 876 return false; |
804 | 877 |
807 fprintf (vect_dump, "Load permutation "); | 880 fprintf (vect_dump, "Load permutation "); |
808 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++) | 881 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++) |
809 fprintf (vect_dump, "%d ", next); | 882 fprintf (vect_dump, "%d ", next); |
810 } | 883 } |
811 | 884 |
885 /* In case of reduction every load permutation is allowed, since the order | |
886 of the reduction statements is not important (as opposed to the case of | |
887 strided stores). The only condition we need to check is that all the | |
888 load nodes are of the same size and have the same permutation (and then | |
889 rearrange all the nodes of the SLP instance according to this | |
890 permutation). */ | |
891 | |
892 /* Check that all the load nodes are of the same size. */ | |
893 for (i = 0; | |
894 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node); | |
895 i++) | |
896 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)) | |
897 != (unsigned) group_size) | |
898 return false; | |
899 | |
900 node = SLP_INSTANCE_TREE (slp_instn); | |
901 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); | |
902 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP | |
903 instance, not all the loads belong to the same node or interleaving | |
904 group. Hence, we need to divide them into groups according to | |
905 GROUP_SIZE. */ | |
906 number_of_groups = VEC_length (int, load_permutation) / group_size; | |
907 | |
908 /* Reduction (there are no data-refs in the root). */ | |
909 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))) | |
910 { | |
911 int first_group_load_index; | |
912 | |
913 /* Compare all the permutation sequences to the first one. */ | |
914 for (i = 1; i < number_of_groups; i++) | |
915 { | |
916 k = 0; | |
917 for (j = i * group_size; j < i * group_size + group_size; j++) | |
918 { | |
919 next = VEC_index (int, load_permutation, j); | |
920 first_group_load_index = VEC_index (int, load_permutation, k); | |
921 | |
922 if (next != first_group_load_index) | |
923 { | |
924 bad_permutation = true; | |
925 break; | |
926 } | |
927 | |
928 k++; | |
929 } | |
930 | |
931 if (bad_permutation) | |
932 break; | |
933 } | |
934 | |
935 if (!bad_permutation) | |
936 { | |
937 /* This permutaion is valid for reduction. Since the order of the | |
938 statements in the nodes is not important unless they are memory | |
939 accesses, we can rearrange the statements in all the nodes | |
940 according to the order of the loads. */ | |
941 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, | |
942 load_permutation); | |
943 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); | |
944 return true; | |
945 } | |
946 } | |
947 | |
812 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to | 948 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to |
813 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as | 949 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as |
814 well. */ | 950 well (unless it's reduction). */ |
815 if (VEC_length (int, load_permutation) | 951 if (VEC_length (int, load_permutation) |
816 != (unsigned int) (group_size * group_size)) | 952 != (unsigned int) (group_size * group_size)) |
817 return false; | 953 return false; |
818 | 954 |
819 supported = true; | 955 supported = true; |
840 break; | 976 break; |
841 } | 977 } |
842 | 978 |
843 SET_BIT (load_index, prev); | 979 SET_BIT (load_index, prev); |
844 } | 980 } |
845 | 981 |
982 for (j = 0; j < group_size; j++) | |
983 if (!TEST_BIT (load_index, j)) | |
984 return false; | |
985 | |
846 sbitmap_free (load_index); | 986 sbitmap_free (load_index); |
847 | 987 |
848 if (supported && i == group_size * group_size | 988 if (supported && i == group_size * group_size |
849 && vect_supported_slp_permutation_p (slp_instn)) | 989 && vect_supported_slp_permutation_p (slp_instn)) |
850 return true; | 990 return true; |
888 { | 1028 { |
889 slp_instance new_instance; | 1029 slp_instance new_instance; |
890 slp_tree node = XNEW (struct _slp_tree); | 1030 slp_tree node = XNEW (struct _slp_tree); |
891 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); | 1031 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); |
892 unsigned int unrolling_factor = 1, nunits; | 1032 unsigned int unrolling_factor = 1, nunits; |
893 tree vectype, scalar_type; | 1033 tree vectype, scalar_type = NULL_TREE; |
894 gimple next; | 1034 gimple next; |
895 unsigned int vectorization_factor = 0; | 1035 unsigned int vectorization_factor = 0; |
896 int inside_cost = 0, outside_cost = 0, ncopies_for_cost; | 1036 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i; |
897 unsigned int max_nunits = 0; | 1037 unsigned int max_nunits = 0; |
898 VEC (int, heap) *load_permutation; | 1038 VEC (int, heap) *load_permutation; |
899 VEC (slp_tree, heap) *loads; | 1039 VEC (slp_tree, heap) *loads; |
900 | 1040 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); |
901 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF ( | 1041 |
902 vinfo_for_stmt (stmt)))); | 1042 if (dr) |
903 vectype = get_vectype_for_scalar_type (scalar_type); | 1043 { |
1044 scalar_type = TREE_TYPE (DR_REF (dr)); | |
1045 vectype = get_vectype_for_scalar_type (scalar_type); | |
1046 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); | |
1047 } | |
1048 else | |
1049 { | |
1050 gcc_assert (loop_vinfo); | |
1051 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); | |
1052 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)); | |
1053 } | |
1054 | |
904 if (!vectype) | 1055 if (!vectype) |
905 { | 1056 { |
906 if (vect_print_dump_info (REPORT_SLP)) | 1057 if (vect_print_dump_info (REPORT_SLP)) |
907 { | 1058 { |
908 fprintf (vect_dump, "Build SLP failed: unsupported data-type "); | 1059 fprintf (vect_dump, "Build SLP failed: unsupported data-type "); |
909 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); | 1060 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); |
910 } | 1061 } |
1062 | |
911 return false; | 1063 return false; |
912 } | 1064 } |
913 | 1065 |
914 nunits = TYPE_VECTOR_SUBPARTS (vectype); | 1066 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
915 if (loop_vinfo) | 1067 if (loop_vinfo) |
930 } | 1082 } |
931 | 1083 |
932 /* Create a node (a root of the SLP tree) for the packed strided stores. */ | 1084 /* Create a node (a root of the SLP tree) for the packed strided stores. */ |
933 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); | 1085 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); |
934 next = stmt; | 1086 next = stmt; |
935 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ | 1087 if (dr) |
936 while (next) | 1088 { |
937 { | 1089 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ |
938 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); | 1090 while (next) |
939 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); | 1091 { |
1092 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); | |
1093 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); | |
1094 } | |
1095 } | |
1096 else | |
1097 { | |
1098 /* Collect reduction statements. */ | |
1099 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, | |
1100 next); | |
1101 i++) | |
1102 { | |
1103 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); | |
1104 if (vect_print_dump_info (REPORT_DETAILS)) | |
1105 { | |
1106 fprintf (vect_dump, "pushing reduction into node: "); | |
1107 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM); | |
1108 } | |
1109 } | |
940 } | 1110 } |
941 | 1111 |
942 SLP_TREE_VEC_STMTS (node) = NULL; | 1112 SLP_TREE_VEC_STMTS (node) = NULL; |
943 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; | 1113 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; |
944 SLP_TREE_LEFT (node) = NULL; | 1114 SLP_TREE_LEFT (node) = NULL; |
1027 | 1197 |
1028 bool | 1198 bool |
1029 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) | 1199 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) |
1030 { | 1200 { |
1031 unsigned int i; | 1201 unsigned int i; |
1032 VEC (gimple, heap) *strided_stores; | 1202 VEC (gimple, heap) *strided_stores, *reductions = NULL; |
1033 gimple store; | 1203 gimple store; |
1034 bool ok = false; | 1204 bool ok = false; |
1035 | 1205 |
1036 if (vect_print_dump_info (REPORT_SLP)) | 1206 if (vect_print_dump_info (REPORT_SLP)) |
1037 fprintf (vect_dump, "=== vect_analyze_slp ==="); | 1207 fprintf (vect_dump, "=== vect_analyze_slp ==="); |
1038 | 1208 |
1039 if (loop_vinfo) | 1209 if (loop_vinfo) |
1040 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo); | 1210 { |
1211 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo); | |
1212 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); | |
1213 } | |
1041 else | 1214 else |
1042 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo); | 1215 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo); |
1043 | 1216 |
1217 /* Find SLP sequences starting from groups of strided stores. */ | |
1044 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++) | 1218 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++) |
1045 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store)) | 1219 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store)) |
1046 ok = true; | 1220 ok = true; |
1047 | 1221 |
1048 if (bb_vinfo && !ok) | 1222 if (bb_vinfo && !ok) |
1050 if (vect_print_dump_info (REPORT_SLP)) | 1224 if (vect_print_dump_info (REPORT_SLP)) |
1051 fprintf (vect_dump, "Failed to SLP the basic block."); | 1225 fprintf (vect_dump, "Failed to SLP the basic block."); |
1052 | 1226 |
1053 return false; | 1227 return false; |
1054 } | 1228 } |
1229 | |
1230 /* Find SLP sequences starting from groups of reductions. */ | |
1231 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1 | |
1232 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, | |
1233 VEC_index (gimple, reductions, 0))) | |
1234 ok = true; | |
1055 | 1235 |
1056 return true; | 1236 return true; |
1057 } | 1237 } |
1058 | 1238 |
1059 | 1239 |
1100 { | 1280 { |
1101 int i; | 1281 int i; |
1102 gimple stmt; | 1282 gimple stmt; |
1103 imm_use_iterator imm_iter; | 1283 imm_use_iterator imm_iter; |
1104 gimple use_stmt; | 1284 gimple use_stmt; |
1285 stmt_vec_info stmt_vinfo; | |
1105 | 1286 |
1106 if (!node) | 1287 if (!node) |
1107 return; | 1288 return; |
1108 | 1289 |
1109 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) | 1290 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) |
1110 if (PURE_SLP_STMT (vinfo_for_stmt (stmt)) | 1291 if (PURE_SLP_STMT (vinfo_for_stmt (stmt)) |
1111 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME) | 1292 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME) |
1112 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0)) | 1293 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0)) |
1113 if (vinfo_for_stmt (use_stmt) | 1294 if ((stmt_vinfo = vinfo_for_stmt (use_stmt)) |
1114 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)) | 1295 && !STMT_SLP_TYPE (stmt_vinfo) |
1115 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt))) | 1296 && (STMT_VINFO_RELEVANT (stmt_vinfo) |
1297 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))) | |
1298 && !(gimple_code (use_stmt) == GIMPLE_PHI | |
1299 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt)) | |
1300 == vect_reduction_def)) | |
1116 vect_mark_slp_stmts (node, hybrid, i); | 1301 vect_mark_slp_stmts (node, hybrid, i); |
1117 | 1302 |
1118 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); | 1303 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); |
1119 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); | 1304 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); |
1120 } | 1305 } |
1264 VEC (ddr_p, heap) *ddrs; | 1449 VEC (ddr_p, heap) *ddrs; |
1265 VEC (slp_instance, heap) *slp_instances; | 1450 VEC (slp_instance, heap) *slp_instances; |
1266 slp_instance instance; | 1451 slp_instance instance; |
1267 int i, insns = 0; | 1452 int i, insns = 0; |
1268 gimple_stmt_iterator gsi; | 1453 gimple_stmt_iterator gsi; |
1454 int min_vf = 2; | |
1455 int max_vf = MAX_VECTORIZATION_FACTOR; | |
1269 | 1456 |
1270 if (vect_print_dump_info (REPORT_DETAILS)) | 1457 if (vect_print_dump_info (REPORT_DETAILS)) |
1271 fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); | 1458 fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); |
1272 | 1459 |
1273 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | 1460 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
1274 insns++; | 1461 { |
1462 gimple stmt = gsi_stmt (gsi); | |
1463 if (!is_gimple_debug (stmt) | |
1464 && !gimple_nop_p (stmt) | |
1465 && gimple_code (stmt) != GIMPLE_LABEL) | |
1466 insns++; | |
1467 } | |
1275 | 1468 |
1276 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) | 1469 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) |
1277 { | 1470 { |
1278 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | 1471 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) |
1279 fprintf (vect_dump, "not vectorized: too many instructions in basic " | 1472 fprintf (vect_dump, "not vectorized: too many instructions in basic " |
1284 | 1477 |
1285 bb_vinfo = new_bb_vec_info (bb); | 1478 bb_vinfo = new_bb_vec_info (bb); |
1286 if (!bb_vinfo) | 1479 if (!bb_vinfo) |
1287 return NULL; | 1480 return NULL; |
1288 | 1481 |
1289 if (!vect_analyze_data_refs (NULL, bb_vinfo)) | 1482 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf)) |
1290 { | 1483 { |
1291 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | 1484 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) |
1292 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic " | 1485 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic " |
1293 "block.\n"); | 1486 "block.\n"); |
1294 | 1487 |
1305 | 1498 |
1306 destroy_bb_vec_info (bb_vinfo); | 1499 destroy_bb_vec_info (bb_vinfo); |
1307 return NULL; | 1500 return NULL; |
1308 } | 1501 } |
1309 | 1502 |
1503 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf) | |
1504 || min_vf > max_vf) | |
1505 { | |
1506 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | |
1507 fprintf (vect_dump, "not vectorized: unhandled data dependence " | |
1508 "in basic block.\n"); | |
1509 | |
1510 destroy_bb_vec_info (bb_vinfo); | |
1511 return NULL; | |
1512 } | |
1513 | |
1310 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo)) | 1514 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo)) |
1311 { | 1515 { |
1312 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | 1516 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) |
1313 fprintf (vect_dump, "not vectorized: bad data alignment in basic " | 1517 fprintf (vect_dump, "not vectorized: bad data alignment in basic " |
1314 "block.\n"); | 1518 "block.\n"); |
1315 | |
1316 destroy_bb_vec_info (bb_vinfo); | |
1317 return NULL; | |
1318 } | |
1319 | |
1320 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo)) | |
1321 { | |
1322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | |
1323 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic" | |
1324 " block.\n"); | |
1325 | 1519 |
1326 destroy_bb_vec_info (bb_vinfo); | 1520 destroy_bb_vec_info (bb_vinfo); |
1327 return NULL; | 1521 return NULL; |
1328 } | 1522 } |
1329 | 1523 |
1410 | 1604 |
1411 | 1605 |
1412 /* For constant and loop invariant defs of SLP_NODE this function returns | 1606 /* For constant and loop invariant defs of SLP_NODE this function returns |
1413 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. | 1607 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. |
1414 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar | 1608 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar |
1415 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */ | 1609 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. |
1610 REDUC_INDEX is the index of the reduction operand in the statements, unless | |
1611 it is -1. */ | |
1416 | 1612 |
1417 static void | 1613 static void |
1418 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds, | 1614 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds, |
1419 unsigned int op_num, unsigned int number_of_vectors) | 1615 unsigned int op_num, unsigned int number_of_vectors, |
1616 int reduc_index) | |
1420 { | 1617 { |
1421 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node); | 1618 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node); |
1422 gimple stmt = VEC_index (gimple, stmts, 0); | 1619 gimple stmt = VEC_index (gimple, stmts, 0); |
1423 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | 1620 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); |
1424 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
1425 int nunits; | 1621 int nunits; |
1426 tree vec_cst; | 1622 tree vec_cst; |
1427 tree t = NULL_TREE; | 1623 tree t = NULL_TREE; |
1428 int j, number_of_places_left_in_vector; | 1624 int j, number_of_places_left_in_vector; |
1429 tree vector_type; | 1625 tree vector_type; |
1431 int group_size = VEC_length (gimple, stmts); | 1627 int group_size = VEC_length (gimple, stmts); |
1432 unsigned int vec_num, i; | 1628 unsigned int vec_num, i; |
1433 int number_of_copies = 1; | 1629 int number_of_copies = 1; |
1434 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors); | 1630 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors); |
1435 bool constant_p, is_store; | 1631 bool constant_p, is_store; |
1632 tree neutral_op = NULL; | |
1633 | |
1634 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) | |
1635 { | |
1636 enum tree_code code = gimple_assign_rhs_code (stmt); | |
1637 if (reduc_index == -1) | |
1638 { | |
1639 VEC_free (tree, heap, *vec_oprnds); | |
1640 return; | |
1641 } | |
1642 | |
1643 op_num = reduc_index - 1; | |
1644 op = gimple_op (stmt, op_num + 1); | |
1645 /* For additional copies (see the explanation of NUMBER_OF_COPIES below) | |
1646 we need either neutral operands or the original operands. See | |
1647 get_initial_def_for_reduction() for details. */ | |
1648 switch (code) | |
1649 { | |
1650 case WIDEN_SUM_EXPR: | |
1651 case DOT_PROD_EXPR: | |
1652 case PLUS_EXPR: | |
1653 case MINUS_EXPR: | |
1654 case BIT_IOR_EXPR: | |
1655 case BIT_XOR_EXPR: | |
1656 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op))) | |
1657 neutral_op = build_real (TREE_TYPE (op), dconst0); | |
1658 else | |
1659 neutral_op = build_int_cst (TREE_TYPE (op), 0); | |
1660 | |
1661 break; | |
1662 | |
1663 case MULT_EXPR: | |
1664 case BIT_AND_EXPR: | |
1665 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op))) | |
1666 neutral_op = build_real (TREE_TYPE (op), dconst1); | |
1667 else | |
1668 neutral_op = build_int_cst (TREE_TYPE (op), 1); | |
1669 | |
1670 break; | |
1671 | |
1672 default: | |
1673 neutral_op = NULL; | |
1674 } | |
1675 } | |
1436 | 1676 |
1437 if (STMT_VINFO_DATA_REF (stmt_vinfo)) | 1677 if (STMT_VINFO_DATA_REF (stmt_vinfo)) |
1438 { | 1678 { |
1439 is_store = true; | 1679 is_store = true; |
1440 op = gimple_assign_rhs1 (stmt); | 1680 op = gimple_assign_rhs1 (stmt); |
1444 is_store = false; | 1684 is_store = false; |
1445 op = gimple_op (stmt, op_num + 1); | 1685 op = gimple_op (stmt, op_num + 1); |
1446 } | 1686 } |
1447 | 1687 |
1448 if (CONSTANT_CLASS_P (op)) | 1688 if (CONSTANT_CLASS_P (op)) |
1449 { | 1689 constant_p = true; |
1450 vector_type = vectype; | |
1451 constant_p = true; | |
1452 } | |
1453 else | 1690 else |
1454 { | 1691 constant_p = false; |
1455 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); | 1692 |
1456 gcc_assert (vector_type); | 1693 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); |
1457 constant_p = false; | 1694 gcc_assert (vector_type); |
1458 } | |
1459 | 1695 |
1460 nunits = TYPE_VECTOR_SUBPARTS (vector_type); | 1696 nunits = TYPE_VECTOR_SUBPARTS (vector_type); |
1461 | 1697 |
1462 /* NUMBER_OF_COPIES is the number of times we need to use the same values in | 1698 /* NUMBER_OF_COPIES is the number of times we need to use the same values in |
1463 created vectors. It is greater than 1 if unrolling is performed. | 1699 created vectors. It is greater than 1 if unrolling is performed. |
1484 { | 1720 { |
1485 if (is_store) | 1721 if (is_store) |
1486 op = gimple_assign_rhs1 (stmt); | 1722 op = gimple_assign_rhs1 (stmt); |
1487 else | 1723 else |
1488 op = gimple_op (stmt, op_num + 1); | 1724 op = gimple_op (stmt, op_num + 1); |
1725 | |
1726 if (reduc_index != -1) | |
1727 { | |
1728 struct loop *loop = (gimple_bb (stmt))->loop_father; | |
1729 gimple def_stmt = SSA_NAME_DEF_STMT (op); | |
1730 | |
1731 gcc_assert (loop); | |
1732 /* Get the def before the loop. */ | |
1733 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, | |
1734 loop_preheader_edge (loop)); | |
1735 if (j != (number_of_copies - 1) && neutral_op) | |
1736 op = neutral_op; | |
1737 } | |
1489 | 1738 |
1490 /* Create 'vect_ = {op0,op1,...,opn}'. */ | 1739 /* Create 'vect_ = {op0,op1,...,opn}'. */ |
1491 t = tree_cons (NULL_TREE, op, t); | 1740 t = tree_cons (NULL_TREE, op, t); |
1492 | 1741 |
1493 number_of_places_left_in_vector--; | 1742 number_of_places_left_in_vector--; |
1522 group of stmts, NUMBER_OF_VECTORS to be created is greater than | 1771 group of stmts, NUMBER_OF_VECTORS to be created is greater than |
1523 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have | 1772 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have |
1524 to replicate the vectors. */ | 1773 to replicate the vectors. */ |
1525 while (number_of_vectors > VEC_length (tree, *vec_oprnds)) | 1774 while (number_of_vectors > VEC_length (tree, *vec_oprnds)) |
1526 { | 1775 { |
1527 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++) | 1776 tree neutral_vec = NULL; |
1528 VEC_quick_push (tree, *vec_oprnds, vop); | 1777 |
1778 if (neutral_op) | |
1779 { | |
1780 if (!neutral_vec) | |
1781 { | |
1782 t = NULL; | |
1783 for (i = 0; i < (unsigned) nunits; i++) | |
1784 t = tree_cons (NULL_TREE, neutral_op, t); | |
1785 neutral_vec = build_vector (vector_type, t); | |
1786 } | |
1787 | |
1788 VEC_quick_push (tree, *vec_oprnds, neutral_vec); | |
1789 } | |
1790 else | |
1791 { | |
1792 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++) | |
1793 VEC_quick_push (tree, *vec_oprnds, vop); | |
1794 } | |
1529 } | 1795 } |
1530 } | 1796 } |
1531 | 1797 |
1532 | 1798 |
1533 /* Get vectorized definitions from SLP_NODE that contains corresponding | 1799 /* Get vectorized definitions from SLP_NODE that contains corresponding |
1562 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from | 1828 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from |
1563 the right node. This is used when the second operand must remain scalar. */ | 1829 the right node. This is used when the second operand must remain scalar. */ |
1564 | 1830 |
1565 void | 1831 void |
1566 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0, | 1832 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0, |
1567 VEC (tree,heap) **vec_oprnds1) | 1833 VEC (tree,heap) **vec_oprnds1, int reduc_index) |
1568 { | 1834 { |
1569 gimple first_stmt; | 1835 gimple first_stmt; |
1570 enum tree_code code; | 1836 enum tree_code code; |
1571 int number_of_vects; | 1837 int number_of_vects; |
1572 HOST_WIDE_INT lhs_size_unit, rhs_size_unit; | 1838 HOST_WIDE_INT lhs_size_unit, rhs_size_unit; |
1593 | 1859 |
1594 /* Allocate memory for vectorized defs. */ | 1860 /* Allocate memory for vectorized defs. */ |
1595 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); | 1861 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); |
1596 | 1862 |
1597 /* SLP_NODE corresponds either to a group of stores or to a group of | 1863 /* SLP_NODE corresponds either to a group of stores or to a group of |
1598 unary/binary operations. We don't call this function for loads. */ | 1864 unary/binary operations. We don't call this function for loads. |
1599 if (SLP_TREE_LEFT (slp_node)) | 1865 For reduction defs we call vect_get_constant_vectors(), since we are |
1866 looking for initial loop invariant values. */ | |
1867 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1) | |
1600 /* The defs are already vectorized. */ | 1868 /* The defs are already vectorized. */ |
1601 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); | 1869 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); |
1602 else | 1870 else |
1603 /* Build vectors from scalar defs. */ | 1871 /* Build vectors from scalar defs. */ |
1604 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects); | 1872 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects, |
1873 reduc_index); | |
1605 | 1874 |
1606 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) | 1875 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) |
1607 /* Since we don't call this function with loads, this is a group of | 1876 /* Since we don't call this function with loads, this is a group of |
1608 stores. */ | 1877 stores. */ |
1878 return; | |
1879 | |
1880 /* For reductions, we only need initial values. */ | |
1881 if (reduc_index != -1) | |
1609 return; | 1882 return; |
1610 | 1883 |
1611 code = gimple_assign_rhs_code (first_stmt); | 1884 code = gimple_assign_rhs_code (first_stmt); |
1612 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) | 1885 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) |
1613 return; | 1886 return; |
1624 if (SLP_TREE_RIGHT (slp_node)) | 1897 if (SLP_TREE_RIGHT (slp_node)) |
1625 /* The defs are already vectorized. */ | 1898 /* The defs are already vectorized. */ |
1626 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); | 1899 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); |
1627 else | 1900 else |
1628 /* Build vectors from scalar defs. */ | 1901 /* Build vectors from scalar defs. */ |
1629 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects); | 1902 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1); |
1630 } | 1903 } |
1631 | 1904 |
1632 | 1905 |
1633 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by | 1906 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by |
1634 building a vector of type MASK_TYPE from it) and two input vectors placed in | 1907 building a vector of type MASK_TYPE from it) and two input vectors placed in |
1964 | 2237 |
1965 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); | 2238 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); |
1966 stmt_info = vinfo_for_stmt (stmt); | 2239 stmt_info = vinfo_for_stmt (stmt); |
1967 | 2240 |
1968 /* VECTYPE is the type of the destination. */ | 2241 /* VECTYPE is the type of the destination. */ |
1969 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt))); | 2242 vectype = STMT_VINFO_VECTYPE (stmt_info); |
1970 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype); | 2243 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype); |
1971 group_size = SLP_INSTANCE_GROUP_SIZE (instance); | 2244 group_size = SLP_INSTANCE_GROUP_SIZE (instance); |
1972 | 2245 |
1973 /* For each SLP instance calculate number of vector stmts to be created | 2246 /* For each SLP instance calculate number of vector stmts to be created |
1974 for the scalar stmts in each node of the SLP tree. Number of vector | 2247 for the scalar stmts in each node of the SLP tree. Number of vector |
2013 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); | 2286 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); |
2014 else | 2287 else |
2015 si = gsi_for_stmt (stmt); | 2288 si = gsi_for_stmt (stmt); |
2016 | 2289 |
2017 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance); | 2290 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance); |
2018 if (is_store) | 2291 return is_store; |
2019 { | |
2020 if (DR_GROUP_FIRST_DR (stmt_info)) | |
2021 /* If IS_STORE is TRUE, the vectorization of the | |
2022 interleaving chain was completed - free all the stores in | |
2023 the chain. */ | |
2024 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); | |
2025 else | |
2026 /* FORNOW: SLP originates only from strided stores. */ | |
2027 gcc_unreachable (); | |
2028 | |
2029 return true; | |
2030 } | |
2031 | |
2032 /* FORNOW: SLP originates only from strided stores. */ | |
2033 return false; | |
2034 } | 2292 } |
2035 | 2293 |
2036 | 2294 |
2037 bool | 2295 bool |
2038 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) | 2296 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) |
2061 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS) | 2319 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS) |
2062 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | 2320 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) |
2063 fprintf (vect_dump, "vectorizing stmts using SLP."); | 2321 fprintf (vect_dump, "vectorizing stmts using SLP."); |
2064 } | 2322 } |
2065 | 2323 |
2324 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) | |
2325 { | |
2326 slp_tree root = SLP_INSTANCE_TREE (instance); | |
2327 gimple store; | |
2328 unsigned int j; | |
2329 gimple_stmt_iterator gsi; | |
2330 | |
2331 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store) | |
2332 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++) | |
2333 { | |
2334 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store))) | |
2335 break; | |
2336 | |
2337 /* Free the attached stmt_vec_info and remove the stmt. */ | |
2338 gsi = gsi_for_stmt (store); | |
2339 gsi_remove (&gsi, true); | |
2340 free_stmt_vec_info (store); | |
2341 } | |
2342 } | |
2343 | |
2066 return is_store; | 2344 return is_store; |
2067 } | 2345 } |
2068 | 2346 |
2069 | 2347 |
2070 /* Vectorize the basic block. */ | 2348 /* Vectorize the basic block. */ |