comparison gcc/tree-vect-stmts.c @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents b7f97abdc517
children 04ced10e8804
comparison
equal deleted inserted replaced
65:65488c3d617d 67:f6334be47118
26 #include "tm.h" 26 #include "tm.h"
27 #include "ggc.h" 27 #include "ggc.h"
28 #include "tree.h" 28 #include "tree.h"
29 #include "target.h" 29 #include "target.h"
30 #include "basic-block.h" 30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-pretty-print.h" 31 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h" 32 #include "gimple-pretty-print.h"
34 #include "tree-flow.h" 33 #include "tree-flow.h"
35 #include "tree-dump.h" 34 #include "tree-dump.h"
36 #include "cfgloop.h" 35 #include "cfgloop.h"
37 #include "cfglayout.h" 36 #include "cfglayout.h"
38 #include "expr.h" 37 #include "expr.h"
39 #include "recog.h" 38 #include "recog.h"
40 #include "optabs.h" 39 #include "optabs.h"
41 #include "toplev.h" 40 #include "diagnostic-core.h"
42 #include "tree-vectorizer.h" 41 #include "tree-vectorizer.h"
43 #include "langhooks.h" 42 #include "langhooks.h"
44 43
45 44
46 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 45 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
164 } 163 }
165 164
166 165
167 /* Function exist_non_indexing_operands_for_use_p 166 /* Function exist_non_indexing_operands_for_use_p
168 167
169 USE is one of the uses attached to STMT. Check if USE is 168 USE is one of the uses attached to STMT. Check if USE is
170 used in STMT for anything other than indexing an array. */ 169 used in STMT for anything other than indexing an array. */
171 170
172 static bool 171 static bool
173 exist_non_indexing_operands_for_use_p (tree use, gimple stmt) 172 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
174 { 173 {
175 tree operand; 174 tree operand;
176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
177 176
178 /* USE corresponds to some operand in STMT. If there is no data 177 /* USE corresponds to some operand in STMT. If there is no data
179 reference in STMT, then any operand that corresponds to USE 178 reference in STMT, then any operand that corresponds to USE
180 is not indexing an array. */ 179 is not indexing an array. */
181 if (!STMT_VINFO_DATA_REF (stmt_info)) 180 if (!STMT_VINFO_DATA_REF (stmt_info))
182 return true; 181 return true;
183 182
213 Function process_use. 212 Function process_use.
214 213
215 Inputs: 214 Inputs:
216 - a USE in STMT in a loop represented by LOOP_VINFO 215 - a USE in STMT in a loop represented by LOOP_VINFO
217 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt 216 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
218 that defined USE. This is done by calling mark_relevant and passing it 217 that defined USE. This is done by calling mark_relevant and passing it
219 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 218 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
220 219
221 Outputs: 220 Outputs:
222 Generally, LIVE_P and RELEVANT are used to define the liveness and 221 Generally, LIVE_P and RELEVANT are used to define the liveness and
223 relevance info of the DEF_STMT of this USE: 222 relevance info of the DEF_STMT of this USE:
464 variable; in this case we set the liveness/relevance as follows: 463 variable; in this case we set the liveness/relevance as follows:
465 live_p = false 464 live_p = false
466 relevant = vect_used_by_reduction 465 relevant = vect_used_by_reduction
467 This is because we distinguish between two kinds of relevant stmts - 466 This is because we distinguish between two kinds of relevant stmts -
468 those that are used by a reduction computation, and those that are 467 those that are used by a reduction computation, and those that are
469 (also) used by a regular computation. This allows us later on to 468 (also) used by a regular computation. This allows us later on to
470 identify stmts that are used solely by a reduction, and therefore the 469 identify stmts that are used solely by a reduction, and therefore the
471 order of the results that they produce does not have to be kept. */ 470 order of the results that they produce does not have to be kept. */
472 471
473 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo); 472 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
474 tmp_relevant = relevant; 473 tmp_relevant = relevant;
544 VEC_free (gimple, heap, worklist); 543 VEC_free (gimple, heap, worklist);
545 return true; 544 return true;
546 } 545 }
547 546
548 547
548 /* Get cost by calling cost target builtin. */
549
550 static inline
551 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
552 {
553 tree dummy_type = NULL;
554 int dummy = 0;
555
556 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
557 dummy_type, dummy);
558 }
559
560
561 /* Get cost for STMT. */
562
549 int 563 int
550 cost_for_stmt (gimple stmt) 564 cost_for_stmt (gimple stmt)
551 { 565 {
552 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
553 567
554 switch (STMT_VINFO_TYPE (stmt_info)) 568 switch (STMT_VINFO_TYPE (stmt_info))
555 { 569 {
556 case load_vec_info_type: 570 case load_vec_info_type:
557 return TARG_SCALAR_LOAD_COST; 571 return vect_get_stmt_cost (scalar_load);
558 case store_vec_info_type: 572 case store_vec_info_type:
559 return TARG_SCALAR_STORE_COST; 573 return vect_get_stmt_cost (scalar_store);
560 case op_vec_info_type: 574 case op_vec_info_type:
561 case condition_vec_info_type: 575 case condition_vec_info_type:
562 case assignment_vec_info_type: 576 case assignment_vec_info_type:
563 case reduc_vec_info_type: 577 case reduc_vec_info_type:
564 case induc_vec_info_type: 578 case induc_vec_info_type:
565 case type_promotion_vec_info_type: 579 case type_promotion_vec_info_type:
566 case type_demotion_vec_info_type: 580 case type_demotion_vec_info_type:
567 case type_conversion_vec_info_type: 581 case type_conversion_vec_info_type:
568 case call_vec_info_type: 582 case call_vec_info_type:
569 return TARG_SCALAR_STMT_COST; 583 return vect_get_stmt_cost (scalar_stmt);
570 case undef_vec_info_type: 584 case undef_vec_info_type:
571 default: 585 default:
572 gcc_unreachable (); 586 gcc_unreachable ();
573 } 587 }
574 } 588 }
588 602
589 /* The SLP costs were already calculated during SLP tree build. */ 603 /* The SLP costs were already calculated during SLP tree build. */
590 if (PURE_SLP_STMT (stmt_info)) 604 if (PURE_SLP_STMT (stmt_info))
591 return; 605 return;
592 606
593 inside_cost = ncopies * TARG_VEC_STMT_COST; 607 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
594 608
595 /* FORNOW: Assuming maximum 2 args per stmts. */ 609 /* FORNOW: Assuming maximum 2 args per stmts. */
596 for (i = 0; i < 2; i++) 610 for (i = 0; i < 2; i++)
597 { 611 {
598 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 612 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
599 outside_cost += TARG_SCALAR_TO_VEC_COST; 613 outside_cost += vect_get_stmt_cost (vector_stmt);
600 } 614 }
601 615
602 if (vect_print_dump_info (REPORT_COST)) 616 if (vect_print_dump_info (REPORT_COST))
603 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, " 617 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
604 "outside_cost = %d .", inside_cost, outside_cost); 618 "outside_cost = %d .", inside_cost, outside_cost);
635 void 649 void
636 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 650 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
637 enum vect_def_type dt, slp_tree slp_node) 651 enum vect_def_type dt, slp_tree slp_node)
638 { 652 {
639 int group_size; 653 int group_size;
640 int inside_cost = 0, outside_cost = 0; 654 unsigned int inside_cost = 0, outside_cost = 0;
655 struct data_reference *first_dr;
656 gimple first_stmt;
641 657
642 /* The SLP costs were already calculated during SLP tree build. */ 658 /* The SLP costs were already calculated during SLP tree build. */
643 if (PURE_SLP_STMT (stmt_info)) 659 if (PURE_SLP_STMT (stmt_info))
644 return; 660 return;
645 661
646 if (dt == vect_constant_def || dt == vect_external_def) 662 if (dt == vect_constant_def || dt == vect_external_def)
647 outside_cost = TARG_SCALAR_TO_VEC_COST; 663 outside_cost = vect_get_stmt_cost (scalar_to_vec);
648 664
649 /* Strided access? */ 665 /* Strided access? */
650 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node) 666 if (DR_GROUP_FIRST_DR (stmt_info))
651 group_size = vect_cost_strided_group_size (stmt_info); 667 {
668 if (slp_node)
669 {
670 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
671 group_size = 1;
672 }
673 else
674 {
675 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
676 group_size = vect_cost_strided_group_size (stmt_info);
677 }
678
679 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
680 }
652 /* Not a strided access. */ 681 /* Not a strided access. */
653 else 682 else
654 group_size = 1; 683 {
684 group_size = 1;
685 first_dr = STMT_VINFO_DATA_REF (stmt_info);
686 }
655 687
656 /* Is this an access in a group of stores, which provide strided access? 688 /* Is this an access in a group of stores, which provide strided access?
657 If so, add in the cost of the permutes. */ 689 If so, add in the cost of the permutes. */
658 if (group_size > 1) 690 if (group_size > 1)
659 { 691 {
660 /* Uses a high and low interleave operation for each needed permute. */ 692 /* Uses a high and low interleave operation for each needed permute. */
661 inside_cost = ncopies * exact_log2(group_size) * group_size 693 inside_cost = ncopies * exact_log2(group_size) * group_size
662 * TARG_VEC_STMT_COST; 694 * vect_get_stmt_cost (vector_stmt);
663 695
664 if (vect_print_dump_info (REPORT_COST)) 696 if (vect_print_dump_info (REPORT_COST))
665 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .", 697 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
666 group_size); 698 group_size);
667 699
668 } 700 }
669 701
670 /* Costs of the stores. */ 702 /* Costs of the stores. */
671 inside_cost += ncopies * TARG_VEC_STORE_COST; 703 vect_get_store_cost (first_dr, ncopies, &inside_cost);
672 704
673 if (vect_print_dump_info (REPORT_COST)) 705 if (vect_print_dump_info (REPORT_COST))
674 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, " 706 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
675 "outside_cost = %d .", inside_cost, outside_cost); 707 "outside_cost = %d .", inside_cost, outside_cost);
676 708
678 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost); 710 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
679 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost); 711 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
680 } 712 }
681 713
682 714
715 /* Calculate cost of DR's memory access. */
716 void
717 vect_get_store_cost (struct data_reference *dr, int ncopies,
718 unsigned int *inside_cost)
719 {
720 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
721
722 switch (alignment_support_scheme)
723 {
724 case dr_aligned:
725 {
726 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
727
728 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_store_cost: aligned.");
730
731 break;
732 }
733
734 case dr_unaligned_supported:
735 {
736 gimple stmt = DR_STMT (dr);
737 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
738 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
739
740 /* Here, we assign an additional cost for the unaligned store. */
741 *inside_cost += ncopies
742 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
743 vectype, DR_MISALIGNMENT (dr));
744
745 if (vect_print_dump_info (REPORT_COST))
746 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
747 "hardware.");
748
749 break;
750 }
751
752 default:
753 gcc_unreachable ();
754 }
755 }
756
757
683 /* Function vect_model_load_cost 758 /* Function vect_model_load_cost
684 759
685 Models cost for loads. In the case of strided accesses, the last access 760 Models cost for loads. In the case of strided accesses, the last access
686 has the overhead of the strided access attributed to it. Since unaligned 761 has the overhead of the strided access attributed to it. Since unaligned
687 accesses are supported for loads, we also account for the costs of the 762 accesses are supported for loads, we also account for the costs of the
690 void 765 void
691 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) 766 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
692 767
693 { 768 {
694 int group_size; 769 int group_size;
695 int alignment_support_cheme;
696 gimple first_stmt; 770 gimple first_stmt;
697 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 771 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
698 int inside_cost = 0, outside_cost = 0; 772 unsigned int inside_cost = 0, outside_cost = 0;
699 773
700 /* The SLP costs were already calculated during SLP tree build. */ 774 /* The SLP costs were already calculated during SLP tree build. */
701 if (PURE_SLP_STMT (stmt_info)) 775 if (PURE_SLP_STMT (stmt_info))
702 return; 776 return;
703 777
713 { 787 {
714 group_size = 1; 788 group_size = 1;
715 first_dr = dr; 789 first_dr = dr;
716 } 790 }
717 791
718 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
719
720 /* Is this an access in a group of loads providing strided access? 792 /* Is this an access in a group of loads providing strided access?
721 If so, add in the cost of the permutes. */ 793 If so, add in the cost of the permutes. */
722 if (group_size > 1) 794 if (group_size > 1)
723 { 795 {
724 /* Uses an even and odd extract operations for each needed permute. */ 796 /* Uses an even and odd extract operations for each needed permute. */
725 inside_cost = ncopies * exact_log2(group_size) * group_size 797 inside_cost = ncopies * exact_log2(group_size) * group_size
726 * TARG_VEC_STMT_COST; 798 * vect_get_stmt_cost (vector_stmt);
727 799
728 if (vect_print_dump_info (REPORT_COST)) 800 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .", 801 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
730 group_size); 802 group_size);
731
732 } 803 }
733 804
734 /* The loads themselves. */ 805 /* The loads themselves. */
735 switch (alignment_support_cheme) 806 vect_get_load_cost (first_dr, ncopies,
807 ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node),
808 &inside_cost, &outside_cost);
809
810 if (vect_print_dump_info (REPORT_COST))
811 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
812 "outside_cost = %d .", inside_cost, outside_cost);
813
814 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
815 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
816 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
817 }
818
819
820 /* Calculate cost of DR's memory access. */
821 void
822 vect_get_load_cost (struct data_reference *dr, int ncopies,
823 bool add_realign_cost, unsigned int *inside_cost,
824 unsigned int *outside_cost)
825 {
826 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
827
828 switch (alignment_support_scheme)
736 { 829 {
737 case dr_aligned: 830 case dr_aligned:
738 { 831 {
739 inside_cost += ncopies * TARG_VEC_LOAD_COST; 832 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
740 833
741 if (vect_print_dump_info (REPORT_COST)) 834 if (vect_print_dump_info (REPORT_COST))
742 fprintf (vect_dump, "vect_model_load_cost: aligned."); 835 fprintf (vect_dump, "vect_model_load_cost: aligned.");
743 836
744 break; 837 break;
745 } 838 }
746 case dr_unaligned_supported: 839 case dr_unaligned_supported:
747 { 840 {
841 gimple stmt = DR_STMT (dr);
842 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
843 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
844
748 /* Here, we assign an additional cost for the unaligned load. */ 845 /* Here, we assign an additional cost for the unaligned load. */
749 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST; 846 *inside_cost += ncopies
750 847 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
848 vectype, DR_MISALIGNMENT (dr));
751 if (vect_print_dump_info (REPORT_COST)) 849 if (vect_print_dump_info (REPORT_COST))
752 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by " 850 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
753 "hardware."); 851 "hardware.");
754 852
755 break; 853 break;
756 } 854 }
757 case dr_explicit_realign: 855 case dr_explicit_realign:
758 { 856 {
759 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST); 857 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
858 + vect_get_stmt_cost (vector_stmt));
760 859
761 /* FIXME: If the misalignment remains fixed across the iterations of 860 /* FIXME: If the misalignment remains fixed across the iterations of
762 the containing loop, the following cost should be added to the 861 the containing loop, the following cost should be added to the
763 outside costs. */ 862 outside costs. */
764 if (targetm.vectorize.builtin_mask_for_load) 863 if (targetm.vectorize.builtin_mask_for_load)
765 inside_cost += TARG_VEC_STMT_COST; 864 *inside_cost += vect_get_stmt_cost (vector_stmt);
766 865
767 break; 866 break;
768 } 867 }
769 case dr_explicit_realign_optimized: 868 case dr_explicit_realign_optimized:
770 { 869 {
771 if (vect_print_dump_info (REPORT_COST)) 870 if (vect_print_dump_info (REPORT_COST))
772 fprintf (vect_dump, "vect_model_load_cost: unaligned software " 871 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
773 "pipelined."); 872 "pipelined.");
774 873
775 /* Unaligned software pipeline has a load of an address, an initial 874 /* Unaligned software pipeline has a load of an address, an initial
776 load, and possibly a mask operation to "prime" the loop. However, 875 load, and possibly a mask operation to "prime" the loop. However,
777 if this is an access in a group of loads, which provide strided 876 if this is an access in a group of loads, which provide strided
778 access, then the above cost should only be considered for one 877 access, then the above cost should only be considered for one
779 access in the group. Inside the loop, there is a load op 878 access in the group. Inside the loop, there is a load op
780 and a realignment op. */ 879 and a realignment op. */
781 880
782 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node) 881 if (add_realign_cost)
783 { 882 {
784 outside_cost = 2*TARG_VEC_STMT_COST; 883 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
785 if (targetm.vectorize.builtin_mask_for_load) 884 if (targetm.vectorize.builtin_mask_for_load)
786 outside_cost += TARG_VEC_STMT_COST; 885 *outside_cost += vect_get_stmt_cost (vector_stmt);
787 } 886 }
788 887
789 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST); 888 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
790 889 + vect_get_stmt_cost (vector_stmt));
791 break; 890 break;
792 } 891 }
793 892
794 default: 893 default:
795 gcc_unreachable (); 894 gcc_unreachable ();
796 } 895 }
797
798 if (vect_print_dump_info (REPORT_COST))
799 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
800 "outside_cost = %d .", inside_cost, outside_cost);
801
802 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
803 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
804 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
805 } 896 }
806 897
807 898
808 /* Function vect_init_vector. 899 /* Function vect_init_vector.
809 900
810 Insert a new stmt (INIT_STMT) that initializes a new vector variable with 901 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
811 the vector elements of VECTOR_VAR. Place the initialization at BSI if it 902 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
812 is not NULL. Otherwise, place the initialization at the loop preheader. 903 is not NULL. Otherwise, place the initialization at the loop preheader.
813 Return the DEF of INIT_STMT. 904 Return the DEF of INIT_STMT.
814 It will be used in the vectorization of STMT. */ 905 It will be used in the vectorization of STMT. */
815 906
816 tree 907 tree
817 vect_init_vector (gimple stmt, tree vector_var, tree vector_type, 908 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
872 } 963 }
873 964
874 965
875 /* Function vect_get_vec_def_for_operand. 966 /* Function vect_get_vec_def_for_operand.
876 967
877 OP is an operand in STMT. This function returns a (vector) def that will be 968 OP is an operand in STMT. This function returns a (vector) def that will be
878 used in the vectorized stmt for STMT. 969 used in the vectorized stmt for STMT.
879 970
880 In the case that OP is an SSA_NAME which is defined in the loop, then 971 In the case that OP is an SSA_NAME which is defined in the loop, then
881 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 972 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
882 973
889 tree vec_oprnd; 980 tree vec_oprnd;
890 gimple vec_stmt; 981 gimple vec_stmt;
891 gimple def_stmt; 982 gimple def_stmt;
892 stmt_vec_info def_stmt_info = NULL; 983 stmt_vec_info def_stmt_info = NULL;
893 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 984 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
894 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 985 unsigned int nunits;
895 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 986 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
897 tree vec_inv; 987 tree vec_inv;
898 tree vec_cst; 988 tree vec_cst;
899 tree t = NULL_TREE; 989 tree t = NULL_TREE;
900 tree def; 990 tree def;
931 /* Case 1: operand is a constant. */ 1021 /* Case 1: operand is a constant. */
932 case vect_constant_def: 1022 case vect_constant_def:
933 { 1023 {
934 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 1024 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
935 gcc_assert (vector_type); 1025 gcc_assert (vector_type);
1026 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
936 1027
937 if (scalar_def) 1028 if (scalar_def)
938 *scalar_def = op; 1029 *scalar_def = op;
939 1030
940 /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 1031 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
941 if (vect_print_dump_info (REPORT_DETAILS)) 1032 if (vect_print_dump_info (REPORT_DETAILS))
942 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); 1033 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
943 1034
944 for (i = nunits - 1; i >= 0; --i) 1035 vec_cst = build_vector_from_val (vector_type, op);
945 {
946 t = tree_cons (NULL_TREE, op, t);
947 }
948 vec_cst = build_vector (vector_type, t);
949 return vect_init_vector (stmt, vec_cst, vector_type, NULL); 1036 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
950 } 1037 }
951 1038
952 /* Case 2: operand is defined outside the loop - loop invariant. */ 1039 /* Case 2: operand is defined outside the loop - loop invariant. */
953 case vect_external_def: 1040 case vect_external_def:
1013 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1100 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1014 1101
1015 /* Get the def from the vectorized stmt. */ 1102 /* Get the def from the vectorized stmt. */
1016 def_stmt_info = vinfo_for_stmt (def_stmt); 1103 def_stmt_info = vinfo_for_stmt (def_stmt);
1017 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1104 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1018 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI); 1105 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1019 vec_oprnd = PHI_RESULT (vec_stmt); 1106 vec_oprnd = PHI_RESULT (vec_stmt);
1107 else
1108 vec_oprnd = gimple_get_lhs (vec_stmt);
1020 return vec_oprnd; 1109 return vec_oprnd;
1021 } 1110 }
1022 1111
1023 default: 1112 default:
1024 gcc_unreachable (); 1113 gcc_unreachable ();
1026 } 1115 }
1027 1116
1028 1117
1029 /* Function vect_get_vec_def_for_stmt_copy 1118 /* Function vect_get_vec_def_for_stmt_copy
1030 1119
1031 Return a vector-def for an operand. This function is used when the 1120 Return a vector-def for an operand. This function is used when the
1032 vectorized stmt to be created (by the caller to this function) is a "copy" 1121 vectorized stmt to be created (by the caller to this function) is a "copy"
1033 created in case the vectorized result cannot fit in one vector, and several 1122 created in case the vectorized result cannot fit in one vector, and several
1034 copies of the vector-stmt are required. In this case the vector-def is 1123 copies of the vector-stmt are required. In this case the vector-def is
1035 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 1124 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1036 of the stmt that defines VEC_OPRND. 1125 of the stmt that defines VEC_OPRND.
1037 DT is the type of the vector def VEC_OPRND. 1126 DT is the type of the vector def VEC_OPRND.
1038 1127
1039 Context: 1128 Context:
1040 In case the vectorization factor (VF) is bigger than the number 1129 In case the vectorization factor (VF) is bigger than the number
1041 of elements that can fit in a vectype (nunits), we have to generate 1130 of elements that can fit in a vectype (nunits), we have to generate
1042 more than one vector stmt to vectorize the scalar stmt. This situation 1131 more than one vector stmt to vectorize the scalar stmt. This situation
1043 arises when there are multiple data-types operated upon in the loop; the 1132 arises when there are multiple data-types operated upon in the loop; the
1044 smallest data-type determines the VF, and as a result, when vectorizing 1133 smallest data-type determines the VF, and as a result, when vectorizing
1045 stmts operating on wider types we need to create 'VF/nunits' "copies" of the 1134 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1046 vector stmt (each computing a vector of 'nunits' results, and together 1135 vector stmt (each computing a vector of 'nunits' results, and together
1047 computing 'VF' results in each iteration). This function is called when 1136 computing 'VF' results in each iteration). This function is called when
1062 1151
1063 The vectorization of S1 is explained in vectorizable_load. 1152 The vectorization of S1 is explained in vectorizable_load.
1064 The vectorization of S2: 1153 The vectorization of S2:
1065 To create the first vector-stmt out of the 4 copies - VSnew.0 - 1154 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1066 the function 'vect_get_vec_def_for_operand' is called to 1155 the function 'vect_get_vec_def_for_operand' is called to
1067 get the relevant vector-def for each operand of S2. For operand x it 1156 get the relevant vector-def for each operand of S2. For operand x it
1068 returns the vector-def 'vx.0'. 1157 returns the vector-def 'vx.0'.
1069 1158
1070 To create the remaining copies of the vector-stmt (VSnew.j), this 1159 To create the remaining copies of the vector-stmt (VSnew.j), this
1071 function is called to get the relevant vector-def for each operand. It is 1160 function is called to get the relevant vector-def for each operand. It is
1072 obtained from the respective VS1.j stmt, which is recorded in the 1161 obtained from the respective VS1.j stmt, which is recorded in the
1105 return vec_oprnd; 1194 return vec_oprnd;
1106 } 1195 }
1107 1196
1108 1197
1109 /* Get vectorized definitions for the operands to create a copy of an original 1198 /* Get vectorized definitions for the operands to create a copy of an original
1110 stmt. See vect_get_vec_def_for_stmt_copy() for details. */ 1199 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1111 1200
1112 static void 1201 static void
1113 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, 1202 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1114 VEC(tree,heap) **vec_oprnds0, 1203 VEC(tree,heap) **vec_oprnds0,
1115 VEC(tree,heap) **vec_oprnds1) 1204 VEC(tree,heap) **vec_oprnds1)
1126 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd); 1215 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1127 } 1216 }
1128 } 1217 }
1129 1218
1130 1219
1131 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */ 1220 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1221 NULL. */
1132 1222
1133 static void 1223 static void
1134 vect_get_vec_defs (tree op0, tree op1, gimple stmt, 1224 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1135 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1, 1225 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1136 slp_tree slp_node) 1226 slp_tree slp_node)
1137 { 1227 {
1138 if (slp_node) 1228 if (slp_node)
1139 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1, -1); 1229 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1140 else 1230 else
1141 { 1231 {
1142 tree vec_oprnd; 1232 tree vec_oprnd;
1143 1233
1144 *vec_oprnds0 = VEC_alloc (tree, heap, 1); 1234 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1225 int nunits_in; 1315 int nunits_in;
1226 int nunits_out; 1316 int nunits_out;
1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1317 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1228 tree fndecl, new_temp, def, rhs_type; 1318 tree fndecl, new_temp, def, rhs_type;
1229 gimple def_stmt; 1319 gimple def_stmt;
1230 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 1320 enum vect_def_type dt[3]
1321 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1231 gimple new_stmt = NULL; 1322 gimple new_stmt = NULL;
1232 int ncopies, j; 1323 int ncopies, j;
1233 VEC(tree, heap) *vargs = NULL; 1324 VEC(tree, heap) *vargs = NULL;
1234 enum { NARROW, NONE, WIDEN } modifier; 1325 enum { NARROW, NONE, WIDEN } modifier;
1235 size_t i, nargs; 1326 size_t i, nargs;
1252 return false; 1343 return false;
1253 1344
1254 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 1345 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1255 return false; 1346 return false;
1256 1347
1348 if (stmt_can_throw_internal (stmt))
1349 return false;
1350
1257 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 1351 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1258 1352
1259 /* Process function arguments. */ 1353 /* Process function arguments. */
1260 rhs_type = NULL_TREE; 1354 rhs_type = NULL_TREE;
1261 vectype_in = NULL_TREE; 1355 vectype_in = NULL_TREE;
1262 nargs = gimple_call_num_args (stmt); 1356 nargs = gimple_call_num_args (stmt);
1263 1357
1264 /* Bail out if the function has more than two arguments, we 1358 /* Bail out if the function has more than three arguments, we do not have
1265 do not have interesting builtin functions to vectorize with 1359 interesting builtin functions to vectorize with more than two arguments
1266 more than two arguments. No arguments is also not good. */ 1360 except for fma. No arguments is also not good. */
1267 if (nargs == 0 || nargs > 2) 1361 if (nargs == 0 || nargs > 3)
1268 return false; 1362 return false;
1269 1363
1270 for (i = 0; i < nargs; i++) 1364 for (i = 0; i < nargs; i++)
1271 { 1365 {
1272 tree opvectype; 1366 tree opvectype;
1485 it defines is mapped to the new definition. So just replace 1579 it defines is mapped to the new definition. So just replace
1486 rhs of the statement with something harmless. */ 1580 rhs of the statement with something harmless. */
1487 1581
1488 type = TREE_TYPE (scalar_dest); 1582 type = TREE_TYPE (scalar_dest);
1489 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 1583 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1490 fold_convert (type, integer_zero_node)); 1584 build_zero_cst (type));
1491 set_vinfo_for_stmt (new_stmt, stmt_info); 1585 set_vinfo_for_stmt (new_stmt, stmt_info);
1492 set_vinfo_for_stmt (stmt, NULL); 1586 set_vinfo_for_stmt (stmt, NULL);
1493 STMT_VINFO_STMT (stmt_info) = new_stmt; 1587 STMT_VINFO_STMT (stmt_info) = new_stmt;
1494 gsi_replace (gsi, new_stmt, false); 1588 gsi_replace (gsi, new_stmt, false);
1495 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; 1589 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1500 1594
1501 /* Function vect_gen_widened_results_half 1595 /* Function vect_gen_widened_results_half
1502 1596
1503 Create a vector stmt whose code, type, number of arguments, and result 1597 Create a vector stmt whose code, type, number of arguments, and result
1504 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 1598 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1505 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. 1599 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1506 In the case that CODE is a CALL_EXPR, this means that a call to DECL 1600 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1507 needs to be created (DECL is a function-decl of a target-builtin). 1601 needs to be created (DECL is a function-decl of a target-builtin).
1508 STMT is the original scalar stmt that we are vectorizing. */ 1602 STMT is the original scalar stmt that we are vectorizing. */
1509 1603
1510 static gimple 1604 static gimple
1648 if (modifier == NARROW) 1742 if (modifier == NARROW)
1649 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 1743 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1650 else 1744 else
1651 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 1745 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1652 1746
1653 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies 1747 /* Multiple types in SLP are handled by creating the appropriate number of
1654 this, so we can safely override NCOPIES with 1 here. */ 1748 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1749 case of SLP. */
1655 if (slp_node) 1750 if (slp_node)
1656 ncopies = 1; 1751 ncopies = 1;
1657 1752
1658 /* Sanity check: make sure that at least one copy of the vectorized stmt 1753 /* Sanity check: make sure that at least one copy of the vectorized stmt
1659 needs to be generated. */ 1754 needs to be generated. */
1712 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); 1807 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1713 1808
1714 builtin_decl = 1809 builtin_decl =
1715 targetm.vectorize.builtin_conversion (code, 1810 targetm.vectorize.builtin_conversion (code,
1716 vectype_out, vectype_in); 1811 vectype_out, vectype_in);
1717 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) 1812 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1718 { 1813 {
1719 /* Arguments are ready. create the new vector stmt. */ 1814 /* Arguments are ready. create the new vector stmt. */
1720 new_stmt = gimple_build_call (builtin_decl, 1, vop0); 1815 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1721 new_temp = make_ssa_name (vec_dest, new_stmt); 1816 new_temp = make_ssa_name (vec_dest, new_stmt);
1722 gimple_call_set_lhs (new_stmt, new_temp); 1817 gimple_call_set_lhs (new_stmt, new_temp);
1783 { 1878 {
1784 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); 1879 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1785 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 1880 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1786 } 1881 }
1787 1882
1788 /* Arguments are ready. Create the new vector stmt. */ 1883 /* Arguments are ready. Create the new vector stmt. */
1789 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0, 1884 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1790 vec_oprnd1); 1885 vec_oprnd1);
1791 new_temp = make_ssa_name (vec_dest, new_stmt); 1886 new_temp = make_ssa_name (vec_dest, new_stmt);
1792 gimple_assign_set_lhs (new_stmt, new_temp); 1887 gimple_assign_set_lhs (new_stmt, new_temp);
1793 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1888 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1806 if (vec_oprnds0) 1901 if (vec_oprnds0)
1807 VEC_free (tree, heap, vec_oprnds0); 1902 VEC_free (tree, heap, vec_oprnds0);
1808 1903
1809 return true; 1904 return true;
1810 } 1905 }
1906
1907
1811 /* Function vectorizable_assignment. 1908 /* Function vectorizable_assignment.
1812 1909
1813 Check if STMT performs an assignment (copy) that can be vectorized. 1910 Check if STMT performs an assignment (copy) that can be vectorized.
1814 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1911 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1815 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1912 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1827 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1924 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1828 tree new_temp; 1925 tree new_temp;
1829 tree def; 1926 tree def;
1830 gimple def_stmt; 1927 gimple def_stmt;
1831 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 1928 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1832 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 1929 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1833 int ncopies; 1930 int ncopies;
1834 int i, j; 1931 int i, j;
1835 VEC(tree,heap) *vec_oprnds = NULL; 1932 VEC(tree,heap) *vec_oprnds = NULL;
1836 tree vop; 1933 tree vop;
1837 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1934 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1838 gimple new_stmt = NULL; 1935 gimple new_stmt = NULL;
1839 stmt_vec_info prev_stmt_info = NULL; 1936 stmt_vec_info prev_stmt_info = NULL;
1937 enum tree_code code;
1938 tree vectype_in;
1840 1939
1841 /* Multiple types in SLP are handled by creating the appropriate number of 1940 /* Multiple types in SLP are handled by creating the appropriate number of
1842 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 1941 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1843 case of SLP. */ 1942 case of SLP. */
1844 if (slp_node) 1943 if (slp_node)
1860 1959
1861 scalar_dest = gimple_assign_lhs (stmt); 1960 scalar_dest = gimple_assign_lhs (stmt);
1862 if (TREE_CODE (scalar_dest) != SSA_NAME) 1961 if (TREE_CODE (scalar_dest) != SSA_NAME)
1863 return false; 1962 return false;
1864 1963
1964 code = gimple_assign_rhs_code (stmt);
1865 if (gimple_assign_single_p (stmt) 1965 if (gimple_assign_single_p (stmt)
1866 || gimple_assign_rhs_code (stmt) == PAREN_EXPR) 1966 || code == PAREN_EXPR
1967 || CONVERT_EXPR_CODE_P (code))
1867 op = gimple_assign_rhs1 (stmt); 1968 op = gimple_assign_rhs1 (stmt);
1868 else 1969 else
1869 return false; 1970 return false;
1870 1971
1871 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0])) 1972 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1973 &def_stmt, &def, &dt[0], &vectype_in))
1872 { 1974 {
1873 if (vect_print_dump_info (REPORT_DETAILS)) 1975 if (vect_print_dump_info (REPORT_DETAILS))
1874 fprintf (vect_dump, "use not simple."); 1976 fprintf (vect_dump, "use not simple.");
1875 return false; 1977 return false;
1876 } 1978 }
1979
1980 /* We can handle NOP_EXPR conversions that do not change the number
1981 of elements or the vector size. */
1982 if (CONVERT_EXPR_CODE_P (code)
1983 && (!vectype_in
1984 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
1985 || (GET_MODE_SIZE (TYPE_MODE (vectype))
1986 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
1987 return false;
1877 1988
1878 if (!vec_stmt) /* transformation not required. */ 1989 if (!vec_stmt) /* transformation not required. */
1879 { 1990 {
1880 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 1991 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1881 if (vect_print_dump_info (REPORT_DETAILS)) 1992 if (vect_print_dump_info (REPORT_DETAILS))
1899 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); 2010 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1900 else 2011 else
1901 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); 2012 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
1902 2013
1903 /* Arguments are ready. create the new vector stmt. */ 2014 /* Arguments are ready. create the new vector stmt. */
1904 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++) 2015 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
1905 { 2016 {
2017 if (CONVERT_EXPR_CODE_P (code))
2018 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
1906 new_stmt = gimple_build_assign (vec_dest, vop); 2019 new_stmt = gimple_build_assign (vec_dest, vop);
1907 new_temp = make_ssa_name (vec_dest, new_stmt); 2020 new_temp = make_ssa_name (vec_dest, new_stmt);
1908 gimple_assign_set_lhs (new_stmt, new_temp); 2021 gimple_assign_set_lhs (new_stmt, new_temp);
1909 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1910 if (slp_node) 2023 if (slp_node)
1924 2037
1925 VEC_free (tree, heap, vec_oprnds); 2038 VEC_free (tree, heap, vec_oprnds);
1926 return true; 2039 return true;
1927 } 2040 }
1928 2041
1929 /* Function vectorizable_operation. 2042
1930 2043 /* Function vectorizable_shift.
1931 Check if STMT performs a binary or unary operation that can be vectorized. 2044
2045 Check if STMT performs a shift operation that can be vectorized.
1932 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2046 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1933 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2047 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1934 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2048 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1935 2049
1936 static bool 2050 static bool
1937 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, 2051 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
1938 gimple *vec_stmt, slp_tree slp_node) 2052 gimple *vec_stmt, slp_tree slp_node)
1939 { 2053 {
1940 tree vec_dest; 2054 tree vec_dest;
1941 tree scalar_dest; 2055 tree scalar_dest;
1942 tree op0, op1 = NULL; 2056 tree op0, op1 = NULL;
1943 tree vec_oprnd1 = NULL_TREE; 2057 tree vec_oprnd1 = NULL_TREE;
1945 tree vectype; 2059 tree vectype;
1946 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2060 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1947 enum tree_code code; 2061 enum tree_code code;
1948 enum machine_mode vec_mode; 2062 enum machine_mode vec_mode;
1949 tree new_temp; 2063 tree new_temp;
1950 int op_type;
1951 optab optab; 2064 optab optab;
1952 int icode; 2065 int icode;
1953 enum machine_mode optab_op2_mode; 2066 enum machine_mode optab_op2_mode;
1954 tree def; 2067 tree def;
1955 gimple def_stmt; 2068 gimple def_stmt;
1959 int nunits_in; 2072 int nunits_in;
1960 int nunits_out; 2073 int nunits_out;
1961 tree vectype_out; 2074 tree vectype_out;
1962 int ncopies; 2075 int ncopies;
1963 int j, i; 2076 int j, i;
1964 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; 2077 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1965 tree vop0, vop1; 2078 tree vop0, vop1;
1966 unsigned int k; 2079 unsigned int k;
1967 bool scalar_shift_arg = false; 2080 bool scalar_shift_arg = false;
1968 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2081 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1969 int vf; 2082 int vf;
1981 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 2094 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1982 return false; 2095 return false;
1983 2096
1984 code = gimple_assign_rhs_code (stmt); 2097 code = gimple_assign_rhs_code (stmt);
1985 2098
1986 /* For pointer addition, we should use the normal plus for 2099 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1987 the vector addition. */ 2100 || code == RROTATE_EXPR))
1988 if (code == POINTER_PLUS_EXPR) 2101 return false;
1989 code = PLUS_EXPR;
1990
1991 /* Support only unary or binary operations. */
1992 op_type = TREE_CODE_LENGTH (code);
1993 if (op_type != unary_op && op_type != binary_op)
1994 {
1995 if (vect_print_dump_info (REPORT_DETAILS))
1996 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1997 return false;
1998 }
1999 2102
2000 scalar_dest = gimple_assign_lhs (stmt); 2103 scalar_dest = gimple_assign_lhs (stmt);
2001 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 2104 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2002 2105
2003 op0 = gimple_assign_rhs1 (stmt); 2106 op0 = gimple_assign_rhs1 (stmt);
2004 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, 2107 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2005 &def_stmt, &def, &dt[0], &vectype)) 2108 &def_stmt, &def, &dt[0], &vectype))
2006 { 2109 {
2007 if (vect_print_dump_info (REPORT_DETAILS)) 2110 if (vect_print_dump_info (REPORT_DETAILS))
2008 fprintf (vect_dump, "use not simple."); 2111 fprintf (vect_dump, "use not simple.");
2009 return false; 2112 return false;
2010 } 2113 }
2028 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2131 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2029 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 2132 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2030 if (nunits_out != nunits_in) 2133 if (nunits_out != nunits_in)
2031 return false; 2134 return false;
2032 2135
2033 if (op_type == binary_op) 2136 op1 = gimple_assign_rhs2 (stmt);
2137 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2138 {
2139 if (vect_print_dump_info (REPORT_DETAILS))
2140 fprintf (vect_dump, "use not simple.");
2141 return false;
2142 }
2143
2144 if (loop_vinfo)
2145 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2146 else
2147 vf = 1;
2148
2149 /* Multiple types in SLP are handled by creating the appropriate number of
2150 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2151 case of SLP. */
2152 if (slp_node)
2153 ncopies = 1;
2154 else
2155 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2156
2157 gcc_assert (ncopies >= 1);
2158
2159 /* Determine whether the shift amount is a vector, or scalar. If the
2160 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2161
2162 /* Vector shifted by vector. */
2163 if (dt[1] == vect_internal_def)
2164 {
2165 optab = optab_for_tree_code (code, vectype, optab_vector);
2166 if (vect_print_dump_info (REPORT_DETAILS))
2167 fprintf (vect_dump, "vector/vector shift/rotate found.");
2168 }
2169 /* See if the machine has a vector shifted by scalar insn and if not
2170 then see if it has a vector shifted by vector insn. */
2171 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2172 {
2173 optab = optab_for_tree_code (code, vectype, optab_scalar);
2174 if (optab
2175 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2176 {
2177 scalar_shift_arg = true;
2178 if (vect_print_dump_info (REPORT_DETAILS))
2179 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2180 }
2181 else
2182 {
2183 optab = optab_for_tree_code (code, vectype, optab_vector);
2184 if (optab
2185 && (optab_handler (optab, TYPE_MODE (vectype))
2186 != CODE_FOR_nothing))
2187 {
2188 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "vector/vector shift/rotate found.");
2190
2191 /* Unlike the other binary operators, shifts/rotates have
2192 the rhs being int, instead of the same type as the lhs,
2193 so make sure the scalar is the right type if we are
2194 dealing with vectors of short/char. */
2195 if (dt[1] == vect_constant_def)
2196 op1 = fold_convert (TREE_TYPE (vectype), op1);
2197 }
2198 }
2199 }
2200 else
2201 {
2202 if (vect_print_dump_info (REPORT_DETAILS))
2203 fprintf (vect_dump, "operand mode requires invariant argument.");
2204 return false;
2205 }
2206
2207 /* Supportable by target? */
2208 if (!optab)
2209 {
2210 if (vect_print_dump_info (REPORT_DETAILS))
2211 fprintf (vect_dump, "no optab.");
2212 return false;
2213 }
2214 vec_mode = TYPE_MODE (vectype);
2215 icode = (int) optab_handler (optab, vec_mode);
2216 if (icode == CODE_FOR_nothing)
2217 {
2218 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "op not supported by target.");
2220 /* Check only during analysis. */
2221 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2222 || (vf < vect_min_worthwhile_factor (code)
2223 && !vec_stmt))
2224 return false;
2225 if (vect_print_dump_info (REPORT_DETAILS))
2226 fprintf (vect_dump, "proceeding using word mode.");
2227 }
2228
2229 /* Worthwhile without SIMD support? Check only during analysis. */
2230 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2231 && vf < vect_min_worthwhile_factor (code)
2232 && !vec_stmt)
2233 {
2234 if (vect_print_dump_info (REPORT_DETAILS))
2235 fprintf (vect_dump, "not worthwhile without SIMD support.");
2236 return false;
2237 }
2238
2239 if (!vec_stmt) /* transformation not required. */
2240 {
2241 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2242 if (vect_print_dump_info (REPORT_DETAILS))
2243 fprintf (vect_dump, "=== vectorizable_shift ===");
2244 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2245 return true;
2246 }
2247
2248 /** Transform. **/
2249
2250 if (vect_print_dump_info (REPORT_DETAILS))
2251 fprintf (vect_dump, "transform binary/unary operation.");
2252
2253 /* Handle def. */
2254 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2255
2256 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2257 created in the previous stages of the recursion, so no allocation is
2258 needed, except for the case of shift with scalar shift argument. In that
2259 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2260 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2261 In case of loop-based vectorization we allocate VECs of size 1. We
2262 allocate VEC_OPRNDS1 only in case of binary operation. */
2263 if (!slp_node)
2264 {
2265 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2266 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2267 }
2268 else if (scalar_shift_arg)
2269 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2270
2271 prev_stmt_info = NULL;
2272 for (j = 0; j < ncopies; j++)
2273 {
2274 /* Handle uses. */
2275 if (j == 0)
2276 {
2277 if (scalar_shift_arg)
2278 {
2279 /* Vector shl and shr insn patterns can be defined with scalar
2280 operand 2 (shift operand). In this case, use constant or loop
2281 invariant op1 directly, without extending it to vector mode
2282 first. */
2283 optab_op2_mode = insn_data[icode].operand[2].mode;
2284 if (!VECTOR_MODE_P (optab_op2_mode))
2285 {
2286 if (vect_print_dump_info (REPORT_DETAILS))
2287 fprintf (vect_dump, "operand 1 using scalar mode.");
2288 vec_oprnd1 = op1;
2289 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2290 if (slp_node)
2291 {
2292 /* Store vec_oprnd1 for every vector stmt to be created
2293 for SLP_NODE. We check during the analysis that all
2294 the shift arguments are the same.
2295 TODO: Allow different constants for different vector
2296 stmts generated for an SLP instance. */
2297 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2298 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2299 }
2300 }
2301 }
2302
2303 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2304 (a special case for certain kind of vector shifts); otherwise,
2305 operand 1 should be of a vector type (the usual case). */
2306 if (vec_oprnd1)
2307 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2308 slp_node);
2309 else
2310 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2311 slp_node);
2312 }
2313 else
2314 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2315
2316 /* Arguments are ready. Create the new vector stmt. */
2317 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2318 {
2319 vop1 = VEC_index (tree, vec_oprnds1, i);
2320 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2321 new_temp = make_ssa_name (vec_dest, new_stmt);
2322 gimple_assign_set_lhs (new_stmt, new_temp);
2323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2324 if (slp_node)
2325 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2326 }
2327
2328 if (slp_node)
2329 continue;
2330
2331 if (j == 0)
2332 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2333 else
2334 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2335 prev_stmt_info = vinfo_for_stmt (new_stmt);
2336 }
2337
2338 VEC_free (tree, heap, vec_oprnds0);
2339 VEC_free (tree, heap, vec_oprnds1);
2340
2341 return true;
2342 }
2343
2344
2345 /* Function vectorizable_operation.
2346
2347 Check if STMT performs a binary, unary or ternary operation that can
2348 be vectorized.
2349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2350 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2352
2353 static bool
2354 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2355 gimple *vec_stmt, slp_tree slp_node)
2356 {
2357 tree vec_dest;
2358 tree scalar_dest;
2359 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2361 tree vectype;
2362 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2363 enum tree_code code;
2364 enum machine_mode vec_mode;
2365 tree new_temp;
2366 int op_type;
2367 optab optab;
2368 int icode;
2369 tree def;
2370 gimple def_stmt;
2371 enum vect_def_type dt[3]
2372 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2373 gimple new_stmt = NULL;
2374 stmt_vec_info prev_stmt_info;
2375 int nunits_in;
2376 int nunits_out;
2377 tree vectype_out;
2378 int ncopies;
2379 int j, i;
2380 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2381 tree vop0, vop1, vop2;
2382 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2383 int vf;
2384
2385 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2386 return false;
2387
2388 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2389 return false;
2390
2391 /* Is STMT a vectorizable binary/unary operation? */
2392 if (!is_gimple_assign (stmt))
2393 return false;
2394
2395 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2396 return false;
2397
2398 code = gimple_assign_rhs_code (stmt);
2399
2400 /* For pointer addition, we should use the normal plus for
2401 the vector addition. */
2402 if (code == POINTER_PLUS_EXPR)
2403 code = PLUS_EXPR;
2404
2405 /* Support only unary or binary operations. */
2406 op_type = TREE_CODE_LENGTH (code);
2407 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2408 {
2409 if (vect_print_dump_info (REPORT_DETAILS))
2410 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2411 op_type);
2412 return false;
2413 }
2414
2415 scalar_dest = gimple_assign_lhs (stmt);
2416 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2417
2418 op0 = gimple_assign_rhs1 (stmt);
2419 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2420 &def_stmt, &def, &dt[0], &vectype))
2421 {
2422 if (vect_print_dump_info (REPORT_DETAILS))
2423 fprintf (vect_dump, "use not simple.");
2424 return false;
2425 }
2426 /* If op0 is an external or constant def use a vector type with
2427 the same size as the output vector type. */
2428 if (!vectype)
2429 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2430 if (vec_stmt)
2431 gcc_assert (vectype);
2432 if (!vectype)
2433 {
2434 if (vect_print_dump_info (REPORT_DETAILS))
2435 {
2436 fprintf (vect_dump, "no vectype for scalar type ");
2437 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2438 }
2439
2440 return false;
2441 }
2442
2443 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2444 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2445 if (nunits_out != nunits_in)
2446 return false;
2447
2448 if (op_type == binary_op || op_type == ternary_op)
2034 { 2449 {
2035 op1 = gimple_assign_rhs2 (stmt); 2450 op1 = gimple_assign_rhs2 (stmt);
2036 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, 2451 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2037 &dt[1])) 2452 &dt[1]))
2038 { 2453 {
2039 if (vect_print_dump_info (REPORT_DETAILS)) 2454 if (vect_print_dump_info (REPORT_DETAILS))
2040 fprintf (vect_dump, "use not simple."); 2455 fprintf (vect_dump, "use not simple.");
2041 return false; 2456 return false;
2042 } 2457 }
2043 } 2458 }
2459 if (op_type == ternary_op)
2460 {
2461 op2 = gimple_assign_rhs3 (stmt);
2462 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2463 &dt[2]))
2464 {
2465 if (vect_print_dump_info (REPORT_DETAILS))
2466 fprintf (vect_dump, "use not simple.");
2467 return false;
2468 }
2469 }
2044 2470
2045 if (loop_vinfo) 2471 if (loop_vinfo)
2046 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2472 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2047 else 2473 else
2048 vf = 1; 2474 vf = 1;
2049 2475
2050 /* Multiple types in SLP are handled by creating the appropriate number of 2476 /* Multiple types in SLP are handled by creating the appropriate number of
2051 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2477 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2052 case of SLP. */ 2478 case of SLP. */
2053 if (slp_node) 2479 if (slp_node)
2054 ncopies = 1; 2480 ncopies = 1;
2055 else 2481 else
2056 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 2482 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2057 2483
2058 gcc_assert (ncopies >= 1); 2484 gcc_assert (ncopies >= 1);
2059 2485
2060 /* If this is a shift/rotate, determine whether the shift amount is a vector, 2486 /* Shifts are handled in vectorizable_shift (). */
2061 or scalar. If the shift/rotate amount is a vector, use the vector/vector
2062 shift optabs. */
2063 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 2487 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2064 || code == RROTATE_EXPR) 2488 || code == RROTATE_EXPR)
2065 { 2489 return false;
2066 /* vector shifted by vector */ 2490
2067 if (dt[1] == vect_internal_def) 2491 optab = optab_for_tree_code (code, vectype, optab_default);
2068 {
2069 optab = optab_for_tree_code (code, vectype, optab_vector);
2070 if (vect_print_dump_info (REPORT_DETAILS))
2071 fprintf (vect_dump, "vector/vector shift/rotate found.");
2072 }
2073
2074 /* See if the machine has a vector shifted by scalar insn and if not
2075 then see if it has a vector shifted by vector insn */
2076 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2077 {
2078 optab = optab_for_tree_code (code, vectype, optab_scalar);
2079 if (optab
2080 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2081 != CODE_FOR_nothing))
2082 {
2083 scalar_shift_arg = true;
2084 if (vect_print_dump_info (REPORT_DETAILS))
2085 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2086 }
2087 else
2088 {
2089 optab = optab_for_tree_code (code, vectype, optab_vector);
2090 if (optab
2091 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2092 != CODE_FOR_nothing))
2093 {
2094 if (vect_print_dump_info (REPORT_DETAILS))
2095 fprintf (vect_dump, "vector/vector shift/rotate found.");
2096
2097 /* Unlike the other binary operators, shifts/rotates have
2098 the rhs being int, instead of the same type as the lhs,
2099 so make sure the scalar is the right type if we are
2100 dealing with vectors of short/char. */
2101 if (dt[1] == vect_constant_def)
2102 op1 = fold_convert (TREE_TYPE (vectype), op1);
2103 }
2104 }
2105 }
2106
2107 else
2108 {
2109 if (vect_print_dump_info (REPORT_DETAILS))
2110 fprintf (vect_dump, "operand mode requires invariant argument.");
2111 return false;
2112 }
2113 }
2114 else
2115 optab = optab_for_tree_code (code, vectype, optab_default);
2116 2492
2117 /* Supportable by target? */ 2493 /* Supportable by target? */
2118 if (!optab) 2494 if (!optab)
2119 { 2495 {
2120 if (vect_print_dump_info (REPORT_DETAILS)) 2496 if (vect_print_dump_info (REPORT_DETAILS))
2121 fprintf (vect_dump, "no optab."); 2497 fprintf (vect_dump, "no optab.");
2122 return false; 2498 return false;
2123 } 2499 }
2124 vec_mode = TYPE_MODE (vectype); 2500 vec_mode = TYPE_MODE (vectype);
2125 icode = (int) optab_handler (optab, vec_mode)->insn_code; 2501 icode = (int) optab_handler (optab, vec_mode);
2126 if (icode == CODE_FOR_nothing) 2502 if (icode == CODE_FOR_nothing)
2127 { 2503 {
2128 if (vect_print_dump_info (REPORT_DETAILS)) 2504 if (vect_print_dump_info (REPORT_DETAILS))
2129 fprintf (vect_dump, "op not supported by target."); 2505 fprintf (vect_dump, "op not supported by target.");
2130 /* Check only during analysis. */ 2506 /* Check only during analysis. */
2134 return false; 2510 return false;
2135 if (vect_print_dump_info (REPORT_DETAILS)) 2511 if (vect_print_dump_info (REPORT_DETAILS))
2136 fprintf (vect_dump, "proceeding using word mode."); 2512 fprintf (vect_dump, "proceeding using word mode.");
2137 } 2513 }
2138 2514
2139 /* Worthwhile without SIMD support? Check only during analysis. */ 2515 /* Worthwhile without SIMD support? Check only during analysis. */
2140 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 2516 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2141 && vf < vect_min_worthwhile_factor (code) 2517 && vf < vect_min_worthwhile_factor (code)
2142 && !vec_stmt) 2518 && !vec_stmt)
2143 { 2519 {
2144 if (vect_print_dump_info (REPORT_DETAILS)) 2520 if (vect_print_dump_info (REPORT_DETAILS))
2161 fprintf (vect_dump, "transform binary/unary operation."); 2537 fprintf (vect_dump, "transform binary/unary operation.");
2162 2538
2163 /* Handle def. */ 2539 /* Handle def. */
2164 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2540 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2165 2541
2166 /* Allocate VECs for vector operands. In case of SLP, vector operands are 2542 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2167 created in the previous stages of the recursion, so no allocation is 2543 created in the previous stages of the recursion, so no allocation is
2168 needed, except for the case of shift with scalar shift argument. In that 2544 needed, except for the case of shift with scalar shift argument. In that
2169 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to 2545 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2170 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE. 2546 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2171 In case of loop-based vectorization we allocate VECs of size 1. We 2547 In case of loop-based vectorization we allocate VECs of size 1. We
2172 allocate VEC_OPRNDS1 only in case of binary operation. */ 2548 allocate VEC_OPRNDS1 only in case of binary operation. */
2173 if (!slp_node) 2549 if (!slp_node)
2174 { 2550 {
2175 vec_oprnds0 = VEC_alloc (tree, heap, 1); 2551 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2176 if (op_type == binary_op) 2552 if (op_type == binary_op || op_type == ternary_op)
2177 vec_oprnds1 = VEC_alloc (tree, heap, 1); 2553 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2178 } 2554 if (op_type == ternary_op)
2179 else if (scalar_shift_arg) 2555 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2180 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); 2556 }
2181 2557
2182 /* In case the vectorization factor (VF) is bigger than the number 2558 /* In case the vectorization factor (VF) is bigger than the number
2183 of elements that we can fit in a vectype (nunits), we have to generate 2559 of elements that we can fit in a vectype (nunits), we have to generate
2184 more than one vector stmt - i.e - we need to "unroll" the 2560 more than one vector stmt - i.e - we need to "unroll" the
2185 vector stmt by a factor VF/nunits. In doing so, we record a pointer 2561 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2186 from one copy of the vector stmt to the next, in the field 2562 from one copy of the vector stmt to the next, in the field
2187 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 2563 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2188 stages to find the correct vector defs to be used when vectorizing 2564 stages to find the correct vector defs to be used when vectorizing
2189 stmts that use the defs of the current stmt. The example below illustrates 2565 stmts that use the defs of the current stmt. The example below
2190 the vectorization process when VF=16 and nunits=4 (i.e - we need to create 2566 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2191 4 vectorized stmts): 2567 we need to create 4 vectorized stmts):
2192 2568
2193 before vectorization: 2569 before vectorization:
2194 RELATED_STMT VEC_STMT 2570 RELATED_STMT VEC_STMT
2195 S1: x = memref - - 2571 S1: x = memref - -
2196 S2: z = x + 1 - - 2572 S2: z = x + 1 - -
2205 S1: x = load - VS1_0 2581 S1: x = load - VS1_0
2206 S2: z = x + 1 - - 2582 S2: z = x + 1 - -
2207 2583
2208 step2: vectorize stmt S2 (done here): 2584 step2: vectorize stmt S2 (done here):
2209 To vectorize stmt S2 we first need to find the relevant vector 2585 To vectorize stmt S2 we first need to find the relevant vector
2210 def for the first operand 'x'. This is, as usual, obtained from 2586 def for the first operand 'x'. This is, as usual, obtained from
2211 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 2587 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2212 that defines 'x' (S1). This way we find the stmt VS1_0, and the 2588 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2213 relevant vector def 'vx0'. Having found 'vx0' we can generate 2589 relevant vector def 'vx0'. Having found 'vx0' we can generate
2214 the vector stmt VS2_0, and as usual, record it in the 2590 the vector stmt VS2_0, and as usual, record it in the
2215 STMT_VINFO_VEC_STMT of stmt S2. 2591 STMT_VINFO_VEC_STMT of stmt S2.
2216 When creating the second copy (VS2_1), we obtain the relevant vector 2592 When creating the second copy (VS2_1), we obtain the relevant vector
2217 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 2593 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2218 stmt VS1_0. This way we find the stmt VS1_1 and the relevant 2594 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2219 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 2595 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2220 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 2596 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2221 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 2597 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2222 chain of stmts and pointers: 2598 chain of stmts and pointers:
2223 RELATED_STMT VEC_STMT 2599 RELATED_STMT VEC_STMT
2224 VS1_0: vx0 = memref0 VS1_1 - 2600 VS1_0: vx0 = memref0 VS1_1 -
2225 VS1_1: vx1 = memref1 VS1_2 - 2601 VS1_1: vx1 = memref1 VS1_2 -
2226 VS1_2: vx2 = memref2 VS1_3 - 2602 VS1_2: vx2 = memref2 VS1_3 -
2236 for (j = 0; j < ncopies; j++) 2612 for (j = 0; j < ncopies; j++)
2237 { 2613 {
2238 /* Handle uses. */ 2614 /* Handle uses. */
2239 if (j == 0) 2615 if (j == 0)
2240 { 2616 {
2241 if (op_type == binary_op && scalar_shift_arg) 2617 if (op_type == binary_op || op_type == ternary_op)
2242 {
2243 /* Vector shl and shr insn patterns can be defined with scalar
2244 operand 2 (shift operand). In this case, use constant or loop
2245 invariant op1 directly, without extending it to vector mode
2246 first. */
2247 optab_op2_mode = insn_data[icode].operand[2].mode;
2248 if (!VECTOR_MODE_P (optab_op2_mode))
2249 {
2250 if (vect_print_dump_info (REPORT_DETAILS))
2251 fprintf (vect_dump, "operand 1 using scalar mode.");
2252 vec_oprnd1 = op1;
2253 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2254 if (slp_node)
2255 {
2256 /* Store vec_oprnd1 for every vector stmt to be created
2257 for SLP_NODE. We check during the analysis that all the
2258 shift arguments are the same.
2259 TODO: Allow different constants for different vector
2260 stmts generated for an SLP instance. */
2261 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2262 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2263 }
2264 }
2265 }
2266
2267 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2268 (a special case for certain kind of vector shifts); otherwise,
2269 operand 1 should be of a vector type (the usual case). */
2270 if (op_type == binary_op && !vec_oprnd1)
2271 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 2618 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2272 slp_node); 2619 slp_node);
2273 else 2620 else
2274 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 2621 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2275 slp_node); 2622 slp_node);
2623 if (op_type == ternary_op)
2624 {
2625 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2626 VEC_quick_push (tree, vec_oprnds2,
2627 vect_get_vec_def_for_operand (op2, stmt, NULL));
2628 }
2276 } 2629 }
2277 else 2630 else
2278 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 2631 {
2279 2632 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2280 /* Arguments are ready. Create the new vector stmt. */ 2633 if (op_type == ternary_op)
2281 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) 2634 {
2635 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2636 VEC_quick_push (tree, vec_oprnds2,
2637 vect_get_vec_def_for_stmt_copy (dt[2],
2638 vec_oprnd));
2639 }
2640 }
2641
2642 /* Arguments are ready. Create the new vector stmt. */
2643 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2282 { 2644 {
2283 vop1 = ((op_type == binary_op) 2645 vop1 = ((op_type == binary_op || op_type == ternary_op)
2284 ? VEC_index (tree, vec_oprnds1, i) : NULL); 2646 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2285 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 2647 vop2 = ((op_type == ternary_op)
2648 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2649 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2650 vop0, vop1, vop2);
2286 new_temp = make_ssa_name (vec_dest, new_stmt); 2651 new_temp = make_ssa_name (vec_dest, new_stmt);
2287 gimple_assign_set_lhs (new_stmt, new_temp); 2652 gimple_assign_set_lhs (new_stmt, new_temp);
2288 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2289 if (slp_node) 2654 if (slp_node)
2290 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 2655 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2301 } 2666 }
2302 2667
2303 VEC_free (tree, heap, vec_oprnds0); 2668 VEC_free (tree, heap, vec_oprnds0);
2304 if (vec_oprnds1) 2669 if (vec_oprnds1)
2305 VEC_free (tree, heap, vec_oprnds1); 2670 VEC_free (tree, heap, vec_oprnds1);
2671 if (vec_oprnds2)
2672 VEC_free (tree, heap, vec_oprnds2);
2306 2673
2307 return true; 2674 return true;
2308 } 2675 }
2309 2676
2310 2677
2311 /* Get vectorized definitions for loop-based vectorization. For the first 2678 /* Get vectorized definitions for loop-based vectorization. For the first
2312 operand we call vect_get_vec_def_for_operand() (with OPRND containing 2679 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2313 scalar operand), and for the rest we get a copy with 2680 scalar operand), and for the rest we get a copy with
2314 vect_get_vec_def_for_stmt_copy() using the previous vector definition 2681 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2315 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details. 2682 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2316 The vectors are collected into VEC_OPRNDS. */ 2683 The vectors are collected into VEC_OPRNDS. */
2503 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2870 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2504 if (nunits_in >= nunits_out) 2871 if (nunits_in >= nunits_out)
2505 return false; 2872 return false;
2506 2873
2507 /* Multiple types in SLP are handled by creating the appropriate number of 2874 /* Multiple types in SLP are handled by creating the appropriate number of
2508 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2875 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2509 case of SLP. */ 2876 case of SLP. */
2510 if (slp_node) 2877 if (slp_node)
2511 ncopies = 1; 2878 ncopies = 1;
2512 else 2879 else
2513 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 2880 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2564 prev_stmt_info = NULL; 2931 prev_stmt_info = NULL;
2565 for (j = 0; j < ncopies; j++) 2932 for (j = 0; j < ncopies; j++)
2566 { 2933 {
2567 /* Handle uses. */ 2934 /* Handle uses. */
2568 if (slp_node) 2935 if (slp_node)
2569 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL, -1); 2936 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
2570 else 2937 else
2571 { 2938 {
2572 VEC_free (tree, heap, vec_oprnds0); 2939 VEC_free (tree, heap, vec_oprnds0);
2573 vec_oprnds0 = VEC_alloc (tree, heap, 2940 vec_oprnds0 = VEC_alloc (tree, heap,
2574 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2)); 2941 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2575 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0, 2942 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2576 vect_pow2 (multi_step_cvt) - 1); 2943 vect_pow2 (multi_step_cvt) - 1);
2577 } 2944 }
2578 2945
2579 /* Arguments are ready. Create the new vector stmts. */ 2946 /* Arguments are ready. Create the new vector stmts. */
2580 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts); 2947 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2581 vect_create_vectorized_demotion_stmts (&vec_oprnds0, 2948 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2582 multi_step_cvt, stmt, tmp_vec_dsts, 2949 multi_step_cvt, stmt, tmp_vec_dsts,
2583 gsi, slp_node, code1, 2950 gsi, slp_node, code1,
2584 &prev_stmt_info); 2951 &prev_stmt_info);
2593 return true; 2960 return true;
2594 } 2961 }
2595 2962
2596 2963
2597 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 2964 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2598 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store 2965 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2599 the resulting vectors and call the function recursively. */ 2966 the resulting vectors and call the function recursively. */
2600 2967
2601 static void 2968 static void
2602 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0, 2969 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2603 VEC (tree, heap) **vec_oprnds1, 2970 VEC (tree, heap) **vec_oprnds1,
2616 VEC (tree, heap) *vec_tmp; 2983 VEC (tree, heap) *vec_tmp;
2617 2984
2618 vec_dest = VEC_pop (tree, vec_dsts); 2985 vec_dest = VEC_pop (tree, vec_dsts);
2619 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2); 2986 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2620 2987
2621 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++) 2988 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2622 { 2989 {
2623 if (op_type == binary_op) 2990 if (op_type == binary_op)
2624 vop1 = VEC_index (tree, *vec_oprnds1, i); 2991 vop1 = VEC_index (tree, *vec_oprnds1, i);
2625 else 2992 else
2626 vop1 = NULL_TREE; 2993 vop1 = NULL_TREE;
2670 } 3037 }
2671 3038
2672 if (multi_step_cvt) 3039 if (multi_step_cvt)
2673 { 3040 {
2674 /* For multi-step promotion operation we first generate we call the 3041 /* For multi-step promotion operation we first generate we call the
2675 function recurcively for every stage. We start from the input type, 3042 function recurcively for every stage. We start from the input type,
2676 create promotion operations to the intermediate types, and then 3043 create promotion operations to the intermediate types, and then
2677 create promotions to the output type. */ 3044 create promotions to the output type. */
2678 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp); 3045 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2679 VEC_free (tree, heap, vec_tmp);
2680 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1, 3046 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2681 multi_step_cvt - 1, stmt, 3047 multi_step_cvt - 1, stmt,
2682 vec_dsts, gsi, slp_node, code1, 3048 vec_dsts, gsi, slp_node, code1,
2683 code2, decl2, decl2, op_type, 3049 code2, decl2, decl2, op_type,
2684 prev_stmt_info); 3050 prev_stmt_info);
2685 } 3051 }
3052
3053 VEC_free (tree, heap, vec_tmp);
2686 } 3054 }
2687 3055
2688 3056
2689 /* Function vectorizable_type_promotion 3057 /* Function vectorizable_type_promotion
2690 3058
2782 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3150 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2783 if (nunits_in <= nunits_out) 3151 if (nunits_in <= nunits_out)
2784 return false; 3152 return false;
2785 3153
2786 /* Multiple types in SLP are handled by creating the appropriate number of 3154 /* Multiple types in SLP are handled by creating the appropriate number of
2787 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3155 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2788 case of SLP. */ 3156 case of SLP. */
2789 if (slp_node) 3157 if (slp_node)
2790 ncopies = 1; 3158 ncopies = 1;
2791 else 3159 else
2792 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 3160 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2874 { 3242 {
2875 /* Handle uses. */ 3243 /* Handle uses. */
2876 if (j == 0) 3244 if (j == 0)
2877 { 3245 {
2878 if (slp_node) 3246 if (slp_node)
2879 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1, -1); 3247 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3248 &vec_oprnds1, -1);
2880 else 3249 else
2881 { 3250 {
2882 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 3251 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2883 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); 3252 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2884 if (op_type == binary_op) 3253 if (op_type == binary_op)
2897 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); 3266 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2898 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1); 3267 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2899 } 3268 }
2900 } 3269 }
2901 3270
2902 /* Arguments are ready. Create the new vector stmts. */ 3271 /* Arguments are ready. Create the new vector stmts. */
2903 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts); 3272 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2904 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1, 3273 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2905 multi_step_cvt, stmt, 3274 multi_step_cvt, stmt,
2906 tmp_vec_dsts, 3275 tmp_vec_dsts,
2907 gsi, slp_node, code1, code2, 3276 gsi, slp_node, code1, code2,
2997 scalar_dest = gimple_assign_lhs (stmt); 3366 scalar_dest = gimple_assign_lhs (stmt);
2998 if (TREE_CODE (scalar_dest) != ARRAY_REF 3367 if (TREE_CODE (scalar_dest) != ARRAY_REF
2999 && TREE_CODE (scalar_dest) != INDIRECT_REF 3368 && TREE_CODE (scalar_dest) != INDIRECT_REF
3000 && TREE_CODE (scalar_dest) != COMPONENT_REF 3369 && TREE_CODE (scalar_dest) != COMPONENT_REF
3001 && TREE_CODE (scalar_dest) != IMAGPART_EXPR 3370 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3002 && TREE_CODE (scalar_dest) != REALPART_EXPR) 3371 && TREE_CODE (scalar_dest) != REALPART_EXPR
3372 && TREE_CODE (scalar_dest) != MEM_REF)
3003 return false; 3373 return false;
3004 3374
3005 gcc_assert (gimple_assign_single_p (stmt)); 3375 gcc_assert (gimple_assign_single_p (stmt));
3006 op = gimple_assign_rhs1 (stmt); 3376 op = gimple_assign_rhs1 (stmt);
3007 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt)) 3377 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3021 } 3391 }
3022 3392
3023 vec_mode = TYPE_MODE (vectype); 3393 vec_mode = TYPE_MODE (vectype);
3024 /* FORNOW. In some cases can vectorize even if data-type not supported 3394 /* FORNOW. In some cases can vectorize even if data-type not supported
3025 (e.g. - array initialization with 0). */ 3395 (e.g. - array initialization with 0). */
3026 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing) 3396 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3027 return false; 3397 return false;
3028 3398
3029 if (!STMT_VINFO_DATA_REF (stmt_info)) 3399 if (!STMT_VINFO_DATA_REF (stmt_info))
3030 return false; 3400 return false;
3401
3402 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3403 {
3404 if (vect_print_dump_info (REPORT_DETAILS))
3405 fprintf (vect_dump, "negative step for store.");
3406 return false;
3407 }
3031 3408
3032 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 3409 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3033 { 3410 {
3034 strided_store = true; 3411 strided_store = true;
3035 first_stmt = DR_GROUP_FIRST_DR (stmt_info); 3412 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3112 fprintf (vect_dump, "transform store. ncopies = %d",ncopies); 3489 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3113 3490
3114 dr_chain = VEC_alloc (tree, heap, group_size); 3491 dr_chain = VEC_alloc (tree, heap, group_size);
3115 oprnds = VEC_alloc (tree, heap, group_size); 3492 oprnds = VEC_alloc (tree, heap, group_size);
3116 3493
3117 alignment_support_scheme = vect_supportable_dr_alignment (first_dr); 3494 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3118 gcc_assert (alignment_support_scheme); 3495 gcc_assert (alignment_support_scheme);
3119 3496
3120 /* In case the vectorization factor (VF) is bigger than the number 3497 /* In case the vectorization factor (VF) is bigger than the number
3121 of elements that we can fit in a vectype (nunits), we have to generate 3498 of elements that we can fit in a vectype (nunits), we have to generate
3122 more than one vector stmt - i.e - we need to "unroll" the 3499 more than one vector stmt - i.e - we need to "unroll" the
3149 (the order of the data-refs in the output of vect_permute_store_chain 3526 (the order of the data-refs in the output of vect_permute_store_chain
3150 corresponds to the order of scalar stmts in the interleaving chain - see 3527 corresponds to the order of scalar stmts in the interleaving chain - see
3151 the documentation of vect_permute_store_chain()). 3528 the documentation of vect_permute_store_chain()).
3152 3529
3153 In case of both multiple types and interleaving, above vector stores and 3530 In case of both multiple types and interleaving, above vector stores and
3154 permutation stmts are created for every copy. The result vector stmts are 3531 permutation stmts are created for every copy. The result vector stmts are
3155 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 3532 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3156 STMT_VINFO_RELATED_STMT for the next copies. 3533 STMT_VINFO_RELATED_STMT for the next copies.
3157 */ 3534 */
3158 3535
3159 prev_stmt_info = NULL; 3536 prev_stmt_info = NULL;
3165 if (j == 0) 3542 if (j == 0)
3166 { 3543 {
3167 if (slp) 3544 if (slp)
3168 { 3545 {
3169 /* Get vectorized arguments for SLP_NODE. */ 3546 /* Get vectorized arguments for SLP_NODE. */
3170 vect_get_slp_defs (slp_node, &vec_oprnds, NULL, -1); 3547 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3548 NULL, -1);
3171 3549
3172 vec_oprnd = VEC_index (tree, vec_oprnds, 0); 3550 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3173 } 3551 }
3174 else 3552 else
3175 { 3553 {
3240 } 3618 }
3241 3619
3242 next_stmt = first_stmt; 3620 next_stmt = first_stmt;
3243 for (i = 0; i < vec_num; i++) 3621 for (i = 0; i < vec_num; i++)
3244 { 3622 {
3623 struct ptr_info_def *pi;
3624
3245 if (i > 0) 3625 if (i > 0)
3246 /* Bump the vector pointer. */ 3626 /* Bump the vector pointer. */
3247 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 3627 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3248 NULL_TREE); 3628 NULL_TREE);
3249 3629
3252 else if (strided_store) 3632 else if (strided_store)
3253 /* For strided stores vectorized defs are interleaved in 3633 /* For strided stores vectorized defs are interleaved in
3254 vect_permute_store_chain(). */ 3634 vect_permute_store_chain(). */
3255 vec_oprnd = VEC_index (tree, result_chain, i); 3635 vec_oprnd = VEC_index (tree, result_chain, i);
3256 3636
3637 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3638 build_int_cst (reference_alias_ptr_type
3639 (DR_REF (first_dr)), 0));
3640 pi = get_ptr_info (dataref_ptr);
3641 pi->align = TYPE_ALIGN_UNIT (vectype);
3257 if (aligned_access_p (first_dr)) 3642 if (aligned_access_p (first_dr))
3258 data_ref = build_fold_indirect_ref (dataref_ptr); 3643 pi->misalign = 0;
3259 else 3644 else if (DR_MISALIGNMENT (first_dr) == -1)
3260 { 3645 {
3261 int mis = DR_MISALIGNMENT (first_dr); 3646 TREE_TYPE (data_ref)
3262 tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); 3647 = build_aligned_type (TREE_TYPE (data_ref),
3263 tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT)); 3648 TYPE_ALIGN (TREE_TYPE (vectype)));
3264 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis); 3649 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
3265 } 3650 pi->misalign = 0;
3266 3651 }
3267 /* If accesses through a pointer to vectype do not alias the original 3652 else
3268 memory reference we have a problem. This should never happen. */ 3653 {
3269 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref), 3654 TREE_TYPE (data_ref)
3270 get_alias_set (gimple_assign_lhs (stmt)))); 3655 = build_aligned_type (TREE_TYPE (data_ref),
3271 3656 TYPE_ALIGN (TREE_TYPE (vectype)));
3272 /* Arguments are ready. Create the new vector stmt. */ 3657 pi->misalign = DR_MISALIGNMENT (first_dr);
3658 }
3659
3660 /* Arguments are ready. Create the new vector stmt. */
3273 new_stmt = gimple_build_assign (data_ref, vec_oprnd); 3661 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3274 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3662 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3275 mark_symbols_for_renaming (new_stmt); 3663 mark_symbols_for_renaming (new_stmt);
3276 3664
3277 if (slp) 3665 if (slp)
3291 3679
3292 VEC_free (tree, heap, dr_chain); 3680 VEC_free (tree, heap, dr_chain);
3293 VEC_free (tree, heap, oprnds); 3681 VEC_free (tree, heap, oprnds);
3294 if (result_chain) 3682 if (result_chain)
3295 VEC_free (tree, heap, result_chain); 3683 VEC_free (tree, heap, result_chain);
3684 if (vec_oprnds)
3685 VEC_free (tree, heap, vec_oprnds);
3296 3686
3297 return true; 3687 return true;
3688 }
3689
3690 /* Given a vector type VECTYPE returns a builtin DECL to be used
3691 for vector permutation and stores a mask into *MASK that implements
3692 reversal of the vector elements. If that is impossible to do
3693 returns NULL (and *MASK is unchanged). */
3694
3695 static tree
3696 perm_mask_for_reverse (tree vectype, tree *mask)
3697 {
3698 tree builtin_decl;
3699 tree mask_element_type, mask_type;
3700 tree mask_vec = NULL;
3701 int i;
3702 int nunits;
3703 if (!targetm.vectorize.builtin_vec_perm)
3704 return NULL;
3705
3706 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
3707 &mask_element_type);
3708 if (!builtin_decl || !mask_element_type)
3709 return NULL;
3710
3711 mask_type = get_vectype_for_scalar_type (mask_element_type);
3712 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3713 if (!mask_type
3714 || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
3715 return NULL;
3716
3717 for (i = 0; i < nunits; i++)
3718 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
3719 mask_vec = build_vector (mask_type, mask_vec);
3720
3721 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
3722 return NULL;
3723 if (mask)
3724 *mask = mask_vec;
3725 return builtin_decl;
3726 }
3727
3728 /* Given a vector variable X, that was generated for the scalar LHS of
3729 STMT, generate instructions to reverse the vector elements of X,
3730 insert them a *GSI and return the permuted vector variable. */
3731
3732 static tree
3733 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
3734 {
3735 tree vectype = TREE_TYPE (x);
3736 tree mask_vec, builtin_decl;
3737 tree perm_dest, data_ref;
3738 gimple perm_stmt;
3739
3740 builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
3741
3742 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3743
3744 /* Generate the permute statement. */
3745 perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
3746 if (!useless_type_conversion_p (vectype,
3747 TREE_TYPE (TREE_TYPE (builtin_decl))))
3748 {
3749 tree tem = create_tmp_reg (TREE_TYPE (TREE_TYPE (builtin_decl)), NULL);
3750 tem = make_ssa_name (tem, perm_stmt);
3751 gimple_call_set_lhs (perm_stmt, tem);
3752 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3753 perm_stmt = gimple_build_assign (NULL_TREE,
3754 build1 (VIEW_CONVERT_EXPR,
3755 vectype, tem));
3756 }
3757 data_ref = make_ssa_name (perm_dest, perm_stmt);
3758 gimple_set_lhs (perm_stmt, data_ref);
3759 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3760
3761 return data_ref;
3298 } 3762 }
3299 3763
3300 /* vectorizable_load. 3764 /* vectorizable_load.
3301 3765
3302 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 3766 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3319 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 3783 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3320 bool nested_in_vect_loop = false; 3784 bool nested_in_vect_loop = false;
3321 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 3785 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3322 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3786 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3323 tree new_temp; 3787 tree new_temp;
3324 int mode; 3788 enum machine_mode mode;
3325 gimple new_stmt = NULL; 3789 gimple new_stmt = NULL;
3326 tree dummy; 3790 tree dummy;
3327 enum dr_alignment_support alignment_support_scheme; 3791 enum dr_alignment_support alignment_support_scheme;
3328 tree dataref_ptr = NULL_TREE; 3792 tree dataref_ptr = NULL_TREE;
3329 gimple ptr_incr; 3793 gimple ptr_incr;
3337 VEC(tree,heap) *dr_chain = NULL; 3801 VEC(tree,heap) *dr_chain = NULL;
3338 bool strided_load = false; 3802 bool strided_load = false;
3339 gimple first_stmt; 3803 gimple first_stmt;
3340 tree scalar_type; 3804 tree scalar_type;
3341 bool inv_p; 3805 bool inv_p;
3806 bool negative;
3342 bool compute_in_loop = false; 3807 bool compute_in_loop = false;
3343 struct loop *at_loop; 3808 struct loop *at_loop;
3344 int vec_num; 3809 int vec_num;
3345 bool slp = (slp_node != NULL); 3810 bool slp = (slp_node != NULL);
3346 bool slp_perm = false; 3811 bool slp_perm = false;
3356 } 3821 }
3357 else 3822 else
3358 vf = 1; 3823 vf = 1;
3359 3824
3360 /* Multiple types in SLP are handled by creating the appropriate number of 3825 /* Multiple types in SLP are handled by creating the appropriate number of
3361 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3826 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3362 case of SLP. */ 3827 case of SLP. */
3363 if (slp) 3828 if (slp)
3364 ncopies = 1; 3829 ncopies = 1;
3365 else 3830 else
3366 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 3831 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3392 code = gimple_assign_rhs_code (stmt); 3857 code = gimple_assign_rhs_code (stmt);
3393 if (code != ARRAY_REF 3858 if (code != ARRAY_REF
3394 && code != INDIRECT_REF 3859 && code != INDIRECT_REF
3395 && code != COMPONENT_REF 3860 && code != COMPONENT_REF
3396 && code != IMAGPART_EXPR 3861 && code != IMAGPART_EXPR
3397 && code != REALPART_EXPR) 3862 && code != REALPART_EXPR
3863 && code != MEM_REF)
3398 return false; 3864 return false;
3399 3865
3400 if (!STMT_VINFO_DATA_REF (stmt_info)) 3866 if (!STMT_VINFO_DATA_REF (stmt_info))
3401 return false; 3867 return false;
3402 3868
3869 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
3870 if (negative && ncopies > 1)
3871 {
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "multiple types with negative step.");
3874 return false;
3875 }
3876
3403 scalar_type = TREE_TYPE (DR_REF (dr)); 3877 scalar_type = TREE_TYPE (DR_REF (dr));
3404 mode = (int) TYPE_MODE (vectype); 3878 mode = TYPE_MODE (vectype);
3405 3879
3406 /* FORNOW. In some cases can vectorize even if data-type not supported 3880 /* FORNOW. In some cases can vectorize even if data-type not supported
3407 (e.g. - data copies). */ 3881 (e.g. - data copies). */
3408 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing) 3882 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
3409 { 3883 {
3410 if (vect_print_dump_info (REPORT_DETAILS)) 3884 if (vect_print_dump_info (REPORT_DETAILS))
3411 fprintf (vect_dump, "Aligned load, but unsupported type."); 3885 fprintf (vect_dump, "Aligned load, but unsupported type.");
3412 return false; 3886 return false;
3413 } 3887 }
3430 3904
3431 /* Check if interleaving is supported. */ 3905 /* Check if interleaving is supported. */
3432 if (!vect_strided_load_supported (vectype) 3906 if (!vect_strided_load_supported (vectype)
3433 && !PURE_SLP_STMT (stmt_info) && !slp) 3907 && !PURE_SLP_STMT (stmt_info) && !slp)
3434 return false; 3908 return false;
3909 }
3910
3911 if (negative)
3912 {
3913 gcc_assert (!strided_load);
3914 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
3915 if (alignment_support_scheme != dr_aligned
3916 && alignment_support_scheme != dr_unaligned_supported)
3917 {
3918 if (vect_print_dump_info (REPORT_DETAILS))
3919 fprintf (vect_dump, "negative step but alignment required.");
3920 return false;
3921 }
3922 if (!perm_mask_for_reverse (vectype, NULL))
3923 {
3924 if (vect_print_dump_info (REPORT_DETAILS))
3925 fprintf (vect_dump, "negative step and reversing not supported.");
3926 return false;
3927 }
3435 } 3928 }
3436 3929
3437 if (!vec_stmt) /* transformation not required. */ 3930 if (!vec_stmt) /* transformation not required. */
3438 { 3931 {
3439 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 3932 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3476 first_stmt = stmt; 3969 first_stmt = stmt;
3477 first_dr = dr; 3970 first_dr = dr;
3478 group_size = vec_num = 1; 3971 group_size = vec_num = 1;
3479 } 3972 }
3480 3973
3481 alignment_support_scheme = vect_supportable_dr_alignment (first_dr); 3974 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3482 gcc_assert (alignment_support_scheme); 3975 gcc_assert (alignment_support_scheme);
3483 3976
3484 /* In case the vectorization factor (VF) is bigger than the number 3977 /* In case the vectorization factor (VF) is bigger than the number
3485 of elements that we can fit in a vectype (nunits), we have to generate 3978 of elements that we can fit in a vectype (nunits), we have to generate
3486 more than one vector stmt - i.e - we need to "unroll" the 3979 more than one vector stmt - i.e - we need to "unroll" the
3487 vector stmt by a factor VF/nunits. In doing so, we record a pointer 3980 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3488 from one copy of the vector stmt to the next, in the field 3981 from one copy of the vector stmt to the next, in the field
3489 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 3982 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3490 stages to find the correct vector defs to be used when vectorizing 3983 stages to find the correct vector defs to be used when vectorizing
3491 stmts that use the defs of the current stmt. The example below illustrates 3984 stmts that use the defs of the current stmt. The example below
3492 the vectorization process when VF=16 and nunits=4 (i.e - we need to create 3985 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
3493 4 vectorized stmts): 3986 need to create 4 vectorized stmts):
3494 3987
3495 before vectorization: 3988 before vectorization:
3496 RELATED_STMT VEC_STMT 3989 RELATED_STMT VEC_STMT
3497 S1: x = memref - - 3990 S1: x = memref - -
3498 S2: z = x + 1 - - 3991 S2: z = x + 1 - -
3500 step 1: vectorize stmt S1: 3993 step 1: vectorize stmt S1:
3501 We first create the vector stmt VS1_0, and, as usual, record a 3994 We first create the vector stmt VS1_0, and, as usual, record a
3502 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 3995 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3503 Next, we create the vector stmt VS1_1, and record a pointer to 3996 Next, we create the vector stmt VS1_1, and record a pointer to
3504 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 3997 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3505 Similarly, for VS1_2 and VS1_3. This is the resulting chain of 3998 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3506 stmts and pointers: 3999 stmts and pointers:
3507 RELATED_STMT VEC_STMT 4000 RELATED_STMT VEC_STMT
3508 VS1_0: vx0 = memref0 VS1_1 - 4001 VS1_0: vx0 = memref0 VS1_1 -
3509 VS1_1: vx1 = memref1 VS1_2 - 4002 VS1_1: vx1 = memref1 VS1_2 -
3510 VS1_2: vx2 = memref2 VS1_3 - 4003 VS1_2: vx2 = memref2 VS1_3 -
3543 the documentation of vect_permute_load_chain()). 4036 the documentation of vect_permute_load_chain()).
3544 The generation of permutation stmts and recording them in 4037 The generation of permutation stmts and recording them in
3545 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load(). 4038 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3546 4039
3547 In case of both multiple types and interleaving, the vector loads and 4040 In case of both multiple types and interleaving, the vector loads and
3548 permutation stmts above are created for every copy. The result vector stmts 4041 permutation stmts above are created for every copy. The result vector
3549 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 4042 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
3550 STMT_VINFO_RELATED_STMT for the next copies. */ 4043 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
3551 4044
3552 /* If the data reference is aligned (dr_aligned) or potentially unaligned 4045 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3553 on a target that supports unaligned accesses (dr_unaligned_supported) 4046 on a target that supports unaligned accesses (dr_unaligned_supported)
3554 we generate the following code: 4047 we generate the following code:
3555 p = initial_addr; 4048 p = initial_addr;
3578 msq = lsq; 4071 msq = lsq;
3579 } */ 4072 } */
3580 4073
3581 /* If the misalignment remains the same throughout the execution of the 4074 /* If the misalignment remains the same throughout the execution of the
3582 loop, we can create the init_addr and permutation mask at the loop 4075 loop, we can create the init_addr and permutation mask at the loop
3583 preheader. Otherwise, it needs to be created inside the loop. 4076 preheader. Otherwise, it needs to be created inside the loop.
3584 This can only occur when vectorizing memory accesses in the inner-loop 4077 This can only occur when vectorizing memory accesses in the inner-loop
3585 nested within an outer-loop that is being vectorized. */ 4078 nested within an outer-loop that is being vectorized. */
3586 4079
3587 if (loop && nested_in_vect_loop_p (loop, stmt) 4080 if (loop && nested_in_vect_loop_p (loop, stmt)
3588 && (TREE_INT_CST_LOW (DR_STEP (dr)) 4081 && (TREE_INT_CST_LOW (DR_STEP (dr))
3605 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 4098 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3606 } 4099 }
3607 } 4100 }
3608 else 4101 else
3609 at_loop = loop; 4102 at_loop = loop;
4103
4104 if (negative)
4105 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
3610 4106
3611 prev_stmt_info = NULL; 4107 prev_stmt_info = NULL;
3612 for (j = 0; j < ncopies; j++) 4108 for (j = 0; j < ncopies; j++)
3613 { 4109 {
3614 /* 1. Create the vector pointer update chain. */ 4110 /* 1. Create the vector pointer update chain. */
3629 4125
3630 /* 2. Create the vector-load in the loop. */ 4126 /* 2. Create the vector-load in the loop. */
3631 switch (alignment_support_scheme) 4127 switch (alignment_support_scheme)
3632 { 4128 {
3633 case dr_aligned: 4129 case dr_aligned:
3634 gcc_assert (aligned_access_p (first_dr));
3635 data_ref = build_fold_indirect_ref (dataref_ptr);
3636 break;
3637 case dr_unaligned_supported: 4130 case dr_unaligned_supported:
3638 { 4131 {
3639 int mis = DR_MISALIGNMENT (first_dr); 4132 struct ptr_info_def *pi;
3640 tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); 4133 data_ref
3641 4134 = build2 (MEM_REF, vectype, dataref_ptr,
3642 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT)); 4135 build_int_cst (reference_alias_ptr_type
3643 data_ref = 4136 (DR_REF (first_dr)), 0));
3644 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis); 4137 pi = get_ptr_info (dataref_ptr);
4138 pi->align = TYPE_ALIGN_UNIT (vectype);
4139 if (alignment_support_scheme == dr_aligned)
4140 {
4141 gcc_assert (aligned_access_p (first_dr));
4142 pi->misalign = 0;
4143 }
4144 else if (DR_MISALIGNMENT (first_dr) == -1)
4145 {
4146 TREE_TYPE (data_ref)
4147 = build_aligned_type (TREE_TYPE (data_ref),
4148 TYPE_ALIGN (TREE_TYPE (vectype)));
4149 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
4150 pi->misalign = 0;
4151 }
4152 else
4153 {
4154 TREE_TYPE (data_ref)
4155 = build_aligned_type (TREE_TYPE (data_ref),
4156 TYPE_ALIGN (TREE_TYPE (vectype)));
4157 pi->misalign = DR_MISALIGNMENT (first_dr);
4158 }
3645 break; 4159 break;
3646 } 4160 }
3647 case dr_explicit_realign: 4161 case dr_explicit_realign:
3648 { 4162 {
3649 tree ptr, bump; 4163 tree ptr, bump;
3653 msq = vect_setup_realignment (first_stmt, gsi, 4167 msq = vect_setup_realignment (first_stmt, gsi,
3654 &realignment_token, 4168 &realignment_token,
3655 dr_explicit_realign, 4169 dr_explicit_realign,
3656 dataref_ptr, NULL); 4170 dataref_ptr, NULL);
3657 4171
3658 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); 4172 new_stmt = gimple_build_assign_with_ops
4173 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4174 build_int_cst
4175 (TREE_TYPE (dataref_ptr),
4176 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4177 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4178 gimple_assign_set_lhs (new_stmt, ptr);
4179 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4180 data_ref
4181 = build2 (MEM_REF, vectype, ptr,
4182 build_int_cst (reference_alias_ptr_type
4183 (DR_REF (first_dr)), 0));
3659 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4184 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3660 new_stmt = gimple_build_assign (vec_dest, data_ref); 4185 new_stmt = gimple_build_assign (vec_dest, data_ref);
3661 new_temp = make_ssa_name (vec_dest, new_stmt); 4186 new_temp = make_ssa_name (vec_dest, new_stmt);
3662 gimple_assign_set_lhs (new_stmt, new_temp); 4187 gimple_assign_set_lhs (new_stmt, new_temp);
3663 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 4188 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3666 msq = new_temp; 4191 msq = new_temp;
3667 4192
3668 bump = size_binop (MULT_EXPR, vs_minus_1, 4193 bump = size_binop (MULT_EXPR, vs_minus_1,
3669 TYPE_SIZE_UNIT (scalar_type)); 4194 TYPE_SIZE_UNIT (scalar_type));
3670 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); 4195 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3671 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr); 4196 new_stmt = gimple_build_assign_with_ops
4197 (BIT_AND_EXPR, NULL_TREE, ptr,
4198 build_int_cst
4199 (TREE_TYPE (ptr),
4200 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4201 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4202 gimple_assign_set_lhs (new_stmt, ptr);
4203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4204 data_ref
4205 = build2 (MEM_REF, vectype, ptr,
4206 build_int_cst (reference_alias_ptr_type
4207 (DR_REF (first_dr)), 0));
3672 break; 4208 break;
3673 } 4209 }
3674 case dr_explicit_realign_optimized: 4210 case dr_explicit_realign_optimized:
3675 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); 4211 new_stmt = gimple_build_assign_with_ops
4212 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4213 build_int_cst
4214 (TREE_TYPE (dataref_ptr),
4215 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4216 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4217 gimple_assign_set_lhs (new_stmt, new_temp);
4218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4219 data_ref
4220 = build2 (MEM_REF, vectype, new_temp,
4221 build_int_cst (reference_alias_ptr_type
4222 (DR_REF (first_dr)), 0));
3676 break; 4223 break;
3677 default: 4224 default:
3678 gcc_unreachable (); 4225 gcc_unreachable ();
3679 } 4226 }
3680 /* If accesses through a pointer to vectype do not alias the original
3681 memory reference we have a problem. This should never happen. */
3682 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3683 get_alias_set (gimple_assign_rhs1 (stmt))));
3684 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4227 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3685 new_stmt = gimple_build_assign (vec_dest, data_ref); 4228 new_stmt = gimple_build_assign (vec_dest, data_ref);
3686 new_temp = make_ssa_name (vec_dest, new_stmt); 4229 new_temp = make_ssa_name (vec_dest, new_stmt);
3687 gimple_assign_set_lhs (new_stmt, new_temp); 4230 gimple_assign_set_lhs (new_stmt, new_temp);
3688 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3689 mark_symbols_for_renaming (new_stmt); 4232 mark_symbols_for_renaming (new_stmt);
3690 4233
3691 /* 3. Handle explicit realignment if necessary/supported. Create in 4234 /* 3. Handle explicit realignment if necessary/supported. Create in
3692 loop: vec_dest = realign_load (msq, lsq, realignment_token) */ 4235 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3693 if (alignment_support_scheme == dr_explicit_realign_optimized 4236 if (alignment_support_scheme == dr_explicit_realign_optimized
3694 || alignment_support_scheme == dr_explicit_realign) 4237 || alignment_support_scheme == dr_explicit_realign)
3695 { 4238 {
3696 tree tmp; 4239 tree tmp;
3745 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); 4288 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3746 new_stmt = SSA_NAME_DEF_STMT (new_temp); 4289 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3747 } 4290 }
3748 else 4291 else
3749 gcc_unreachable (); /* FORNOW. */ 4292 gcc_unreachable (); /* FORNOW. */
4293 }
4294
4295 if (negative)
4296 {
4297 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4298 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3750 } 4299 }
3751 4300
3752 /* Collect vector loads and later create their permutation in 4301 /* Collect vector loads and later create their permutation in
3753 vect_transform_strided_load (). */ 4302 vect_transform_strided_load (). */
3754 if (strided_load || slp_perm) 4303 if (strided_load || slp_perm)
3867 tree vec_dest = NULL_TREE; 4416 tree vec_dest = NULL_TREE;
3868 tree op = NULL_TREE; 4417 tree op = NULL_TREE;
3869 tree cond_expr, then_clause, else_clause; 4418 tree cond_expr, then_clause, else_clause;
3870 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3871 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4420 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3872 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause; 4421 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4422 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
3873 tree vec_compare, vec_cond_expr; 4423 tree vec_compare, vec_cond_expr;
3874 tree new_temp; 4424 tree new_temp;
3875 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3876 enum machine_mode vec_mode; 4426 enum machine_mode vec_mode;
3877 tree def; 4427 tree def;
3878 enum vect_def_type dt; 4428 enum vect_def_type dt, dts[4];
3879 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 4429 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3880 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 4430 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3881 enum tree_code code; 4431 enum tree_code code;
4432 stmt_vec_info prev_stmt_info = NULL;
4433 int j;
3882 4434
3883 /* FORNOW: unsupported in basic block SLP. */ 4435 /* FORNOW: unsupported in basic block SLP. */
3884 gcc_assert (loop_vinfo); 4436 gcc_assert (loop_vinfo);
3885 4437
3886 gcc_assert (ncopies >= 1); 4438 gcc_assert (ncopies >= 1);
3887 if (ncopies > 1) 4439 if (reduc_index && ncopies > 1)
3888 return false; /* FORNOW */ 4440 return false; /* FORNOW */
3889 4441
3890 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 4442 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3891 return false; 4443 return false;
3892 4444
3969 /* Handle def. */ 4521 /* Handle def. */
3970 scalar_dest = gimple_assign_lhs (stmt); 4522 scalar_dest = gimple_assign_lhs (stmt);
3971 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4523 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3972 4524
3973 /* Handle cond expr. */ 4525 /* Handle cond expr. */
3974 vec_cond_lhs = 4526 for (j = 0; j < ncopies; j++)
3975 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); 4527 {
3976 vec_cond_rhs = 4528 gimple new_stmt;
3977 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); 4529 if (j == 0)
3978 if (reduc_index == 1) 4530 {
3979 vec_then_clause = reduc_def; 4531 gimple gtemp;
3980 else 4532 vec_cond_lhs =
3981 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); 4533 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
3982 if (reduc_index == 2) 4534 stmt, NULL);
3983 vec_else_clause = reduc_def; 4535 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
3984 else 4536 NULL, &gtemp, &def, &dts[0]);
3985 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); 4537 vec_cond_rhs =
3986 4538 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
3987 /* Arguments are ready. Create the new vector stmt. */ 4539 stmt, NULL);
3988 vec_compare = build2 (TREE_CODE (cond_expr), vectype, 4540 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
3989 vec_cond_lhs, vec_cond_rhs); 4541 NULL, &gtemp, &def, &dts[1]);
3990 vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 4542 if (reduc_index == 1)
3991 vec_compare, vec_then_clause, vec_else_clause); 4543 vec_then_clause = reduc_def;
3992 4544 else
3993 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr); 4545 {
3994 new_temp = make_ssa_name (vec_dest, *vec_stmt); 4546 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
3995 gimple_assign_set_lhs (*vec_stmt, new_temp); 4547 stmt, NULL);
3996 vect_finish_stmt_generation (stmt, *vec_stmt, gsi); 4548 vect_is_simple_use (then_clause, loop_vinfo,
4549 NULL, &gtemp, &def, &dts[2]);
4550 }
4551 if (reduc_index == 2)
4552 vec_else_clause = reduc_def;
4553 else
4554 {
4555 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4556 stmt, NULL);
4557 vect_is_simple_use (else_clause, loop_vinfo,
4558 NULL, &gtemp, &def, &dts[3]);
4559 }
4560 }
4561 else
4562 {
4563 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4564 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4565 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4566 vec_then_clause);
4567 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4568 vec_else_clause);
4569 }
4570
4571 /* Arguments are ready. Create the new vector stmt. */
4572 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4573 vec_cond_lhs, vec_cond_rhs);
4574 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4575 vec_compare, vec_then_clause, vec_else_clause);
4576
4577 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4578 new_temp = make_ssa_name (vec_dest, new_stmt);
4579 gimple_assign_set_lhs (new_stmt, new_temp);
4580 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4581 if (j == 0)
4582 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4583 else
4584 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4585
4586 prev_stmt_info = vinfo_for_stmt (new_stmt);
4587 }
3997 4588
3998 return true; 4589 return true;
3999 } 4590 }
4000 4591
4001 4592
4104 && (STMT_VINFO_RELEVANT_P (stmt_info) 4695 && (STMT_VINFO_RELEVANT_P (stmt_info)
4105 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 4696 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4106 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL) 4697 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4107 || vectorizable_type_demotion (stmt, NULL, NULL, NULL) 4698 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4108 || vectorizable_conversion (stmt, NULL, NULL, NULL) 4699 || vectorizable_conversion (stmt, NULL, NULL, NULL)
4700 || vectorizable_shift (stmt, NULL, NULL, NULL)
4109 || vectorizable_operation (stmt, NULL, NULL, NULL) 4701 || vectorizable_operation (stmt, NULL, NULL, NULL)
4110 || vectorizable_assignment (stmt, NULL, NULL, NULL) 4702 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4111 || vectorizable_load (stmt, NULL, NULL, NULL, NULL) 4703 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4112 || vectorizable_call (stmt, NULL, NULL) 4704 || vectorizable_call (stmt, NULL, NULL)
4113 || vectorizable_store (stmt, NULL, NULL, NULL) 4705 || vectorizable_store (stmt, NULL, NULL, NULL)
4114 || vectorizable_reduction (stmt, NULL, NULL, NULL) 4706 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4115 || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); 4707 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
4116 else 4708 else
4117 { 4709 {
4118 if (bb_vinfo) 4710 if (bb_vinfo)
4119 ok = (vectorizable_operation (stmt, NULL, NULL, node) 4711 ok = (vectorizable_shift (stmt, NULL, NULL, node)
4712 || vectorizable_operation (stmt, NULL, NULL, node)
4120 || vectorizable_assignment (stmt, NULL, NULL, node) 4713 || vectorizable_assignment (stmt, NULL, NULL, node)
4121 || vectorizable_load (stmt, NULL, NULL, node, NULL) 4714 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4122 || vectorizable_store (stmt, NULL, NULL, node)); 4715 || vectorizable_store (stmt, NULL, NULL, node));
4123 } 4716 }
4124 4717
4156 } 4749 }
4157 4750
4158 if (!PURE_SLP_STMT (stmt_info)) 4751 if (!PURE_SLP_STMT (stmt_info))
4159 { 4752 {
4160 /* Groups of strided accesses whose size is not a power of 2 are not 4753 /* Groups of strided accesses whose size is not a power of 2 are not
4161 vectorizable yet using loop-vectorization. Therefore, if this stmt 4754 vectorizable yet using loop-vectorization. Therefore, if this stmt
4162 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and 4755 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4163 loop-based vectorized), the loop cannot be vectorized. */ 4756 loop-based vectorized), the loop cannot be vectorized. */
4164 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 4757 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4165 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( 4758 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4166 DR_GROUP_FIRST_DR (stmt_info)))) == -1) 4759 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4190 slp_instance slp_node_instance) 4783 slp_instance slp_node_instance)
4191 { 4784 {
4192 bool is_store = false; 4785 bool is_store = false;
4193 gimple vec_stmt = NULL; 4786 gimple vec_stmt = NULL;
4194 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4195 gimple orig_stmt_in_pattern; 4788 gimple orig_stmt_in_pattern, orig_scalar_stmt = stmt;
4196 bool done; 4789 bool done;
4197 4790
4198 switch (STMT_VINFO_TYPE (stmt_info)) 4791 switch (STMT_VINFO_TYPE (stmt_info))
4199 { 4792 {
4200 case type_demotion_vec_info_type: 4793 case type_demotion_vec_info_type:
4213 break; 4806 break;
4214 4807
4215 case induc_vec_info_type: 4808 case induc_vec_info_type:
4216 gcc_assert (!slp_node); 4809 gcc_assert (!slp_node);
4217 done = vectorizable_induction (stmt, gsi, &vec_stmt); 4810 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4811 gcc_assert (done);
4812 break;
4813
4814 case shift_vec_info_type:
4815 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
4218 gcc_assert (done); 4816 gcc_assert (done);
4219 break; 4817 break;
4220 4818
4221 case op_vec_info_type: 4819 case op_vec_info_type:
4222 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node); 4820 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4238 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node); 4836 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4239 gcc_assert (done); 4837 gcc_assert (done);
4240 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node) 4838 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4241 { 4839 {
4242 /* In case of interleaving, the whole chain is vectorized when the 4840 /* In case of interleaving, the whole chain is vectorized when the
4243 last store in the chain is reached. Store stmts before the last 4841 last store in the chain is reached. Store stmts before the last
4244 one are skipped, and there vec_stmt_info shouldn't be freed 4842 one are skipped, and there vec_stmt_info shouldn't be freed
4245 meanwhile. */ 4843 meanwhile. */
4246 *strided_store = true; 4844 *strided_store = true;
4247 if (STMT_VINFO_VEC_STMT (stmt_info)) 4845 if (STMT_VINFO_VEC_STMT (stmt_info))
4248 is_store = true; 4846 is_store = true;
4258 break; 4856 break;
4259 4857
4260 case call_vec_info_type: 4858 case call_vec_info_type:
4261 gcc_assert (!slp_node); 4859 gcc_assert (!slp_node);
4262 done = vectorizable_call (stmt, gsi, &vec_stmt); 4860 done = vectorizable_call (stmt, gsi, &vec_stmt);
4861 stmt = gsi_stmt (*gsi);
4263 break; 4862 break;
4264 4863
4265 case reduc_vec_info_type: 4864 case reduc_vec_info_type:
4266 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node); 4865 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
4267 gcc_assert (done); 4866 gcc_assert (done);
4336 computed this idiom. We need to record a pointer to VEC_STMT in 4935 computed this idiom. We need to record a pointer to VEC_STMT in
4337 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the 4936 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4338 documentation of vect_pattern_recog. */ 4937 documentation of vect_pattern_recog. */
4339 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 4938 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4340 { 4939 {
4341 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); 4940 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo)
4941 == orig_scalar_stmt);
4342 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; 4942 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4343 } 4943 }
4344 } 4944 }
4345 } 4945 }
4346 4946
4456 set_vinfo_for_stmt (stmt, NULL); 5056 set_vinfo_for_stmt (stmt, NULL);
4457 free (stmt_info); 5057 free (stmt_info);
4458 } 5058 }
4459 5059
4460 5060
4461 /* Function get_vectype_for_scalar_type. 5061 /* Function get_vectype_for_scalar_type_and_size.
4462 5062
4463 Returns the vector type corresponding to SCALAR_TYPE as supported 5063 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
4464 by the target. */ 5064 by the target. */
4465 5065
4466 tree 5066 static tree
4467 get_vectype_for_scalar_type (tree scalar_type) 5067 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
4468 { 5068 {
4469 enum machine_mode inner_mode = TYPE_MODE (scalar_type); 5069 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5070 enum machine_mode simd_mode;
4470 unsigned int nbytes = GET_MODE_SIZE (inner_mode); 5071 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
4471 int nunits; 5072 int nunits;
4472 tree vectype; 5073 tree vectype;
4473 5074
4474 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode)) 5075 if (nbytes == 0)
4475 return NULL_TREE; 5076 return NULL_TREE;
4476 5077
4477 /* We can't build a vector type of elements with alignment bigger than 5078 /* We can't build a vector type of elements with alignment bigger than
4478 their size. */ 5079 their size. */
4479 if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 5080 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
4485 vectorization of bool and/or enum types in some languages. */ 5086 vectorization of bool and/or enum types in some languages. */
4486 if (INTEGRAL_TYPE_P (scalar_type) 5087 if (INTEGRAL_TYPE_P (scalar_type)
4487 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)) 5088 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
4488 return NULL_TREE; 5089 return NULL_TREE;
4489 5090
4490 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD) 5091 if (GET_MODE_CLASS (inner_mode) != MODE_INT
4491 is expected. */ 5092 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
4492 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes; 5093 return NULL_TREE;
5094
5095 /* If no size was supplied use the mode the target prefers. Otherwise
5096 lookup a vector mode of the specified size. */
5097 if (size == 0)
5098 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5099 else
5100 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5101 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5102 if (nunits <= 1)
5103 return NULL_TREE;
4493 5104
4494 vectype = build_vector_type (scalar_type, nunits); 5105 vectype = build_vector_type (scalar_type, nunits);
4495 if (vect_print_dump_info (REPORT_DETAILS)) 5106 if (vect_print_dump_info (REPORT_DETAILS))
4496 { 5107 {
4497 fprintf (vect_dump, "get vectype with %d units of type ", nunits); 5108 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4516 } 5127 }
4517 5128
4518 return vectype; 5129 return vectype;
4519 } 5130 }
4520 5131
5132 unsigned int current_vector_size;
5133
5134 /* Function get_vectype_for_scalar_type.
5135
5136 Returns the vector type corresponding to SCALAR_TYPE as supported
5137 by the target. */
5138
5139 tree
5140 get_vectype_for_scalar_type (tree scalar_type)
5141 {
5142 tree vectype;
5143 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5144 current_vector_size);
5145 if (vectype
5146 && current_vector_size == 0)
5147 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5148 return vectype;
5149 }
5150
4521 /* Function get_same_sized_vectype 5151 /* Function get_same_sized_vectype
4522 5152
4523 Returns a vector type corresponding to SCALAR_TYPE of size 5153 Returns a vector type corresponding to SCALAR_TYPE of size
4524 VECTOR_TYPE if supported by the target. */ 5154 VECTOR_TYPE if supported by the target. */
4525 5155
4526 tree 5156 tree
4527 get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED) 5157 get_same_sized_vectype (tree scalar_type, tree vector_type)
4528 { 5158 {
4529 return get_vectype_for_scalar_type (scalar_type); 5159 return get_vectype_for_scalar_type_and_size
5160 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
4530 } 5161 }
4531 5162
4532 /* Function vect_is_simple_use. 5163 /* Function vect_is_simple_use.
4533 5164
4534 Input: 5165 Input:
4537 OPERAND - operand of a stmt in the loop or bb. 5168 OPERAND - operand of a stmt in the loop or bb.
4538 DEF - the defining stmt in case OPERAND is an SSA_NAME. 5169 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4539 5170
4540 Returns whether a stmt with OPERAND can be vectorized. 5171 Returns whether a stmt with OPERAND can be vectorized.
4541 For loops, supportable operands are constants, loop invariants, and operands 5172 For loops, supportable operands are constants, loop invariants, and operands
4542 that are defined by the current iteration of the loop. Unsupportable 5173 that are defined by the current iteration of the loop. Unsupportable
4543 operands are those that are defined by a previous iteration of the loop (as 5174 operands are those that are defined by a previous iteration of the loop (as
4544 is the case in reduction/induction computations). 5175 is the case in reduction/induction computations).
4545 For basic blocks, supportable operands are constants and bb invariants. 5176 For basic blocks, supportable operands are constants and bb invariants.
4546 For now, operands defined outside the basic block are not supported. */ 5177 For now, operands defined outside the basic block are not supported. */
4547 5178
4719 5350
4720 Output: 5351 Output:
4721 - CODE1 and CODE2 are codes of vector operations to be used when 5352 - CODE1 and CODE2 are codes of vector operations to be used when
4722 vectorizing the operation, if available. 5353 vectorizing the operation, if available.
4723 - DECL1 and DECL2 are decls of target builtin functions to be used 5354 - DECL1 and DECL2 are decls of target builtin functions to be used
4724 when vectorizing the operation, if available. In this case, 5355 when vectorizing the operation, if available. In this case,
4725 CODE1 and CODE2 are CALL_EXPR. 5356 CODE1 and CODE2 are CALL_EXPR.
4726 - MULTI_STEP_CVT determines the number of required intermediate steps in 5357 - MULTI_STEP_CVT determines the number of required intermediate steps in
4727 case of multi-step conversion (like char->short->int - in that case 5358 case of multi-step conversion (like char->short->int - in that case
4728 MULTI_STEP_CVT will be 1). 5359 MULTI_STEP_CVT will be 1).
4729 - INTERM_TYPES contains the intermediate type required to perform the 5360 - INTERM_TYPES contains the intermediate type required to perform the
4763 of {mult_even,mult_odd} generate the following vectors: 5394 of {mult_even,mult_odd} generate the following vectors:
4764 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. 5395 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4765 5396
4766 When vectorizing outer-loops, we execute the inner-loop sequentially 5397 When vectorizing outer-loops, we execute the inner-loop sequentially
4767 (each vectorized inner-loop iteration contributes to VF outer-loop 5398 (each vectorized inner-loop iteration contributes to VF outer-loop
4768 iterations in parallel). We therefore don't allow to change the order 5399 iterations in parallel). We therefore don't allow to change the order
4769 of the computation in the inner-loop during outer-loop vectorization. */ 5400 of the computation in the inner-loop during outer-loop vectorization. */
4770 5401
4771 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 5402 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4772 && !nested_in_vect_loop_p (vect_loop, stmt)) 5403 && !nested_in_vect_loop_p (vect_loop, stmt))
4773 ordered_p = false; 5404 ordered_p = false;
4855 5486
4856 if (!optab1 || !optab2) 5487 if (!optab1 || !optab2)
4857 return false; 5488 return false;
4858 5489
4859 vec_mode = TYPE_MODE (vectype); 5490 vec_mode = TYPE_MODE (vectype);
4860 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing 5491 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
4861 || (icode2 = optab_handler (optab2, vec_mode)->insn_code) 5492 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
4862 == CODE_FOR_nothing)
4863 return false; 5493 return false;
4864 5494
4865 /* Check if it's a multi-step conversion that can be done using intermediate 5495 /* Check if it's a multi-step conversion that can be done using intermediate
4866 types. */ 5496 types. */
4867 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype) 5497 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4877 5507
4878 *code1 = c1; 5508 *code1 = c1;
4879 *code2 = c2; 5509 *code2 = c2;
4880 5510
4881 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 5511 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4882 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS 5512 intermediate steps in promotion sequence. We try
4883 to get to NARROW_VECTYPE, and fail if we do not. */ 5513 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5514 not. */
4884 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); 5515 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4885 for (i = 0; i < 3; i++) 5516 for (i = 0; i < 3; i++)
4886 { 5517 {
4887 intermediate_mode = insn_data[icode1].operand[0].mode; 5518 intermediate_mode = insn_data[icode1].operand[0].mode;
4888 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode, 5519 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4889 TYPE_UNSIGNED (prev_type)); 5520 TYPE_UNSIGNED (prev_type));
4890 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 5521 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4891 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 5522 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4892 5523
4893 if (!optab3 || !optab4 5524 if (!optab3 || !optab4
4894 || (icode1 = optab1->handlers[(int) prev_mode].insn_code) 5525 || ((icode1 = optab_handler (optab1, prev_mode))
4895 == CODE_FOR_nothing 5526 == CODE_FOR_nothing)
4896 || insn_data[icode1].operand[0].mode != intermediate_mode 5527 || insn_data[icode1].operand[0].mode != intermediate_mode
4897 || (icode2 = optab2->handlers[(int) prev_mode].insn_code) 5528 || ((icode2 = optab_handler (optab2, prev_mode))
4898 == CODE_FOR_nothing 5529 == CODE_FOR_nothing)
4899 || insn_data[icode2].operand[0].mode != intermediate_mode 5530 || insn_data[icode2].operand[0].mode != intermediate_mode
4900 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code) 5531 || ((icode1 = optab_handler (optab3, intermediate_mode))
4901 == CODE_FOR_nothing 5532 == CODE_FOR_nothing)
4902 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code) 5533 || ((icode2 = optab_handler (optab4, intermediate_mode))
4903 == CODE_FOR_nothing) 5534 == CODE_FOR_nothing))
4904 return false; 5535 return false;
4905 5536
4906 VEC_quick_push (tree, *interm_types, intermediate_type); 5537 VEC_quick_push (tree, *interm_types, intermediate_type);
4907 (*multi_step_cvt)++; 5538 (*multi_step_cvt)++;
4908 5539
4929 narrowing operation that is supported by the target platform in 5560 narrowing operation that is supported by the target platform in
4930 vector form (i.e., when operating on arguments of type VECTYPE_IN 5561 vector form (i.e., when operating on arguments of type VECTYPE_IN
4931 and producing a result of type VECTYPE_OUT). 5562 and producing a result of type VECTYPE_OUT).
4932 5563
4933 Narrowing operations we currently support are NOP (CONVERT) and 5564 Narrowing operations we currently support are NOP (CONVERT) and
4934 FIX_TRUNC. This function checks if these operations are supported by 5565 FIX_TRUNC. This function checks if these operations are supported by
4935 the target platform directly via vector tree-codes. 5566 the target platform directly via vector tree-codes.
4936 5567
4937 Output: 5568 Output:
4938 - CODE1 is the code of a vector operation to be used when 5569 - CODE1 is the code of a vector operation to be used when
4939 vectorizing the operation, if available. 5570 vectorizing the operation, if available.
4985 5616
4986 if (!optab1) 5617 if (!optab1)
4987 return false; 5618 return false;
4988 5619
4989 vec_mode = TYPE_MODE (vectype); 5620 vec_mode = TYPE_MODE (vectype);
4990 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) 5621 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
4991 == CODE_FOR_nothing)
4992 return false; 5622 return false;
4993 5623
4994 /* Check if it's a multi-step conversion that can be done using intermediate 5624 /* Check if it's a multi-step conversion that can be done using intermediate
4995 types. */ 5625 types. */
4996 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype)) 5626 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4998 enum machine_mode intermediate_mode, prev_mode = vec_mode; 5628 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4999 5629
5000 *code1 = c1; 5630 *code1 = c1;
5001 prev_type = vectype; 5631 prev_type = vectype;
5002 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 5632 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5003 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS 5633 intermediate steps in promotion sequence. We try
5004 to get to NARROW_VECTYPE, and fail if we do not. */ 5634 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5635 not. */
5005 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); 5636 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5006 for (i = 0; i < 3; i++) 5637 for (i = 0; i < 3; i++)
5007 { 5638 {
5008 intermediate_mode = insn_data[icode1].operand[0].mode; 5639 intermediate_mode = insn_data[icode1].operand[0].mode;
5009 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode, 5640 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5010 TYPE_UNSIGNED (prev_type)); 5641 TYPE_UNSIGNED (prev_type));
5011 interm_optab = optab_for_tree_code (c1, intermediate_type, 5642 interm_optab = optab_for_tree_code (c1, intermediate_type,
5012 optab_default); 5643 optab_default);
5013 if (!interm_optab 5644 if (!interm_optab
5014 || (icode1 = optab1->handlers[(int) prev_mode].insn_code) 5645 || ((icode1 = optab_handler (optab1, prev_mode))
5015 == CODE_FOR_nothing 5646 == CODE_FOR_nothing)
5016 || insn_data[icode1].operand[0].mode != intermediate_mode 5647 || insn_data[icode1].operand[0].mode != intermediate_mode
5017 || (icode1 5648 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
5018 = interm_optab->handlers[(int) intermediate_mode].insn_code) 5649 == CODE_FOR_nothing))
5019 == CODE_FOR_nothing)
5020 return false; 5650 return false;
5021 5651
5022 VEC_quick_push (tree, *interm_types, intermediate_type); 5652 VEC_quick_push (tree, *interm_types, intermediate_type);
5023 (*multi_step_cvt)++; 5653 (*multi_step_cvt)++;
5024 5654