comparison gcc/tree-vect-loop-manip.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Vectorizer Specific Loop Manipulations 1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2018 Free Software Foundation, Inc. 2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com>
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
45 #include "tree-ssa-loop-niter.h" 45 #include "tree-ssa-loop-niter.h"
46 #include "internal-fn.h" 46 #include "internal-fn.h"
47 #include "stor-layout.h" 47 #include "stor-layout.h"
48 #include "optabs-query.h" 48 #include "optabs-query.h"
49 #include "vec-perm-indices.h" 49 #include "vec-perm-indices.h"
50 #include "insn-config.h"
51 #include "rtl.h"
52 #include "recog.h"
50 53
51 /************************************************************************* 54 /*************************************************************************
52 Simple Loop Peeling Utilities 55 Simple Loop Peeling Utilities
53 56
54 Utilities to support loop peeling for vectorization purposes. 57 Utilities to support loop peeling for vectorization purposes.
87 gimple *stmt; 90 gimple *stmt;
88 use_operand_p use_p; 91 use_operand_p use_p;
89 ssa_op_iter iter; 92 ssa_op_iter iter;
90 edge e; 93 edge e;
91 edge_iterator ei; 94 edge_iterator ei;
92 struct loop *loop = bb->loop_father; 95 class loop *loop = bb->loop_father;
93 struct loop *outer_loop = NULL; 96 class loop *outer_loop = NULL;
94 97
95 if (rename_from_outer_loop) 98 if (rename_from_outer_loop)
96 { 99 {
97 gcc_assert (loop); 100 gcc_assert (loop);
98 outer_loop = loop_outer (loop); 101 outer_loop = loop_outer (loop);
256 /* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that 259 /* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
257 the mask should have during the first iteration and NEXT_MASK is the 260 the mask should have during the first iteration and NEXT_MASK is the
258 value that it should have on subsequent iterations. */ 261 value that it should have on subsequent iterations. */
259 262
260 static void 263 static void
261 vect_set_loop_mask (struct loop *loop, tree mask, tree init_mask, 264 vect_set_loop_mask (class loop *loop, tree mask, tree init_mask,
262 tree next_mask) 265 tree next_mask)
263 { 266 {
264 gphi *phi = create_phi_node (mask, loop->header); 267 gphi *phi = create_phi_node (mask, loop->header);
265 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION); 268 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
266 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION); 269 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
267 } 270 }
268 271
269 /* Add SEQ to the end of LOOP's preheader block. */ 272 /* Add SEQ to the end of LOOP's preheader block. */
270 273
271 static void 274 static void
272 add_preheader_seq (struct loop *loop, gimple_seq seq) 275 add_preheader_seq (class loop *loop, gimple_seq seq)
273 { 276 {
274 if (seq) 277 if (seq)
275 { 278 {
276 edge pe = loop_preheader_edge (loop); 279 edge pe = loop_preheader_edge (loop);
277 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 280 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
280 } 283 }
281 284
282 /* Add SEQ to the beginning of LOOP's header block. */ 285 /* Add SEQ to the beginning of LOOP's header block. */
283 286
284 static void 287 static void
285 add_header_seq (struct loop *loop, gimple_seq seq) 288 add_header_seq (class loop *loop, gimple_seq seq)
286 { 289 {
287 if (seq) 290 if (seq)
288 { 291 {
289 gimple_stmt_iterator gsi = gsi_after_labels (loop->header); 292 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
290 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 293 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
321 rgroup_masks *src_rgm) 324 rgroup_masks *src_rgm)
322 { 325 {
323 tree src_masktype = src_rgm->mask_type; 326 tree src_masktype = src_rgm->mask_type;
324 tree dest_masktype = dest_rgm->mask_type; 327 tree dest_masktype = dest_rgm->mask_type;
325 machine_mode src_mode = TYPE_MODE (src_masktype); 328 machine_mode src_mode = TYPE_MODE (src_masktype);
329 insn_code icode1, icode2;
326 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter 330 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
327 && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing 331 && (icode1 = optab_handler (vec_unpacku_hi_optab,
328 && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing) 332 src_mode)) != CODE_FOR_nothing
333 && (icode2 = optab_handler (vec_unpacku_lo_optab,
334 src_mode)) != CODE_FOR_nothing)
329 { 335 {
330 /* Unpacking the source masks gives at least as many mask bits as 336 /* Unpacking the source masks gives at least as many mask bits as
331 we need. We can then VIEW_CONVERT any excess bits away. */ 337 we need. We can then VIEW_CONVERT any excess bits away. */
332 tree unpack_masktype = vect_halve_mask_nunits (src_masktype); 338 machine_mode dest_mode = insn_data[icode1].operand[0].mode;
339 gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
340 tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
333 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i) 341 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
334 { 342 {
335 tree src = src_rgm->masks[i / 2]; 343 tree src = src_rgm->masks[i / 2];
336 tree dest = dest_rgm->masks[i]; 344 tree dest = dest_rgm->masks[i];
337 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1) 345 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
380 all the masks in RGM and return a mask that is nonzero when the loop 388 all the masks in RGM and return a mask that is nonzero when the loop
381 needs to iterate. Add any new preheader statements to PREHEADER_SEQ. 389 needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
382 Use LOOP_COND_GSI to insert code before the exit gcond. 390 Use LOOP_COND_GSI to insert code before the exit gcond.
383 391
384 RGM belongs to loop LOOP. The loop originally iterated NITERS 392 RGM belongs to loop LOOP. The loop originally iterated NITERS
385 times and has been vectorized according to LOOP_VINFO. Each iteration 393 times and has been vectorized according to LOOP_VINFO.
386 of the vectorized loop handles VF iterations of the scalar loop.
387 394
388 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop 395 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
389 starts with NITERS_SKIP dummy iterations of the scalar loop before 396 starts with NITERS_SKIP dummy iterations of the scalar loop before
390 the real work starts. The mask elements for these dummy iterations 397 the real work starts. The mask elements for these dummy iterations
391 must be 0, to ensure that the extra iterations do not have an effect. 398 must be 0, to ensure that the extra iterations do not have an effect.
405 412
406 This means that we cannot guarantee that such an induction variable 413 This means that we cannot guarantee that such an induction variable
407 would ever hit a value that produces a set of all-false masks for RGM. */ 414 would ever hit a value that produces a set of all-false masks for RGM. */
408 415
409 static tree 416 static tree
410 vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, 417 vect_set_loop_masks_directly (class loop *loop, loop_vec_info loop_vinfo,
411 gimple_seq *preheader_seq, 418 gimple_seq *preheader_seq,
412 gimple_stmt_iterator loop_cond_gsi, 419 gimple_stmt_iterator loop_cond_gsi,
413 rgroup_masks *rgm, tree vf, 420 rgroup_masks *rgm, tree niters, tree niters_skip,
414 tree niters, tree niters_skip,
415 bool might_wrap_p) 421 bool might_wrap_p)
416 { 422 {
417 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); 423 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
424 tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
418 tree mask_type = rgm->mask_type; 425 tree mask_type = rgm->mask_type;
419 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter; 426 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
420 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type); 427 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
428 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
421 429
422 /* Calculate the maximum number of scalar values that the rgroup 430 /* Calculate the maximum number of scalar values that the rgroup
423 handles in total, the number that it handles for each iteration 431 handles in total, the number that it handles for each iteration
424 of the vector loop, and the number that it should skip during the 432 of the vector loop, and the number that it should skip during the
425 first iteration of the vector loop. */ 433 first iteration of the vector loop. */
426 tree nscalars_total = niters; 434 tree nscalars_total = niters;
427 tree nscalars_step = vf; 435 tree nscalars_step = build_int_cst (iv_type, vf);
428 tree nscalars_skip = niters_skip; 436 tree nscalars_skip = niters_skip;
429 if (nscalars_per_iter != 1) 437 if (nscalars_per_iter != 1)
430 { 438 {
431 /* We checked before choosing to use a fully-masked loop that these 439 /* We checked before choosing to use a fully-masked loop that these
432 multiplications don't overflow. */ 440 multiplications don't overflow. */
433 tree factor = build_int_cst (compare_type, nscalars_per_iter); 441 tree compare_factor = build_int_cst (compare_type, nscalars_per_iter);
442 tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
434 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type, 443 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
435 nscalars_total, factor); 444 nscalars_total, compare_factor);
436 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, compare_type, 445 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
437 nscalars_step, factor); 446 nscalars_step, iv_factor);
438 if (nscalars_skip) 447 if (nscalars_skip)
439 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type, 448 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
440 nscalars_skip, factor); 449 nscalars_skip, compare_factor);
441 } 450 }
442 451
443 /* Create an induction variable that counts the number of scalars 452 /* Create an induction variable that counts the number of scalars
444 processed. */ 453 processed. */
445 tree index_before_incr, index_after_incr; 454 tree index_before_incr, index_after_incr;
446 gimple_stmt_iterator incr_gsi; 455 gimple_stmt_iterator incr_gsi;
447 bool insert_after; 456 bool insert_after;
457 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
458 create_iv (build_int_cst (iv_type, 0), nscalars_step, NULL_TREE, loop,
459 &incr_gsi, insert_after, &index_before_incr, &index_after_incr);
460
448 tree zero_index = build_int_cst (compare_type, 0); 461 tree zero_index = build_int_cst (compare_type, 0);
449 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
450 create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi,
451 insert_after, &index_before_incr, &index_after_incr);
452
453 tree test_index, test_limit, first_limit; 462 tree test_index, test_limit, first_limit;
454 gimple_stmt_iterator *test_gsi; 463 gimple_stmt_iterator *test_gsi;
455 if (might_wrap_p) 464 if (might_wrap_p)
456 { 465 {
457 /* In principle the loop should stop iterating once the incremented 466 /* In principle the loop should stop iterating once the incremented
479 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP) 488 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
480 489
481 where the rightmost subtraction can be done directly in 490 where the rightmost subtraction can be done directly in
482 COMPARE_TYPE. */ 491 COMPARE_TYPE. */
483 test_index = index_before_incr; 492 test_index = index_before_incr;
484 tree adjust = nscalars_step; 493 tree adjust = gimple_convert (preheader_seq, compare_type,
494 nscalars_step);
485 if (nscalars_skip) 495 if (nscalars_skip)
486 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type, 496 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
487 adjust, nscalars_skip); 497 adjust, nscalars_skip);
488 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type, 498 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
489 nscalars_total, adjust); 499 nscalars_total, adjust);
522 test_limit, nscalars_skip); 532 test_limit, nscalars_skip);
523 test_gsi = &loop_cond_gsi; 533 test_gsi = &loop_cond_gsi;
524 534
525 first_limit = test_limit; 535 first_limit = test_limit;
526 } 536 }
537
538 /* Convert the IV value to the comparison type (either a no-op or
539 a demotion). */
540 gimple_seq test_seq = NULL;
541 test_index = gimple_convert (&test_seq, compare_type, test_index);
542 gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
527 543
528 /* Provide a definition of each mask in the group. */ 544 /* Provide a definition of each mask in the group. */
529 tree next_mask = NULL_TREE; 545 tree next_mask = NULL_TREE;
530 tree mask; 546 tree mask;
531 unsigned int i; 547 unsigned int i;
625 641
626 Insert the branch-back condition before LOOP_COND_GSI and return the 642 Insert the branch-back condition before LOOP_COND_GSI and return the
627 final gcond. */ 643 final gcond. */
628 644
629 static gcond * 645 static gcond *
630 vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, 646 vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
631 tree niters, tree final_iv, 647 tree niters, tree final_iv,
632 bool niters_maybe_zero, 648 bool niters_maybe_zero,
633 gimple_stmt_iterator loop_cond_gsi) 649 gimple_stmt_iterator loop_cond_gsi)
634 { 650 {
635 gimple_seq preheader_seq = NULL; 651 gimple_seq preheader_seq = NULL;
636 gimple_seq header_seq = NULL; 652 gimple_seq header_seq = NULL;
637 653
638 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); 654 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
639 unsigned int compare_precision = TYPE_PRECISION (compare_type); 655 unsigned int compare_precision = TYPE_PRECISION (compare_type);
640 unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
641 tree orig_niters = niters; 656 tree orig_niters = niters;
642 657
643 /* Type of the initial value of NITERS. */ 658 /* Type of the initial value of NITERS. */
644 tree ni_actual_type = TREE_TYPE (niters); 659 tree ni_actual_type = TREE_TYPE (niters);
645 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type); 660 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
661 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
646 662
647 /* Convert NITERS to the same size as the compare. */ 663 /* Convert NITERS to the same size as the compare. */
648 if (compare_precision > ni_actual_precision 664 if (compare_precision > ni_actual_precision
649 && niters_maybe_zero) 665 && niters_maybe_zero)
650 { 666 {
659 niters, build_one_cst (compare_type)); 675 niters, build_one_cst (compare_type));
660 } 676 }
661 else 677 else
662 niters = gimple_convert (&preheader_seq, compare_type, niters); 678 niters = gimple_convert (&preheader_seq, compare_type, niters);
663 679
664 /* Convert skip_niters to the right type. */ 680 widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
665 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
666
667 /* Now calculate the value that the induction variable must be able
668 to hit in order to ensure that we end the loop with an all-false mask.
669 This involves adding the maximum number of inactive trailing scalar
670 iterations. */
671 widest_int iv_limit;
672 bool known_max_iters = max_loop_iterations (loop, &iv_limit);
673 if (known_max_iters)
674 {
675 if (niters_skip)
676 {
677 /* Add the maximum number of skipped iterations to the
678 maximum iteration count. */
679 if (TREE_CODE (niters_skip) == INTEGER_CST)
680 iv_limit += wi::to_widest (niters_skip);
681 else
682 iv_limit += max_vf - 1;
683 }
684 /* IV_LIMIT is the maximum number of latch iterations, which is also
685 the maximum in-range IV value. Round this value down to the previous
686 vector alignment boundary and then add an extra full iteration. */
687 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
688 iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf;
689 }
690
691 /* Get the vectorization factor in tree form. */
692 tree vf = build_int_cst (compare_type,
693 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
694 681
695 /* Iterate over all the rgroups and fill in their masks. We could use 682 /* Iterate over all the rgroups and fill in their masks. We could use
696 the first mask from any rgroup for the loop condition; here we 683 the first mask from any rgroup for the loop condition; here we
697 arbitrarily pick the last. */ 684 arbitrarily pick the last. */
698 tree test_mask = NULL_TREE; 685 tree test_mask = NULL_TREE;
715 } 702 }
716 703
717 /* See whether zero-based IV would ever generate all-false masks 704 /* See whether zero-based IV would ever generate all-false masks
718 before wrapping around. */ 705 before wrapping around. */
719 bool might_wrap_p 706 bool might_wrap_p
720 = (!known_max_iters 707 = (iv_limit == -1
721 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter, 708 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
722 UNSIGNED) 709 UNSIGNED)
723 > compare_precision)); 710 > compare_precision));
724 711
725 /* Set up all masks for this group. */ 712 /* Set up all masks for this group. */
726 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo, 713 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
727 &preheader_seq, 714 &preheader_seq,
728 loop_cond_gsi, rgm, vf, 715 loop_cond_gsi, rgm,
729 niters, niters_skip, 716 niters, niters_skip,
730 might_wrap_p); 717 might_wrap_p);
731 } 718 }
732 719
733 /* Emit all accumulated statements. */ 720 /* Emit all accumulated statements. */
762 749
763 /* Like vect_set_loop_condition, but handle the case in which there 750 /* Like vect_set_loop_condition, but handle the case in which there
764 are no loop masks. */ 751 are no loop masks. */
765 752
766 static gcond * 753 static gcond *
767 vect_set_loop_condition_unmasked (struct loop *loop, tree niters, 754 vect_set_loop_condition_unmasked (class loop *loop, tree niters,
768 tree step, tree final_iv, 755 tree step, tree final_iv,
769 bool niters_maybe_zero, 756 bool niters_maybe_zero,
770 gimple_stmt_iterator loop_cond_gsi) 757 gimple_stmt_iterator loop_cond_gsi)
771 { 758 {
772 tree indx_before_incr, indx_after_incr; 759 tree indx_before_incr, indx_after_incr;
915 N * STEP on exit from the loop. 902 N * STEP on exit from the loop.
916 903
917 Assumption: the exit-condition of LOOP is the last stmt in the loop. */ 904 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
918 905
919 void 906 void
920 vect_set_loop_condition (struct loop *loop, loop_vec_info loop_vinfo, 907 vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo,
921 tree niters, tree step, tree final_iv, 908 tree niters, tree step, tree final_iv,
922 bool niters_maybe_zero) 909 bool niters_maybe_zero)
923 { 910 {
924 gcond *cond_stmt; 911 gcond *cond_stmt;
925 gcond *orig_cond = get_loop_exit_condition (loop); 912 gcond *orig_cond = get_loop_exit_condition (loop);
975 gsi_next (&gsi_to); 962 gsi_next (&gsi_to);
976 continue; 963 continue;
977 } 964 }
978 if (TREE_CODE (from_arg) != SSA_NAME) 965 if (TREE_CODE (from_arg) != SSA_NAME)
979 gcc_assert (operand_equal_p (from_arg, to_arg, 0)); 966 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
980 else 967 else if (TREE_CODE (to_arg) == SSA_NAME
968 && from_arg != to_arg)
981 { 969 {
982 if (get_current_def (to_arg) == NULL_TREE) 970 if (get_current_def (to_arg) == NULL_TREE)
983 set_current_def (to_arg, get_current_def (from_arg)); 971 {
972 gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
973 TREE_TYPE (get_current_def
974 (from_arg))));
975 set_current_def (to_arg, get_current_def (from_arg));
976 }
984 } 977 }
985 gsi_next (&gsi_from); 978 gsi_next (&gsi_from);
986 gsi_next (&gsi_to); 979 gsi_next (&gsi_to);
987 } 980 }
988 981
998 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is 991 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
999 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the 992 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1000 basic blocks from SCALAR_LOOP instead of LOOP, but to either the 993 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1001 entry or exit of LOOP. */ 994 entry or exit of LOOP. */
1002 995
1003 struct loop * 996 class loop *
1004 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, 997 slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop,
1005 struct loop *scalar_loop, edge e) 998 class loop *scalar_loop, edge e)
1006 { 999 {
1007 struct loop *new_loop; 1000 class loop *new_loop;
1008 basic_block *new_bbs, *bbs, *pbbs; 1001 basic_block *new_bbs, *bbs, *pbbs;
1009 bool at_exit; 1002 bool at_exit;
1010 bool was_imm_dom; 1003 bool was_imm_dom;
1011 basic_block exit_dest; 1004 basic_block exit_dest;
1012 edge exit, new_exit; 1005 edge exit, new_exit;
1227 (3) its exit condition is the last stmt in the header 1220 (3) its exit condition is the last stmt in the header
1228 (4) E is the entry/exit edge of LOOP. 1221 (4) E is the entry/exit edge of LOOP.
1229 */ 1222 */
1230 1223
1231 bool 1224 bool
1232 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e) 1225 slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e)
1233 { 1226 {
1234 edge exit_e = single_exit (loop); 1227 edge exit_e = single_exit (loop);
1235 edge entry_e = loop_preheader_edge (loop); 1228 edge entry_e = loop_preheader_edge (loop);
1236 gcond *orig_cond = get_loop_exit_condition (loop); 1229 gcond *orig_cond = get_loop_exit_condition (loop);
1237 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src); 1230 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
1254 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI 1247 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1255 in the exit bb and rename all the uses after the loop. This simplifies 1248 in the exit bb and rename all the uses after the loop. This simplifies
1256 the *guard[12] routines, which assume loop closed SSA form for all PHIs 1249 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1257 (but normally loop closed SSA form doesn't require virtual PHIs to be 1250 (but normally loop closed SSA form doesn't require virtual PHIs to be
1258 in the same form). Doing this early simplifies the checking what 1251 in the same form). Doing this early simplifies the checking what
1259 uses should be renamed. */ 1252 uses should be renamed.
1260 1253
1261 static void 1254 If we create a new phi after the loop, return the definition that
1262 create_lcssa_for_virtual_phi (struct loop *loop) 1255 applies on entry to the loop, otherwise return null. */
1256
1257 static tree
1258 create_lcssa_for_virtual_phi (class loop *loop)
1263 { 1259 {
1264 gphi_iterator gsi; 1260 gphi_iterator gsi;
1265 edge exit_e = single_exit (loop); 1261 edge exit_e = single_exit (loop);
1266 1262
1267 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 1263 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1288 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop) 1284 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1289 if (stmt != new_phi 1285 if (stmt != new_phi
1290 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 1286 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1291 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 1287 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1292 SET_USE (use_p, new_vop); 1288 SET_USE (use_p, new_vop);
1289
1290 return PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1293 } 1291 }
1294 break; 1292 break;
1295 } 1293 }
1296 1294 return NULL_TREE;
1297 } 1295 }
1298 1296
1299 /* Function vect_get_loop_location. 1297 /* Function vect_get_loop_location.
1300 1298
1301 Extract the location of the loop in the source code. 1299 Extract the location of the loop in the source code.
1302 If the loop is not well formed for vectorization, an estimated 1300 If the loop is not well formed for vectorization, an estimated
1303 location is calculated. 1301 location is calculated.
1304 Return the loop location if succeed and NULL if not. */ 1302 Return the loop location if succeed and NULL if not. */
1305 1303
1306 dump_user_location_t 1304 dump_user_location_t
1307 find_loop_location (struct loop *loop) 1305 find_loop_location (class loop *loop)
1308 { 1306 {
1309 gimple *stmt = NULL; 1307 gimple *stmt = NULL;
1310 basic_block bb; 1308 basic_block bb;
1311 gimple_stmt_iterator si; 1309 gimple_stmt_iterator si;
1312 1310
1364 These restrictions will be relaxed in the future. */ 1362 These restrictions will be relaxed in the future. */
1365 1363
1366 bool 1364 bool
1367 vect_can_advance_ivs_p (loop_vec_info loop_vinfo) 1365 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1368 { 1366 {
1369 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1367 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1370 basic_block bb = loop->header; 1368 basic_block bb = loop->header;
1371 gphi_iterator gsi; 1369 gphi_iterator gsi;
1372 1370
1373 /* Analyze phi functions of the loop header. */ 1371 /* Analyze phi functions of the loop header. */
1374 1372
1478 static void 1476 static void
1479 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, 1477 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
1480 tree niters, edge update_e) 1478 tree niters, edge update_e)
1481 { 1479 {
1482 gphi_iterator gsi, gsi1; 1480 gphi_iterator gsi, gsi1;
1483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1481 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1484 basic_block update_bb = update_e->dest; 1482 basic_block update_bb = update_e->dest;
1485 basic_block exit_bb = single_exit (loop)->dest; 1483 basic_block exit_bb = single_exit (loop)->dest;
1486 1484
1487 /* Make sure there exists a single-predecessor exit bb: */ 1485 /* Make sure there exists a single-predecessor exit bb: */
1488 gcc_assert (single_pred_p (exit_bb)); 1486 gcc_assert (single_pred_p (exit_bb));
1559 { 1557 {
1560 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); 1558 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1561 stmt_vec_info stmt_info = dr_info->stmt; 1559 stmt_vec_info stmt_info = dr_info->stmt;
1562 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1560 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1563 1561
1564 unsigned int target_align = DR_TARGET_ALIGNMENT (dr_info); 1562 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1565 gcc_assert (target_align != 0); 1563 unsigned HOST_WIDE_INT target_align_c;
1564 tree target_align_minus_1;
1566 1565
1567 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr), 1566 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1568 size_zero_node) < 0; 1567 size_zero_node) < 0;
1569 tree offset = (negative 1568 tree offset = (negative
1570 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) 1569 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1571 : size_zero_node); 1570 : size_zero_node);
1572 tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq, 1571 tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq,
1573 offset); 1572 offset);
1574 tree type = unsigned_type_for (TREE_TYPE (start_addr)); 1573 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1575 tree target_align_minus_1 = build_int_cst (type, target_align - 1); 1574 if (target_align.is_constant (&target_align_c))
1575 target_align_minus_1 = build_int_cst (type, target_align_c - 1);
1576 else
1577 {
1578 tree vla = build_int_cst (type, target_align);
1579 tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
1580 fold_build2 (MINUS_EXPR, type,
1581 build_int_cst (type, 0), vla));
1582 target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
1583 build_int_cst (type, 1));
1584 }
1585
1576 HOST_WIDE_INT elem_size 1586 HOST_WIDE_INT elem_size
1577 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 1587 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1578 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size)); 1588 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1579 1589
1580 /* Create: misalign_in_bytes = addr & (target_align - 1). */ 1590 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1629 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)); 1639 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1630 gimple_seq stmts = NULL, new_stmts = NULL; 1640 gimple_seq stmts = NULL, new_stmts = NULL;
1631 tree iters, iters_name; 1641 tree iters, iters_name;
1632 stmt_vec_info stmt_info = dr_info->stmt; 1642 stmt_vec_info stmt_info = dr_info->stmt;
1633 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1643 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1634 unsigned int target_align = DR_TARGET_ALIGNMENT (dr_info); 1644 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1635 1645
1636 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) 1646 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1637 { 1647 {
1638 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 1648 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1639 1649
1648 { 1658 {
1649 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo); 1659 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
1650 tree type = TREE_TYPE (misalign_in_elems); 1660 tree type = TREE_TYPE (misalign_in_elems);
1651 HOST_WIDE_INT elem_size 1661 HOST_WIDE_INT elem_size
1652 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 1662 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1653 HOST_WIDE_INT align_in_elems = target_align / elem_size; 1663 /* We only do prolog peeling if the target alignment is known at compile
1654 tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1); 1664 time. */
1665 poly_uint64 align_in_elems =
1666 exact_div (target_align, elem_size);
1667 tree align_in_elems_minus_1 =
1668 build_int_cst (type, align_in_elems - 1);
1655 tree align_in_elems_tree = build_int_cst (type, align_in_elems); 1669 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
1656 1670
1657 /* Create: (niters_type) ((align_in_elems - misalign_in_elems) 1671 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1658 & (align_in_elems - 1)). */ 1672 & (align_in_elems - 1)). */
1659 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr), 1673 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1664 else 1678 else
1665 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree, 1679 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1666 misalign_in_elems); 1680 misalign_in_elems);
1667 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1); 1681 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1668 iters = fold_convert (niters_type, iters); 1682 iters = fold_convert (niters_type, iters);
1669 *bound = align_in_elems - 1; 1683 unsigned HOST_WIDE_INT align_in_elems_c;
1684 if (align_in_elems.is_constant (&align_in_elems_c))
1685 *bound = align_in_elems_c - 1;
1686 else
1687 *bound = -1;
1670 } 1688 }
1671 1689
1672 if (dump_enabled_p ()) 1690 if (dump_enabled_p ())
1673 dump_printf_loc (MSG_NOTE, vect_location, 1691 dump_printf_loc (MSG_NOTE, vect_location,
1674 "niters for prolog loop: %T\n", iters); 1692 "niters for prolog loop: %T\n", iters);
1696 If CODE is PLUS, the vector loop starts NITERS iterations after the 1714 If CODE is PLUS, the vector loop starts NITERS iterations after the
1697 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS 1715 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
1698 iterations before the scalar one (using masking to skip inactive 1716 iterations before the scalar one (using masking to skip inactive
1699 elements). This function updates the information recorded in DR to 1717 elements). This function updates the information recorded in DR to
1700 account for the difference. Specifically, it updates the OFFSET 1718 account for the difference. Specifically, it updates the OFFSET
1701 field of DR. */ 1719 field of DR_INFO. */
1702 1720
1703 static void 1721 static void
1704 vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code) 1722 vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
1705 { 1723 {
1706 tree offset = DR_OFFSET (dr); 1724 struct data_reference *dr = dr_info->dr;
1725 tree offset = dr_info->offset;
1726 if (!offset)
1727 offset = build_zero_cst (sizetype);
1707 1728
1708 niters = fold_build2 (MULT_EXPR, sizetype, 1729 niters = fold_build2 (MULT_EXPR, sizetype,
1709 fold_convert (sizetype, niters), 1730 fold_convert (sizetype, niters),
1710 fold_convert (sizetype, DR_STEP (dr))); 1731 fold_convert (sizetype, DR_STEP (dr)));
1711 offset = fold_build2 (code, sizetype, 1732 offset = fold_build2 (code, sizetype,
1712 fold_convert (sizetype, offset), niters); 1733 fold_convert (sizetype, offset), niters);
1713 DR_OFFSET (dr) = offset; 1734 dr_info->offset = offset;
1714 } 1735 }
1715 1736
1716 1737
1717 /* Function vect_update_inits_of_drs 1738 /* Function vect_update_inits_of_drs
1718 1739
1719 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO. 1740 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
1720 CODE and NITERS are as for vect_update_inits_of_dr. */ 1741 CODE and NITERS are as for vect_update_inits_of_dr. */
1721 1742
1722 static void 1743 void
1723 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters, 1744 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
1724 tree_code code) 1745 tree_code code)
1725 { 1746 {
1726 unsigned int i; 1747 unsigned int i;
1727 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1748 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1728 struct data_reference *dr; 1749 struct data_reference *dr;
1729 1750
1730 DUMP_VECT_SCOPE ("vect_update_inits_of_dr"); 1751 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
1731 1752
1732 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */ 1753 /* Adjust niters to sizetype. We used to insert the stmts on loop preheader
1754 here, but since we might use these niters to update the epilogues niters
1755 and data references we can't insert them here as this definition might not
1756 always dominate its uses. */
1733 if (!types_compatible_p (sizetype, TREE_TYPE (niters))) 1757 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1734 { 1758 niters = fold_convert (sizetype, niters);
1735 gimple_seq seq;
1736 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1737 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1738
1739 niters = fold_convert (sizetype, niters);
1740 niters = force_gimple_operand (niters, &seq, false, var);
1741 if (seq)
1742 {
1743 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1744 gcc_assert (!new_bb);
1745 }
1746 }
1747 1759
1748 FOR_EACH_VEC_ELT (datarefs, i, dr) 1760 FOR_EACH_VEC_ELT (datarefs, i, dr)
1749 { 1761 {
1750 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); 1762 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1751 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)) 1763 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt))
1752 vect_update_init_of_dr (dr, niters, code); 1764 vect_update_init_of_dr (dr_info, niters, code);
1753 } 1765 }
1754 } 1766 }
1755 1767
1756 /* For the information recorded in LOOP_VINFO prepare the loop for peeling 1768 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
1757 by masking. This involves calculating the number of iterations to 1769 by masking. This involves calculating the number of iterations to
1984 tree niters_vector, 1996 tree niters_vector,
1985 tree *niters_vector_mult_vf_ptr) 1997 tree *niters_vector_mult_vf_ptr)
1986 { 1998 {
1987 /* We should be using a step_vector of VF if VF is variable. */ 1999 /* We should be using a step_vector of VF if VF is variable. */
1988 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant (); 2000 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
1989 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2001 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1990 tree type = TREE_TYPE (niters_vector); 2002 tree type = TREE_TYPE (niters_vector);
1991 tree log_vf = build_int_cst (type, exact_log2 (vf)); 2003 tree log_vf = build_int_cst (type, exact_log2 (vf));
1992 basic_block exit_bb = single_exit (loop)->dest; 2004 basic_block exit_bb = single_exit (loop)->dest;
1993 2005
1994 gcc_assert (niters_vector_mult_vf_ptr != NULL); 2006 gcc_assert (niters_vector_mult_vf_ptr != NULL);
2002 &stmts, true, var); 2014 &stmts, true, var);
2003 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb); 2015 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2004 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); 2016 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2005 } 2017 }
2006 *niters_vector_mult_vf_ptr = niters_vector_mult_vf; 2018 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2019 }
2020
2021 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2022 this function searches for the corresponding lcssa phi node in exit
2023 bb of LOOP. If it is found, return the phi result; otherwise return
2024 NULL. */
2025
2026 static tree
2027 find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
2028 gphi *lcssa_phi)
2029 {
2030 gphi_iterator gsi;
2031 edge e = single_exit (loop);
2032
2033 gcc_assert (single_pred_p (e->dest));
2034 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2035 {
2036 gphi *phi = gsi.phi ();
2037 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2038 PHI_ARG_DEF (lcssa_phi, 0), 0))
2039 return PHI_RESULT (phi);
2040 }
2041 return NULL_TREE;
2007 } 2042 }
2008 2043
2009 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND 2044 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
2010 from SECOND/FIRST and puts it at the original loop's preheader/exit 2045 from SECOND/FIRST and puts it at the original loop's preheader/exit
2011 edge, the two loops are arranged as below: 2046 edge, the two loops are arranged as below:
2052 second loop, i.e, between_bb in the example code. With PHIs updated, 2087 second loop, i.e, between_bb in the example code. With PHIs updated,
2053 the second loop will execute rest iterations of the first. */ 2088 the second loop will execute rest iterations of the first. */
2054 2089
2055 static void 2090 static void
2056 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo, 2091 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
2057 struct loop *first, struct loop *second, 2092 class loop *first, class loop *second,
2058 bool create_lcssa_for_iv_phis) 2093 bool create_lcssa_for_iv_phis)
2059 { 2094 {
2060 gphi_iterator gsi_update, gsi_orig; 2095 gphi_iterator gsi_update, gsi_orig;
2061 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2096 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2062 2097
2063 edge first_latch_e = EDGE_SUCC (first->latch, 0); 2098 edge first_latch_e = EDGE_SUCC (first->latch, 0);
2064 edge second_preheader_e = loop_preheader_edge (second); 2099 edge second_preheader_e = loop_preheader_edge (second);
2065 basic_block between_bb = single_exit (first)->dest; 2100 basic_block between_bb = single_exit (first)->dest;
2066 2101
2090 } 2125 }
2091 2126
2092 /* Update PHI node in the second loop by replacing arg on the loop's 2127 /* Update PHI node in the second loop by replacing arg on the loop's
2093 incoming edge. */ 2128 incoming edge. */
2094 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg); 2129 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
2130 }
2131
2132 /* For epilogue peeling we have to make sure to copy all LC PHIs
2133 for correct vectorization of live stmts. */
2134 if (loop == first)
2135 {
2136 basic_block orig_exit = single_exit (second)->dest;
2137 for (gsi_orig = gsi_start_phis (orig_exit);
2138 !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
2139 {
2140 gphi *orig_phi = gsi_orig.phi ();
2141 tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
2142 if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p (orig_arg))
2143 continue;
2144
2145 /* Already created in the above loop. */
2146 if (find_guard_arg (first, second, orig_phi))
2147 continue;
2148
2149 tree new_res = copy_ssa_name (orig_arg);
2150 gphi *lcphi = create_phi_node (new_res, between_bb);
2151 add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
2152 }
2095 } 2153 }
2096 } 2154 }
2097 2155
2098 /* Function slpeel_add_loop_guard adds guard skipping from the beginning 2156 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
2099 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE 2157 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2140 2198
2141 This function creates PHI nodes at merge_bb and replaces the use of i_5 2199 This function creates PHI nodes at merge_bb and replaces the use of i_5
2142 in the update_loop's PHI node with the result of new PHI result. */ 2200 in the update_loop's PHI node with the result of new PHI result. */
2143 2201
2144 static void 2202 static void
2145 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop, 2203 slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
2146 struct loop *update_loop, 2204 class loop *update_loop,
2147 edge guard_edge, edge merge_edge) 2205 edge guard_edge, edge merge_edge)
2148 { 2206 {
2149 source_location merge_loc, guard_loc; 2207 location_t merge_loc, guard_loc;
2150 edge orig_e = loop_preheader_edge (skip_loop); 2208 edge orig_e = loop_preheader_edge (skip_loop);
2151 edge update_e = loop_preheader_edge (update_loop); 2209 edge update_e = loop_preheader_edge (update_loop);
2152 gphi_iterator gsi_orig, gsi_update; 2210 gphi_iterator gsi_orig, gsi_update;
2153 2211
2154 for ((gsi_orig = gsi_start_phis (skip_loop->header), 2212 for ((gsi_orig = gsi_start_phis (skip_loop->header),
2173 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc); 2231 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2174 2232
2175 /* Update phi in UPDATE_PHI. */ 2233 /* Update phi in UPDATE_PHI. */
2176 adjust_phi_and_debug_stmts (update_phi, update_e, new_res); 2234 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2177 } 2235 }
2178 }
2179
2180 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2181 this function searches for the corresponding lcssa phi node in exit
2182 bb of LOOP. If it is found, return the phi result; otherwise return
2183 NULL. */
2184
2185 static tree
2186 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
2187 gphi *lcssa_phi)
2188 {
2189 gphi_iterator gsi;
2190 edge e = single_exit (loop);
2191
2192 gcc_assert (single_pred_p (e->dest));
2193 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2194 {
2195 gphi *phi = gsi.phi ();
2196 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2197 PHI_ARG_DEF (lcssa_phi, 0), 0))
2198 return PHI_RESULT (phi);
2199 }
2200 return NULL_TREE;
2201 } 2236 }
2202 2237
2203 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied 2238 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
2204 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a 2239 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
2205 point between the two loops to the end of EPILOG. Edges GUARD_EDGE 2240 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
2252 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined 2287 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2253 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI 2288 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
2254 in exit_bb will also be updated. */ 2289 in exit_bb will also be updated. */
2255 2290
2256 static void 2291 static void
2257 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog, 2292 slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog,
2258 edge guard_edge, edge merge_edge) 2293 edge guard_edge, edge merge_edge)
2259 { 2294 {
2260 gphi_iterator gsi; 2295 gphi_iterator gsi;
2261 basic_block merge_bb = guard_edge->dest; 2296 basic_block merge_bb = guard_edge->dest;
2262 2297
2268 2303
2269 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2304 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2270 { 2305 {
2271 gphi *update_phi = gsi.phi (); 2306 gphi *update_phi = gsi.phi ();
2272 tree old_arg = PHI_ARG_DEF (update_phi, 0); 2307 tree old_arg = PHI_ARG_DEF (update_phi, 0);
2273 /* This loop-closed-phi actually doesn't represent a use out of the 2308
2274 loop - the phi arg is a constant. */ 2309 tree merge_arg = NULL_TREE;
2275 if (TREE_CODE (old_arg) != SSA_NAME) 2310
2276 continue; 2311 /* If the old argument is a SSA_NAME use its current_def. */
2277 2312 if (TREE_CODE (old_arg) == SSA_NAME)
2278 tree merge_arg = get_current_def (old_arg); 2313 merge_arg = get_current_def (old_arg);
2314 /* If it's a constant or doesn't have a current_def, just use the old
2315 argument. */
2279 if (!merge_arg) 2316 if (!merge_arg)
2280 merge_arg = old_arg; 2317 merge_arg = old_arg;
2281 2318
2282 tree guard_arg = find_guard_arg (loop, epilog, update_phi); 2319 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
2283 /* If the var is live after loop but not a reduction, we simply 2320 /* If the var is live after loop but not a reduction, we simply
2301 2338
2302 /* EPILOG loop is duplicated from the original loop for vectorizing, 2339 /* EPILOG loop is duplicated from the original loop for vectorizing,
2303 the arg of its loop closed ssa PHI needs to be updated. */ 2340 the arg of its loop closed ssa PHI needs to be updated. */
2304 2341
2305 static void 2342 static void
2306 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog) 2343 slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
2307 { 2344 {
2308 gphi_iterator gsi; 2345 gphi_iterator gsi;
2309 basic_block exit_bb = single_exit (epilog)->dest; 2346 basic_block exit_bb = single_exit (epilog)->dest;
2310 2347
2311 gcc_assert (single_pred_p (exit_bb)); 2348 gcc_assert (single_pred_p (exit_bb));
2384 2421
2385 merge_bb_3: 2422 merge_bb_3:
2386 2423
2387 Note this function peels prolog and epilog only if it's necessary, 2424 Note this function peels prolog and epilog only if it's necessary,
2388 as well as guards. 2425 as well as guards.
2389 Returns created epilogue or NULL. 2426 This function returns the epilogue loop if a decision was made to vectorize
2427 it, otherwise NULL.
2428
2429 The analysis resulting in this epilogue loop's loop_vec_info was performed
2430 in the same vect_analyze_loop call as the main loop's. At that time
2431 vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
2432 vectorization factors than the main loop. This list is stored in the main
2433 loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
2434 vectorize the epilogue loop for a lower vectorization factor, the
2435 loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
2436 updated and linked to the epilogue loop. This is later used to vectorize
2437 the epilogue. The reason the loop_vec_info needs updating is that it was
2438 constructed based on the original main loop, and the epilogue loop is a
2439 copy of this loop, so all links pointing to statements in the original loop
2440 need updating. Furthermore, these loop_vec_infos share the
2441 data_reference's records, which will also need to be updated.
2390 2442
2391 TODO: Guard for prefer_scalar_loop should be emitted along with 2443 TODO: Guard for prefer_scalar_loop should be emitted along with
2392 versioning conditions if loop versioning is needed. */ 2444 versioning conditions if loop versioning is needed. */
2393 2445
2394 2446
2395 struct loop * 2447 class loop *
2396 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, 2448 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
2397 tree *niters_vector, tree *step_vector, 2449 tree *niters_vector, tree *step_vector,
2398 tree *niters_vector_mult_vf_var, int th, 2450 tree *niters_vector_mult_vf_var, int th,
2399 bool check_profitability, bool niters_no_overflow) 2451 bool check_profitability, bool niters_no_overflow,
2452 tree *advance)
2400 { 2453 {
2401 edge e, guard_e; 2454 edge e, guard_e;
2402 tree type = TREE_TYPE (niters), guard_cond; 2455 tree type = TREE_TYPE (niters), guard_cond;
2403 basic_block guard_bb, guard_to; 2456 basic_block guard_bb, guard_to;
2404 profile_probability prob_prolog, prob_vector, prob_epilog; 2457 profile_probability prob_prolog, prob_vector, prob_epilog;
2405 int estimated_vf; 2458 int estimated_vf;
2406 int prolog_peeling = 0; 2459 int prolog_peeling = 0;
2460 bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
2461 /* We currently do not support prolog peeling if the target alignment is not
2462 known at compile time. 'vect_gen_prolog_loop_niters' depends on the
2463 target alignment being constant. */
2464 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2465 if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
2466 return NULL;
2467
2407 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) 2468 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2408 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 2469 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2409 2470
2410 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2471 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2411 poly_uint64 bound_epilog = 0; 2472 poly_uint64 bound_epilog = 0;
2425 if (estimated_vf == 2) 2486 if (estimated_vf == 2)
2426 estimated_vf = 3; 2487 estimated_vf = 3;
2427 prob_prolog = prob_epilog = profile_probability::guessed_always () 2488 prob_prolog = prob_epilog = profile_probability::guessed_always ()
2428 .apply_scale (estimated_vf - 1, estimated_vf); 2489 .apply_scale (estimated_vf - 1, estimated_vf);
2429 2490
2430 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo); 2491 class loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
2431 struct loop *first_loop = loop; 2492 class loop *first_loop = loop;
2432 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP; 2493 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
2494
2495 /* We might have a queued need to update virtual SSA form. As we
2496 delete the update SSA machinery below after doing a regular
2497 incremental SSA update during loop copying make sure we don't
2498 lose that fact.
2499 ??? Needing to update virtual SSA form by renaming is unfortunate
2500 but not all of the vectorizer code inserting new loads / stores
2501 properly assigns virtual operands to those statements. */
2502 update_ssa (TODO_update_ssa_only_virtuals);
2503
2433 create_lcssa_for_virtual_phi (loop); 2504 create_lcssa_for_virtual_phi (loop);
2434 update_ssa (TODO_update_ssa_only_virtuals); 2505
2506 /* If we're vectorizing an epilogue loop, the update_ssa above will
2507 have ensured that the virtual operand is in SSA form throughout the
2508 vectorized main loop. Normally it is possible to trace the updated
2509 vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
2510 back to scalar-stmt vuses, meaning that the effect of the SSA update
2511 remains local to the main loop. However, there are rare cases in
2512 which the vectorized loop has vdefs even when the original scalar
2513 loop didn't. For example, vectorizing a load with IFN_LOAD_LANES
2514 introduces clobbers of the temporary vector array, which in turn
2515 needs new vdefs. If the scalar loop doesn't write to memory, these
2516 new vdefs will be the only ones in the vector loop.
2517
2518 In that case, update_ssa will have added a new virtual phi to the
2519 main loop, which previously didn't need one. Ensure that we (locally)
2520 maintain LCSSA form for the virtual operand, just as we would have
2521 done if the virtual phi had existed from the outset. This makes it
2522 easier to duplicate the scalar epilogue loop below. */
2523 tree vop_to_rename = NULL_TREE;
2524 if (loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
2525 {
2526 class loop *orig_loop = LOOP_VINFO_LOOP (orig_loop_vinfo);
2527 vop_to_rename = create_lcssa_for_virtual_phi (orig_loop);
2528 }
2435 2529
2436 if (MAY_HAVE_DEBUG_BIND_STMTS) 2530 if (MAY_HAVE_DEBUG_BIND_STMTS)
2437 { 2531 {
2438 gcc_assert (!adjust_vec.exists ()); 2532 gcc_assert (!adjust_vec.exists ());
2439 adjust_vec.create (32); 2533 adjust_vec.create (32);
2448 so that we can also get the upper bound on the number of iterations. */ 2542 so that we can also get the upper bound on the number of iterations. */
2449 tree niters_prolog; 2543 tree niters_prolog;
2450 int bound_prolog = 0; 2544 int bound_prolog = 0;
2451 if (prolog_peeling) 2545 if (prolog_peeling)
2452 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor, 2546 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
2453 &bound_prolog); 2547 &bound_prolog);
2454 else 2548 else
2455 niters_prolog = build_int_cst (type, 0); 2549 niters_prolog = build_int_cst (type, 0);
2456 2550
2551 loop_vec_info epilogue_vinfo = NULL;
2552 if (vect_epilogues)
2553 {
2554 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
2555 loop_vinfo->epilogue_vinfos.ordered_remove (0);
2556 }
2557
2558 tree niters_vector_mult_vf = NULL_TREE;
2559 /* Saving NITERs before the loop, as this may be changed by prologue. */
2560 tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
2561 edge update_e = NULL, skip_e = NULL;
2562 unsigned int lowest_vf = constant_lower_bound (vf);
2563 /* If we know the number of scalar iterations for the main loop we should
2564 check whether after the main loop there are enough iterations left over
2565 for the epilogue. */
2566 if (vect_epilogues
2567 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2568 && prolog_peeling >= 0
2569 && known_eq (vf, lowest_vf))
2570 {
2571 unsigned HOST_WIDE_INT eiters
2572 = (LOOP_VINFO_INT_NITERS (loop_vinfo)
2573 - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
2574
2575 eiters -= prolog_peeling;
2576 eiters
2577 = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
2578
2579 unsigned int ratio;
2580 unsigned int epilogue_gaps
2581 = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
2582 while (!(constant_multiple_p
2583 (GET_MODE_SIZE (loop_vinfo->vector_mode),
2584 GET_MODE_SIZE (epilogue_vinfo->vector_mode), &ratio)
2585 && eiters >= lowest_vf / ratio + epilogue_gaps))
2586 {
2587 delete epilogue_vinfo;
2588 epilogue_vinfo = NULL;
2589 if (loop_vinfo->epilogue_vinfos.length () == 0)
2590 {
2591 vect_epilogues = false;
2592 break;
2593 }
2594 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
2595 loop_vinfo->epilogue_vinfos.ordered_remove (0);
2596 epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
2597 }
2598 }
2457 /* Prolog loop may be skipped. */ 2599 /* Prolog loop may be skipped. */
2458 bool skip_prolog = (prolog_peeling != 0); 2600 bool skip_prolog = (prolog_peeling != 0);
2459 /* Skip to epilog if scalar loop may be preferred. It's only needed 2601 /* Skip this loop to epilog when there are not enough iterations to enter this
2460 when we peel for epilog loop and when it hasn't been checked with 2602 vectorized loop. If true we should perform runtime checks on the NITERS
2461 loop versioning. */ 2603 to check whether we should skip the current vectorized loop. If we know
2604 the number of scalar iterations we may choose to add a runtime check if
2605 this number "maybe" smaller than the number of iterations required
2606 when we know the number of scalar iterations may potentially
2607 be smaller than the number of iterations required to enter this loop, for
2608 this we use the upper bounds on the prolog and epilog peeling. When we
2609 don't know the number of iterations and don't require versioning it is
2610 because we have asserted that there are enough scalar iterations to enter
2611 the main loop, so this skip is not necessary. When we are versioning then
2612 we only add such a skip if we have chosen to vectorize the epilogue. */
2462 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 2613 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2463 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo), 2614 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
2464 bound_prolog + bound_epilog) 2615 bound_prolog + bound_epilog)
2465 : !LOOP_REQUIRES_VERSIONING (loop_vinfo)); 2616 : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2617 || vect_epilogues));
2466 /* Epilog loop must be executed if the number of iterations for epilog 2618 /* Epilog loop must be executed if the number of iterations for epilog
2467 loop is known at compile time, otherwise we need to add a check at 2619 loop is known at compile time, otherwise we need to add a check at
2468 the end of vector loop and skip to the end of epilog loop. */ 2620 the end of vector loop and skip to the end of epilog loop. */
2469 bool skip_epilog = (prolog_peeling < 0 2621 bool skip_epilog = (prolog_peeling < 0
2470 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 2622 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2489 scale_loop_profile (loop, prob_vector, 0); 2641 scale_loop_profile (loop, prob_vector, 0);
2490 } 2642 }
2491 } 2643 }
2492 2644
2493 dump_user_location_t loop_loc = find_loop_location (loop); 2645 dump_user_location_t loop_loc = find_loop_location (loop);
2494 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); 2646 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2647 if (vect_epilogues)
2648 /* Make sure to set the epilogue's epilogue scalar loop, such that we can
2649 use the original scalar loop as remaining epilogue if necessary. */
2650 LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
2651 = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2652
2495 if (prolog_peeling) 2653 if (prolog_peeling)
2496 { 2654 {
2497 e = loop_preheader_edge (loop); 2655 e = loop_preheader_edge (loop);
2498 if (!slpeel_can_duplicate_loop_p (loop, e)) 2656 if (!slpeel_can_duplicate_loop_p (loop, e))
2499 { 2657 {
2507 { 2665 {
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, 2666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2509 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n"); 2667 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2510 gcc_unreachable (); 2668 gcc_unreachable ();
2511 } 2669 }
2670 prolog->force_vectorize = false;
2512 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true); 2671 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
2513 first_loop = prolog; 2672 first_loop = prolog;
2514 reset_original_copy_tables (); 2673 reset_original_copy_tables ();
2515 2674
2516 /* Update the number of iterations for prolog loop. */ 2675 /* Update the number of iterations for prolog loop. */
2535 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e); 2694 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
2536 2695
2537 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog); 2696 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
2538 scale_loop_profile (prolog, prob_prolog, bound_prolog); 2697 scale_loop_profile (prolog, prob_prolog, bound_prolog);
2539 } 2698 }
2699
2540 /* Update init address of DRs. */ 2700 /* Update init address of DRs. */
2541 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR); 2701 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
2542 /* Update niters for vector loop. */ 2702 /* Update niters for vector loop. */
2543 LOOP_VINFO_NITERS (loop_vinfo) 2703 LOOP_VINFO_NITERS (loop_vinfo)
2544 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog); 2704 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
2569 { 2729 {
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, 2730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2571 "loop can't be duplicated to exit edge.\n"); 2731 "loop can't be duplicated to exit edge.\n");
2572 gcc_unreachable (); 2732 gcc_unreachable ();
2573 } 2733 }
2574 /* Peel epilog and put it on exit edge of loop. */ 2734 /* Peel epilog and put it on exit edge of loop. If we are vectorizing
2575 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); 2735 said epilog then we should use a copy of the main loop as a starting
2736 point. This loop may have already had some preliminary transformations
2737 to allow for more optimal vectorization, for example if-conversion.
2738 If we are not vectorizing the epilog then we should use the scalar loop
2739 as the transformations mentioned above make less or no sense when not
2740 vectorizing. */
2741 epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
2742 if (vop_to_rename)
2743 {
2744 /* Vectorizing the main loop can sometimes introduce a vdef to
2745 a loop that previously didn't have one; see the comment above
2746 the definition of VOP_TO_RENAME for details. The definition
2747 D that holds on E will then be different from the definition
2748 VOP_TO_RENAME that holds during SCALAR_LOOP, so we need to
2749 rename VOP_TO_RENAME to D when copying the loop.
2750
2751 The virtual operand is in LCSSA form for the main loop,
2752 and no stmt between the main loop and E needs a vdef,
2753 so we know that D is provided by a phi rather than by a
2754 vdef on a normal gimple stmt. */
2755 basic_block vdef_bb = e->src;
2756 gphi *vphi;
2757 while (!(vphi = get_virtual_phi (vdef_bb)))
2758 vdef_bb = get_immediate_dominator (CDI_DOMINATORS, vdef_bb);
2759 gcc_assert (vop_to_rename != gimple_phi_result (vphi));
2760 set_current_def (vop_to_rename, gimple_phi_result (vphi));
2761 }
2762 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
2576 if (!epilog) 2763 if (!epilog)
2577 { 2764 {
2578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, 2765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2579 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n"); 2766 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2580 gcc_unreachable (); 2767 gcc_unreachable ();
2581 } 2768 }
2769 epilog->force_vectorize = false;
2582 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false); 2770 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
2583 2771
2584 /* Scalar version loop may be preferred. In this case, add guard 2772 /* Scalar version loop may be preferred. In this case, add guard
2585 and skip to epilog. Note this only happens when the number of 2773 and skip to epilog. Note this only happens when the number of
2586 iterations of loop is unknown at compile time, otherwise this 2774 iterations of loop is unknown at compile time, otherwise this
2598 guard_to = split_edge (loop_preheader_edge (epilog)); 2786 guard_to = split_edge (loop_preheader_edge (epilog));
2599 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, 2787 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2600 guard_to, guard_bb, 2788 guard_to, guard_bb,
2601 prob_vector.invert (), 2789 prob_vector.invert (),
2602 irred_flag); 2790 irred_flag);
2791 skip_e = guard_e;
2603 e = EDGE_PRED (guard_to, 0); 2792 e = EDGE_PRED (guard_to, 0);
2604 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); 2793 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2605 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e); 2794 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
2606 2795
2607 /* Simply propagate profile info from guard_bb to guard_to which is 2796 /* Simply propagate profile info from guard_bb to guard_to which is
2619 (prob_vector)); 2808 (prob_vector));
2620 free (bbs); 2809 free (bbs);
2621 } 2810 }
2622 2811
2623 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src; 2812 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
2624 tree niters_vector_mult_vf;
2625 /* If loop is peeled for non-zero constant times, now niters refers to 2813 /* If loop is peeled for non-zero constant times, now niters refers to
2626 orig_niters - prolog_peeling, it won't overflow even the orig_niters 2814 orig_niters - prolog_peeling, it won't overflow even the orig_niters
2627 overflows. */ 2815 overflows. */
2628 niters_no_overflow |= (prolog_peeling > 0); 2816 niters_no_overflow |= (prolog_peeling > 0);
2629 vect_gen_vector_loop_niters (loop_vinfo, niters, 2817 vect_gen_vector_loop_niters (loop_vinfo, niters,
2642 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector, 2830 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2643 &niters_vector_mult_vf); 2831 &niters_vector_mult_vf);
2644 /* Update IVs of original loop as if they were advanced by 2832 /* Update IVs of original loop as if they were advanced by
2645 niters_vector_mult_vf steps. */ 2833 niters_vector_mult_vf steps. */
2646 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); 2834 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2647 edge update_e = skip_vector ? e : loop_preheader_edge (epilog); 2835 update_e = skip_vector ? e : loop_preheader_edge (epilog);
2648 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf, 2836 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2649 update_e); 2837 update_e);
2650 2838
2651 if (skip_epilog) 2839 if (skip_epilog)
2652 { 2840 {
2683 2871
2684 delete_update_ssa (); 2872 delete_update_ssa ();
2685 adjust_vec_debug_stmts (); 2873 adjust_vec_debug_stmts ();
2686 scev_reset (); 2874 scev_reset ();
2687 } 2875 }
2876
2877 if (vect_epilogues)
2878 {
2879 epilog->aux = epilogue_vinfo;
2880 LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
2881
2882 loop_constraint_clear (epilog, LOOP_C_INFINITE);
2883
2884 /* We now must calculate the number of NITERS performed by the previous
2885 loop and EPILOGUE_NITERS to be performed by the epilogue. */
2886 tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
2887 niters_prolog, niters_vector_mult_vf);
2888
2889 /* If skip_vector we may skip the previous loop, we insert a phi-node to
2890 determine whether we are coming from the previous vectorized loop
2891 using the update_e edge or the skip_vector basic block using the
2892 skip_e edge. */
2893 if (skip_vector)
2894 {
2895 gcc_assert (update_e != NULL && skip_e != NULL);
2896 gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
2897 update_e->dest);
2898 tree new_ssa = make_ssa_name (TREE_TYPE (niters));
2899 gimple *stmt = gimple_build_assign (new_ssa, niters);
2900 gimple_stmt_iterator gsi;
2901 if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
2902 && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
2903 {
2904 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
2905 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
2906 }
2907 else
2908 {
2909 gsi = gsi_last_bb (update_e->src);
2910 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2911 }
2912
2913 niters = new_ssa;
2914 add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
2915 add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
2916 UNKNOWN_LOCATION);
2917 niters = PHI_RESULT (new_phi);
2918 }
2919
2920 /* Subtract the number of iterations performed by the vectorized loop
2921 from the number of total iterations. */
2922 tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
2923 before_loop_niters,
2924 niters);
2925
2926 LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
2927 LOOP_VINFO_NITERSM1 (epilogue_vinfo)
2928 = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
2929 epilogue_niters,
2930 build_one_cst (TREE_TYPE (epilogue_niters)));
2931
2932 /* Set ADVANCE to the number of iterations performed by the previous
2933 loop and its prologue. */
2934 *advance = niters;
2935
2936 /* Redo the peeling for niter analysis as the NITERs and alignment
2937 may have been updated to take the main loop into account. */
2938 determine_peel_for_niter (epilogue_vinfo);
2939 }
2940
2688 adjust_vec.release (); 2941 adjust_vec.release ();
2689 free_original_copy_tables (); 2942 free_original_copy_tables ();
2690 2943
2691 return epilog; 2944 return vect_epilogues ? epilog : NULL;
2692 } 2945 }
2693 2946
2694 /* Function vect_create_cond_for_niters_checks. 2947 /* Function vect_create_cond_for_niters_checks.
2695 2948
2696 Create a conditional expression that represents the run-time checks for 2949 Create a conditional expression that represents the run-time checks for
2949 cost model threshold TH. 3202 cost model threshold TH.
2950 3203
2951 The versioning precondition(s) are placed in *COND_EXPR and 3204 The versioning precondition(s) are placed in *COND_EXPR and
2952 *COND_EXPR_STMT_LIST. */ 3205 *COND_EXPR_STMT_LIST. */
2953 3206
2954 void 3207 class loop *
2955 vect_loop_versioning (loop_vec_info loop_vinfo, 3208 vect_loop_versioning (loop_vec_info loop_vinfo,
2956 unsigned int th, bool check_profitability, 3209 gimple *loop_vectorized_call)
2957 poly_uint64 versioning_threshold) 3210 {
2958 { 3211 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2959 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop; 3212 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2960 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2961 basic_block condition_bb; 3213 basic_block condition_bb;
2962 gphi_iterator gsi; 3214 gphi_iterator gsi;
2963 gimple_stmt_iterator cond_exp_gsi; 3215 gimple_stmt_iterator cond_exp_gsi;
2964 basic_block merge_bb; 3216 basic_block merge_bb;
2965 basic_block new_exit_bb; 3217 basic_block new_exit_bb;
2972 gimple_seq gimplify_stmt_list = NULL; 3224 gimple_seq gimplify_stmt_list = NULL;
2973 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo); 3225 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2974 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); 3226 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2975 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); 3227 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2976 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); 3228 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2977 3229 poly_uint64 versioning_threshold
2978 if (check_profitability) 3230 = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3231 tree version_simd_if_cond
3232 = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
3233 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3234
3235 if (vect_apply_runtime_profitability_check_p (loop_vinfo)
3236 && !ordered_p (th, versioning_threshold))
2979 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, 3237 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2980 build_int_cst (TREE_TYPE (scalar_loop_iters), 3238 build_int_cst (TREE_TYPE (scalar_loop_iters),
2981 th - 1)); 3239 th - 1));
2982 if (maybe_ne (versioning_threshold, 0U)) 3240 if (maybe_ne (versioning_threshold, 0U))
2983 { 3241 {
2993 3251
2994 if (version_niter) 3252 if (version_niter)
2995 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr); 3253 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2996 3254
2997 if (cond_expr) 3255 if (cond_expr)
2998 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list, 3256 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3257 &cond_expr_stmt_list,
2999 is_gimple_condexpr, NULL_TREE); 3258 is_gimple_condexpr, NULL_TREE);
3000 3259
3001 if (version_align) 3260 if (version_align)
3002 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, 3261 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3003 &cond_expr_stmt_list); 3262 &cond_expr_stmt_list);
3005 if (version_alias) 3264 if (version_alias)
3006 { 3265 {
3007 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr); 3266 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
3008 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr); 3267 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
3009 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); 3268 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3269 }
3270
3271 if (version_simd_if_cond)
3272 {
3273 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
3274 if (flag_checking)
3275 if (basic_block bb
3276 = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
3277 gcc_assert (bb != loop->header
3278 && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
3279 && (scalar_loop == NULL
3280 || (bb != scalar_loop->header
3281 && dominated_by_p (CDI_DOMINATORS,
3282 scalar_loop->header, bb))));
3283 tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
3284 tree c = fold_build2 (NE_EXPR, boolean_type_node,
3285 version_simd_if_cond, zero);
3286 if (cond_expr)
3287 cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3288 c, cond_expr);
3289 else
3290 cond_expr = c;
3291 if (dump_enabled_p ())
3292 dump_printf_loc (MSG_NOTE, vect_location,
3293 "created versioning for simd if condition check.\n");
3010 } 3294 }
3011 3295
3012 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), 3296 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3013 &gimplify_stmt_list, 3297 &gimplify_stmt_list,
3014 is_gimple_condexpr, NULL_TREE); 3298 is_gimple_condexpr, NULL_TREE);
3015 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); 3299 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
3016 3300
3017 initialize_original_copy_tables (); 3301 /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
3018 if (scalar_loop) 3302 invariant in. */
3019 { 3303 class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
3020 edge scalar_e; 3304 for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
3021 basic_block preheader, scalar_preheader; 3305 !gsi_end_p (gsi); gsi_next (&gsi))
3022 3306 {
3023 /* We don't want to scale SCALAR_LOOP's frequencies, we need to 3307 gimple *stmt = gsi_stmt (gsi);
3024 scale LOOP's frequencies instead. */ 3308 update_stmt (stmt);
3025 nloop = loop_version (scalar_loop, cond_expr, &condition_bb, 3309 ssa_op_iter iter;
3310 use_operand_p use_p;
3311 basic_block def_bb;
3312 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3313 if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
3314 && flow_bb_inside_loop_p (outermost, def_bb))
3315 outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
3316 }
3317
3318 /* Search for the outermost loop we can version. Avoid versioning of
3319 non-perfect nests but allow if-conversion versioned loops inside. */
3320 class loop *loop_to_version = loop;
3321 if (flow_loop_nested_p (outermost, loop))
3322 {
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_NOTE, vect_location,
3325 "trying to apply versioning to outer loop %d\n",
3326 outermost->num);
3327 if (outermost->num == 0)
3328 outermost = superloop_at_depth (loop, 1);
3329 /* And avoid applying versioning on non-perfect nests. */
3330 while (loop_to_version != outermost
3331 && single_exit (loop_outer (loop_to_version))
3332 && (!loop_outer (loop_to_version)->inner->next
3333 || vect_loop_vectorized_call (loop_to_version))
3334 && (!loop_outer (loop_to_version)->inner->next
3335 || !loop_outer (loop_to_version)->inner->next->next))
3336 loop_to_version = loop_outer (loop_to_version);
3337 }
3338
3339 /* Apply versioning. If there is already a scalar version created by
3340 if-conversion re-use that. Note we cannot re-use the copy of
3341 an if-converted outer-loop when vectorizing the inner loop only. */
3342 gcond *cond;
3343 if ((!loop_to_version->inner || loop == loop_to_version)
3344 && loop_vectorized_call)
3345 {
3346 gcc_assert (scalar_loop);
3347 condition_bb = gimple_bb (loop_vectorized_call);
3348 cond = as_a <gcond *> (last_stmt (condition_bb));
3349 gimple_cond_set_condition_from_tree (cond, cond_expr);
3350 update_stmt (cond);
3351
3352 if (cond_expr_stmt_list)
3353 {
3354 cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
3355 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3356 GSI_SAME_STMT);
3357 }
3358
3359 /* if-conversion uses profile_probability::always () for both paths,
3360 reset the paths probabilities appropriately. */
3361 edge te, fe;
3362 extract_true_false_edges_from_block (condition_bb, &te, &fe);
3363 te->probability = prob;
3364 fe->probability = prob.invert ();
3365 /* We can scale loops counts immediately but have to postpone
3366 scaling the scalar loop because we re-use it during peeling. */
3367 scale_loop_frequencies (loop_to_version, te->probability);
3368 LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = fe->probability;
3369
3370 nloop = scalar_loop;
3371 if (dump_enabled_p ())
3372 dump_printf_loc (MSG_NOTE, vect_location,
3373 "reusing %sloop version created by if conversion\n",
3374 loop_to_version != loop ? "outer " : "");
3375 }
3376 else
3377 {
3378 if (loop_to_version != loop
3379 && dump_enabled_p ())
3380 dump_printf_loc (MSG_NOTE, vect_location,
3381 "applying loop versioning to outer loop %d\n",
3382 loop_to_version->num);
3383
3384 initialize_original_copy_tables ();
3385 nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
3026 prob, prob.invert (), prob, prob.invert (), true); 3386 prob, prob.invert (), prob, prob.invert (), true);
3027 scale_loop_frequencies (loop, prob); 3387 gcc_assert (nloop);
3028 /* CONDITION_BB was created above SCALAR_LOOP's preheader, 3388 nloop = get_loop_copy (loop);
3029 while we need to move it above LOOP's preheader. */ 3389
3030 e = loop_preheader_edge (loop); 3390 /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
3031 scalar_e = loop_preheader_edge (scalar_loop); 3391 reap those otherwise; they also refer to the original
3032 /* The vector loop preheader might not be empty, since new 3392 loops. */
3033 invariants could have been created while analyzing the loop. */ 3393 class loop *l = loop;
3034 gcc_assert (single_pred_p (e->src)); 3394 while (gimple *call = vect_loop_vectorized_call (l))
3035 gcc_assert (empty_block_p (scalar_e->src) 3395 {
3036 && single_pred_p (scalar_e->src)); 3396 call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
3037 gcc_assert (single_pred_p (condition_bb)); 3397 fold_loop_internal_call (call, boolean_false_node);
3038 preheader = e->src; 3398 l = loop_outer (l);
3039 scalar_preheader = scalar_e->src; 3399 }
3040 scalar_e = find_edge (condition_bb, scalar_preheader); 3400 free_original_copy_tables ();
3041 e = single_pred_edge (preheader); 3401
3042 redirect_edge_and_branch_force (single_pred_edge (condition_bb), 3402 if (cond_expr_stmt_list)
3043 scalar_preheader); 3403 {
3044 redirect_edge_and_branch_force (scalar_e, preheader); 3404 cond_exp_gsi = gsi_last_bb (condition_bb);
3045 redirect_edge_and_branch_force (e, condition_bb); 3405 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3046 set_immediate_dominator (CDI_DOMINATORS, condition_bb, 3406 GSI_SAME_STMT);
3047 single_pred (condition_bb)); 3407 }
3048 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader, 3408
3049 single_pred (scalar_preheader)); 3409 /* Loop versioning violates an assumption we try to maintain during
3050 set_immediate_dominator (CDI_DOMINATORS, preheader, 3410 vectorization - that the loop exit block has a single predecessor.
3051 condition_bb); 3411 After versioning, the exit block of both loop versions is the same
3052 } 3412 basic block (i.e. it has two predecessors). Just in order to simplify
3053 else 3413 following transformations in the vectorizer, we fix this situation
3054 nloop = loop_version (loop, cond_expr, &condition_bb, 3414 here by adding a new (empty) block on the exit-edge of the loop,
3055 prob, prob.invert (), prob, prob.invert (), true); 3415 with the proper loop-exit phis to maintain loop-closed-form.
3416 If loop versioning wasn't done from loop, but scalar_loop instead,
3417 merge_bb will have already just a single successor. */
3418
3419 merge_bb = single_exit (loop_to_version)->dest;
3420 if (EDGE_COUNT (merge_bb->preds) >= 2)
3421 {
3422 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
3423 new_exit_bb = split_edge (single_exit (loop_to_version));
3424 new_exit_e = single_exit (loop_to_version);
3425 e = EDGE_SUCC (new_exit_bb, 0);
3426
3427 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
3428 gsi_next (&gsi))
3429 {
3430 tree new_res;
3431 orig_phi = gsi.phi ();
3432 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
3433 new_phi = create_phi_node (new_res, new_exit_bb);
3434 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3435 add_phi_arg (new_phi, arg, new_exit_e,
3436 gimple_phi_arg_location_from_edge (orig_phi, e));
3437 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
3438 }
3439 }
3440
3441 update_ssa (TODO_update_ssa);
3442 }
3056 3443
3057 if (version_niter) 3444 if (version_niter)
3058 { 3445 {
3059 /* The versioned loop could be infinite, we need to clear existing 3446 /* The versioned loop could be infinite, we need to clear existing
3060 niter information which is copied from the original loop. */ 3447 niter information which is copied from the original loop. */
3077 vect_location, 3464 vect_location,
3078 "loop versioned for vectorization to enhance " 3465 "loop versioned for vectorization to enhance "
3079 "alignment\n"); 3466 "alignment\n");
3080 3467
3081 } 3468 }
3082 free_original_copy_tables (); 3469
3083 3470 return nloop;
3084 /* Loop versioning violates an assumption we try to maintain during 3471 }
3085 vectorization - that the loop exit block has a single predecessor.
3086 After versioning, the exit block of both loop versions is the same
3087 basic block (i.e. it has two predecessors). Just in order to simplify
3088 following transformations in the vectorizer, we fix this situation
3089 here by adding a new (empty) block on the exit-edge of the loop,
3090 with the proper loop-exit phis to maintain loop-closed-form.
3091 If loop versioning wasn't done from loop, but scalar_loop instead,
3092 merge_bb will have already just a single successor. */
3093
3094 merge_bb = single_exit (loop)->dest;
3095 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
3096 {
3097 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
3098 new_exit_bb = split_edge (single_exit (loop));
3099 new_exit_e = single_exit (loop);
3100 e = EDGE_SUCC (new_exit_bb, 0);
3101
3102 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
3103 {
3104 tree new_res;
3105 orig_phi = gsi.phi ();
3106 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
3107 new_phi = create_phi_node (new_res, new_exit_bb);
3108 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3109 add_phi_arg (new_phi, arg, new_exit_e,
3110 gimple_phi_arg_location_from_edge (orig_phi, e));
3111 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
3112 }
3113 }
3114
3115 /* End loop-exit-fixes after versioning. */
3116
3117 if (cond_expr_stmt_list)
3118 {
3119 cond_exp_gsi = gsi_last_bb (condition_bb);
3120 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3121 GSI_SAME_STMT);
3122 }
3123 update_ssa (TODO_update_ssa);
3124 }