Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-ssa-loop-ivcanon.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
130:e108057fa461 | 132:d34655255c78 |
---|---|
1 /* Induction variable canonicalization and loop peeling. | 1 /* Induction variable canonicalization and loop peeling. |
2 Copyright (C) 2004-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2004-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it | 6 GCC is free software; you can redistribute it and/or modify it |
7 under the terms of the GNU General Public License as published by the | 7 under the terms of the GNU General Public License as published by the |
61 #include "tree-scalar-evolution.h" | 61 #include "tree-scalar-evolution.h" |
62 #include "params.h" | 62 #include "params.h" |
63 #include "tree-inline.h" | 63 #include "tree-inline.h" |
64 #include "tree-cfgcleanup.h" | 64 #include "tree-cfgcleanup.h" |
65 #include "builtins.h" | 65 #include "builtins.h" |
66 #include "tree-ssa-sccvn.h" | |
66 | 67 |
67 /* Specifies types of loops that may be unrolled. */ | 68 /* Specifies types of loops that may be unrolled. */ |
68 | 69 |
69 enum unroll_level | 70 enum unroll_level |
70 { | 71 { |
74 of code size. */ | 75 of code size. */ |
75 UL_ALL /* All suitable loops. */ | 76 UL_ALL /* All suitable loops. */ |
76 }; | 77 }; |
77 | 78 |
78 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT | 79 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT |
79 is the exit edge whose condition is replaced. */ | 80 is the exit edge whose condition is replaced. The ssa versions of the new |
80 | 81 IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER |
81 static void | 82 if they are not NULL. */ |
82 create_canonical_iv (struct loop *loop, edge exit, tree niter) | 83 |
84 void | |
85 create_canonical_iv (struct loop *loop, edge exit, tree niter, | |
86 tree *var_before = NULL, tree *var_after = NULL) | |
83 { | 87 { |
84 edge in; | 88 edge in; |
85 tree type, var; | 89 tree type, var; |
86 gcond *cond; | 90 gcond *cond; |
87 gimple_stmt_iterator incr_at; | 91 gimple_stmt_iterator incr_at; |
110 build_int_cst (type, 1)); | 114 build_int_cst (type, 1)); |
111 incr_at = gsi_last_bb (in->src); | 115 incr_at = gsi_last_bb (in->src); |
112 create_iv (niter, | 116 create_iv (niter, |
113 build_int_cst (type, -1), | 117 build_int_cst (type, -1), |
114 NULL_TREE, loop, | 118 NULL_TREE, loop, |
115 &incr_at, false, NULL, &var); | 119 &incr_at, false, var_before, &var); |
120 if (var_after) | |
121 *var_after = var; | |
116 | 122 |
117 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; | 123 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; |
118 gimple_cond_set_code (cond, cmp); | 124 gimple_cond_set_code (cond, cmp); |
119 gimple_cond_set_lhs (cond, var); | 125 gimple_cond_set_lhs (cond, var); |
120 gimple_cond_set_rhs (cond, build_int_cst (type, 0)); | 126 gimple_cond_set_rhs (cond, build_int_cst (type, 0)); |
360 expand inline. */ | 366 expand inline. */ |
361 else if (gimple_code (stmt) != GIMPLE_DEBUG) | 367 else if (gimple_code (stmt) != GIMPLE_DEBUG) |
362 size->non_call_stmts_on_hot_path++; | 368 size->non_call_stmts_on_hot_path++; |
363 if (((gimple_code (stmt) == GIMPLE_COND | 369 if (((gimple_code (stmt) == GIMPLE_COND |
364 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | 370 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) |
365 || constant_after_peeling (gimple_cond_rhs (stmt), stmt, | 371 || !constant_after_peeling (gimple_cond_rhs (stmt), stmt, |
366 loop))) | 372 loop))) |
367 || (gimple_code (stmt) == GIMPLE_SWITCH | 373 || (gimple_code (stmt) == GIMPLE_SWITCH |
368 && !constant_after_peeling (gimple_switch_index ( | 374 && !constant_after_peeling (gimple_switch_index ( |
369 as_a <gswitch *> (stmt)), | 375 as_a <gswitch *> (stmt)), |
370 stmt, loop))) | 376 stmt, loop))) |
371 && (!exit || bb != exit->src)) | 377 && (!exit || bb != exit->src)) |
645 latch_edge->flags |= flags; | 651 latch_edge->flags |= flags; |
646 latch_edge->goto_locus = locus; | 652 latch_edge->goto_locus = locus; |
647 | 653 |
648 add_bb_to_loop (latch_edge->dest, current_loops->tree_root); | 654 add_bb_to_loop (latch_edge->dest, current_loops->tree_root); |
649 latch_edge->dest->count = profile_count::zero (); | 655 latch_edge->dest->count = profile_count::zero (); |
650 latch_edge->dest->frequency = 0; | |
651 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); | 656 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); |
652 | 657 |
653 gsi = gsi_start_bb (latch_edge->dest); | 658 gsi = gsi_start_bb (latch_edge->dest); |
654 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | 659 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
655 } | 660 } |
656 loops_to_unloop.release (); | 661 loops_to_unloop.release (); |
657 loops_to_unloop_nunroll.release (); | 662 loops_to_unloop_nunroll.release (); |
658 | 663 |
659 /* Remove edges in peeled copies. */ | 664 /* Remove edges in peeled copies. Given remove_path removes dominated |
665 regions we need to cope with removal of already removed paths. */ | |
660 unsigned i; | 666 unsigned i; |
661 edge e; | 667 edge e; |
668 auto_vec<int, 20> src_bbs; | |
669 src_bbs.reserve_exact (edges_to_remove.length ()); | |
662 FOR_EACH_VEC_ELT (edges_to_remove, i, e) | 670 FOR_EACH_VEC_ELT (edges_to_remove, i, e) |
663 { | 671 src_bbs.quick_push (e->src->index); |
664 bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated); | 672 FOR_EACH_VEC_ELT (edges_to_remove, i, e) |
665 gcc_assert (ok); | 673 if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])) |
666 } | 674 { |
675 bool ok = remove_path (e, irred_invalidated, | |
676 loop_closed_ssa_invalidated); | |
677 gcc_assert (ok); | |
678 } | |
667 edges_to_remove.release (); | 679 edges_to_remove.release (); |
668 } | 680 } |
669 | 681 |
670 /* Tries to unroll LOOP completely, i.e. NITER times. | 682 /* Tries to unroll LOOP completely, i.e. NITER times. |
671 UL determines which loops we are allowed to unroll. | 683 UL determines which loops we are allowed to unroll. |
675 LOCUS corresponding to the loop is used when emitting | 687 LOCUS corresponding to the loop is used when emitting |
676 a summary of the unroll to the dump file. */ | 688 a summary of the unroll to the dump file. */ |
677 | 689 |
678 static bool | 690 static bool |
679 try_unroll_loop_completely (struct loop *loop, | 691 try_unroll_loop_completely (struct loop *loop, |
680 edge exit, tree niter, | 692 edge exit, tree niter, bool may_be_zero, |
681 enum unroll_level ul, | 693 enum unroll_level ul, |
682 HOST_WIDE_INT maxiter, | 694 HOST_WIDE_INT maxiter, |
683 location_t locus) | 695 dump_user_location_t locus, bool allow_peel) |
684 { | 696 { |
685 unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; | 697 unsigned HOST_WIDE_INT n_unroll = 0; |
686 struct loop_size size; | |
687 bool n_unroll_found = false; | 698 bool n_unroll_found = false; |
688 edge edge_to_cancel = NULL; | 699 edge edge_to_cancel = NULL; |
689 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; | |
690 | 700 |
691 /* See if we proved number of iterations to be low constant. | 701 /* See if we proved number of iterations to be low constant. |
692 | 702 |
693 EXIT is an edge that will be removed in all but last iteration of | 703 EXIT is an edge that will be removed in all but last iteration of |
694 the loop. | 704 the loop. |
712 the EXIT edge. */ | 722 the EXIT edge. */ |
713 else | 723 else |
714 exit = NULL; | 724 exit = NULL; |
715 | 725 |
716 /* See if we can improve our estimate by using recorded loop bounds. */ | 726 /* See if we can improve our estimate by using recorded loop bounds. */ |
717 if (maxiter >= 0 | 727 if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH) |
728 && maxiter >= 0 | |
718 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) | 729 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) |
719 { | 730 { |
720 n_unroll = maxiter; | 731 n_unroll = maxiter; |
721 n_unroll_found = true; | 732 n_unroll_found = true; |
722 /* Loop terminates before the IV variable test, so we can not | 733 /* Loop terminates before the IV variable test, so we can not |
725 } | 736 } |
726 | 737 |
727 if (!n_unroll_found) | 738 if (!n_unroll_found) |
728 return false; | 739 return false; |
729 | 740 |
730 if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | 741 if (!loop->unroll |
742 && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | |
731 { | 743 { |
732 if (dump_file && (dump_flags & TDF_DETAILS)) | 744 if (dump_file && (dump_flags & TDF_DETAILS)) |
733 fprintf (dump_file, "Not unrolling loop %d " | 745 fprintf (dump_file, "Not unrolling loop %d " |
734 "(--param max-completely-peel-times limit reached).\n", | 746 "(--param max-completely-peel-times limit reached).\n", |
735 loop->num); | 747 loop->num); |
739 if (!edge_to_cancel) | 751 if (!edge_to_cancel) |
740 edge_to_cancel = loop_edge_to_cancel (loop); | 752 edge_to_cancel = loop_edge_to_cancel (loop); |
741 | 753 |
742 if (n_unroll) | 754 if (n_unroll) |
743 { | 755 { |
744 bool large; | |
745 if (ul == UL_SINGLE_ITER) | 756 if (ul == UL_SINGLE_ITER) |
746 return false; | 757 return false; |
747 | 758 |
748 /* EXIT can be removed only if we are sure it passes first N_UNROLL | 759 if (loop->unroll) |
749 iterations. */ | |
750 bool remove_exit = (exit && niter | |
751 && TREE_CODE (niter) == INTEGER_CST | |
752 && wi::leu_p (n_unroll, wi::to_widest (niter))); | |
753 | |
754 large = tree_estimate_loop_size | |
755 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, | |
756 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); | |
757 ninsns = size.overall; | |
758 if (large) | |
759 { | 760 { |
761 /* If the unrolling factor is too large, bail out. */ | |
762 if (n_unroll > (unsigned)loop->unroll) | |
763 { | |
764 if (dump_file && (dump_flags & TDF_DETAILS)) | |
765 fprintf (dump_file, | |
766 "Not unrolling loop %d: " | |
767 "user didn't want it unrolled completely.\n", | |
768 loop->num); | |
769 return false; | |
770 } | |
771 } | |
772 else | |
773 { | |
774 struct loop_size size; | |
775 /* EXIT can be removed only if we are sure it passes first N_UNROLL | |
776 iterations. */ | |
777 bool remove_exit = (exit && niter | |
778 && TREE_CODE (niter) == INTEGER_CST | |
779 && wi::leu_p (n_unroll, wi::to_widest (niter))); | |
780 bool large | |
781 = tree_estimate_loop_size | |
782 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, | |
783 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); | |
784 if (large) | |
785 { | |
786 if (dump_file && (dump_flags & TDF_DETAILS)) | |
787 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | |
788 loop->num); | |
789 return false; | |
790 } | |
791 | |
792 unsigned HOST_WIDE_INT ninsns = size.overall; | |
793 unsigned HOST_WIDE_INT unr_insns | |
794 = estimated_unrolled_size (&size, n_unroll); | |
760 if (dump_file && (dump_flags & TDF_DETAILS)) | 795 if (dump_file && (dump_flags & TDF_DETAILS)) |
761 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | 796 { |
762 loop->num); | 797 fprintf (dump_file, " Loop size: %d\n", (int) ninsns); |
763 return false; | 798 fprintf (dump_file, " Estimated size after unrolling: %d\n", |
799 (int) unr_insns); | |
800 } | |
801 | |
802 /* If the code is going to shrink, we don't need to be extra | |
803 cautious on guessing if the unrolling is going to be | |
804 profitable. */ | |
805 if (unr_insns | |
806 /* If there is IV variable that will become constant, we | |
807 save one instruction in the loop prologue we do not | |
808 account otherwise. */ | |
809 <= ninsns + (size.constant_iv != false)) | |
810 ; | |
811 /* We unroll only inner loops, because we do not consider it | |
812 profitable otheriwse. We still can cancel loopback edge | |
813 of not rolling loop; this is always a good idea. */ | |
814 else if (ul == UL_NO_GROWTH) | |
815 { | |
816 if (dump_file && (dump_flags & TDF_DETAILS)) | |
817 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
818 loop->num); | |
819 return false; | |
820 } | |
821 /* Outer loops tend to be less interesting candidates for | |
822 complete unrolling unless we can do a lot of propagation | |
823 into the inner loop body. For now we disable outer loop | |
824 unrolling when the code would grow. */ | |
825 else if (loop->inner) | |
826 { | |
827 if (dump_file && (dump_flags & TDF_DETAILS)) | |
828 fprintf (dump_file, "Not unrolling loop %d: " | |
829 "it is not innermost and code would grow.\n", | |
830 loop->num); | |
831 return false; | |
832 } | |
833 /* If there is call on a hot path through the loop, then | |
834 there is most probably not much to optimize. */ | |
835 else if (size.num_non_pure_calls_on_hot_path) | |
836 { | |
837 if (dump_file && (dump_flags & TDF_DETAILS)) | |
838 fprintf (dump_file, "Not unrolling loop %d: " | |
839 "contains call and code would grow.\n", | |
840 loop->num); | |
841 return false; | |
842 } | |
843 /* If there is pure/const call in the function, then we can | |
844 still optimize the unrolled loop body if it contains some | |
845 other interesting code than the calls and code storing or | |
846 cumulating the return value. */ | |
847 else if (size.num_pure_calls_on_hot_path | |
848 /* One IV increment, one test, one ivtmp store and | |
849 one useful stmt. That is about minimal loop | |
850 doing pure call. */ | |
851 && (size.non_call_stmts_on_hot_path | |
852 <= 3 + size.num_pure_calls_on_hot_path)) | |
853 { | |
854 if (dump_file && (dump_flags & TDF_DETAILS)) | |
855 fprintf (dump_file, "Not unrolling loop %d: " | |
856 "contains just pure calls and code would grow.\n", | |
857 loop->num); | |
858 return false; | |
859 } | |
860 /* Complete unrolling is major win when control flow is | |
861 removed and one big basic block is created. If the loop | |
862 contains control flow the optimization may still be a win | |
863 because of eliminating the loop overhead but it also may | |
864 blow the branch predictor tables. Limit number of | |
865 branches on the hot path through the peeled sequence. */ | |
866 else if (size.num_branches_on_hot_path * (int)n_unroll | |
867 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) | |
868 { | |
869 if (dump_file && (dump_flags & TDF_DETAILS)) | |
870 fprintf (dump_file, "Not unrolling loop %d: " | |
871 "number of branches on hot path in the unrolled " | |
872 "sequence reaches --param max-peel-branches limit.\n", | |
873 loop->num); | |
874 return false; | |
875 } | |
876 else if (unr_insns | |
877 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) | |
878 { | |
879 if (dump_file && (dump_flags & TDF_DETAILS)) | |
880 fprintf (dump_file, "Not unrolling loop %d: " | |
881 "number of insns in the unrolled sequence reaches " | |
882 "--param max-completely-peeled-insns limit.\n", | |
883 loop->num); | |
884 return false; | |
885 } | |
764 } | 886 } |
765 | |
766 unr_insns = estimated_unrolled_size (&size, n_unroll); | |
767 if (dump_file && (dump_flags & TDF_DETAILS)) | |
768 { | |
769 fprintf (dump_file, " Loop size: %d\n", (int) ninsns); | |
770 fprintf (dump_file, " Estimated size after unrolling: %d\n", | |
771 (int) unr_insns); | |
772 } | |
773 | |
774 /* If the code is going to shrink, we don't need to be extra cautious | |
775 on guessing if the unrolling is going to be profitable. */ | |
776 if (unr_insns | |
777 /* If there is IV variable that will become constant, we save | |
778 one instruction in the loop prologue we do not account | |
779 otherwise. */ | |
780 <= ninsns + (size.constant_iv != false)) | |
781 ; | |
782 /* We unroll only inner loops, because we do not consider it profitable | |
783 otheriwse. We still can cancel loopback edge of not rolling loop; | |
784 this is always a good idea. */ | |
785 else if (ul == UL_NO_GROWTH) | |
786 { | |
787 if (dump_file && (dump_flags & TDF_DETAILS)) | |
788 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
789 loop->num); | |
790 return false; | |
791 } | |
792 /* Outer loops tend to be less interesting candidates for complete | |
793 unrolling unless we can do a lot of propagation into the inner loop | |
794 body. For now we disable outer loop unrolling when the code would | |
795 grow. */ | |
796 else if (loop->inner) | |
797 { | |
798 if (dump_file && (dump_flags & TDF_DETAILS)) | |
799 fprintf (dump_file, "Not unrolling loop %d: " | |
800 "it is not innermost and code would grow.\n", | |
801 loop->num); | |
802 return false; | |
803 } | |
804 /* If there is call on a hot path through the loop, then | |
805 there is most probably not much to optimize. */ | |
806 else if (size.num_non_pure_calls_on_hot_path) | |
807 { | |
808 if (dump_file && (dump_flags & TDF_DETAILS)) | |
809 fprintf (dump_file, "Not unrolling loop %d: " | |
810 "contains call and code would grow.\n", | |
811 loop->num); | |
812 return false; | |
813 } | |
814 /* If there is pure/const call in the function, then we | |
815 can still optimize the unrolled loop body if it contains | |
816 some other interesting code than the calls and code | |
817 storing or cumulating the return value. */ | |
818 else if (size.num_pure_calls_on_hot_path | |
819 /* One IV increment, one test, one ivtmp store | |
820 and one useful stmt. That is about minimal loop | |
821 doing pure call. */ | |
822 && (size.non_call_stmts_on_hot_path | |
823 <= 3 + size.num_pure_calls_on_hot_path)) | |
824 { | |
825 if (dump_file && (dump_flags & TDF_DETAILS)) | |
826 fprintf (dump_file, "Not unrolling loop %d: " | |
827 "contains just pure calls and code would grow.\n", | |
828 loop->num); | |
829 return false; | |
830 } | |
831 /* Complete unrolling is a major win when control flow is removed and | |
832 one big basic block is created. If the loop contains control flow | |
833 the optimization may still be a win because of eliminating the loop | |
834 overhead but it also may blow the branch predictor tables. | |
835 Limit number of branches on the hot path through the peeled | |
836 sequence. */ | |
837 else if (size.num_branches_on_hot_path * (int)n_unroll | |
838 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) | |
839 { | |
840 if (dump_file && (dump_flags & TDF_DETAILS)) | |
841 fprintf (dump_file, "Not unrolling loop %d: " | |
842 " number of branches on hot path in the unrolled sequence" | |
843 " reach --param max-peel-branches limit.\n", | |
844 loop->num); | |
845 return false; | |
846 } | |
847 else if (unr_insns | |
848 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) | |
849 { | |
850 if (dump_file && (dump_flags & TDF_DETAILS)) | |
851 fprintf (dump_file, "Not unrolling loop %d: " | |
852 "(--param max-completely-peeled-insns limit reached).\n", | |
853 loop->num); | |
854 return false; | |
855 } | |
856 if (!n_unroll) | |
857 dump_printf_loc (report_flags, locus, | |
858 "loop turned into non-loop; it never loops.\n"); | |
859 | 887 |
860 initialize_original_copy_tables (); | 888 initialize_original_copy_tables (); |
861 auto_sbitmap wont_exit (n_unroll + 1); | 889 auto_sbitmap wont_exit (n_unroll + 1); |
862 if (exit && niter | 890 if (exit && niter |
863 && TREE_CODE (niter) == INTEGER_CST | 891 && TREE_CODE (niter) == INTEGER_CST |
871 else | 899 else |
872 { | 900 { |
873 exit = NULL; | 901 exit = NULL; |
874 bitmap_clear (wont_exit); | 902 bitmap_clear (wont_exit); |
875 } | 903 } |
904 if (may_be_zero) | |
905 bitmap_clear_bit (wont_exit, 1); | |
876 | 906 |
877 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | 907 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
878 n_unroll, wont_exit, | 908 n_unroll, wont_exit, |
879 exit, &edges_to_remove, | 909 exit, &edges_to_remove, |
880 DLTHE_FLAG_UPDATE_FREQ | 910 DLTHE_FLAG_UPDATE_FREQ |
897 if (edge_to_cancel->flags & EDGE_TRUE_VALUE) | 927 if (edge_to_cancel->flags & EDGE_TRUE_VALUE) |
898 gimple_cond_make_false (cond); | 928 gimple_cond_make_false (cond); |
899 else | 929 else |
900 gimple_cond_make_true (cond); | 930 gimple_cond_make_true (cond); |
901 update_stmt (cond); | 931 update_stmt (cond); |
902 /* Do not remove the path. Doing so may remove outer loop | 932 /* Do not remove the path, as doing so may remove outer loop and |
903 and confuse bookkeeping code in tree_unroll_loops_completelly. */ | 933 confuse bookkeeping code in tree_unroll_loops_completely. */ |
904 } | 934 } |
905 | 935 |
906 /* Store the loop for later unlooping and exit removal. */ | 936 /* Store the loop for later unlooping and exit removal. */ |
907 loops_to_unloop.safe_push (loop); | 937 loops_to_unloop.safe_push (loop); |
908 loops_to_unloop_nunroll.safe_push (n_unroll); | 938 loops_to_unloop_nunroll.safe_push (n_unroll); |
914 "loop turned into non-loop; it never loops\n"); | 944 "loop turned into non-loop; it never loops\n"); |
915 else | 945 else |
916 { | 946 { |
917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | 947 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, |
918 "loop with %d iterations completely unrolled", | 948 "loop with %d iterations completely unrolled", |
919 (int) (n_unroll + 1)); | 949 (int) n_unroll); |
920 if (loop->header->count.initialized_p ()) | 950 if (loop->header->count.initialized_p ()) |
921 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, | 951 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, |
922 " (header execution count %d)", | 952 " (header execution count %d)", |
923 (int)loop->header->count.to_gcov_type ()); | 953 (int)loop->header->count.to_gcov_type ()); |
924 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); | 954 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); |
955 enter the loop. | 985 enter the loop. |
956 Parameters are the same as for try_unroll_loops_completely */ | 986 Parameters are the same as for try_unroll_loops_completely */ |
957 | 987 |
958 static bool | 988 static bool |
959 try_peel_loop (struct loop *loop, | 989 try_peel_loop (struct loop *loop, |
960 edge exit, tree niter, | 990 edge exit, tree niter, bool may_be_zero, |
961 HOST_WIDE_INT maxiter) | 991 HOST_WIDE_INT maxiter) |
962 { | 992 { |
963 HOST_WIDE_INT npeel; | 993 HOST_WIDE_INT npeel; |
964 struct loop_size size; | 994 struct loop_size size; |
965 int peeled_size; | 995 int peeled_size; |
966 | 996 |
967 if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 | 997 if (!flag_peel_loops |
998 || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 | |
968 || !peeled_loops) | 999 || !peeled_loops) |
969 return false; | 1000 return false; |
970 | 1001 |
971 if (bitmap_bit_p (peeled_loops, loop->num)) | 1002 if (bitmap_bit_p (peeled_loops, loop->num)) |
972 { | 1003 { |
973 if (dump_file) | 1004 if (dump_file) |
974 fprintf (dump_file, "Not peeling: loop is already peeled\n"); | 1005 fprintf (dump_file, "Not peeling: loop is already peeled\n"); |
1006 return false; | |
1007 } | |
1008 | |
1009 /* We don't peel loops that will be unrolled as this can duplicate a | |
1010 loop more times than the user requested. */ | |
1011 if (loop->unroll) | |
1012 { | |
1013 if (dump_file) | |
1014 fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); | |
975 return false; | 1015 return false; |
976 } | 1016 } |
977 | 1017 |
978 /* Peel only innermost loops. | 1018 /* Peel only innermost loops. |
979 While the code is perfectly capable of peeling non-innermost loops, | 1019 While the code is perfectly capable of peeling non-innermost loops, |
980 the heuristics would probably need some improvements. */ | 1020 the heuristics would probably need some improvements. */ |
981 if (loop->inner) | 1021 if (loop->inner) |
982 { | 1022 { |
983 if (dump_file) | 1023 if (dump_file) |
984 fprintf (dump_file, "Not peeling: outer loop\n"); | 1024 fprintf (dump_file, "Not peeling: outer loop\n"); |
985 return false; | 1025 return false; |
986 } | 1026 } |
987 | 1027 |
988 if (!optimize_loop_for_speed_p (loop)) | 1028 if (!optimize_loop_for_speed_p (loop)) |
989 { | 1029 { |
990 if (dump_file) | 1030 if (dump_file) |
991 fprintf (dump_file, "Not peeling: cold loop\n"); | 1031 fprintf (dump_file, "Not peeling: cold loop\n"); |
992 return false; | 1032 return false; |
993 } | 1033 } |
994 | 1034 |
995 /* Check if there is an estimate on the number of iterations. */ | 1035 /* Check if there is an estimate on the number of iterations. */ |
996 npeel = estimated_loop_iterations_int (loop); | 1036 npeel = estimated_loop_iterations_int (loop); |
1004 return false; | 1044 return false; |
1005 } | 1045 } |
1006 if (maxiter >= 0 && maxiter <= npeel) | 1046 if (maxiter >= 0 && maxiter <= npeel) |
1007 { | 1047 { |
1008 if (dump_file) | 1048 if (dump_file) |
1009 fprintf (dump_file, "Not peeling: upper bound is known so can " | 1049 fprintf (dump_file, "Not peeling: upper bound is known so can " |
1010 "unroll completely\n"); | 1050 "unroll completely\n"); |
1011 return false; | 1051 return false; |
1012 } | 1052 } |
1013 | 1053 |
1014 /* We want to peel estimated number of iterations + 1 (so we never | 1054 /* We want to peel estimated number of iterations + 1 (so we never |
1015 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES | 1055 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES |
1016 and be sure to avoid overflows. */ | 1056 and be sure to avoid overflows. */ |
1017 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) | 1057 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) |
1018 { | 1058 { |
1019 if (dump_file) | 1059 if (dump_file) |
1020 fprintf (dump_file, "Not peeling: rolls too much " | 1060 fprintf (dump_file, "Not peeling: rolls too much " |
1021 "(%i + 1 > --param max-peel-times)\n", (int) npeel); | 1061 "(%i + 1 > --param max-peel-times)\n", (int) npeel); |
1022 return false; | 1062 return false; |
1023 } | 1063 } |
1024 npeel++; | 1064 npeel++; |
1025 | 1065 |
1028 PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); | 1068 PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); |
1029 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) | 1069 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) |
1030 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) | 1070 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) |
1031 { | 1071 { |
1032 if (dump_file) | 1072 if (dump_file) |
1033 fprintf (dump_file, "Not peeling: peeled sequence size is too large " | 1073 fprintf (dump_file, "Not peeling: peeled sequence size is too large " |
1034 "(%i insns > --param max-peel-insns)", peeled_size); | 1074 "(%i insns > --param max-peel-insns)", peeled_size); |
1035 return false; | 1075 return false; |
1036 } | 1076 } |
1037 | 1077 |
1038 /* Duplicate possibly eliminating the exits. */ | 1078 /* Duplicate possibly eliminating the exits. */ |
1048 else | 1088 else |
1049 { | 1089 { |
1050 exit = NULL; | 1090 exit = NULL; |
1051 bitmap_clear (wont_exit); | 1091 bitmap_clear (wont_exit); |
1052 } | 1092 } |
1093 if (may_be_zero) | |
1094 bitmap_clear_bit (wont_exit, 1); | |
1053 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | 1095 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1054 npeel, wont_exit, | 1096 npeel, wont_exit, |
1055 exit, &edges_to_remove, | 1097 exit, &edges_to_remove, |
1056 DLTHE_FLAG_UPDATE_FREQ)) | 1098 DLTHE_FLAG_UPDATE_FREQ)) |
1057 { | 1099 { |
1088 loop->nb_iterations_estimate = 0; | 1130 loop->nb_iterations_estimate = 0; |
1089 loop->nb_iterations_likely_upper_bound = 0; | 1131 loop->nb_iterations_likely_upper_bound = 0; |
1090 } | 1132 } |
1091 } | 1133 } |
1092 profile_count entry_count = profile_count::zero (); | 1134 profile_count entry_count = profile_count::zero (); |
1093 int entry_freq = 0; | |
1094 | 1135 |
1095 edge e; | 1136 edge e; |
1096 edge_iterator ei; | 1137 edge_iterator ei; |
1097 FOR_EACH_EDGE (e, ei, loop->header->preds) | 1138 FOR_EACH_EDGE (e, ei, loop->header->preds) |
1098 if (e->src != loop->latch) | 1139 if (e->src != loop->latch) |
1099 { | 1140 { |
1100 if (e->src->count.initialized_p ()) | 1141 if (e->src->count.initialized_p ()) |
1101 entry_count = e->src->count + e->src->count; | 1142 entry_count = e->src->count + e->src->count; |
1102 entry_freq += e->src->frequency; | |
1103 gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); | 1143 gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); |
1104 } | 1144 } |
1105 profile_probability p = profile_probability::very_unlikely (); | 1145 profile_probability p = profile_probability::very_unlikely (); |
1106 if (loop->header->count > 0) | 1146 p = entry_count.probability_in (loop->header->count); |
1107 p = entry_count.probability_in (loop->header->count); | |
1108 else if (loop->header->frequency) | |
1109 p = profile_probability::probability_in_gcov_type | |
1110 (entry_freq, loop->header->frequency); | |
1111 scale_loop_profile (loop, p, 0); | 1147 scale_loop_profile (loop, p, 0); |
1112 bitmap_set_bit (peeled_loops, loop->num); | 1148 bitmap_set_bit (peeled_loops, loop->num); |
1113 return true; | 1149 return true; |
1114 } | 1150 } |
1115 /* Adds a canonical induction variable to LOOP if suitable. | 1151 /* Adds a canonical induction variable to LOOP if suitable. |
1119 Returns true if cfg is changed. */ | 1155 Returns true if cfg is changed. */ |
1120 | 1156 |
1121 static bool | 1157 static bool |
1122 canonicalize_loop_induction_variables (struct loop *loop, | 1158 canonicalize_loop_induction_variables (struct loop *loop, |
1123 bool create_iv, enum unroll_level ul, | 1159 bool create_iv, enum unroll_level ul, |
1124 bool try_eval) | 1160 bool try_eval, bool allow_peel) |
1125 { | 1161 { |
1126 edge exit = NULL; | 1162 edge exit = NULL; |
1127 tree niter; | 1163 tree niter; |
1128 HOST_WIDE_INT maxiter; | 1164 HOST_WIDE_INT maxiter; |
1129 bool modified = false; | 1165 bool modified = false; |
1130 location_t locus = UNKNOWN_LOCATION; | 1166 dump_user_location_t locus; |
1131 | 1167 struct tree_niter_desc niter_desc; |
1132 niter = number_of_latch_executions (loop); | 1168 bool may_be_zero = false; |
1169 | |
1170 /* For unrolling allow conditional constant or zero iterations, thus | |
1171 perform loop-header copying on-the-fly. */ | |
1133 exit = single_exit (loop); | 1172 exit = single_exit (loop); |
1173 niter = chrec_dont_know; | |
1174 if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) | |
1175 { | |
1176 niter = niter_desc.niter; | |
1177 may_be_zero | |
1178 = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero); | |
1179 } | |
1134 if (TREE_CODE (niter) == INTEGER_CST) | 1180 if (TREE_CODE (niter) == INTEGER_CST) |
1135 locus = gimple_location (last_stmt (exit->src)); | 1181 locus = last_stmt (exit->src); |
1136 else | 1182 else |
1137 { | 1183 { |
1184 /* For non-constant niter fold may_be_zero into niter again. */ | |
1185 if (may_be_zero) | |
1186 { | |
1187 if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1188 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), | |
1189 niter_desc.may_be_zero, | |
1190 build_int_cst (TREE_TYPE (niter), 0), niter); | |
1191 else | |
1192 niter = chrec_dont_know; | |
1193 may_be_zero = false; | |
1194 } | |
1195 | |
1138 /* If the loop has more than one exit, try checking all of them | 1196 /* If the loop has more than one exit, try checking all of them |
1139 for # of iterations determinable through scev. */ | 1197 for # of iterations determinable through scev. */ |
1140 if (!exit) | 1198 if (!exit) |
1141 niter = find_loop_niter (loop, &exit); | 1199 niter = find_loop_niter (loop, &exit); |
1142 | 1200 |
1145 && (chrec_contains_undetermined (niter) | 1203 && (chrec_contains_undetermined (niter) |
1146 || TREE_CODE (niter) != INTEGER_CST)) | 1204 || TREE_CODE (niter) != INTEGER_CST)) |
1147 niter = find_loop_niter_by_eval (loop, &exit); | 1205 niter = find_loop_niter_by_eval (loop, &exit); |
1148 | 1206 |
1149 if (exit) | 1207 if (exit) |
1150 locus = gimple_location (last_stmt (exit->src)); | 1208 locus = last_stmt (exit->src); |
1151 | 1209 |
1152 if (TREE_CODE (niter) != INTEGER_CST) | 1210 if (TREE_CODE (niter) != INTEGER_CST) |
1153 exit = NULL; | 1211 exit = NULL; |
1154 } | 1212 } |
1155 | 1213 |
1187 /* Remove exits that are known to be never taken based on loop bound. | 1245 /* Remove exits that are known to be never taken based on loop bound. |
1188 Needs to be called after compilation of max_loop_iterations_int that | 1246 Needs to be called after compilation of max_loop_iterations_int that |
1189 populates the loop bounds. */ | 1247 populates the loop bounds. */ |
1190 modified |= remove_redundant_iv_tests (loop); | 1248 modified |= remove_redundant_iv_tests (loop); |
1191 | 1249 |
1192 if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus)) | 1250 if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, |
1251 maxiter, locus, allow_peel)) | |
1193 return true; | 1252 return true; |
1194 | 1253 |
1195 if (create_iv | 1254 if (create_iv |
1196 && niter && !chrec_contains_undetermined (niter) | 1255 && niter && !chrec_contains_undetermined (niter) |
1197 && exit && just_once_each_iteration_p (loop, exit->src)) | 1256 && exit && just_once_each_iteration_p (loop, exit->src)) |
1198 create_canonical_iv (loop, exit, niter); | 1257 { |
1258 tree iv_niter = niter; | |
1259 if (may_be_zero) | |
1260 { | |
1261 if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1262 iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter), | |
1263 niter_desc.may_be_zero, | |
1264 build_int_cst (TREE_TYPE (iv_niter), 0), | |
1265 iv_niter); | |
1266 else | |
1267 iv_niter = NULL_TREE; | |
1268 } | |
1269 if (iv_niter) | |
1270 create_canonical_iv (loop, exit, iv_niter); | |
1271 } | |
1199 | 1272 |
1200 if (ul == UL_ALL) | 1273 if (ul == UL_ALL) |
1201 modified |= try_peel_loop (loop, exit, niter, maxiter); | 1274 modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter); |
1202 | 1275 |
1203 return modified; | 1276 return modified; |
1204 } | 1277 } |
1205 | 1278 |
1206 /* The main entry point of the pass. Adds canonical induction variables | 1279 /* The main entry point of the pass. Adds canonical induction variables |
1218 | 1291 |
1219 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | 1292 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
1220 { | 1293 { |
1221 changed |= canonicalize_loop_induction_variables (loop, | 1294 changed |= canonicalize_loop_induction_variables (loop, |
1222 true, UL_SINGLE_ITER, | 1295 true, UL_SINGLE_ITER, |
1223 true); | 1296 true, false); |
1224 } | 1297 } |
1225 gcc_assert (!need_ssa_update_p (cfun)); | 1298 gcc_assert (!need_ssa_update_p (cfun)); |
1226 | 1299 |
1227 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | 1300 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
1228 if (irred_invalidated | 1301 if (irred_invalidated |
1244 if (changed) | 1317 if (changed) |
1245 return TODO_cleanup_cfg; | 1318 return TODO_cleanup_cfg; |
1246 return 0; | 1319 return 0; |
1247 } | 1320 } |
1248 | 1321 |
1249 /* Propagate constant SSA_NAMEs defined in basic block BB. */ | |
1250 | |
1251 static void | |
1252 propagate_constants_for_unrolling (basic_block bb) | |
1253 { | |
1254 /* Look for degenerate PHI nodes with constant argument. */ | |
1255 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); ) | |
1256 { | |
1257 gphi *phi = gsi.phi (); | |
1258 tree result = gimple_phi_result (phi); | |
1259 tree arg = gimple_phi_arg_def (phi, 0); | |
1260 | |
1261 if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result) | |
1262 && gimple_phi_num_args (phi) == 1 | |
1263 && CONSTANT_CLASS_P (arg)) | |
1264 { | |
1265 replace_uses_by (result, arg); | |
1266 gsi_remove (&gsi, true); | |
1267 release_ssa_name (result); | |
1268 } | |
1269 else | |
1270 gsi_next (&gsi); | |
1271 } | |
1272 | |
1273 /* Look for assignments to SSA names with constant RHS. */ | |
1274 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) | |
1275 { | |
1276 gimple *stmt = gsi_stmt (gsi); | |
1277 tree lhs; | |
1278 | |
1279 if (is_gimple_assign (stmt) | |
1280 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant | |
1281 && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME) | |
1282 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) | |
1283 { | |
1284 replace_uses_by (lhs, gimple_assign_rhs1 (stmt)); | |
1285 gsi_remove (&gsi, true); | |
1286 release_ssa_name (lhs); | |
1287 } | |
1288 else | |
1289 gsi_next (&gsi); | |
1290 } | |
1291 } | |
1292 | |
1293 /* Process loops from innermost to outer, stopping at the innermost | 1322 /* Process loops from innermost to outer, stopping at the innermost |
1294 loop we unrolled. */ | 1323 loop we unrolled. */ |
1295 | 1324 |
1296 static bool | 1325 static bool |
1297 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, | 1326 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, |
1299 { | 1328 { |
1300 struct loop *loop_father; | 1329 struct loop *loop_father; |
1301 bool changed = false; | 1330 bool changed = false; |
1302 struct loop *inner; | 1331 struct loop *inner; |
1303 enum unroll_level ul; | 1332 enum unroll_level ul; |
1304 | 1333 unsigned num = number_of_loops (cfun); |
1305 /* Process inner loops first. */ | 1334 |
1335 /* Process inner loops first. Don't walk loops added by the recursive | |
1336 calls because SSA form is not up-to-date. They can be handled in the | |
1337 next iteration. */ | |
1338 bitmap child_father_bbs = NULL; | |
1306 for (inner = loop->inner; inner != NULL; inner = inner->next) | 1339 for (inner = loop->inner; inner != NULL; inner = inner->next) |
1307 changed |= tree_unroll_loops_completely_1 (may_increase_size, | 1340 if ((unsigned) inner->num < num) |
1308 unroll_outer, father_bbs, | 1341 { |
1309 inner); | 1342 if (!child_father_bbs) |
1310 | 1343 child_father_bbs = BITMAP_ALLOC (NULL); |
1344 if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer, | |
1345 child_father_bbs, inner)) | |
1346 { | |
1347 bitmap_ior_into (father_bbs, child_father_bbs); | |
1348 bitmap_clear (child_father_bbs); | |
1349 changed = true; | |
1350 } | |
1351 } | |
1352 if (child_father_bbs) | |
1353 BITMAP_FREE (child_father_bbs); | |
1354 | |
1311 /* If we changed an inner loop we cannot process outer loops in this | 1355 /* If we changed an inner loop we cannot process outer loops in this |
1312 iteration because SSA form is not up-to-date. Continue with | 1356 iteration because SSA form is not up-to-date. Continue with |
1313 siblings of outer loops instead. */ | 1357 siblings of outer loops instead. */ |
1314 if (changed) | 1358 if (changed) |
1315 return true; | 1359 { |
1360 /* If we are recorded as father clear all other fathers that | |
1361 are necessarily covered already to avoid redundant work. */ | |
1362 if (bitmap_bit_p (father_bbs, loop->header->index)) | |
1363 { | |
1364 bitmap_clear (father_bbs); | |
1365 bitmap_set_bit (father_bbs, loop->header->index); | |
1366 } | |
1367 return true; | |
1368 } | |
1316 | 1369 |
1317 /* Don't unroll #pragma omp simd loops until the vectorizer | 1370 /* Don't unroll #pragma omp simd loops until the vectorizer |
1318 attempts to vectorize those. */ | 1371 attempts to vectorize those. */ |
1319 if (loop->force_vectorize) | 1372 if (loop->force_vectorize) |
1320 return false; | 1373 return false; |
1322 /* Try to unroll this loop. */ | 1375 /* Try to unroll this loop. */ |
1323 loop_father = loop_outer (loop); | 1376 loop_father = loop_outer (loop); |
1324 if (!loop_father) | 1377 if (!loop_father) |
1325 return false; | 1378 return false; |
1326 | 1379 |
1327 if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | 1380 if (loop->unroll > 1) |
1381 ul = UL_ALL; | |
1382 else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | |
1328 /* Unroll outermost loops only if asked to do so or they do | 1383 /* Unroll outermost loops only if asked to do so or they do |
1329 not cause code growth. */ | 1384 not cause code growth. */ |
1330 && (unroll_outer || loop_outer (loop_father))) | 1385 && (unroll_outer || loop_outer (loop_father))) |
1331 ul = UL_ALL; | 1386 ul = UL_ALL; |
1332 else | 1387 else |
1333 ul = UL_NO_GROWTH; | 1388 ul = UL_NO_GROWTH; |
1334 | 1389 |
1335 if (canonicalize_loop_induction_variables | 1390 if (canonicalize_loop_induction_variables |
1336 (loop, false, ul, !flag_tree_loop_ivcanon)) | 1391 (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer)) |
1337 { | 1392 { |
1338 /* If we'll continue unrolling, we need to propagate constants | 1393 /* If we'll continue unrolling, we need to propagate constants |
1339 within the new basic blocks to fold away induction variable | 1394 within the new basic blocks to fold away induction variable |
1340 computations; otherwise, the size might blow up before the | 1395 computations; otherwise, the size might blow up before the |
1341 iteration is complete and the IR eventually cleaned up. */ | 1396 iteration is complete and the IR eventually cleaned up. */ |
1342 if (loop_outer (loop_father)) | 1397 if (loop_outer (loop_father)) |
1343 bitmap_set_bit (father_bbs, loop_father->header->index); | 1398 { |
1399 /* Once we process our father we will have processed | |
1400 the fathers of our children as well, so avoid doing | |
1401 redundant work and clear fathers we've gathered sofar. */ | |
1402 bitmap_clear (father_bbs); | |
1403 bitmap_set_bit (father_bbs, loop_father->header->index); | |
1404 } | |
1344 | 1405 |
1345 return true; | 1406 return true; |
1346 } | 1407 } |
1347 | 1408 |
1348 return false; | 1409 return false; |
1350 | 1411 |
1351 /* Unroll LOOPS completely if they iterate just few times. Unless | 1412 /* Unroll LOOPS completely if they iterate just few times. Unless |
1352 MAY_INCREASE_SIZE is true, perform the unrolling only if the | 1413 MAY_INCREASE_SIZE is true, perform the unrolling only if the |
1353 size of the code does not increase. */ | 1414 size of the code does not increase. */ |
1354 | 1415 |
1355 unsigned int | 1416 static unsigned int |
1356 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) | 1417 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) |
1357 { | 1418 { |
1358 bitmap father_bbs = BITMAP_ALLOC (NULL); | 1419 bitmap father_bbs = BITMAP_ALLOC (NULL); |
1359 bool changed; | 1420 bool changed; |
1360 int iteration = 0; | 1421 int iteration = 0; |
1406 bitmap_clear (father_bbs); | 1467 bitmap_clear (father_bbs); |
1407 /* Propagate the constants within the new basic blocks. */ | 1468 /* Propagate the constants within the new basic blocks. */ |
1408 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) | 1469 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) |
1409 { | 1470 { |
1410 loop_p father = get_loop (cfun, i); | 1471 loop_p father = get_loop (cfun, i); |
1411 basic_block *body = get_loop_body_in_dom_order (father); | 1472 bitmap exit_bbs = BITMAP_ALLOC (NULL); |
1412 for (unsigned j = 0; j < father->num_nodes; j++) | 1473 loop_exit *exit = father->exits->next; |
1413 propagate_constants_for_unrolling (body[j]); | 1474 while (exit->e) |
1414 free (body); | 1475 { |
1476 bitmap_set_bit (exit_bbs, exit->e->dest->index); | |
1477 exit = exit->next; | |
1478 } | |
1479 do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs); | |
1415 } | 1480 } |
1416 BITMAP_FREE (fathers); | 1481 BITMAP_FREE (fathers); |
1417 | 1482 |
1418 /* This will take care of removing completely unrolled loops | 1483 /* This will take care of removing completely unrolled loops |
1419 from the loop structures so we can continue unrolling now | 1484 from the loop structures so we can continue unrolling now |
1527 /* If we ever decide to run loop peeling more than once, we will need to | 1592 /* If we ever decide to run loop peeling more than once, we will need to |
1528 track loops already peeled in loop structures themselves to avoid | 1593 track loops already peeled in loop structures themselves to avoid |
1529 re-peeling the same loop multiple times. */ | 1594 re-peeling the same loop multiple times. */ |
1530 if (flag_peel_loops) | 1595 if (flag_peel_loops) |
1531 peeled_loops = BITMAP_ALLOC (NULL); | 1596 peeled_loops = BITMAP_ALLOC (NULL); |
1532 int val = tree_unroll_loops_completely (flag_unroll_loops | 1597 unsigned int val = tree_unroll_loops_completely (flag_unroll_loops |
1533 || flag_peel_loops | 1598 || flag_peel_loops |
1534 || optimize >= 3, true); | 1599 || optimize >= 3, true); |
1535 if (peeled_loops) | 1600 if (peeled_loops) |
1536 { | 1601 { |
1537 BITMAP_FREE (peeled_loops); | 1602 BITMAP_FREE (peeled_loops); |
1538 peeled_loops = NULL; | 1603 peeled_loops = NULL; |
1539 } | 1604 } |
1581 unsigned int | 1646 unsigned int |
1582 pass_complete_unrolli::execute (function *fun) | 1647 pass_complete_unrolli::execute (function *fun) |
1583 { | 1648 { |
1584 unsigned ret = 0; | 1649 unsigned ret = 0; |
1585 | 1650 |
1586 loop_optimizer_init (LOOPS_NORMAL | 1651 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
1587 | LOOPS_HAVE_RECORDED_EXITS); | |
1588 if (number_of_loops (fun) > 1) | 1652 if (number_of_loops (fun) > 1) |
1589 { | 1653 { |
1590 scev_initialize (); | 1654 scev_initialize (); |
1591 ret = tree_unroll_loops_completely (optimize >= 3, false); | 1655 ret = tree_unroll_loops_completely (optimize >= 3, false); |
1592 scev_finalize (); | 1656 scev_finalize (); |