Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-loop-ivcanon.c @ 118:fd00160c1b76
ifdef TARGET_64BIT
author | mir3636 |
---|---|
date | Tue, 27 Feb 2018 15:01:35 +0900 |
parents | 04ced10e8804 |
children | 84e7813d76e9 |
rev | line source |
---|---|
111 | 1 /* Induction variable canonicalization and loop peeling. |
2 Copyright (C) 2004-2017 Free Software Foundation, Inc. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
3 |
0 | 4 This file is part of GCC. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
5 |
0 | 6 GCC is free software; you can redistribute it and/or modify it |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
10 |
0 | 11 GCC is distributed in the hope that it will be useful, but WITHOUT |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
15 |
0 | 16 You should have received a copy of the GNU General Public License |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 /* This pass detects the loops that iterate a constant number of times, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
21 adds a canonical induction variable (step -1, tested against 0) |
0 | 22 and replaces the exit test. This enables the less powerful rtl |
23 level analysis to use this information. | |
24 | |
25 This might spoil the code in some cases (by increasing register pressure). | |
26 Note that in the case the new variable is not needed, ivopts will get rid | |
27 of it, so it might only be a problem when there are no other linear induction | |
28 variables. In that case the created optimization possibilities are likely | |
29 to pay up. | |
30 | |
111 | 31 We also perform |
32 - complete unrolling (or peeling) when the loops is rolling few enough | |
33 times | |
34 - simple peeling (i.e. copying few initial iterations prior the loop) | |
35 when number of iteration estimate is known (typically by the profile | |
36 info). */ | |
0 | 37 |
38 #include "config.h" | |
39 #include "system.h" | |
40 #include "coretypes.h" | |
111 | 41 #include "backend.h" |
0 | 42 #include "tree.h" |
111 | 43 #include "gimple.h" |
44 #include "cfghooks.h" | |
45 #include "tree-pass.h" | |
46 #include "ssa.h" | |
47 #include "cgraph.h" | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
48 #include "gimple-pretty-print.h" |
111 | 49 #include "fold-const.h" |
50 #include "profile.h" | |
51 #include "gimple-fold.h" | |
52 #include "tree-eh.h" | |
53 #include "gimple-iterator.h" | |
54 #include "tree-cfg.h" | |
55 #include "tree-ssa-loop-manip.h" | |
56 #include "tree-ssa-loop-niter.h" | |
57 #include "tree-ssa-loop.h" | |
58 #include "tree-into-ssa.h" | |
0 | 59 #include "cfgloop.h" |
60 #include "tree-chrec.h" | |
61 #include "tree-scalar-evolution.h" | |
62 #include "params.h" | |
63 #include "tree-inline.h" | |
111 | 64 #include "tree-cfgcleanup.h" |
65 #include "builtins.h" | |
0 | 66 |
67 /* Specifies types of loops that may be unrolled. */ | |
68 | |
69 enum unroll_level | |
70 { | |
71 UL_SINGLE_ITER, /* Only loops that exit immediately in the first | |
72 iteration. */ | |
73 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase | |
74 of code size. */ | |
75 UL_ALL /* All suitable loops. */ | |
76 }; | |
77 | |
78 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT | |
79 is the exit edge whose condition is replaced. */ | |
80 | |
81 static void | |
82 create_canonical_iv (struct loop *loop, edge exit, tree niter) | |
83 { | |
84 edge in; | |
85 tree type, var; | |
111 | 86 gcond *cond; |
0 | 87 gimple_stmt_iterator incr_at; |
88 enum tree_code cmp; | |
89 | |
90 if (dump_file && (dump_flags & TDF_DETAILS)) | |
91 { | |
92 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num); | |
93 print_generic_expr (dump_file, niter, TDF_SLIM); | |
94 fprintf (dump_file, " iterations.\n"); | |
95 } | |
96 | |
111 | 97 cond = as_a <gcond *> (last_stmt (exit->src)); |
0 | 98 in = EDGE_SUCC (exit->src, 0); |
99 if (in == exit) | |
100 in = EDGE_SUCC (exit->src, 1); | |
101 | |
102 /* Note that we do not need to worry about overflows, since | |
103 type of niter is always unsigned and all comparisons are | |
104 just for equality/nonequality -- i.e. everything works | |
105 with a modulo arithmetics. */ | |
106 | |
107 type = TREE_TYPE (niter); | |
108 niter = fold_build2 (PLUS_EXPR, type, | |
109 niter, | |
110 build_int_cst (type, 1)); | |
111 incr_at = gsi_last_bb (in->src); | |
112 create_iv (niter, | |
113 build_int_cst (type, -1), | |
114 NULL_TREE, loop, | |
115 &incr_at, false, NULL, &var); | |
116 | |
117 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; | |
118 gimple_cond_set_code (cond, cmp); | |
119 gimple_cond_set_lhs (cond, var); | |
120 gimple_cond_set_rhs (cond, build_int_cst (type, 0)); | |
121 update_stmt (cond); | |
122 } | |
123 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
124 /* Describe size of loop as detected by tree_estimate_loop_size. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
125 struct loop_size |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
126 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
127 /* Number of instructions in the loop. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
128 int overall; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
129 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
130 /* Number of instructions that will be likely optimized out in |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
131 peeled iterations of loop (i.e. computation based on induction |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
132 variable where induction variable starts at known constant.) */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
133 int eliminated_by_peeling; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
134 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
135 /* Same statistics for last iteration of loop: it is smaller because |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
136 instructions after exit are not executed. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
137 int last_iteration; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
138 int last_iteration_eliminated_by_peeling; |
111 | 139 |
140 /* If some IV computation will become constant. */ | |
141 bool constant_iv; | |
142 | |
143 /* Number of call stmts that are not a builtin and are pure or const | |
144 present on the hot path. */ | |
145 int num_pure_calls_on_hot_path; | |
146 /* Number of call stmts that are not a builtin and are not pure nor const | |
147 present on the hot path. */ | |
148 int num_non_pure_calls_on_hot_path; | |
149 /* Number of statements other than calls in the loop. */ | |
150 int non_call_stmts_on_hot_path; | |
151 /* Number of branches seen on the hot path. */ | |
152 int num_branches_on_hot_path; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
153 }; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
154 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
155 /* Return true if OP in STMT will be constant after peeling LOOP. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
156 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
157 static bool |
111 | 158 constant_after_peeling (tree op, gimple *stmt, struct loop *loop) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
159 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
160 if (is_gimple_min_invariant (op)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
161 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
162 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
163 /* We can still fold accesses to constant arrays when index is known. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
164 if (TREE_CODE (op) != SSA_NAME) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
165 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
166 tree base = op; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
167 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
168 /* First make fast look if we see constant array inside. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
169 while (handled_component_p (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
170 base = TREE_OPERAND (base, 0); |
111 | 171 if ((DECL_P (base) |
172 && ctor_for_folding (base) != error_mark_node) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
173 || CONSTANT_CLASS_P (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
174 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
175 /* If so, see if we understand all the indices. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
176 base = op; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
177 while (handled_component_p (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
178 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
179 if (TREE_CODE (base) == ARRAY_REF |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
180 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
181 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
182 base = TREE_OPERAND (base, 0); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
183 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
184 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
185 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
186 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
187 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
188 |
111 | 189 /* Induction variables are constants when defined in loop. */ |
190 if (loop_containing_stmt (stmt) != loop) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
191 return false; |
111 | 192 tree ev = analyze_scalar_evolution (loop, op); |
193 if (chrec_contains_undetermined (ev) | |
194 || chrec_contains_symbols (ev)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
195 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
196 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
197 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
198 |
111 | 199 /* Computes an estimated number of insns in LOOP. |
200 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last | |
201 iteration of the loop. | |
202 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration | |
203 of loop. | |
204 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. | |
205 Stop estimating after UPPER_BOUND is met. Return true in this case. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
206 |
111 | 207 static bool |
208 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, | |
209 struct loop_size *size, int upper_bound) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
210 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
211 basic_block *body = get_loop_body (loop); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
212 gimple_stmt_iterator gsi; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
213 unsigned int i; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
214 bool after_exit; |
111 | 215 vec<basic_block> path = get_loop_hot_path (loop); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
216 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
217 size->overall = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
218 size->eliminated_by_peeling = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
219 size->last_iteration = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
220 size->last_iteration_eliminated_by_peeling = 0; |
111 | 221 size->num_pure_calls_on_hot_path = 0; |
222 size->num_non_pure_calls_on_hot_path = 0; | |
223 size->non_call_stmts_on_hot_path = 0; | |
224 size->num_branches_on_hot_path = 0; | |
225 size->constant_iv = 0; | |
0 | 226 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
227 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
228 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
229 for (i = 0; i < loop->num_nodes; i++) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
230 { |
111 | 231 if (edge_to_cancel && body[i] != edge_to_cancel->src |
232 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
233 after_exit = true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
234 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
235 after_exit = false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
236 if (dump_file && (dump_flags & TDF_DETAILS)) |
111 | 237 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, |
238 after_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
239 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
240 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
241 { |
111 | 242 gimple *stmt = gsi_stmt (gsi); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
243 int num = estimate_num_insns (stmt, &eni_size_weights); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
244 bool likely_eliminated = false; |
111 | 245 bool likely_eliminated_last = false; |
246 bool likely_eliminated_peeled = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
247 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
248 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
249 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
250 fprintf (dump_file, " size: %3i ", num); |
111 | 251 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
252 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
253 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
254 /* Look for reasons why we might optimize this stmt away. */ |
0 | 255 |
111 | 256 if (!gimple_has_side_effects (stmt)) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
257 { |
111 | 258 /* Exit conditional. */ |
259 if (exit && body[i] == exit->src | |
260 && stmt == last_stmt (exit->src)) | |
261 { | |
262 if (dump_file && (dump_flags & TDF_DETAILS)) | |
263 fprintf (dump_file, " Exit condition will be eliminated " | |
264 "in peeled copies.\n"); | |
265 likely_eliminated_peeled = true; | |
266 } | |
267 if (edge_to_cancel && body[i] == edge_to_cancel->src | |
268 && stmt == last_stmt (edge_to_cancel->src)) | |
269 { | |
270 if (dump_file && (dump_flags & TDF_DETAILS)) | |
271 fprintf (dump_file, " Exit condition will be eliminated " | |
272 "in last copy.\n"); | |
273 likely_eliminated_last = true; | |
274 } | |
275 /* Sets of IV variables */ | |
276 if (gimple_code (stmt) == GIMPLE_ASSIGN | |
277 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) | |
278 { | |
279 if (dump_file && (dump_flags & TDF_DETAILS)) | |
280 fprintf (dump_file, " Induction variable computation will" | |
281 " be folded away.\n"); | |
282 likely_eliminated = true; | |
283 } | |
284 /* Assignments of IV variables. */ | |
285 else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
286 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME | |
287 && constant_after_peeling (gimple_assign_rhs1 (stmt), | |
288 stmt, loop) | |
289 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS | |
290 || constant_after_peeling (gimple_assign_rhs2 (stmt), | |
291 stmt, loop))) | |
292 { | |
293 size->constant_iv = true; | |
294 if (dump_file && (dump_flags & TDF_DETAILS)) | |
295 fprintf (dump_file, | |
296 " Constant expression will be folded away.\n"); | |
297 likely_eliminated = true; | |
298 } | |
299 /* Conditionals. */ | |
300 else if ((gimple_code (stmt) == GIMPLE_COND | |
301 && constant_after_peeling (gimple_cond_lhs (stmt), stmt, | |
302 loop) | |
303 && constant_after_peeling (gimple_cond_rhs (stmt), stmt, | |
304 loop) | |
305 /* We don't simplify all constant compares so make sure | |
306 they are not both constant already. See PR70288. */ | |
307 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt)) | |
308 || ! is_gimple_min_invariant | |
309 (gimple_cond_rhs (stmt)))) | |
310 || (gimple_code (stmt) == GIMPLE_SWITCH | |
311 && constant_after_peeling (gimple_switch_index ( | |
312 as_a <gswitch *> | |
313 (stmt)), | |
314 stmt, loop) | |
315 && ! is_gimple_min_invariant | |
316 (gimple_switch_index | |
317 (as_a <gswitch *> (stmt))))) | |
318 { | |
319 if (dump_file && (dump_flags & TDF_DETAILS)) | |
320 fprintf (dump_file, " Constant conditional.\n"); | |
321 likely_eliminated = true; | |
322 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
323 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
324 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
325 size->overall += num; |
111 | 326 if (likely_eliminated || likely_eliminated_peeled) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
327 size->eliminated_by_peeling += num; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
328 if (!after_exit) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
329 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
330 size->last_iteration += num; |
111 | 331 if (likely_eliminated || likely_eliminated_last) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
332 size->last_iteration_eliminated_by_peeling += num; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
333 } |
111 | 334 if ((size->overall * 3 / 2 - size->eliminated_by_peeling |
335 - size->last_iteration_eliminated_by_peeling) > upper_bound) | |
336 { | |
337 free (body); | |
338 path.release (); | |
339 return true; | |
340 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
341 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
342 } |
111 | 343 while (path.length ()) |
344 { | |
345 basic_block bb = path.pop (); | |
346 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
347 { | |
348 gimple *stmt = gsi_stmt (gsi); | |
349 if (gimple_code (stmt) == GIMPLE_CALL | |
350 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))) | |
351 { | |
352 int flags = gimple_call_flags (stmt); | |
353 if (flags & (ECF_PURE | ECF_CONST)) | |
354 size->num_pure_calls_on_hot_path++; | |
355 else | |
356 size->num_non_pure_calls_on_hot_path++; | |
357 size->num_branches_on_hot_path ++; | |
358 } | |
359 /* Count inexpensive calls as non-calls, because they will likely | |
360 expand inline. */ | |
361 else if (gimple_code (stmt) != GIMPLE_DEBUG) | |
362 size->non_call_stmts_on_hot_path++; | |
363 if (((gimple_code (stmt) == GIMPLE_COND | |
364 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | |
365 || constant_after_peeling (gimple_cond_rhs (stmt), stmt, | |
366 loop))) | |
367 || (gimple_code (stmt) == GIMPLE_SWITCH | |
368 && !constant_after_peeling (gimple_switch_index ( | |
369 as_a <gswitch *> (stmt)), | |
370 stmt, loop))) | |
371 && (!exit || bb != exit->src)) | |
372 size->num_branches_on_hot_path++; | |
373 } | |
374 } | |
375 path.release (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
376 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
377 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
378 size->eliminated_by_peeling, size->last_iteration, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
379 size->last_iteration_eliminated_by_peeling); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
380 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
381 free (body); |
111 | 382 return false; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
383 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
384 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
385 /* Estimate number of insns of completely unrolled loop. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
386 It is (NUNROLL + 1) * size of loop body with taking into account |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
387 the fact that in last copy everything after exit conditional |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
388 is dead and that some instructions will be eliminated after |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
389 peeling. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
390 |
111 | 391 Loop body is likely going to simplify further, this is difficult |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
392 to guess, we just decrease the result by 1/3. */ |
0 | 393 |
394 static unsigned HOST_WIDE_INT | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
395 estimated_unrolled_size (struct loop_size *size, |
0 | 396 unsigned HOST_WIDE_INT nunroll) |
397 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
398 HOST_WIDE_INT unr_insns = ((nunroll) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
399 * (HOST_WIDE_INT) (size->overall |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
400 - size->eliminated_by_peeling)); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
401 if (!nunroll) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
402 unr_insns = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
403 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
404 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
405 unr_insns = unr_insns * 2 / 3; |
0 | 406 if (unr_insns <= 0) |
407 unr_insns = 1; | |
408 | |
409 return unr_insns; | |
410 } | |
411 | |
111 | 412 /* Loop LOOP is known to not loop. See if there is an edge in the loop |
413 body that can be remove to make the loop to always exit and at | |
414 the same time it does not make any code potentially executed | |
415 during the last iteration dead. | |
416 | |
417 After complete unrolling we still may get rid of the conditional | |
418 on the exit in the last copy even if we have no idea what it does. | |
419 This is quite common case for loops of form | |
420 | |
421 int a[5]; | |
422 for (i=0;i<b;i++) | |
423 a[i]=0; | |
424 | |
425 Here we prove the loop to iterate 5 times but we do not know | |
426 it from induction variable. | |
427 | |
428 For now we handle only simple case where there is exit condition | |
429 just before the latch block and the latch block contains no statements | |
430 with side effect that may otherwise terminate the execution of loop | |
431 (such as by EH or by terminating the program or longjmp). | |
432 | |
433 In the general case we may want to cancel the paths leading to statements | |
434 loop-niter identified as having undefined effect in the last iteration. | |
435 The other cases are hopefully rare and will be cleaned up later. */ | |
436 | |
437 static edge | |
438 loop_edge_to_cancel (struct loop *loop) | |
439 { | |
440 vec<edge> exits; | |
441 unsigned i; | |
442 edge edge_to_cancel; | |
443 gimple_stmt_iterator gsi; | |
444 | |
445 /* We want only one predecestor of the loop. */ | |
446 if (EDGE_COUNT (loop->latch->preds) > 1) | |
447 return NULL; | |
448 | |
449 exits = get_loop_exit_edges (loop); | |
450 | |
451 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel) | |
452 { | |
453 /* Find the other edge than the loop exit | |
454 leaving the conditoinal. */ | |
455 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2) | |
456 continue; | |
457 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel) | |
458 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1); | |
459 else | |
460 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0); | |
461 | |
462 /* We only can handle conditionals. */ | |
463 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
464 continue; | |
465 | |
466 /* We should never have conditionals in the loop latch. */ | |
467 gcc_assert (edge_to_cancel->dest != loop->header); | |
468 | |
469 /* Check that it leads to loop latch. */ | |
470 if (edge_to_cancel->dest != loop->latch) | |
471 continue; | |
472 | |
473 exits.release (); | |
474 | |
475 /* Verify that the code in loop latch does nothing that may end program | |
476 execution without really reaching the exit. This may include | |
477 non-pure/const function calls, EH statements, volatile ASMs etc. */ | |
478 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) | |
479 if (gimple_has_side_effects (gsi_stmt (gsi))) | |
480 return NULL; | |
481 return edge_to_cancel; | |
482 } | |
483 exits.release (); | |
484 return NULL; | |
485 } | |
486 | |
487 /* Remove all tests for exits that are known to be taken after LOOP was | |
488 peeled NPEELED times. Put gcc_unreachable before every statement | |
489 known to not be executed. */ | |
490 | |
491 static bool | |
492 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled) | |
493 { | |
494 struct nb_iter_bound *elt; | |
495 bool changed = false; | |
496 | |
497 for (elt = loop->bounds; elt; elt = elt->next) | |
498 { | |
499 /* If statement is known to be undefined after peeling, turn it | |
500 into unreachable (or trap when debugging experience is supposed | |
501 to be good). */ | |
502 if (!elt->is_exit | |
503 && wi::ltu_p (elt->bound, npeeled)) | |
504 { | |
505 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); | |
506 gcall *stmt = gimple_build_call | |
507 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
508 gimple_set_location (stmt, gimple_location (elt->stmt)); | |
509 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); | |
510 split_block (gimple_bb (stmt), stmt); | |
511 changed = true; | |
512 if (dump_file && (dump_flags & TDF_DETAILS)) | |
513 { | |
514 fprintf (dump_file, "Forced statement unreachable: "); | |
515 print_gimple_stmt (dump_file, elt->stmt, 0); | |
516 } | |
517 } | |
518 /* If we know the exit will be taken after peeling, update. */ | |
519 else if (elt->is_exit | |
520 && wi::leu_p (elt->bound, npeeled)) | |
521 { | |
522 basic_block bb = gimple_bb (elt->stmt); | |
523 edge exit_edge = EDGE_SUCC (bb, 0); | |
524 | |
525 if (dump_file && (dump_flags & TDF_DETAILS)) | |
526 { | |
527 fprintf (dump_file, "Forced exit to be taken: "); | |
528 print_gimple_stmt (dump_file, elt->stmt, 0); | |
529 } | |
530 if (!loop_exit_edge_p (loop, exit_edge)) | |
531 exit_edge = EDGE_SUCC (bb, 1); | |
532 exit_edge->probability = profile_probability::always (); | |
533 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); | |
534 gcond *cond_stmt = as_a <gcond *> (elt->stmt); | |
535 if (exit_edge->flags & EDGE_TRUE_VALUE) | |
536 gimple_cond_make_true (cond_stmt); | |
537 else | |
538 gimple_cond_make_false (cond_stmt); | |
539 update_stmt (cond_stmt); | |
540 changed = true; | |
541 } | |
542 } | |
543 return changed; | |
544 } | |
545 | |
546 /* Remove all exits that are known to be never taken because of the loop bound | |
547 discovered. */ | |
548 | |
549 static bool | |
550 remove_redundant_iv_tests (struct loop *loop) | |
551 { | |
552 struct nb_iter_bound *elt; | |
553 bool changed = false; | |
554 | |
555 if (!loop->any_upper_bound) | |
556 return false; | |
557 for (elt = loop->bounds; elt; elt = elt->next) | |
558 { | |
559 /* Exit is pointless if it won't be taken before loop reaches | |
560 upper bound. */ | |
561 if (elt->is_exit && loop->any_upper_bound | |
562 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) | |
563 { | |
564 basic_block bb = gimple_bb (elt->stmt); | |
565 edge exit_edge = EDGE_SUCC (bb, 0); | |
566 struct tree_niter_desc niter; | |
567 | |
568 if (!loop_exit_edge_p (loop, exit_edge)) | |
569 exit_edge = EDGE_SUCC (bb, 1); | |
570 | |
571 /* Only when we know the actual number of iterations, not | |
572 just a bound, we can remove the exit. */ | |
573 if (!number_of_iterations_exit (loop, exit_edge, | |
574 &niter, false, false) | |
575 || !integer_onep (niter.assumptions) | |
576 || !integer_zerop (niter.may_be_zero) | |
577 || !niter.niter | |
578 || TREE_CODE (niter.niter) != INTEGER_CST | |
579 || !wi::ltu_p (loop->nb_iterations_upper_bound, | |
580 wi::to_widest (niter.niter))) | |
581 continue; | |
582 | |
583 if (dump_file && (dump_flags & TDF_DETAILS)) | |
584 { | |
585 fprintf (dump_file, "Removed pointless exit: "); | |
586 print_gimple_stmt (dump_file, elt->stmt, 0); | |
587 } | |
588 gcond *cond_stmt = as_a <gcond *> (elt->stmt); | |
589 if (exit_edge->flags & EDGE_TRUE_VALUE) | |
590 gimple_cond_make_false (cond_stmt); | |
591 else | |
592 gimple_cond_make_true (cond_stmt); | |
593 update_stmt (cond_stmt); | |
594 changed = true; | |
595 } | |
596 } | |
597 return changed; | |
598 } | |
599 | |
600 /* Stores loops that will be unlooped and edges that will be removed | |
601 after we process whole loop tree. */ | |
602 static vec<loop_p> loops_to_unloop; | |
603 static vec<int> loops_to_unloop_nunroll; | |
604 static vec<edge> edges_to_remove; | |
605 /* Stores loops that has been peeled. */ | |
606 static bitmap peeled_loops; | |
607 | |
608 /* Cancel all fully unrolled loops by putting __builtin_unreachable | |
609 on the latch edge. | |
610 We do it after all unrolling since unlooping moves basic blocks | |
611 across loop boundaries trashing loop closed SSA form as well | |
612 as SCEV info needed to be intact during unrolling. | |
613 | |
614 IRRED_INVALIDATED is used to bookkeep if information about | |
615 irreducible regions may become invalid as a result | |
616 of the transformation. | |
617 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case | |
618 when we need to go into loop closed SSA form. */ | |
619 | |
620 static void | |
621 unloop_loops (bitmap loop_closed_ssa_invalidated, | |
622 bool *irred_invalidated) | |
623 { | |
624 while (loops_to_unloop.length ()) | |
625 { | |
626 struct loop *loop = loops_to_unloop.pop (); | |
627 int n_unroll = loops_to_unloop_nunroll.pop (); | |
628 basic_block latch = loop->latch; | |
629 edge latch_edge = loop_latch_edge (loop); | |
630 int flags = latch_edge->flags; | |
631 location_t locus = latch_edge->goto_locus; | |
632 gcall *stmt; | |
633 gimple_stmt_iterator gsi; | |
634 | |
635 remove_exits_and_undefined_stmts (loop, n_unroll); | |
636 | |
637 /* Unloop destroys the latch edge. */ | |
638 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); | |
639 | |
640 /* Create new basic block for the latch edge destination and wire | |
641 it in. */ | |
642 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
643 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); | |
644 latch_edge->probability = profile_probability::never (); | |
645 latch_edge->flags |= flags; | |
646 latch_edge->goto_locus = locus; | |
647 | |
648 add_bb_to_loop (latch_edge->dest, current_loops->tree_root); | |
649 latch_edge->dest->count = profile_count::zero (); | |
650 latch_edge->dest->frequency = 0; | |
651 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); | |
652 | |
653 gsi = gsi_start_bb (latch_edge->dest); | |
654 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
655 } | |
656 loops_to_unloop.release (); | |
657 loops_to_unloop_nunroll.release (); | |
658 | |
659 /* Remove edges in peeled copies. */ | |
660 unsigned i; | |
661 edge e; | |
662 FOR_EACH_VEC_ELT (edges_to_remove, i, e) | |
663 { | |
664 bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated); | |
665 gcc_assert (ok); | |
666 } | |
667 edges_to_remove.release (); | |
668 } | |
669 | |
0 | 670 /* Tries to unroll LOOP completely, i.e. NITER times. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
671 UL determines which loops we are allowed to unroll. |
111 | 672 EXIT is the exit of the loop that should be eliminated. |
673 MAXITER specfy bound on number of iterations, -1 if it is | |
674 not known or too large for HOST_WIDE_INT. The location | |
675 LOCUS corresponding to the loop is used when emitting | |
676 a summary of the unroll to the dump file. */ | |
0 | 677 |
678 static bool | |
679 try_unroll_loop_completely (struct loop *loop, | |
680 edge exit, tree niter, | |
111 | 681 enum unroll_level ul, |
682 HOST_WIDE_INT maxiter, | |
683 location_t locus) | |
0 | 684 { |
111 | 685 unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
686 struct loop_size size; |
111 | 687 bool n_unroll_found = false; |
688 edge edge_to_cancel = NULL; | |
689 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; | |
0 | 690 |
111 | 691 /* See if we proved number of iterations to be low constant. |
692 | |
693 EXIT is an edge that will be removed in all but last iteration of | |
694 the loop. | |
695 | |
696 EDGE_TO_CACNEL is an edge that will be removed from the last iteration | |
697 of the unrolled sequence and is expected to make the final loop not | |
698 rolling. | |
699 | |
700 If the number of execution of loop is determined by standard induction | |
701 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving | |
702 from the iv test. */ | |
703 if (tree_fits_uhwi_p (niter)) | |
704 { | |
705 n_unroll = tree_to_uhwi (niter); | |
706 n_unroll_found = true; | |
707 edge_to_cancel = EDGE_SUCC (exit->src, 0); | |
708 if (edge_to_cancel == exit) | |
709 edge_to_cancel = EDGE_SUCC (exit->src, 1); | |
710 } | |
711 /* We do not know the number of iterations and thus we can not eliminate | |
712 the EXIT edge. */ | |
713 else | |
714 exit = NULL; | |
715 | |
716 /* See if we can improve our estimate by using recorded loop bounds. */ | |
717 if (maxiter >= 0 | |
718 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) | |
719 { | |
720 n_unroll = maxiter; | |
721 n_unroll_found = true; | |
722 /* Loop terminates before the IV variable test, so we can not | |
723 remove it in the last iteration. */ | |
724 edge_to_cancel = NULL; | |
725 } | |
726 | |
727 if (!n_unroll_found) | |
0 | 728 return false; |
729 | |
111 | 730 if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) |
731 { | |
732 if (dump_file && (dump_flags & TDF_DETAILS)) | |
733 fprintf (dump_file, "Not unrolling loop %d " | |
734 "(--param max-completely-peel-times limit reached).\n", | |
735 loop->num); | |
736 return false; | |
737 } | |
0 | 738 |
111 | 739 if (!edge_to_cancel) |
740 edge_to_cancel = loop_edge_to_cancel (loop); | |
0 | 741 |
742 if (n_unroll) | |
743 { | |
111 | 744 bool large; |
0 | 745 if (ul == UL_SINGLE_ITER) |
746 return false; | |
747 | |
111 | 748 /* EXIT can be removed only if we are sure it passes first N_UNROLL |
749 iterations. */ | |
750 bool remove_exit = (exit && niter | |
751 && TREE_CODE (niter) == INTEGER_CST | |
752 && wi::leu_p (n_unroll, wi::to_widest (niter))); | |
753 | |
754 large = tree_estimate_loop_size | |
755 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, | |
756 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
757 ninsns = size.overall; |
111 | 758 if (large) |
759 { | |
760 if (dump_file && (dump_flags & TDF_DETAILS)) | |
761 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | |
762 loop->num); | |
763 return false; | |
764 } | |
0 | 765 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
766 unr_insns = estimated_unrolled_size (&size, n_unroll); |
0 | 767 if (dump_file && (dump_flags & TDF_DETAILS)) |
768 { | |
769 fprintf (dump_file, " Loop size: %d\n", (int) ninsns); | |
770 fprintf (dump_file, " Estimated size after unrolling: %d\n", | |
771 (int) unr_insns); | |
772 } | |
773 | |
111 | 774 /* If the code is going to shrink, we don't need to be extra cautious |
775 on guessing if the unrolling is going to be profitable. */ | |
776 if (unr_insns | |
777 /* If there is IV variable that will become constant, we save | |
778 one instruction in the loop prologue we do not account | |
779 otherwise. */ | |
780 <= ninsns + (size.constant_iv != false)) | |
781 ; | |
782 /* We unroll only inner loops, because we do not consider it profitable | |
783 otheriwse. We still can cancel loopback edge of not rolling loop; | |
784 this is always a good idea. */ | |
785 else if (ul == UL_NO_GROWTH) | |
786 { | |
787 if (dump_file && (dump_flags & TDF_DETAILS)) | |
788 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
789 loop->num); | |
790 return false; | |
791 } | |
792 /* Outer loops tend to be less interesting candidates for complete | |
793 unrolling unless we can do a lot of propagation into the inner loop | |
794 body. For now we disable outer loop unrolling when the code would | |
795 grow. */ | |
796 else if (loop->inner) | |
797 { | |
798 if (dump_file && (dump_flags & TDF_DETAILS)) | |
799 fprintf (dump_file, "Not unrolling loop %d: " | |
800 "it is not innermost and code would grow.\n", | |
801 loop->num); | |
802 return false; | |
803 } | |
804 /* If there is call on a hot path through the loop, then | |
805 there is most probably not much to optimize. */ | |
806 else if (size.num_non_pure_calls_on_hot_path) | |
0 | 807 { |
808 if (dump_file && (dump_flags & TDF_DETAILS)) | |
111 | 809 fprintf (dump_file, "Not unrolling loop %d: " |
810 "contains call and code would grow.\n", | |
811 loop->num); | |
812 return false; | |
813 } | |
814 /* If there is pure/const call in the function, then we | |
815 can still optimize the unrolled loop body if it contains | |
816 some other interesting code than the calls and code | |
817 storing or cumulating the return value. */ | |
818 else if (size.num_pure_calls_on_hot_path | |
819 /* One IV increment, one test, one ivtmp store | |
820 and one useful stmt. That is about minimal loop | |
821 doing pure call. */ | |
822 && (size.non_call_stmts_on_hot_path | |
823 <= 3 + size.num_pure_calls_on_hot_path)) | |
824 { | |
825 if (dump_file && (dump_flags & TDF_DETAILS)) | |
826 fprintf (dump_file, "Not unrolling loop %d: " | |
827 "contains just pure calls and code would grow.\n", | |
828 loop->num); | |
829 return false; | |
830 } | |
831 /* Complete unrolling is a major win when control flow is removed and | |
832 one big basic block is created. If the loop contains control flow | |
833 the optimization may still be a win because of eliminating the loop | |
834 overhead but it also may blow the branch predictor tables. | |
835 Limit number of branches on the hot path through the peeled | |
836 sequence. */ | |
837 else if (size.num_branches_on_hot_path * (int)n_unroll | |
838 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) | |
839 { | |
840 if (dump_file && (dump_flags & TDF_DETAILS)) | |
841 fprintf (dump_file, "Not unrolling loop %d: " | |
842 " number of branches on hot path in the unrolled sequence" | |
843 " reach --param max-peel-branches limit.\n", | |
844 loop->num); | |
845 return false; | |
846 } | |
847 else if (unr_insns | |
848 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) | |
849 { | |
850 if (dump_file && (dump_flags & TDF_DETAILS)) | |
851 fprintf (dump_file, "Not unrolling loop %d: " | |
0 | 852 "(--param max-completely-peeled-insns limit reached).\n", |
853 loop->num); | |
854 return false; | |
855 } | |
111 | 856 if (!n_unroll) |
857 dump_printf_loc (report_flags, locus, | |
858 "loop turned into non-loop; it never loops.\n"); | |
0 | 859 |
860 initialize_original_copy_tables (); | |
111 | 861 auto_sbitmap wont_exit (n_unroll + 1); |
862 if (exit && niter | |
863 && TREE_CODE (niter) == INTEGER_CST | |
864 && wi::leu_p (n_unroll, wi::to_widest (niter))) | |
865 { | |
866 bitmap_ones (wont_exit); | |
867 if (wi::eq_p (wi::to_widest (niter), n_unroll) | |
868 || edge_to_cancel) | |
869 bitmap_clear_bit (wont_exit, 0); | |
870 } | |
871 else | |
872 { | |
873 exit = NULL; | |
874 bitmap_clear (wont_exit); | |
875 } | |
0 | 876 |
877 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
878 n_unroll, wont_exit, | |
111 | 879 exit, &edges_to_remove, |
0 | 880 DLTHE_FLAG_UPDATE_FREQ |
881 | DLTHE_FLAG_COMPLETTE_PEEL)) | |
882 { | |
883 free_original_copy_tables (); | |
111 | 884 if (dump_file && (dump_flags & TDF_DETAILS)) |
885 fprintf (dump_file, "Failed to duplicate the loop\n"); | |
0 | 886 return false; |
887 } | |
888 | |
889 free_original_copy_tables (); | |
890 } | |
891 | |
111 | 892 /* Remove the conditional from the last copy of the loop. */ |
893 if (edge_to_cancel) | |
894 { | |
895 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src)); | |
896 force_edge_cold (edge_to_cancel, true); | |
897 if (edge_to_cancel->flags & EDGE_TRUE_VALUE) | |
898 gimple_cond_make_false (cond); | |
899 else | |
900 gimple_cond_make_true (cond); | |
901 update_stmt (cond); | |
902 /* Do not remove the path. Doing so may remove outer loop | |
903 and confuse bookkeeping code in tree_unroll_loops_completelly. */ | |
904 } | |
905 | |
906 /* Store the loop for later unlooping and exit removal. */ | |
907 loops_to_unloop.safe_push (loop); | |
908 loops_to_unloop_nunroll.safe_push (n_unroll); | |
909 | |
910 if (dump_enabled_p ()) | |
911 { | |
912 if (!n_unroll) | |
913 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
914 "loop turned into non-loop; it never loops\n"); | |
915 else | |
916 { | |
917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
918 "loop with %d iterations completely unrolled", | |
919 (int) (n_unroll + 1)); | |
920 if (loop->header->count.initialized_p ()) | |
921 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, | |
922 " (header execution count %d)", | |
923 (int)loop->header->count.to_gcov_type ()); | |
924 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); | |
925 } | |
926 } | |
0 | 927 |
928 if (dump_file && (dump_flags & TDF_DETAILS)) | |
111 | 929 { |
930 if (exit) | |
931 fprintf (dump_file, "Exit condition of peeled iterations was " | |
932 "eliminated.\n"); | |
933 if (edge_to_cancel) | |
934 fprintf (dump_file, "Last iteration exit edge was proved true.\n"); | |
935 else | |
936 fprintf (dump_file, "Latch of last iteration was marked by " | |
937 "__builtin_unreachable ().\n"); | |
938 } | |
0 | 939 |
940 return true; | |
941 } | |
942 | |
111 | 943 /* Return number of instructions after peeling. */ |
944 static unsigned HOST_WIDE_INT | |
945 estimated_peeled_sequence_size (struct loop_size *size, | |
946 unsigned HOST_WIDE_INT npeel) | |
947 { | |
948 return MAX (npeel * (HOST_WIDE_INT) (size->overall | |
949 - size->eliminated_by_peeling), 1); | |
950 } | |
951 | |
952 /* If the loop is expected to iterate N times and is | |
953 small enough, duplicate the loop body N+1 times before | |
954 the loop itself. This way the hot path will never | |
955 enter the loop. | |
956 Parameters are the same as for try_unroll_loops_completely */ | |
957 | |
958 static bool | |
959 try_peel_loop (struct loop *loop, | |
960 edge exit, tree niter, | |
961 HOST_WIDE_INT maxiter) | |
962 { | |
963 HOST_WIDE_INT npeel; | |
964 struct loop_size size; | |
965 int peeled_size; | |
966 | |
967 if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 | |
968 || !peeled_loops) | |
969 return false; | |
970 | |
971 if (bitmap_bit_p (peeled_loops, loop->num)) | |
972 { | |
973 if (dump_file) | |
974 fprintf (dump_file, "Not peeling: loop is already peeled\n"); | |
975 return false; | |
976 } | |
977 | |
978 /* Peel only innermost loops. | |
979 While the code is perfectly capable of peeling non-innermost loops, | |
980 the heuristics would probably need some improvements. */ | |
981 if (loop->inner) | |
982 { | |
983 if (dump_file) | |
984 fprintf (dump_file, "Not peeling: outer loop\n"); | |
985 return false; | |
986 } | |
987 | |
988 if (!optimize_loop_for_speed_p (loop)) | |
989 { | |
990 if (dump_file) | |
991 fprintf (dump_file, "Not peeling: cold loop\n"); | |
992 return false; | |
993 } | |
994 | |
995 /* Check if there is an estimate on the number of iterations. */ | |
996 npeel = estimated_loop_iterations_int (loop); | |
997 if (npeel < 0) | |
998 npeel = likely_max_loop_iterations_int (loop); | |
999 if (npeel < 0) | |
1000 { | |
1001 if (dump_file) | |
1002 fprintf (dump_file, "Not peeling: number of iterations is not " | |
1003 "estimated\n"); | |
1004 return false; | |
1005 } | |
1006 if (maxiter >= 0 && maxiter <= npeel) | |
1007 { | |
1008 if (dump_file) | |
1009 fprintf (dump_file, "Not peeling: upper bound is known so can " | |
1010 "unroll completely\n"); | |
1011 return false; | |
1012 } | |
1013 | |
1014 /* We want to peel estimated number of iterations + 1 (so we never | |
1015 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES | |
1016 and be sure to avoid overflows. */ | |
1017 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) | |
1018 { | |
1019 if (dump_file) | |
1020 fprintf (dump_file, "Not peeling: rolls too much " | |
1021 "(%i + 1 > --param max-peel-times)\n", (int) npeel); | |
1022 return false; | |
1023 } | |
1024 npeel++; | |
1025 | |
1026 /* Check peeled loops size. */ | |
1027 tree_estimate_loop_size (loop, exit, NULL, &size, | |
1028 PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); | |
1029 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) | |
1030 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) | |
1031 { | |
1032 if (dump_file) | |
1033 fprintf (dump_file, "Not peeling: peeled sequence size is too large " | |
1034 "(%i insns > --param max-peel-insns)", peeled_size); | |
1035 return false; | |
1036 } | |
1037 | |
1038 /* Duplicate possibly eliminating the exits. */ | |
1039 initialize_original_copy_tables (); | |
1040 auto_sbitmap wont_exit (npeel + 1); | |
1041 if (exit && niter | |
1042 && TREE_CODE (niter) == INTEGER_CST | |
1043 && wi::leu_p (npeel, wi::to_widest (niter))) | |
1044 { | |
1045 bitmap_ones (wont_exit); | |
1046 bitmap_clear_bit (wont_exit, 0); | |
1047 } | |
1048 else | |
1049 { | |
1050 exit = NULL; | |
1051 bitmap_clear (wont_exit); | |
1052 } | |
1053 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1054 npeel, wont_exit, | |
1055 exit, &edges_to_remove, | |
1056 DLTHE_FLAG_UPDATE_FREQ)) | |
1057 { | |
1058 free_original_copy_tables (); | |
1059 return false; | |
1060 } | |
1061 free_original_copy_tables (); | |
1062 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1063 { | |
1064 fprintf (dump_file, "Peeled loop %d, %i times.\n", | |
1065 loop->num, (int) npeel); | |
1066 } | |
1067 if (loop->any_estimate) | |
1068 { | |
1069 if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) | |
1070 loop->nb_iterations_estimate -= npeel; | |
1071 else | |
1072 loop->nb_iterations_estimate = 0; | |
1073 } | |
1074 if (loop->any_upper_bound) | |
1075 { | |
1076 if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) | |
1077 loop->nb_iterations_upper_bound -= npeel; | |
1078 else | |
1079 loop->nb_iterations_upper_bound = 0; | |
1080 } | |
1081 if (loop->any_likely_upper_bound) | |
1082 { | |
1083 if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) | |
1084 loop->nb_iterations_likely_upper_bound -= npeel; | |
1085 else | |
1086 { | |
1087 loop->any_estimate = true; | |
1088 loop->nb_iterations_estimate = 0; | |
1089 loop->nb_iterations_likely_upper_bound = 0; | |
1090 } | |
1091 } | |
1092 profile_count entry_count = profile_count::zero (); | |
1093 int entry_freq = 0; | |
1094 | |
1095 edge e; | |
1096 edge_iterator ei; | |
1097 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1098 if (e->src != loop->latch) | |
1099 { | |
1100 if (e->src->count.initialized_p ()) | |
1101 entry_count = e->src->count + e->src->count; | |
1102 entry_freq += e->src->frequency; | |
1103 gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); | |
1104 } | |
1105 profile_probability p = profile_probability::very_unlikely (); | |
1106 if (loop->header->count > 0) | |
1107 p = entry_count.probability_in (loop->header->count); | |
1108 else if (loop->header->frequency) | |
1109 p = profile_probability::probability_in_gcov_type | |
1110 (entry_freq, loop->header->frequency); | |
1111 scale_loop_profile (loop, p, 0); | |
1112 bitmap_set_bit (peeled_loops, loop->num); | |
1113 return true; | |
1114 } | |
0 | 1115 /* Adds a canonical induction variable to LOOP if suitable. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1116 CREATE_IV is true if we may create a new iv. UL determines |
0 | 1117 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1118 to determine the number of iterations of a loop by direct evaluation. |
111 | 1119 Returns true if cfg is changed. */ |
0 | 1120 |
1121 static bool | |
1122 canonicalize_loop_induction_variables (struct loop *loop, | |
1123 bool create_iv, enum unroll_level ul, | |
1124 bool try_eval) | |
1125 { | |
1126 edge exit = NULL; | |
1127 tree niter; | |
111 | 1128 HOST_WIDE_INT maxiter; |
1129 bool modified = false; | |
1130 location_t locus = UNKNOWN_LOCATION; | |
0 | 1131 |
1132 niter = number_of_latch_executions (loop); | |
111 | 1133 exit = single_exit (loop); |
0 | 1134 if (TREE_CODE (niter) == INTEGER_CST) |
111 | 1135 locus = gimple_location (last_stmt (exit->src)); |
0 | 1136 else |
1137 { | |
1138 /* If the loop has more than one exit, try checking all of them | |
1139 for # of iterations determinable through scev. */ | |
111 | 1140 if (!exit) |
0 | 1141 niter = find_loop_niter (loop, &exit); |
1142 | |
1143 /* Finally if everything else fails, try brute force evaluation. */ | |
1144 if (try_eval | |
1145 && (chrec_contains_undetermined (niter) | |
1146 || TREE_CODE (niter) != INTEGER_CST)) | |
1147 niter = find_loop_niter_by_eval (loop, &exit); | |
1148 | |
111 | 1149 if (exit) |
1150 locus = gimple_location (last_stmt (exit->src)); | |
1151 | |
1152 if (TREE_CODE (niter) != INTEGER_CST) | |
1153 exit = NULL; | |
0 | 1154 } |
1155 | |
111 | 1156 /* We work exceptionally hard here to estimate the bound |
1157 by find_loop_niter_by_eval. Be sure to keep it for future. */ | |
1158 if (niter && TREE_CODE (niter) == INTEGER_CST) | |
1159 { | |
1160 record_niter_bound (loop, wi::to_widest (niter), | |
1161 exit == single_likely_exit (loop), true); | |
1162 } | |
1163 | |
1164 /* Force re-computation of loop bounds so we can remove redundant exits. */ | |
1165 maxiter = max_loop_iterations_int (loop); | |
1166 | |
1167 if (dump_file && (dump_flags & TDF_DETAILS) | |
1168 && TREE_CODE (niter) == INTEGER_CST) | |
0 | 1169 { |
1170 fprintf (dump_file, "Loop %d iterates ", loop->num); | |
1171 print_generic_expr (dump_file, niter, TDF_SLIM); | |
1172 fprintf (dump_file, " times.\n"); | |
1173 } | |
111 | 1174 if (dump_file && (dump_flags & TDF_DETAILS) |
1175 && maxiter >= 0) | |
1176 { | |
1177 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, | |
1178 (int)maxiter); | |
1179 } | |
1180 if (dump_file && (dump_flags & TDF_DETAILS) | |
1181 && likely_max_loop_iterations_int (loop) >= 0) | |
1182 { | |
1183 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", | |
1184 loop->num, (int)likely_max_loop_iterations_int (loop)); | |
1185 } | |
0 | 1186 |
111 | 1187 /* Remove exits that are known to be never taken based on loop bound. |
1188 Needs to be called after compilation of max_loop_iterations_int that | |
1189 populates the loop bounds. */ | |
1190 modified |= remove_redundant_iv_tests (loop); | |
1191 | |
1192 if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus)) | |
0 | 1193 return true; |
1194 | |
111 | 1195 if (create_iv |
1196 && niter && !chrec_contains_undetermined (niter) | |
1197 && exit && just_once_each_iteration_p (loop, exit->src)) | |
0 | 1198 create_canonical_iv (loop, exit, niter); |
1199 | |
111 | 1200 if (ul == UL_ALL) |
1201 modified |= try_peel_loop (loop, exit, niter, maxiter); | |
1202 | |
1203 return modified; | |
0 | 1204 } |
1205 | |
1206 /* The main entry point of the pass. Adds canonical induction variables | |
1207 to the suitable loops. */ | |
1208 | |
1209 unsigned int | |
1210 canonicalize_induction_variables (void) | |
1211 { | |
1212 struct loop *loop; | |
1213 bool changed = false; | |
111 | 1214 bool irred_invalidated = false; |
1215 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1216 |
111 | 1217 estimate_numbers_of_iterations (cfun); |
1218 | |
1219 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
0 | 1220 { |
1221 changed |= canonicalize_loop_induction_variables (loop, | |
1222 true, UL_SINGLE_ITER, | |
1223 true); | |
1224 } | |
111 | 1225 gcc_assert (!need_ssa_update_p (cfun)); |
1226 | |
1227 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | |
1228 if (irred_invalidated | |
1229 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1230 mark_irreducible_loops (); | |
0 | 1231 |
1232 /* Clean up the information about numbers of iterations, since brute force | |
1233 evaluation could reveal new information. */ | |
111 | 1234 free_numbers_of_iterations_estimates (cfun); |
0 | 1235 scev_reset (); |
1236 | |
111 | 1237 if (!bitmap_empty_p (loop_closed_ssa_invalidated)) |
1238 { | |
1239 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); | |
1240 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1241 } | |
1242 BITMAP_FREE (loop_closed_ssa_invalidated); | |
1243 | |
0 | 1244 if (changed) |
1245 return TODO_cleanup_cfg; | |
1246 return 0; | |
1247 } | |
1248 | |
111 | 1249 /* Propagate constant SSA_NAMEs defined in basic block BB. */ |
1250 | |
1251 static void | |
1252 propagate_constants_for_unrolling (basic_block bb) | |
1253 { | |
1254 /* Look for degenerate PHI nodes with constant argument. */ | |
1255 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); ) | |
1256 { | |
1257 gphi *phi = gsi.phi (); | |
1258 tree result = gimple_phi_result (phi); | |
1259 tree arg = gimple_phi_arg_def (phi, 0); | |
1260 | |
1261 if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result) | |
1262 && gimple_phi_num_args (phi) == 1 | |
1263 && CONSTANT_CLASS_P (arg)) | |
1264 { | |
1265 replace_uses_by (result, arg); | |
1266 gsi_remove (&gsi, true); | |
1267 release_ssa_name (result); | |
1268 } | |
1269 else | |
1270 gsi_next (&gsi); | |
1271 } | |
1272 | |
1273 /* Look for assignments to SSA names with constant RHS. */ | |
1274 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) | |
1275 { | |
1276 gimple *stmt = gsi_stmt (gsi); | |
1277 tree lhs; | |
1278 | |
1279 if (is_gimple_assign (stmt) | |
1280 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant | |
1281 && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME) | |
1282 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) | |
1283 { | |
1284 replace_uses_by (lhs, gimple_assign_rhs1 (stmt)); | |
1285 gsi_remove (&gsi, true); | |
1286 release_ssa_name (lhs); | |
1287 } | |
1288 else | |
1289 gsi_next (&gsi); | |
1290 } | |
1291 } | |
1292 | |
1293 /* Process loops from innermost to outer, stopping at the innermost | |
1294 loop we unrolled. */ | |
1295 | |
1296 static bool | |
1297 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, | |
1298 bitmap father_bbs, struct loop *loop) | |
1299 { | |
1300 struct loop *loop_father; | |
1301 bool changed = false; | |
1302 struct loop *inner; | |
1303 enum unroll_level ul; | |
1304 | |
1305 /* Process inner loops first. */ | |
1306 for (inner = loop->inner; inner != NULL; inner = inner->next) | |
1307 changed |= tree_unroll_loops_completely_1 (may_increase_size, | |
1308 unroll_outer, father_bbs, | |
1309 inner); | |
1310 | |
1311 /* If we changed an inner loop we cannot process outer loops in this | |
1312 iteration because SSA form is not up-to-date. Continue with | |
1313 siblings of outer loops instead. */ | |
1314 if (changed) | |
1315 return true; | |
1316 | |
1317 /* Don't unroll #pragma omp simd loops until the vectorizer | |
1318 attempts to vectorize those. */ | |
1319 if (loop->force_vectorize) | |
1320 return false; | |
1321 | |
1322 /* Try to unroll this loop. */ | |
1323 loop_father = loop_outer (loop); | |
1324 if (!loop_father) | |
1325 return false; | |
1326 | |
1327 if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | |
1328 /* Unroll outermost loops only if asked to do so or they do | |
1329 not cause code growth. */ | |
1330 && (unroll_outer || loop_outer (loop_father))) | |
1331 ul = UL_ALL; | |
1332 else | |
1333 ul = UL_NO_GROWTH; | |
1334 | |
1335 if (canonicalize_loop_induction_variables | |
1336 (loop, false, ul, !flag_tree_loop_ivcanon)) | |
1337 { | |
1338 /* If we'll continue unrolling, we need to propagate constants | |
1339 within the new basic blocks to fold away induction variable | |
1340 computations; otherwise, the size might blow up before the | |
1341 iteration is complete and the IR eventually cleaned up. */ | |
1342 if (loop_outer (loop_father)) | |
1343 bitmap_set_bit (father_bbs, loop_father->header->index); | |
1344 | |
1345 return true; | |
1346 } | |
1347 | |
1348 return false; | |
1349 } | |
1350 | |
0 | 1351 /* Unroll LOOPS completely if they iterate just few times. Unless |
1352 MAY_INCREASE_SIZE is true, perform the unrolling only if the | |
1353 size of the code does not increase. */ | |
1354 | |
1355 unsigned int | |
1356 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) | |
1357 { | |
111 | 1358 bitmap father_bbs = BITMAP_ALLOC (NULL); |
0 | 1359 bool changed; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1360 int iteration = 0; |
111 | 1361 bool irred_invalidated = false; |
1362 | |
1363 estimate_numbers_of_iterations (cfun); | |
0 | 1364 |
1365 do | |
1366 { | |
1367 changed = false; | |
111 | 1368 bitmap loop_closed_ssa_invalidated = NULL; |
0 | 1369 |
111 | 1370 if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1371 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
0 | 1372 |
111 | 1373 free_numbers_of_iterations_estimates (cfun); |
1374 estimate_numbers_of_iterations (cfun); | |
1375 | |
1376 changed = tree_unroll_loops_completely_1 (may_increase_size, | |
1377 unroll_outer, father_bbs, | |
1378 current_loops->tree_root); | |
0 | 1379 if (changed) |
1380 { | |
111 | 1381 unsigned i; |
1382 | |
1383 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | |
1384 | |
1385 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */ | |
1386 if (loop_closed_ssa_invalidated | |
1387 && !bitmap_empty_p (loop_closed_ssa_invalidated)) | |
1388 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, | |
1389 TODO_update_ssa); | |
1390 else | |
1391 update_ssa (TODO_update_ssa); | |
1392 | |
1393 /* father_bbs is a bitmap of loop father header BB indices. | |
1394 Translate that to what non-root loops these BBs belong to now. */ | |
1395 bitmap_iterator bi; | |
1396 bitmap fathers = BITMAP_ALLOC (NULL); | |
1397 EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi) | |
1398 { | |
1399 basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
1400 if (! unrolled_loop_bb) | |
1401 continue; | |
1402 if (loop_outer (unrolled_loop_bb->loop_father)) | |
1403 bitmap_set_bit (fathers, | |
1404 unrolled_loop_bb->loop_father->num); | |
1405 } | |
1406 bitmap_clear (father_bbs); | |
1407 /* Propagate the constants within the new basic blocks. */ | |
1408 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) | |
1409 { | |
1410 loop_p father = get_loop (cfun, i); | |
1411 basic_block *body = get_loop_body_in_dom_order (father); | |
1412 for (unsigned j = 0; j < father->num_nodes; j++) | |
1413 propagate_constants_for_unrolling (body[j]); | |
1414 free (body); | |
1415 } | |
1416 BITMAP_FREE (fathers); | |
1417 | |
0 | 1418 /* This will take care of removing completely unrolled loops |
1419 from the loop structures so we can continue unrolling now | |
1420 innermost loops. */ | |
1421 if (cleanup_tree_cfg ()) | |
1422 update_ssa (TODO_update_ssa_only_virtuals); | |
1423 | |
1424 /* Clean up the information about numbers of iterations, since | |
1425 complete unrolling might have invalidated it. */ | |
1426 scev_reset (); | |
111 | 1427 if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1428 verify_loop_closed_ssa (true); | |
0 | 1429 } |
111 | 1430 if (loop_closed_ssa_invalidated) |
1431 BITMAP_FREE (loop_closed_ssa_invalidated); | |
0 | 1432 } |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1433 while (changed |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1434 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS)); |
0 | 1435 |
111 | 1436 BITMAP_FREE (father_bbs); |
1437 | |
1438 if (irred_invalidated | |
1439 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1440 mark_irreducible_loops (); | |
1441 | |
0 | 1442 return 0; |
1443 } | |
111 | 1444 |
1445 /* Canonical induction variable creation pass. */ | |
1446 | |
1447 namespace { | |
1448 | |
1449 const pass_data pass_data_iv_canon = | |
1450 { | |
1451 GIMPLE_PASS, /* type */ | |
1452 "ivcanon", /* name */ | |
1453 OPTGROUP_LOOP, /* optinfo_flags */ | |
1454 TV_TREE_LOOP_IVCANON, /* tv_id */ | |
1455 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1456 0, /* properties_provided */ | |
1457 0, /* properties_destroyed */ | |
1458 0, /* todo_flags_start */ | |
1459 0, /* todo_flags_finish */ | |
1460 }; | |
1461 | |
1462 class pass_iv_canon : public gimple_opt_pass | |
1463 { | |
1464 public: | |
1465 pass_iv_canon (gcc::context *ctxt) | |
1466 : gimple_opt_pass (pass_data_iv_canon, ctxt) | |
1467 {} | |
1468 | |
1469 /* opt_pass methods: */ | |
1470 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; } | |
1471 virtual unsigned int execute (function *fun); | |
1472 | |
1473 }; // class pass_iv_canon | |
1474 | |
1475 unsigned int | |
1476 pass_iv_canon::execute (function *fun) | |
1477 { | |
1478 if (number_of_loops (fun) <= 1) | |
1479 return 0; | |
1480 | |
1481 return canonicalize_induction_variables (); | |
1482 } | |
1483 | |
1484 } // anon namespace | |
1485 | |
1486 gimple_opt_pass * | |
1487 make_pass_iv_canon (gcc::context *ctxt) | |
1488 { | |
1489 return new pass_iv_canon (ctxt); | |
1490 } | |
1491 | |
1492 /* Complete unrolling of loops. */ | |
1493 | |
1494 namespace { | |
1495 | |
1496 const pass_data pass_data_complete_unroll = | |
1497 { | |
1498 GIMPLE_PASS, /* type */ | |
1499 "cunroll", /* name */ | |
1500 OPTGROUP_LOOP, /* optinfo_flags */ | |
1501 TV_COMPLETE_UNROLL, /* tv_id */ | |
1502 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1503 0, /* properties_provided */ | |
1504 0, /* properties_destroyed */ | |
1505 0, /* todo_flags_start */ | |
1506 0, /* todo_flags_finish */ | |
1507 }; | |
1508 | |
1509 class pass_complete_unroll : public gimple_opt_pass | |
1510 { | |
1511 public: | |
1512 pass_complete_unroll (gcc::context *ctxt) | |
1513 : gimple_opt_pass (pass_data_complete_unroll, ctxt) | |
1514 {} | |
1515 | |
1516 /* opt_pass methods: */ | |
1517 virtual unsigned int execute (function *); | |
1518 | |
1519 }; // class pass_complete_unroll | |
1520 | |
1521 unsigned int | |
1522 pass_complete_unroll::execute (function *fun) | |
1523 { | |
1524 if (number_of_loops (fun) <= 1) | |
1525 return 0; | |
1526 | |
1527 /* If we ever decide to run loop peeling more than once, we will need to | |
1528 track loops already peeled in loop structures themselves to avoid | |
1529 re-peeling the same loop multiple times. */ | |
1530 if (flag_peel_loops) | |
1531 peeled_loops = BITMAP_ALLOC (NULL); | |
1532 int val = tree_unroll_loops_completely (flag_unroll_loops | |
1533 || flag_peel_loops | |
1534 || optimize >= 3, true); | |
1535 if (peeled_loops) | |
1536 { | |
1537 BITMAP_FREE (peeled_loops); | |
1538 peeled_loops = NULL; | |
1539 } | |
1540 return val; | |
1541 } | |
1542 | |
1543 } // anon namespace | |
1544 | |
1545 gimple_opt_pass * | |
1546 make_pass_complete_unroll (gcc::context *ctxt) | |
1547 { | |
1548 return new pass_complete_unroll (ctxt); | |
1549 } | |
1550 | |
1551 /* Complete unrolling of inner loops. */ | |
1552 | |
1553 namespace { | |
1554 | |
1555 const pass_data pass_data_complete_unrolli = | |
1556 { | |
1557 GIMPLE_PASS, /* type */ | |
1558 "cunrolli", /* name */ | |
1559 OPTGROUP_LOOP, /* optinfo_flags */ | |
1560 TV_COMPLETE_UNROLL, /* tv_id */ | |
1561 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1562 0, /* properties_provided */ | |
1563 0, /* properties_destroyed */ | |
1564 0, /* todo_flags_start */ | |
1565 0, /* todo_flags_finish */ | |
1566 }; | |
1567 | |
1568 class pass_complete_unrolli : public gimple_opt_pass | |
1569 { | |
1570 public: | |
1571 pass_complete_unrolli (gcc::context *ctxt) | |
1572 : gimple_opt_pass (pass_data_complete_unrolli, ctxt) | |
1573 {} | |
1574 | |
1575 /* opt_pass methods: */ | |
1576 virtual bool gate (function *) { return optimize >= 2; } | |
1577 virtual unsigned int execute (function *); | |
1578 | |
1579 }; // class pass_complete_unrolli | |
1580 | |
1581 unsigned int | |
1582 pass_complete_unrolli::execute (function *fun) | |
1583 { | |
1584 unsigned ret = 0; | |
1585 | |
1586 loop_optimizer_init (LOOPS_NORMAL | |
1587 | LOOPS_HAVE_RECORDED_EXITS); | |
1588 if (number_of_loops (fun) > 1) | |
1589 { | |
1590 scev_initialize (); | |
1591 ret = tree_unroll_loops_completely (optimize >= 3, false); | |
1592 scev_finalize (); | |
1593 } | |
1594 loop_optimizer_finalize (); | |
1595 | |
1596 return ret; | |
1597 } | |
1598 | |
1599 } // anon namespace | |
1600 | |
1601 gimple_opt_pass * | |
1602 make_pass_complete_unrolli (gcc::context *ctxt) | |
1603 { | |
1604 return new pass_complete_unrolli (ctxt); | |
1605 } | |
1606 | |
1607 |