Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-loop-ivcanon.c @ 136:4627f235cf2a
fix c-next example
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 08 Nov 2018 14:11:56 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
rev | line source |
---|---|
111 | 1 /* Induction variable canonicalization and loop peeling. |
131 | 2 Copyright (C) 2004-2018 Free Software Foundation, Inc. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
3 |
0 | 4 This file is part of GCC. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
5 |
0 | 6 GCC is free software; you can redistribute it and/or modify it |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
10 |
0 | 11 GCC is distributed in the hope that it will be useful, but WITHOUT |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
15 |
0 | 16 You should have received a copy of the GNU General Public License |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 /* This pass detects the loops that iterate a constant number of times, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
21 adds a canonical induction variable (step -1, tested against 0) |
0 | 22 and replaces the exit test. This enables the less powerful rtl |
23 level analysis to use this information. | |
24 | |
25 This might spoil the code in some cases (by increasing register pressure). | |
26 Note that in the case the new variable is not needed, ivopts will get rid | |
27 of it, so it might only be a problem when there are no other linear induction | |
28 variables. In that case the created optimization possibilities are likely | |
29 to pay up. | |
30 | |
111 | 31 We also perform |
32 - complete unrolling (or peeling) when the loops is rolling few enough | |
33 times | |
34 - simple peeling (i.e. copying few initial iterations prior the loop) | |
35 when number of iteration estimate is known (typically by the profile | |
36 info). */ | |
0 | 37 |
38 #include "config.h" | |
39 #include "system.h" | |
40 #include "coretypes.h" | |
111 | 41 #include "backend.h" |
0 | 42 #include "tree.h" |
111 | 43 #include "gimple.h" |
44 #include "cfghooks.h" | |
45 #include "tree-pass.h" | |
46 #include "ssa.h" | |
47 #include "cgraph.h" | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
48 #include "gimple-pretty-print.h" |
111 | 49 #include "fold-const.h" |
50 #include "profile.h" | |
51 #include "gimple-fold.h" | |
52 #include "tree-eh.h" | |
53 #include "gimple-iterator.h" | |
54 #include "tree-cfg.h" | |
55 #include "tree-ssa-loop-manip.h" | |
56 #include "tree-ssa-loop-niter.h" | |
57 #include "tree-ssa-loop.h" | |
58 #include "tree-into-ssa.h" | |
0 | 59 #include "cfgloop.h" |
60 #include "tree-chrec.h" | |
61 #include "tree-scalar-evolution.h" | |
62 #include "params.h" | |
63 #include "tree-inline.h" | |
111 | 64 #include "tree-cfgcleanup.h" |
65 #include "builtins.h" | |
131 | 66 #include "tree-ssa-sccvn.h" |
0 | 67 |
68 /* Specifies types of loops that may be unrolled. */ | |
69 | |
70 enum unroll_level | |
71 { | |
72 UL_SINGLE_ITER, /* Only loops that exit immediately in the first | |
73 iteration. */ | |
74 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase | |
75 of code size. */ | |
76 UL_ALL /* All suitable loops. */ | |
77 }; | |
78 | |
79 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT | |
131 | 80 is the exit edge whose condition is replaced. The ssa versions of the new |
81 IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER | |
82 if they are not NULL. */ | |
0 | 83 |
131 | 84 void |
85 create_canonical_iv (struct loop *loop, edge exit, tree niter, | |
86 tree *var_before = NULL, tree *var_after = NULL) | |
0 | 87 { |
88 edge in; | |
89 tree type, var; | |
111 | 90 gcond *cond; |
0 | 91 gimple_stmt_iterator incr_at; |
92 enum tree_code cmp; | |
93 | |
94 if (dump_file && (dump_flags & TDF_DETAILS)) | |
95 { | |
96 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num); | |
97 print_generic_expr (dump_file, niter, TDF_SLIM); | |
98 fprintf (dump_file, " iterations.\n"); | |
99 } | |
100 | |
111 | 101 cond = as_a <gcond *> (last_stmt (exit->src)); |
0 | 102 in = EDGE_SUCC (exit->src, 0); |
103 if (in == exit) | |
104 in = EDGE_SUCC (exit->src, 1); | |
105 | |
106 /* Note that we do not need to worry about overflows, since | |
107 type of niter is always unsigned and all comparisons are | |
108 just for equality/nonequality -- i.e. everything works | |
109 with a modulo arithmetics. */ | |
110 | |
111 type = TREE_TYPE (niter); | |
112 niter = fold_build2 (PLUS_EXPR, type, | |
113 niter, | |
114 build_int_cst (type, 1)); | |
115 incr_at = gsi_last_bb (in->src); | |
116 create_iv (niter, | |
117 build_int_cst (type, -1), | |
118 NULL_TREE, loop, | |
131 | 119 &incr_at, false, var_before, &var); |
120 if (var_after) | |
121 *var_after = var; | |
0 | 122 |
123 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; | |
124 gimple_cond_set_code (cond, cmp); | |
125 gimple_cond_set_lhs (cond, var); | |
126 gimple_cond_set_rhs (cond, build_int_cst (type, 0)); | |
127 update_stmt (cond); | |
128 } | |
129 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
130 /* Describe size of loop as detected by tree_estimate_loop_size. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
131 struct loop_size |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
132 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
133 /* Number of instructions in the loop. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
134 int overall; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
135 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
136 /* Number of instructions that will be likely optimized out in |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
137 peeled iterations of loop (i.e. computation based on induction |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
138 variable where induction variable starts at known constant.) */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
139 int eliminated_by_peeling; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
140 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
141 /* Same statistics for last iteration of loop: it is smaller because |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
142 instructions after exit are not executed. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
143 int last_iteration; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
144 int last_iteration_eliminated_by_peeling; |
111 | 145 |
146 /* If some IV computation will become constant. */ | |
147 bool constant_iv; | |
148 | |
149 /* Number of call stmts that are not a builtin and are pure or const | |
150 present on the hot path. */ | |
151 int num_pure_calls_on_hot_path; | |
152 /* Number of call stmts that are not a builtin and are not pure nor const | |
153 present on the hot path. */ | |
154 int num_non_pure_calls_on_hot_path; | |
155 /* Number of statements other than calls in the loop. */ | |
156 int non_call_stmts_on_hot_path; | |
157 /* Number of branches seen on the hot path. */ | |
158 int num_branches_on_hot_path; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
159 }; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
160 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
161 /* Return true if OP in STMT will be constant after peeling LOOP. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
162 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
163 static bool |
111 | 164 constant_after_peeling (tree op, gimple *stmt, struct loop *loop) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
165 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
166 if (is_gimple_min_invariant (op)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
167 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
168 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
169 /* We can still fold accesses to constant arrays when index is known. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
170 if (TREE_CODE (op) != SSA_NAME) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
171 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
172 tree base = op; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
173 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
174 /* First make fast look if we see constant array inside. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
175 while (handled_component_p (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
176 base = TREE_OPERAND (base, 0); |
111 | 177 if ((DECL_P (base) |
178 && ctor_for_folding (base) != error_mark_node) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
179 || CONSTANT_CLASS_P (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
180 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
181 /* If so, see if we understand all the indices. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
182 base = op; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
183 while (handled_component_p (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
184 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
185 if (TREE_CODE (base) == ARRAY_REF |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
186 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
187 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
188 base = TREE_OPERAND (base, 0); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
189 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
190 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
191 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
192 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
193 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
194 |
111 | 195 /* Induction variables are constants when defined in loop. */ |
196 if (loop_containing_stmt (stmt) != loop) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
197 return false; |
111 | 198 tree ev = analyze_scalar_evolution (loop, op); |
199 if (chrec_contains_undetermined (ev) | |
200 || chrec_contains_symbols (ev)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
201 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
202 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
203 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
204 |
111 | 205 /* Computes an estimated number of insns in LOOP. |
206 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last | |
207 iteration of the loop. | |
208 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration | |
209 of loop. | |
210 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. | |
211 Stop estimating after UPPER_BOUND is met. Return true in this case. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
212 |
111 | 213 static bool |
214 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, | |
215 struct loop_size *size, int upper_bound) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
216 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
217 basic_block *body = get_loop_body (loop); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
218 gimple_stmt_iterator gsi; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
219 unsigned int i; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
220 bool after_exit; |
111 | 221 vec<basic_block> path = get_loop_hot_path (loop); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
222 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
223 size->overall = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
224 size->eliminated_by_peeling = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
225 size->last_iteration = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
226 size->last_iteration_eliminated_by_peeling = 0; |
111 | 227 size->num_pure_calls_on_hot_path = 0; |
228 size->num_non_pure_calls_on_hot_path = 0; | |
229 size->non_call_stmts_on_hot_path = 0; | |
230 size->num_branches_on_hot_path = 0; | |
231 size->constant_iv = 0; | |
0 | 232 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
233 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
234 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
235 for (i = 0; i < loop->num_nodes; i++) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
236 { |
111 | 237 if (edge_to_cancel && body[i] != edge_to_cancel->src |
238 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
239 after_exit = true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
240 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
241 after_exit = false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
242 if (dump_file && (dump_flags & TDF_DETAILS)) |
111 | 243 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, |
244 after_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
245 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
246 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
247 { |
111 | 248 gimple *stmt = gsi_stmt (gsi); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
249 int num = estimate_num_insns (stmt, &eni_size_weights); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
250 bool likely_eliminated = false; |
111 | 251 bool likely_eliminated_last = false; |
252 bool likely_eliminated_peeled = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
253 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
254 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
255 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
256 fprintf (dump_file, " size: %3i ", num); |
111 | 257 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
258 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
259 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
260 /* Look for reasons why we might optimize this stmt away. */ |
0 | 261 |
111 | 262 if (!gimple_has_side_effects (stmt)) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
263 { |
111 | 264 /* Exit conditional. */ |
265 if (exit && body[i] == exit->src | |
266 && stmt == last_stmt (exit->src)) | |
267 { | |
268 if (dump_file && (dump_flags & TDF_DETAILS)) | |
269 fprintf (dump_file, " Exit condition will be eliminated " | |
270 "in peeled copies.\n"); | |
271 likely_eliminated_peeled = true; | |
272 } | |
273 if (edge_to_cancel && body[i] == edge_to_cancel->src | |
274 && stmt == last_stmt (edge_to_cancel->src)) | |
275 { | |
276 if (dump_file && (dump_flags & TDF_DETAILS)) | |
277 fprintf (dump_file, " Exit condition will be eliminated " | |
278 "in last copy.\n"); | |
279 likely_eliminated_last = true; | |
280 } | |
281 /* Sets of IV variables */ | |
282 if (gimple_code (stmt) == GIMPLE_ASSIGN | |
283 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) | |
284 { | |
285 if (dump_file && (dump_flags & TDF_DETAILS)) | |
286 fprintf (dump_file, " Induction variable computation will" | |
287 " be folded away.\n"); | |
288 likely_eliminated = true; | |
289 } | |
290 /* Assignments of IV variables. */ | |
291 else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
292 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME | |
293 && constant_after_peeling (gimple_assign_rhs1 (stmt), | |
294 stmt, loop) | |
295 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS | |
296 || constant_after_peeling (gimple_assign_rhs2 (stmt), | |
297 stmt, loop))) | |
298 { | |
299 size->constant_iv = true; | |
300 if (dump_file && (dump_flags & TDF_DETAILS)) | |
301 fprintf (dump_file, | |
302 " Constant expression will be folded away.\n"); | |
303 likely_eliminated = true; | |
304 } | |
305 /* Conditionals. */ | |
306 else if ((gimple_code (stmt) == GIMPLE_COND | |
307 && constant_after_peeling (gimple_cond_lhs (stmt), stmt, | |
308 loop) | |
309 && constant_after_peeling (gimple_cond_rhs (stmt), stmt, | |
310 loop) | |
311 /* We don't simplify all constant compares so make sure | |
312 they are not both constant already. See PR70288. */ | |
313 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt)) | |
314 || ! is_gimple_min_invariant | |
315 (gimple_cond_rhs (stmt)))) | |
316 || (gimple_code (stmt) == GIMPLE_SWITCH | |
317 && constant_after_peeling (gimple_switch_index ( | |
318 as_a <gswitch *> | |
319 (stmt)), | |
320 stmt, loop) | |
321 && ! is_gimple_min_invariant | |
322 (gimple_switch_index | |
323 (as_a <gswitch *> (stmt))))) | |
324 { | |
325 if (dump_file && (dump_flags & TDF_DETAILS)) | |
326 fprintf (dump_file, " Constant conditional.\n"); | |
327 likely_eliminated = true; | |
328 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
329 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
330 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
331 size->overall += num; |
111 | 332 if (likely_eliminated || likely_eliminated_peeled) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
333 size->eliminated_by_peeling += num; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
334 if (!after_exit) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
335 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
336 size->last_iteration += num; |
111 | 337 if (likely_eliminated || likely_eliminated_last) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
338 size->last_iteration_eliminated_by_peeling += num; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
339 } |
111 | 340 if ((size->overall * 3 / 2 - size->eliminated_by_peeling |
341 - size->last_iteration_eliminated_by_peeling) > upper_bound) | |
342 { | |
343 free (body); | |
344 path.release (); | |
345 return true; | |
346 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
347 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
348 } |
111 | 349 while (path.length ()) |
350 { | |
351 basic_block bb = path.pop (); | |
352 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
353 { | |
354 gimple *stmt = gsi_stmt (gsi); | |
355 if (gimple_code (stmt) == GIMPLE_CALL | |
356 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))) | |
357 { | |
358 int flags = gimple_call_flags (stmt); | |
359 if (flags & (ECF_PURE | ECF_CONST)) | |
360 size->num_pure_calls_on_hot_path++; | |
361 else | |
362 size->num_non_pure_calls_on_hot_path++; | |
363 size->num_branches_on_hot_path ++; | |
364 } | |
365 /* Count inexpensive calls as non-calls, because they will likely | |
366 expand inline. */ | |
367 else if (gimple_code (stmt) != GIMPLE_DEBUG) | |
368 size->non_call_stmts_on_hot_path++; | |
369 if (((gimple_code (stmt) == GIMPLE_COND | |
370 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | |
131 | 371 || !constant_after_peeling (gimple_cond_rhs (stmt), stmt, |
372 loop))) | |
111 | 373 || (gimple_code (stmt) == GIMPLE_SWITCH |
374 && !constant_after_peeling (gimple_switch_index ( | |
375 as_a <gswitch *> (stmt)), | |
376 stmt, loop))) | |
377 && (!exit || bb != exit->src)) | |
378 size->num_branches_on_hot_path++; | |
379 } | |
380 } | |
381 path.release (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
382 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
383 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
384 size->eliminated_by_peeling, size->last_iteration, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
385 size->last_iteration_eliminated_by_peeling); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
386 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
387 free (body); |
111 | 388 return false; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
389 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
390 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
391 /* Estimate number of insns of completely unrolled loop. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
392 It is (NUNROLL + 1) * size of loop body with taking into account |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
393 the fact that in last copy everything after exit conditional |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
394 is dead and that some instructions will be eliminated after |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
395 peeling. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
396 |
111 | 397 Loop body is likely going to simplify further, this is difficult |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
398 to guess, we just decrease the result by 1/3. */ |
0 | 399 |
400 static unsigned HOST_WIDE_INT | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
401 estimated_unrolled_size (struct loop_size *size, |
0 | 402 unsigned HOST_WIDE_INT nunroll) |
403 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
404 HOST_WIDE_INT unr_insns = ((nunroll) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
405 * (HOST_WIDE_INT) (size->overall |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
406 - size->eliminated_by_peeling)); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
407 if (!nunroll) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
408 unr_insns = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
409 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
410 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
411 unr_insns = unr_insns * 2 / 3; |
0 | 412 if (unr_insns <= 0) |
413 unr_insns = 1; | |
414 | |
415 return unr_insns; | |
416 } | |
417 | |
111 | 418 /* Loop LOOP is known to not loop. See if there is an edge in the loop |
419 body that can be remove to make the loop to always exit and at | |
420 the same time it does not make any code potentially executed | |
421 during the last iteration dead. | |
422 | |
423 After complete unrolling we still may get rid of the conditional | |
424 on the exit in the last copy even if we have no idea what it does. | |
425 This is quite common case for loops of form | |
426 | |
427 int a[5]; | |
428 for (i=0;i<b;i++) | |
429 a[i]=0; | |
430 | |
431 Here we prove the loop to iterate 5 times but we do not know | |
432 it from induction variable. | |
433 | |
434 For now we handle only simple case where there is exit condition | |
435 just before the latch block and the latch block contains no statements | |
436 with side effect that may otherwise terminate the execution of loop | |
437 (such as by EH or by terminating the program or longjmp). | |
438 | |
439 In the general case we may want to cancel the paths leading to statements | |
440 loop-niter identified as having undefined effect in the last iteration. | |
441 The other cases are hopefully rare and will be cleaned up later. */ | |
442 | |
443 static edge | |
444 loop_edge_to_cancel (struct loop *loop) | |
445 { | |
446 vec<edge> exits; | |
447 unsigned i; | |
448 edge edge_to_cancel; | |
449 gimple_stmt_iterator gsi; | |
450 | |
451 /* We want only one predecestor of the loop. */ | |
452 if (EDGE_COUNT (loop->latch->preds) > 1) | |
453 return NULL; | |
454 | |
455 exits = get_loop_exit_edges (loop); | |
456 | |
457 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel) | |
458 { | |
459 /* Find the other edge than the loop exit | |
460 leaving the conditoinal. */ | |
461 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2) | |
462 continue; | |
463 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel) | |
464 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1); | |
465 else | |
466 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0); | |
467 | |
468 /* We only can handle conditionals. */ | |
469 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
470 continue; | |
471 | |
472 /* We should never have conditionals in the loop latch. */ | |
473 gcc_assert (edge_to_cancel->dest != loop->header); | |
474 | |
475 /* Check that it leads to loop latch. */ | |
476 if (edge_to_cancel->dest != loop->latch) | |
477 continue; | |
478 | |
479 exits.release (); | |
480 | |
481 /* Verify that the code in loop latch does nothing that may end program | |
482 execution without really reaching the exit. This may include | |
483 non-pure/const function calls, EH statements, volatile ASMs etc. */ | |
484 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) | |
485 if (gimple_has_side_effects (gsi_stmt (gsi))) | |
486 return NULL; | |
487 return edge_to_cancel; | |
488 } | |
489 exits.release (); | |
490 return NULL; | |
491 } | |
492 | |
493 /* Remove all tests for exits that are known to be taken after LOOP was | |
494 peeled NPEELED times. Put gcc_unreachable before every statement | |
495 known to not be executed. */ | |
496 | |
497 static bool | |
498 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled) | |
499 { | |
500 struct nb_iter_bound *elt; | |
501 bool changed = false; | |
502 | |
503 for (elt = loop->bounds; elt; elt = elt->next) | |
504 { | |
505 /* If statement is known to be undefined after peeling, turn it | |
506 into unreachable (or trap when debugging experience is supposed | |
507 to be good). */ | |
508 if (!elt->is_exit | |
509 && wi::ltu_p (elt->bound, npeeled)) | |
510 { | |
511 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); | |
512 gcall *stmt = gimple_build_call | |
513 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
514 gimple_set_location (stmt, gimple_location (elt->stmt)); | |
515 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); | |
516 split_block (gimple_bb (stmt), stmt); | |
517 changed = true; | |
518 if (dump_file && (dump_flags & TDF_DETAILS)) | |
519 { | |
520 fprintf (dump_file, "Forced statement unreachable: "); | |
521 print_gimple_stmt (dump_file, elt->stmt, 0); | |
522 } | |
523 } | |
524 /* If we know the exit will be taken after peeling, update. */ | |
525 else if (elt->is_exit | |
526 && wi::leu_p (elt->bound, npeeled)) | |
527 { | |
528 basic_block bb = gimple_bb (elt->stmt); | |
529 edge exit_edge = EDGE_SUCC (bb, 0); | |
530 | |
531 if (dump_file && (dump_flags & TDF_DETAILS)) | |
532 { | |
533 fprintf (dump_file, "Forced exit to be taken: "); | |
534 print_gimple_stmt (dump_file, elt->stmt, 0); | |
535 } | |
536 if (!loop_exit_edge_p (loop, exit_edge)) | |
537 exit_edge = EDGE_SUCC (bb, 1); | |
538 exit_edge->probability = profile_probability::always (); | |
539 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); | |
540 gcond *cond_stmt = as_a <gcond *> (elt->stmt); | |
541 if (exit_edge->flags & EDGE_TRUE_VALUE) | |
542 gimple_cond_make_true (cond_stmt); | |
543 else | |
544 gimple_cond_make_false (cond_stmt); | |
545 update_stmt (cond_stmt); | |
546 changed = true; | |
547 } | |
548 } | |
549 return changed; | |
550 } | |
551 | |
552 /* Remove all exits that are known to be never taken because of the loop bound | |
553 discovered. */ | |
554 | |
555 static bool | |
556 remove_redundant_iv_tests (struct loop *loop) | |
557 { | |
558 struct nb_iter_bound *elt; | |
559 bool changed = false; | |
560 | |
561 if (!loop->any_upper_bound) | |
562 return false; | |
563 for (elt = loop->bounds; elt; elt = elt->next) | |
564 { | |
565 /* Exit is pointless if it won't be taken before loop reaches | |
566 upper bound. */ | |
567 if (elt->is_exit && loop->any_upper_bound | |
568 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) | |
569 { | |
570 basic_block bb = gimple_bb (elt->stmt); | |
571 edge exit_edge = EDGE_SUCC (bb, 0); | |
572 struct tree_niter_desc niter; | |
573 | |
574 if (!loop_exit_edge_p (loop, exit_edge)) | |
575 exit_edge = EDGE_SUCC (bb, 1); | |
576 | |
577 /* Only when we know the actual number of iterations, not | |
578 just a bound, we can remove the exit. */ | |
579 if (!number_of_iterations_exit (loop, exit_edge, | |
580 &niter, false, false) | |
581 || !integer_onep (niter.assumptions) | |
582 || !integer_zerop (niter.may_be_zero) | |
583 || !niter.niter | |
584 || TREE_CODE (niter.niter) != INTEGER_CST | |
585 || !wi::ltu_p (loop->nb_iterations_upper_bound, | |
586 wi::to_widest (niter.niter))) | |
587 continue; | |
588 | |
589 if (dump_file && (dump_flags & TDF_DETAILS)) | |
590 { | |
591 fprintf (dump_file, "Removed pointless exit: "); | |
592 print_gimple_stmt (dump_file, elt->stmt, 0); | |
593 } | |
594 gcond *cond_stmt = as_a <gcond *> (elt->stmt); | |
595 if (exit_edge->flags & EDGE_TRUE_VALUE) | |
596 gimple_cond_make_false (cond_stmt); | |
597 else | |
598 gimple_cond_make_true (cond_stmt); | |
599 update_stmt (cond_stmt); | |
600 changed = true; | |
601 } | |
602 } | |
603 return changed; | |
604 } | |
605 | |
606 /* Stores loops that will be unlooped and edges that will be removed | |
607 after we process whole loop tree. */ | |
608 static vec<loop_p> loops_to_unloop; | |
609 static vec<int> loops_to_unloop_nunroll; | |
610 static vec<edge> edges_to_remove; | |
611 /* Stores loops that has been peeled. */ | |
612 static bitmap peeled_loops; | |
613 | |
614 /* Cancel all fully unrolled loops by putting __builtin_unreachable | |
615 on the latch edge. | |
616 We do it after all unrolling since unlooping moves basic blocks | |
617 across loop boundaries trashing loop closed SSA form as well | |
618 as SCEV info needed to be intact during unrolling. | |
619 | |
620 IRRED_INVALIDATED is used to bookkeep if information about | |
621 irreducible regions may become invalid as a result | |
622 of the transformation. | |
623 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case | |
624 when we need to go into loop closed SSA form. */ | |
625 | |
626 static void | |
627 unloop_loops (bitmap loop_closed_ssa_invalidated, | |
628 bool *irred_invalidated) | |
629 { | |
630 while (loops_to_unloop.length ()) | |
631 { | |
632 struct loop *loop = loops_to_unloop.pop (); | |
633 int n_unroll = loops_to_unloop_nunroll.pop (); | |
634 basic_block latch = loop->latch; | |
635 edge latch_edge = loop_latch_edge (loop); | |
636 int flags = latch_edge->flags; | |
637 location_t locus = latch_edge->goto_locus; | |
638 gcall *stmt; | |
639 gimple_stmt_iterator gsi; | |
640 | |
641 remove_exits_and_undefined_stmts (loop, n_unroll); | |
642 | |
643 /* Unloop destroys the latch edge. */ | |
644 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); | |
645 | |
646 /* Create new basic block for the latch edge destination and wire | |
647 it in. */ | |
648 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
649 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); | |
650 latch_edge->probability = profile_probability::never (); | |
651 latch_edge->flags |= flags; | |
652 latch_edge->goto_locus = locus; | |
653 | |
654 add_bb_to_loop (latch_edge->dest, current_loops->tree_root); | |
655 latch_edge->dest->count = profile_count::zero (); | |
656 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); | |
657 | |
658 gsi = gsi_start_bb (latch_edge->dest); | |
659 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
660 } | |
661 loops_to_unloop.release (); | |
662 loops_to_unloop_nunroll.release (); | |
663 | |
131 | 664 /* Remove edges in peeled copies. Given remove_path removes dominated |
665 regions we need to cope with removal of already removed paths. */ | |
111 | 666 unsigned i; |
667 edge e; | |
131 | 668 auto_vec<int, 20> src_bbs; |
669 src_bbs.reserve_exact (edges_to_remove.length ()); | |
111 | 670 FOR_EACH_VEC_ELT (edges_to_remove, i, e) |
131 | 671 src_bbs.quick_push (e->src->index); |
672 FOR_EACH_VEC_ELT (edges_to_remove, i, e) | |
673 if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])) | |
674 { | |
675 bool ok = remove_path (e, irred_invalidated, | |
676 loop_closed_ssa_invalidated); | |
677 gcc_assert (ok); | |
678 } | |
111 | 679 edges_to_remove.release (); |
680 } | |
681 | |
0 | 682 /* Tries to unroll LOOP completely, i.e. NITER times. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
683 UL determines which loops we are allowed to unroll. |
111 | 684 EXIT is the exit of the loop that should be eliminated. |
685 MAXITER specfy bound on number of iterations, -1 if it is | |
686 not known or too large for HOST_WIDE_INT. The location | |
687 LOCUS corresponding to the loop is used when emitting | |
688 a summary of the unroll to the dump file. */ | |
0 | 689 |
690 static bool | |
691 try_unroll_loop_completely (struct loop *loop, | |
131 | 692 edge exit, tree niter, bool may_be_zero, |
111 | 693 enum unroll_level ul, |
694 HOST_WIDE_INT maxiter, | |
131 | 695 dump_user_location_t locus, bool allow_peel) |
0 | 696 { |
131 | 697 unsigned HOST_WIDE_INT n_unroll = 0; |
111 | 698 bool n_unroll_found = false; |
699 edge edge_to_cancel = NULL; | |
0 | 700 |
111 | 701 /* See if we proved number of iterations to be low constant. |
702 | |
703 EXIT is an edge that will be removed in all but last iteration of | |
704 the loop. | |
705 | |
706 EDGE_TO_CACNEL is an edge that will be removed from the last iteration | |
707 of the unrolled sequence and is expected to make the final loop not | |
708 rolling. | |
709 | |
710 If the number of execution of loop is determined by standard induction | |
711 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving | |
712 from the iv test. */ | |
713 if (tree_fits_uhwi_p (niter)) | |
714 { | |
715 n_unroll = tree_to_uhwi (niter); | |
716 n_unroll_found = true; | |
717 edge_to_cancel = EDGE_SUCC (exit->src, 0); | |
718 if (edge_to_cancel == exit) | |
719 edge_to_cancel = EDGE_SUCC (exit->src, 1); | |
720 } | |
721 /* We do not know the number of iterations and thus we can not eliminate | |
722 the EXIT edge. */ | |
723 else | |
724 exit = NULL; | |
725 | |
726 /* See if we can improve our estimate by using recorded loop bounds. */ | |
131 | 727 if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH) |
728 && maxiter >= 0 | |
111 | 729 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) |
730 { | |
731 n_unroll = maxiter; | |
732 n_unroll_found = true; | |
733 /* Loop terminates before the IV variable test, so we can not | |
734 remove it in the last iteration. */ | |
735 edge_to_cancel = NULL; | |
736 } | |
737 | |
738 if (!n_unroll_found) | |
0 | 739 return false; |
740 | |
131 | 741 if (!loop->unroll |
742 && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | |
111 | 743 { |
744 if (dump_file && (dump_flags & TDF_DETAILS)) | |
745 fprintf (dump_file, "Not unrolling loop %d " | |
746 "(--param max-completely-peel-times limit reached).\n", | |
747 loop->num); | |
748 return false; | |
749 } | |
0 | 750 |
111 | 751 if (!edge_to_cancel) |
752 edge_to_cancel = loop_edge_to_cancel (loop); | |
0 | 753 |
754 if (n_unroll) | |
755 { | |
756 if (ul == UL_SINGLE_ITER) | |
757 return false; | |
758 | |
131 | 759 if (loop->unroll) |
0 | 760 { |
131 | 761 /* If the unrolling factor is too large, bail out. */ |
762 if (n_unroll > (unsigned)loop->unroll) | |
763 { | |
764 if (dump_file && (dump_flags & TDF_DETAILS)) | |
765 fprintf (dump_file, | |
766 "Not unrolling loop %d: " | |
767 "user didn't want it unrolled completely.\n", | |
768 loop->num); | |
769 return false; | |
770 } | |
111 | 771 } |
131 | 772 else |
0 | 773 { |
131 | 774 struct loop_size size; |
775 /* EXIT can be removed only if we are sure it passes first N_UNROLL | |
776 iterations. */ | |
777 bool remove_exit = (exit && niter | |
778 && TREE_CODE (niter) == INTEGER_CST | |
779 && wi::leu_p (n_unroll, wi::to_widest (niter))); | |
780 bool large | |
781 = tree_estimate_loop_size | |
782 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, | |
783 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); | |
784 if (large) | |
785 { | |
786 if (dump_file && (dump_flags & TDF_DETAILS)) | |
787 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | |
788 loop->num); | |
789 return false; | |
790 } | |
791 | |
792 unsigned HOST_WIDE_INT ninsns = size.overall; | |
793 unsigned HOST_WIDE_INT unr_insns | |
794 = estimated_unrolled_size (&size, n_unroll); | |
0 | 795 if (dump_file && (dump_flags & TDF_DETAILS)) |
131 | 796 { |
797 fprintf (dump_file, " Loop size: %d\n", (int) ninsns); | |
798 fprintf (dump_file, " Estimated size after unrolling: %d\n", | |
799 (int) unr_insns); | |
800 } | |
801 | |
802 /* If the code is going to shrink, we don't need to be extra | |
803 cautious on guessing if the unrolling is going to be | |
804 profitable. */ | |
805 if (unr_insns | |
806 /* If there is IV variable that will become constant, we | |
807 save one instruction in the loop prologue we do not | |
808 account otherwise. */ | |
809 <= ninsns + (size.constant_iv != false)) | |
810 ; | |
811 /* We unroll only inner loops, because we do not consider it | |
812 profitable otheriwse. We still can cancel loopback edge | |
813 of not rolling loop; this is always a good idea. */ | |
814 else if (ul == UL_NO_GROWTH) | |
815 { | |
816 if (dump_file && (dump_flags & TDF_DETAILS)) | |
817 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
818 loop->num); | |
819 return false; | |
820 } | |
821 /* Outer loops tend to be less interesting candidates for | |
822 complete unrolling unless we can do a lot of propagation | |
823 into the inner loop body. For now we disable outer loop | |
824 unrolling when the code would grow. */ | |
825 else if (loop->inner) | |
826 { | |
827 if (dump_file && (dump_flags & TDF_DETAILS)) | |
828 fprintf (dump_file, "Not unrolling loop %d: " | |
829 "it is not innermost and code would grow.\n", | |
830 loop->num); | |
831 return false; | |
832 } | |
833 /* If there is call on a hot path through the loop, then | |
834 there is most probably not much to optimize. */ | |
835 else if (size.num_non_pure_calls_on_hot_path) | |
836 { | |
837 if (dump_file && (dump_flags & TDF_DETAILS)) | |
838 fprintf (dump_file, "Not unrolling loop %d: " | |
839 "contains call and code would grow.\n", | |
840 loop->num); | |
841 return false; | |
842 } | |
843 /* If there is pure/const call in the function, then we can | |
844 still optimize the unrolled loop body if it contains some | |
845 other interesting code than the calls and code storing or | |
846 cumulating the return value. */ | |
847 else if (size.num_pure_calls_on_hot_path | |
848 /* One IV increment, one test, one ivtmp store and | |
849 one useful stmt. That is about minimal loop | |
850 doing pure call. */ | |
851 && (size.non_call_stmts_on_hot_path | |
852 <= 3 + size.num_pure_calls_on_hot_path)) | |
853 { | |
854 if (dump_file && (dump_flags & TDF_DETAILS)) | |
855 fprintf (dump_file, "Not unrolling loop %d: " | |
856 "contains just pure calls and code would grow.\n", | |
857 loop->num); | |
858 return false; | |
859 } | |
860 /* Complete unrolling is major win when control flow is | |
861 removed and one big basic block is created. If the loop | |
862 contains control flow the optimization may still be a win | |
863 because of eliminating the loop overhead but it also may | |
864 blow the branch predictor tables. Limit number of | |
865 branches on the hot path through the peeled sequence. */ | |
866 else if (size.num_branches_on_hot_path * (int)n_unroll | |
867 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) | |
868 { | |
869 if (dump_file && (dump_flags & TDF_DETAILS)) | |
870 fprintf (dump_file, "Not unrolling loop %d: " | |
871 "number of branches on hot path in the unrolled " | |
872 "sequence reaches --param max-peel-branches limit.\n", | |
873 loop->num); | |
874 return false; | |
875 } | |
876 else if (unr_insns | |
877 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) | |
878 { | |
879 if (dump_file && (dump_flags & TDF_DETAILS)) | |
880 fprintf (dump_file, "Not unrolling loop %d: " | |
881 "number of insns in the unrolled sequence reaches " | |
882 "--param max-completely-peeled-insns limit.\n", | |
883 loop->num); | |
884 return false; | |
885 } | |
111 | 886 } |
0 | 887 |
888 initialize_original_copy_tables (); | |
111 | 889 auto_sbitmap wont_exit (n_unroll + 1); |
890 if (exit && niter | |
891 && TREE_CODE (niter) == INTEGER_CST | |
892 && wi::leu_p (n_unroll, wi::to_widest (niter))) | |
893 { | |
894 bitmap_ones (wont_exit); | |
895 if (wi::eq_p (wi::to_widest (niter), n_unroll) | |
896 || edge_to_cancel) | |
897 bitmap_clear_bit (wont_exit, 0); | |
898 } | |
899 else | |
900 { | |
901 exit = NULL; | |
902 bitmap_clear (wont_exit); | |
903 } | |
131 | 904 if (may_be_zero) |
905 bitmap_clear_bit (wont_exit, 1); | |
0 | 906 |
907 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
908 n_unroll, wont_exit, | |
111 | 909 exit, &edges_to_remove, |
0 | 910 DLTHE_FLAG_UPDATE_FREQ |
911 | DLTHE_FLAG_COMPLETTE_PEEL)) | |
912 { | |
913 free_original_copy_tables (); | |
111 | 914 if (dump_file && (dump_flags & TDF_DETAILS)) |
915 fprintf (dump_file, "Failed to duplicate the loop\n"); | |
0 | 916 return false; |
917 } | |
918 | |
919 free_original_copy_tables (); | |
920 } | |
921 | |
111 | 922 /* Remove the conditional from the last copy of the loop. */ |
923 if (edge_to_cancel) | |
924 { | |
925 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src)); | |
926 force_edge_cold (edge_to_cancel, true); | |
927 if (edge_to_cancel->flags & EDGE_TRUE_VALUE) | |
928 gimple_cond_make_false (cond); | |
929 else | |
930 gimple_cond_make_true (cond); | |
931 update_stmt (cond); | |
131 | 932 /* Do not remove the path, as doing so may remove outer loop and |
933 confuse bookkeeping code in tree_unroll_loops_completely. */ | |
111 | 934 } |
935 | |
936 /* Store the loop for later unlooping and exit removal. */ | |
937 loops_to_unloop.safe_push (loop); | |
938 loops_to_unloop_nunroll.safe_push (n_unroll); | |
939 | |
940 if (dump_enabled_p ()) | |
941 { | |
942 if (!n_unroll) | |
943 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
944 "loop turned into non-loop; it never loops\n"); | |
945 else | |
946 { | |
947 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
948 "loop with %d iterations completely unrolled", | |
131 | 949 (int) n_unroll); |
111 | 950 if (loop->header->count.initialized_p ()) |
951 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, | |
952 " (header execution count %d)", | |
953 (int)loop->header->count.to_gcov_type ()); | |
954 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); | |
955 } | |
956 } | |
0 | 957 |
958 if (dump_file && (dump_flags & TDF_DETAILS)) | |
111 | 959 { |
960 if (exit) | |
961 fprintf (dump_file, "Exit condition of peeled iterations was " | |
962 "eliminated.\n"); | |
963 if (edge_to_cancel) | |
964 fprintf (dump_file, "Last iteration exit edge was proved true.\n"); | |
965 else | |
966 fprintf (dump_file, "Latch of last iteration was marked by " | |
967 "__builtin_unreachable ().\n"); | |
968 } | |
0 | 969 |
970 return true; | |
971 } | |
972 | |
111 | 973 /* Return number of instructions after peeling. */ |
974 static unsigned HOST_WIDE_INT | |
975 estimated_peeled_sequence_size (struct loop_size *size, | |
976 unsigned HOST_WIDE_INT npeel) | |
977 { | |
978 return MAX (npeel * (HOST_WIDE_INT) (size->overall | |
979 - size->eliminated_by_peeling), 1); | |
980 } | |
981 | |
982 /* If the loop is expected to iterate N times and is | |
983 small enough, duplicate the loop body N+1 times before | |
984 the loop itself. This way the hot path will never | |
985 enter the loop. | |
986 Parameters are the same as for try_unroll_loops_completely */ | |
987 | |
988 static bool | |
989 try_peel_loop (struct loop *loop, | |
131 | 990 edge exit, tree niter, bool may_be_zero, |
111 | 991 HOST_WIDE_INT maxiter) |
992 { | |
993 HOST_WIDE_INT npeel; | |
994 struct loop_size size; | |
995 int peeled_size; | |
996 | |
131 | 997 if (!flag_peel_loops |
998 || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 | |
111 | 999 || !peeled_loops) |
1000 return false; | |
1001 | |
1002 if (bitmap_bit_p (peeled_loops, loop->num)) | |
1003 { | |
1004 if (dump_file) | |
1005 fprintf (dump_file, "Not peeling: loop is already peeled\n"); | |
1006 return false; | |
1007 } | |
1008 | |
131 | 1009 /* We don't peel loops that will be unrolled as this can duplicate a |
1010 loop more times than the user requested. */ | |
1011 if (loop->unroll) | |
1012 { | |
1013 if (dump_file) | |
1014 fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); | |
1015 return false; | |
1016 } | |
1017 | |
111 | 1018 /* Peel only innermost loops. |
1019 While the code is perfectly capable of peeling non-innermost loops, | |
1020 the heuristics would probably need some improvements. */ | |
1021 if (loop->inner) | |
1022 { | |
1023 if (dump_file) | |
131 | 1024 fprintf (dump_file, "Not peeling: outer loop\n"); |
111 | 1025 return false; |
1026 } | |
1027 | |
1028 if (!optimize_loop_for_speed_p (loop)) | |
1029 { | |
1030 if (dump_file) | |
131 | 1031 fprintf (dump_file, "Not peeling: cold loop\n"); |
111 | 1032 return false; |
1033 } | |
1034 | |
1035 /* Check if there is an estimate on the number of iterations. */ | |
1036 npeel = estimated_loop_iterations_int (loop); | |
1037 if (npeel < 0) | |
1038 npeel = likely_max_loop_iterations_int (loop); | |
1039 if (npeel < 0) | |
1040 { | |
1041 if (dump_file) | |
1042 fprintf (dump_file, "Not peeling: number of iterations is not " | |
1043 "estimated\n"); | |
1044 return false; | |
1045 } | |
1046 if (maxiter >= 0 && maxiter <= npeel) | |
1047 { | |
1048 if (dump_file) | |
131 | 1049 fprintf (dump_file, "Not peeling: upper bound is known so can " |
111 | 1050 "unroll completely\n"); |
1051 return false; | |
1052 } | |
1053 | |
1054 /* We want to peel estimated number of iterations + 1 (so we never | |
1055 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES | |
1056 and be sure to avoid overflows. */ | |
1057 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) | |
1058 { | |
1059 if (dump_file) | |
131 | 1060 fprintf (dump_file, "Not peeling: rolls too much " |
111 | 1061 "(%i + 1 > --param max-peel-times)\n", (int) npeel); |
1062 return false; | |
1063 } | |
1064 npeel++; | |
1065 | |
1066 /* Check peeled loops size. */ | |
1067 tree_estimate_loop_size (loop, exit, NULL, &size, | |
1068 PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); | |
1069 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) | |
1070 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) | |
1071 { | |
1072 if (dump_file) | |
131 | 1073 fprintf (dump_file, "Not peeling: peeled sequence size is too large " |
111 | 1074 "(%i insns > --param max-peel-insns)", peeled_size); |
1075 return false; | |
1076 } | |
1077 | |
1078 /* Duplicate possibly eliminating the exits. */ | |
1079 initialize_original_copy_tables (); | |
1080 auto_sbitmap wont_exit (npeel + 1); | |
1081 if (exit && niter | |
1082 && TREE_CODE (niter) == INTEGER_CST | |
1083 && wi::leu_p (npeel, wi::to_widest (niter))) | |
1084 { | |
1085 bitmap_ones (wont_exit); | |
1086 bitmap_clear_bit (wont_exit, 0); | |
1087 } | |
1088 else | |
1089 { | |
1090 exit = NULL; | |
1091 bitmap_clear (wont_exit); | |
1092 } | |
131 | 1093 if (may_be_zero) |
1094 bitmap_clear_bit (wont_exit, 1); | |
111 | 1095 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1096 npeel, wont_exit, | |
1097 exit, &edges_to_remove, | |
1098 DLTHE_FLAG_UPDATE_FREQ)) | |
1099 { | |
1100 free_original_copy_tables (); | |
1101 return false; | |
1102 } | |
1103 free_original_copy_tables (); | |
1104 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1105 { | |
1106 fprintf (dump_file, "Peeled loop %d, %i times.\n", | |
1107 loop->num, (int) npeel); | |
1108 } | |
1109 if (loop->any_estimate) | |
1110 { | |
1111 if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) | |
1112 loop->nb_iterations_estimate -= npeel; | |
1113 else | |
1114 loop->nb_iterations_estimate = 0; | |
1115 } | |
1116 if (loop->any_upper_bound) | |
1117 { | |
1118 if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) | |
1119 loop->nb_iterations_upper_bound -= npeel; | |
1120 else | |
1121 loop->nb_iterations_upper_bound = 0; | |
1122 } | |
1123 if (loop->any_likely_upper_bound) | |
1124 { | |
1125 if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) | |
1126 loop->nb_iterations_likely_upper_bound -= npeel; | |
1127 else | |
1128 { | |
1129 loop->any_estimate = true; | |
1130 loop->nb_iterations_estimate = 0; | |
1131 loop->nb_iterations_likely_upper_bound = 0; | |
1132 } | |
1133 } | |
1134 profile_count entry_count = profile_count::zero (); | |
1135 | |
1136 edge e; | |
1137 edge_iterator ei; | |
1138 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1139 if (e->src != loop->latch) | |
1140 { | |
1141 if (e->src->count.initialized_p ()) | |
1142 entry_count = e->src->count + e->src->count; | |
1143 gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); | |
1144 } | |
1145 profile_probability p = profile_probability::very_unlikely (); | |
131 | 1146 p = entry_count.probability_in (loop->header->count); |
111 | 1147 scale_loop_profile (loop, p, 0); |
1148 bitmap_set_bit (peeled_loops, loop->num); | |
1149 return true; | |
1150 } | |
0 | 1151 /* Adds a canonical induction variable to LOOP if suitable. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1152 CREATE_IV is true if we may create a new iv. UL determines |
0 | 1153 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1154 to determine the number of iterations of a loop by direct evaluation. |
111 | 1155 Returns true if cfg is changed. */ |
0 | 1156 |
1157 static bool | |
1158 canonicalize_loop_induction_variables (struct loop *loop, | |
1159 bool create_iv, enum unroll_level ul, | |
131 | 1160 bool try_eval, bool allow_peel) |
0 | 1161 { |
1162 edge exit = NULL; | |
1163 tree niter; | |
111 | 1164 HOST_WIDE_INT maxiter; |
1165 bool modified = false; | |
131 | 1166 dump_user_location_t locus; |
1167 struct tree_niter_desc niter_desc; | |
1168 bool may_be_zero = false; | |
0 | 1169 |
131 | 1170 /* For unrolling allow conditional constant or zero iterations, thus |
1171 perform loop-header copying on-the-fly. */ | |
111 | 1172 exit = single_exit (loop); |
131 | 1173 niter = chrec_dont_know; |
1174 if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) | |
1175 { | |
1176 niter = niter_desc.niter; | |
1177 may_be_zero | |
1178 = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero); | |
1179 } | |
0 | 1180 if (TREE_CODE (niter) == INTEGER_CST) |
131 | 1181 locus = last_stmt (exit->src); |
0 | 1182 else |
1183 { | |
131 | 1184 /* For non-constant niter fold may_be_zero into niter again. */ |
1185 if (may_be_zero) | |
1186 { | |
1187 if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1188 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), | |
1189 niter_desc.may_be_zero, | |
1190 build_int_cst (TREE_TYPE (niter), 0), niter); | |
1191 else | |
1192 niter = chrec_dont_know; | |
1193 may_be_zero = false; | |
1194 } | |
1195 | |
0 | 1196 /* If the loop has more than one exit, try checking all of them |
1197 for # of iterations determinable through scev. */ | |
111 | 1198 if (!exit) |
0 | 1199 niter = find_loop_niter (loop, &exit); |
1200 | |
1201 /* Finally if everything else fails, try brute force evaluation. */ | |
1202 if (try_eval | |
1203 && (chrec_contains_undetermined (niter) | |
1204 || TREE_CODE (niter) != INTEGER_CST)) | |
1205 niter = find_loop_niter_by_eval (loop, &exit); | |
1206 | |
111 | 1207 if (exit) |
131 | 1208 locus = last_stmt (exit->src); |
111 | 1209 |
1210 if (TREE_CODE (niter) != INTEGER_CST) | |
1211 exit = NULL; | |
0 | 1212 } |
1213 | |
111 | 1214 /* We work exceptionally hard here to estimate the bound |
1215 by find_loop_niter_by_eval. Be sure to keep it for future. */ | |
1216 if (niter && TREE_CODE (niter) == INTEGER_CST) | |
1217 { | |
1218 record_niter_bound (loop, wi::to_widest (niter), | |
1219 exit == single_likely_exit (loop), true); | |
1220 } | |
1221 | |
1222 /* Force re-computation of loop bounds so we can remove redundant exits. */ | |
1223 maxiter = max_loop_iterations_int (loop); | |
1224 | |
1225 if (dump_file && (dump_flags & TDF_DETAILS) | |
1226 && TREE_CODE (niter) == INTEGER_CST) | |
0 | 1227 { |
1228 fprintf (dump_file, "Loop %d iterates ", loop->num); | |
1229 print_generic_expr (dump_file, niter, TDF_SLIM); | |
1230 fprintf (dump_file, " times.\n"); | |
1231 } | |
111 | 1232 if (dump_file && (dump_flags & TDF_DETAILS) |
1233 && maxiter >= 0) | |
1234 { | |
1235 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, | |
1236 (int)maxiter); | |
1237 } | |
1238 if (dump_file && (dump_flags & TDF_DETAILS) | |
1239 && likely_max_loop_iterations_int (loop) >= 0) | |
1240 { | |
1241 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", | |
1242 loop->num, (int)likely_max_loop_iterations_int (loop)); | |
1243 } | |
0 | 1244 |
111 | 1245 /* Remove exits that are known to be never taken based on loop bound. |
1246 Needs to be called after compilation of max_loop_iterations_int that | |
1247 populates the loop bounds. */ | |
1248 modified |= remove_redundant_iv_tests (loop); | |
1249 | |
131 | 1250 if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, |
1251 maxiter, locus, allow_peel)) | |
0 | 1252 return true; |
1253 | |
111 | 1254 if (create_iv |
1255 && niter && !chrec_contains_undetermined (niter) | |
1256 && exit && just_once_each_iteration_p (loop, exit->src)) | |
131 | 1257 { |
1258 tree iv_niter = niter; | |
1259 if (may_be_zero) | |
1260 { | |
1261 if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1262 iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter), | |
1263 niter_desc.may_be_zero, | |
1264 build_int_cst (TREE_TYPE (iv_niter), 0), | |
1265 iv_niter); | |
1266 else | |
1267 iv_niter = NULL_TREE; | |
1268 } | |
1269 if (iv_niter) | |
1270 create_canonical_iv (loop, exit, iv_niter); | |
1271 } | |
0 | 1272 |
111 | 1273 if (ul == UL_ALL) |
131 | 1274 modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter); |
111 | 1275 |
1276 return modified; | |
0 | 1277 } |
1278 | |
1279 /* The main entry point of the pass. Adds canonical induction variables | |
1280 to the suitable loops. */ | |
1281 | |
1282 unsigned int | |
1283 canonicalize_induction_variables (void) | |
1284 { | |
1285 struct loop *loop; | |
1286 bool changed = false; | |
111 | 1287 bool irred_invalidated = false; |
1288 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1289 |
111 | 1290 estimate_numbers_of_iterations (cfun); |
1291 | |
1292 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
0 | 1293 { |
1294 changed |= canonicalize_loop_induction_variables (loop, | |
1295 true, UL_SINGLE_ITER, | |
131 | 1296 true, false); |
0 | 1297 } |
111 | 1298 gcc_assert (!need_ssa_update_p (cfun)); |
1299 | |
1300 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | |
1301 if (irred_invalidated | |
1302 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1303 mark_irreducible_loops (); | |
0 | 1304 |
1305 /* Clean up the information about numbers of iterations, since brute force | |
1306 evaluation could reveal new information. */ | |
111 | 1307 free_numbers_of_iterations_estimates (cfun); |
0 | 1308 scev_reset (); |
1309 | |
111 | 1310 if (!bitmap_empty_p (loop_closed_ssa_invalidated)) |
1311 { | |
1312 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); | |
1313 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1314 } | |
1315 BITMAP_FREE (loop_closed_ssa_invalidated); | |
1316 | |
0 | 1317 if (changed) |
1318 return TODO_cleanup_cfg; | |
1319 return 0; | |
1320 } | |
1321 | |
111 | 1322 /* Process loops from innermost to outer, stopping at the innermost |
1323 loop we unrolled. */ | |
1324 | |
1325 static bool | |
1326 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, | |
1327 bitmap father_bbs, struct loop *loop) | |
1328 { | |
1329 struct loop *loop_father; | |
1330 bool changed = false; | |
1331 struct loop *inner; | |
1332 enum unroll_level ul; | |
131 | 1333 unsigned num = number_of_loops (cfun); |
111 | 1334 |
131 | 1335 /* Process inner loops first. Don't walk loops added by the recursive |
1336 calls because SSA form is not up-to-date. They can be handled in the | |
1337 next iteration. */ | |
1338 bitmap child_father_bbs = NULL; | |
111 | 1339 for (inner = loop->inner; inner != NULL; inner = inner->next) |
131 | 1340 if ((unsigned) inner->num < num) |
1341 { | |
1342 if (!child_father_bbs) | |
1343 child_father_bbs = BITMAP_ALLOC (NULL); | |
1344 if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer, | |
1345 child_father_bbs, inner)) | |
1346 { | |
1347 bitmap_ior_into (father_bbs, child_father_bbs); | |
1348 bitmap_clear (child_father_bbs); | |
1349 changed = true; | |
1350 } | |
1351 } | |
1352 if (child_father_bbs) | |
1353 BITMAP_FREE (child_father_bbs); | |
1354 | |
111 | 1355 /* If we changed an inner loop we cannot process outer loops in this |
1356 iteration because SSA form is not up-to-date. Continue with | |
1357 siblings of outer loops instead. */ | |
1358 if (changed) | |
131 | 1359 { |
1360 /* If we are recorded as father clear all other fathers that | |
1361 are necessarily covered already to avoid redundant work. */ | |
1362 if (bitmap_bit_p (father_bbs, loop->header->index)) | |
1363 { | |
1364 bitmap_clear (father_bbs); | |
1365 bitmap_set_bit (father_bbs, loop->header->index); | |
1366 } | |
1367 return true; | |
1368 } | |
111 | 1369 |
1370 /* Don't unroll #pragma omp simd loops until the vectorizer | |
1371 attempts to vectorize those. */ | |
1372 if (loop->force_vectorize) | |
1373 return false; | |
1374 | |
1375 /* Try to unroll this loop. */ | |
1376 loop_father = loop_outer (loop); | |
1377 if (!loop_father) | |
1378 return false; | |
1379 | |
131 | 1380 if (loop->unroll > 1) |
1381 ul = UL_ALL; | |
1382 else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | |
111 | 1383 /* Unroll outermost loops only if asked to do so or they do |
1384 not cause code growth. */ | |
1385 && (unroll_outer || loop_outer (loop_father))) | |
1386 ul = UL_ALL; | |
1387 else | |
1388 ul = UL_NO_GROWTH; | |
1389 | |
1390 if (canonicalize_loop_induction_variables | |
131 | 1391 (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer)) |
111 | 1392 { |
1393 /* If we'll continue unrolling, we need to propagate constants | |
1394 within the new basic blocks to fold away induction variable | |
1395 computations; otherwise, the size might blow up before the | |
1396 iteration is complete and the IR eventually cleaned up. */ | |
1397 if (loop_outer (loop_father)) | |
131 | 1398 { |
1399 /* Once we process our father we will have processed | |
1400 the fathers of our children as well, so avoid doing | |
1401 redundant work and clear fathers we've gathered sofar. */ | |
1402 bitmap_clear (father_bbs); | |
1403 bitmap_set_bit (father_bbs, loop_father->header->index); | |
1404 } | |
111 | 1405 |
1406 return true; | |
1407 } | |
1408 | |
1409 return false; | |
1410 } | |
1411 | |
0 | 1412 /* Unroll LOOPS completely if they iterate just few times. Unless |
1413 MAY_INCREASE_SIZE is true, perform the unrolling only if the | |
1414 size of the code does not increase. */ | |
1415 | |
131 | 1416 static unsigned int |
0 | 1417 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) |
1418 { | |
111 | 1419 bitmap father_bbs = BITMAP_ALLOC (NULL); |
0 | 1420 bool changed; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1421 int iteration = 0; |
111 | 1422 bool irred_invalidated = false; |
1423 | |
1424 estimate_numbers_of_iterations (cfun); | |
0 | 1425 |
1426 do | |
1427 { | |
1428 changed = false; | |
111 | 1429 bitmap loop_closed_ssa_invalidated = NULL; |
0 | 1430 |
111 | 1431 if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1432 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
0 | 1433 |
111 | 1434 free_numbers_of_iterations_estimates (cfun); |
1435 estimate_numbers_of_iterations (cfun); | |
1436 | |
1437 changed = tree_unroll_loops_completely_1 (may_increase_size, | |
1438 unroll_outer, father_bbs, | |
1439 current_loops->tree_root); | |
0 | 1440 if (changed) |
1441 { | |
111 | 1442 unsigned i; |
1443 | |
1444 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | |
1445 | |
1446 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */ | |
1447 if (loop_closed_ssa_invalidated | |
1448 && !bitmap_empty_p (loop_closed_ssa_invalidated)) | |
1449 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, | |
1450 TODO_update_ssa); | |
1451 else | |
1452 update_ssa (TODO_update_ssa); | |
1453 | |
1454 /* father_bbs is a bitmap of loop father header BB indices. | |
1455 Translate that to what non-root loops these BBs belong to now. */ | |
1456 bitmap_iterator bi; | |
1457 bitmap fathers = BITMAP_ALLOC (NULL); | |
1458 EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi) | |
1459 { | |
1460 basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
1461 if (! unrolled_loop_bb) | |
1462 continue; | |
1463 if (loop_outer (unrolled_loop_bb->loop_father)) | |
1464 bitmap_set_bit (fathers, | |
1465 unrolled_loop_bb->loop_father->num); | |
1466 } | |
1467 bitmap_clear (father_bbs); | |
1468 /* Propagate the constants within the new basic blocks. */ | |
1469 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) | |
1470 { | |
1471 loop_p father = get_loop (cfun, i); | |
131 | 1472 bitmap exit_bbs = BITMAP_ALLOC (NULL); |
1473 loop_exit *exit = father->exits->next; | |
1474 while (exit->e) | |
1475 { | |
1476 bitmap_set_bit (exit_bbs, exit->e->dest->index); | |
1477 exit = exit->next; | |
1478 } | |
1479 do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs); | |
111 | 1480 } |
1481 BITMAP_FREE (fathers); | |
1482 | |
0 | 1483 /* This will take care of removing completely unrolled loops |
1484 from the loop structures so we can continue unrolling now | |
1485 innermost loops. */ | |
1486 if (cleanup_tree_cfg ()) | |
1487 update_ssa (TODO_update_ssa_only_virtuals); | |
1488 | |
1489 /* Clean up the information about numbers of iterations, since | |
1490 complete unrolling might have invalidated it. */ | |
1491 scev_reset (); | |
111 | 1492 if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1493 verify_loop_closed_ssa (true); | |
0 | 1494 } |
111 | 1495 if (loop_closed_ssa_invalidated) |
1496 BITMAP_FREE (loop_closed_ssa_invalidated); | |
0 | 1497 } |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1498 while (changed |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1499 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS)); |
0 | 1500 |
111 | 1501 BITMAP_FREE (father_bbs); |
1502 | |
1503 if (irred_invalidated | |
1504 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1505 mark_irreducible_loops (); | |
1506 | |
0 | 1507 return 0; |
1508 } | |
111 | 1509 |
1510 /* Canonical induction variable creation pass. */ | |
1511 | |
1512 namespace { | |
1513 | |
1514 const pass_data pass_data_iv_canon = | |
1515 { | |
1516 GIMPLE_PASS, /* type */ | |
1517 "ivcanon", /* name */ | |
1518 OPTGROUP_LOOP, /* optinfo_flags */ | |
1519 TV_TREE_LOOP_IVCANON, /* tv_id */ | |
1520 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1521 0, /* properties_provided */ | |
1522 0, /* properties_destroyed */ | |
1523 0, /* todo_flags_start */ | |
1524 0, /* todo_flags_finish */ | |
1525 }; | |
1526 | |
1527 class pass_iv_canon : public gimple_opt_pass | |
1528 { | |
1529 public: | |
1530 pass_iv_canon (gcc::context *ctxt) | |
1531 : gimple_opt_pass (pass_data_iv_canon, ctxt) | |
1532 {} | |
1533 | |
1534 /* opt_pass methods: */ | |
1535 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; } | |
1536 virtual unsigned int execute (function *fun); | |
1537 | |
1538 }; // class pass_iv_canon | |
1539 | |
1540 unsigned int | |
1541 pass_iv_canon::execute (function *fun) | |
1542 { | |
1543 if (number_of_loops (fun) <= 1) | |
1544 return 0; | |
1545 | |
1546 return canonicalize_induction_variables (); | |
1547 } | |
1548 | |
1549 } // anon namespace | |
1550 | |
1551 gimple_opt_pass * | |
1552 make_pass_iv_canon (gcc::context *ctxt) | |
1553 { | |
1554 return new pass_iv_canon (ctxt); | |
1555 } | |
1556 | |
1557 /* Complete unrolling of loops. */ | |
1558 | |
1559 namespace { | |
1560 | |
1561 const pass_data pass_data_complete_unroll = | |
1562 { | |
1563 GIMPLE_PASS, /* type */ | |
1564 "cunroll", /* name */ | |
1565 OPTGROUP_LOOP, /* optinfo_flags */ | |
1566 TV_COMPLETE_UNROLL, /* tv_id */ | |
1567 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1568 0, /* properties_provided */ | |
1569 0, /* properties_destroyed */ | |
1570 0, /* todo_flags_start */ | |
1571 0, /* todo_flags_finish */ | |
1572 }; | |
1573 | |
1574 class pass_complete_unroll : public gimple_opt_pass | |
1575 { | |
1576 public: | |
1577 pass_complete_unroll (gcc::context *ctxt) | |
1578 : gimple_opt_pass (pass_data_complete_unroll, ctxt) | |
1579 {} | |
1580 | |
1581 /* opt_pass methods: */ | |
1582 virtual unsigned int execute (function *); | |
1583 | |
1584 }; // class pass_complete_unroll | |
1585 | |
1586 unsigned int | |
1587 pass_complete_unroll::execute (function *fun) | |
1588 { | |
1589 if (number_of_loops (fun) <= 1) | |
1590 return 0; | |
1591 | |
1592 /* If we ever decide to run loop peeling more than once, we will need to | |
1593 track loops already peeled in loop structures themselves to avoid | |
1594 re-peeling the same loop multiple times. */ | |
1595 if (flag_peel_loops) | |
1596 peeled_loops = BITMAP_ALLOC (NULL); | |
131 | 1597 unsigned int val = tree_unroll_loops_completely (flag_unroll_loops |
1598 || flag_peel_loops | |
1599 || optimize >= 3, true); | |
111 | 1600 if (peeled_loops) |
1601 { | |
1602 BITMAP_FREE (peeled_loops); | |
1603 peeled_loops = NULL; | |
1604 } | |
1605 return val; | |
1606 } | |
1607 | |
1608 } // anon namespace | |
1609 | |
1610 gimple_opt_pass * | |
1611 make_pass_complete_unroll (gcc::context *ctxt) | |
1612 { | |
1613 return new pass_complete_unroll (ctxt); | |
1614 } | |
1615 | |
1616 /* Complete unrolling of inner loops. */ | |
1617 | |
1618 namespace { | |
1619 | |
1620 const pass_data pass_data_complete_unrolli = | |
1621 { | |
1622 GIMPLE_PASS, /* type */ | |
1623 "cunrolli", /* name */ | |
1624 OPTGROUP_LOOP, /* optinfo_flags */ | |
1625 TV_COMPLETE_UNROLL, /* tv_id */ | |
1626 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1627 0, /* properties_provided */ | |
1628 0, /* properties_destroyed */ | |
1629 0, /* todo_flags_start */ | |
1630 0, /* todo_flags_finish */ | |
1631 }; | |
1632 | |
1633 class pass_complete_unrolli : public gimple_opt_pass | |
1634 { | |
1635 public: | |
1636 pass_complete_unrolli (gcc::context *ctxt) | |
1637 : gimple_opt_pass (pass_data_complete_unrolli, ctxt) | |
1638 {} | |
1639 | |
1640 /* opt_pass methods: */ | |
1641 virtual bool gate (function *) { return optimize >= 2; } | |
1642 virtual unsigned int execute (function *); | |
1643 | |
1644 }; // class pass_complete_unrolli | |
1645 | |
1646 unsigned int | |
1647 pass_complete_unrolli::execute (function *fun) | |
1648 { | |
1649 unsigned ret = 0; | |
1650 | |
131 | 1651 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
111 | 1652 if (number_of_loops (fun) > 1) |
1653 { | |
1654 scev_initialize (); | |
1655 ret = tree_unroll_loops_completely (optimize >= 3, false); | |
1656 scev_finalize (); | |
1657 } | |
1658 loop_optimizer_finalize (); | |
1659 | |
1660 return ret; | |
1661 } | |
1662 | |
1663 } // anon namespace | |
1664 | |
1665 gimple_opt_pass * | |
1666 make_pass_complete_unrolli (gcc::context *ctxt) | |
1667 { | |
1668 return new pass_complete_unrolli (ctxt); | |
1669 } | |
1670 | |
1671 |