Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-loop-ivcanon.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
111 | 1 /* Induction variable canonicalization and loop peeling. |
145 | 2 Copyright (C) 2004-2020 Free Software Foundation, Inc. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
3 |
0 | 4 This file is part of GCC. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
5 |
0 | 6 GCC is free software; you can redistribute it and/or modify it |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
10 |
0 | 11 GCC is distributed in the hope that it will be useful, but WITHOUT |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
15 |
0 | 16 You should have received a copy of the GNU General Public License |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 /* This pass detects the loops that iterate a constant number of times, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
21 adds a canonical induction variable (step -1, tested against 0) |
0 | 22 and replaces the exit test. This enables the less powerful rtl |
23 level analysis to use this information. | |
24 | |
25 This might spoil the code in some cases (by increasing register pressure). | |
26 Note that in the case the new variable is not needed, ivopts will get rid | |
27 of it, so it might only be a problem when there are no other linear induction | |
28 variables. In that case the created optimization possibilities are likely | |
29 to pay up. | |
30 | |
111 | 31 We also perform |
32 - complete unrolling (or peeling) when the loops is rolling few enough | |
33 times | |
34 - simple peeling (i.e. copying few initial iterations prior the loop) | |
35 when number of iteration estimate is known (typically by the profile | |
36 info). */ | |
0 | 37 |
38 #include "config.h" | |
39 #include "system.h" | |
40 #include "coretypes.h" | |
111 | 41 #include "backend.h" |
0 | 42 #include "tree.h" |
111 | 43 #include "gimple.h" |
44 #include "cfghooks.h" | |
45 #include "tree-pass.h" | |
46 #include "ssa.h" | |
47 #include "cgraph.h" | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
48 #include "gimple-pretty-print.h" |
111 | 49 #include "fold-const.h" |
50 #include "profile.h" | |
51 #include "gimple-fold.h" | |
52 #include "tree-eh.h" | |
53 #include "gimple-iterator.h" | |
54 #include "tree-cfg.h" | |
55 #include "tree-ssa-loop-manip.h" | |
56 #include "tree-ssa-loop-niter.h" | |
57 #include "tree-ssa-loop.h" | |
58 #include "tree-into-ssa.h" | |
0 | 59 #include "cfgloop.h" |
60 #include "tree-chrec.h" | |
61 #include "tree-scalar-evolution.h" | |
62 #include "tree-inline.h" | |
111 | 63 #include "tree-cfgcleanup.h" |
64 #include "builtins.h" | |
131 | 65 #include "tree-ssa-sccvn.h" |
145 | 66 #include "dbgcnt.h" |
0 | 67 |
68 /* Specifies types of loops that may be unrolled. */ | |
69 | |
70 enum unroll_level | |
71 { | |
72 UL_SINGLE_ITER, /* Only loops that exit immediately in the first | |
73 iteration. */ | |
74 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase | |
75 of code size. */ | |
76 UL_ALL /* All suitable loops. */ | |
77 }; | |
78 | |
79 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT | |
131 | 80 is the exit edge whose condition is replaced. The ssa versions of the new |
81 IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER | |
82 if they are not NULL. */ | |
0 | 83 |
131 | 84 void |
145 | 85 create_canonical_iv (class loop *loop, edge exit, tree niter, |
131 | 86 tree *var_before = NULL, tree *var_after = NULL) |
0 | 87 { |
88 edge in; | |
89 tree type, var; | |
111 | 90 gcond *cond; |
0 | 91 gimple_stmt_iterator incr_at; |
92 enum tree_code cmp; | |
93 | |
94 if (dump_file && (dump_flags & TDF_DETAILS)) | |
95 { | |
96 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num); | |
97 print_generic_expr (dump_file, niter, TDF_SLIM); | |
98 fprintf (dump_file, " iterations.\n"); | |
99 } | |
100 | |
111 | 101 cond = as_a <gcond *> (last_stmt (exit->src)); |
0 | 102 in = EDGE_SUCC (exit->src, 0); |
103 if (in == exit) | |
104 in = EDGE_SUCC (exit->src, 1); | |
105 | |
106 /* Note that we do not need to worry about overflows, since | |
107 type of niter is always unsigned and all comparisons are | |
108 just for equality/nonequality -- i.e. everything works | |
109 with a modulo arithmetics. */ | |
110 | |
111 type = TREE_TYPE (niter); | |
112 niter = fold_build2 (PLUS_EXPR, type, | |
113 niter, | |
114 build_int_cst (type, 1)); | |
115 incr_at = gsi_last_bb (in->src); | |
116 create_iv (niter, | |
117 build_int_cst (type, -1), | |
118 NULL_TREE, loop, | |
131 | 119 &incr_at, false, var_before, &var); |
120 if (var_after) | |
121 *var_after = var; | |
0 | 122 |
123 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; | |
124 gimple_cond_set_code (cond, cmp); | |
125 gimple_cond_set_lhs (cond, var); | |
126 gimple_cond_set_rhs (cond, build_int_cst (type, 0)); | |
127 update_stmt (cond); | |
128 } | |
129 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
130 /* Describe size of loop as detected by tree_estimate_loop_size. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
131 struct loop_size |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
132 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
133 /* Number of instructions in the loop. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
134 int overall; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
135 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
136 /* Number of instructions that will be likely optimized out in |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
137 peeled iterations of loop (i.e. computation based on induction |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
138 variable where induction variable starts at known constant.) */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
139 int eliminated_by_peeling; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
140 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
141 /* Same statistics for last iteration of loop: it is smaller because |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
142 instructions after exit are not executed. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
143 int last_iteration; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
144 int last_iteration_eliminated_by_peeling; |
111 | 145 |
146 /* If some IV computation will become constant. */ | |
147 bool constant_iv; | |
148 | |
149 /* Number of call stmts that are not a builtin and are pure or const | |
150 present on the hot path. */ | |
151 int num_pure_calls_on_hot_path; | |
152 /* Number of call stmts that are not a builtin and are not pure nor const | |
153 present on the hot path. */ | |
154 int num_non_pure_calls_on_hot_path; | |
155 /* Number of statements other than calls in the loop. */ | |
156 int non_call_stmts_on_hot_path; | |
157 /* Number of branches seen on the hot path. */ | |
158 int num_branches_on_hot_path; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
159 }; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
160 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
161 /* Return true if OP in STMT will be constant after peeling LOOP. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
162 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
163 static bool |
145 | 164 constant_after_peeling (tree op, gimple *stmt, class loop *loop) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
165 { |
145 | 166 if (CONSTANT_CLASS_P (op)) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
167 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
168 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
169 /* We can still fold accesses to constant arrays when index is known. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
170 if (TREE_CODE (op) != SSA_NAME) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
171 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
172 tree base = op; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
173 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
174 /* First make fast look if we see constant array inside. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
175 while (handled_component_p (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
176 base = TREE_OPERAND (base, 0); |
111 | 177 if ((DECL_P (base) |
178 && ctor_for_folding (base) != error_mark_node) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
179 || CONSTANT_CLASS_P (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
180 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
181 /* If so, see if we understand all the indices. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
182 base = op; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
183 while (handled_component_p (base)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
184 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
185 if (TREE_CODE (base) == ARRAY_REF |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
186 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
187 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
188 base = TREE_OPERAND (base, 0); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
189 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
190 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
191 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
192 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
193 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
194 |
111 | 195 /* Induction variables are constants when defined in loop. */ |
196 if (loop_containing_stmt (stmt) != loop) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
197 return false; |
111 | 198 tree ev = analyze_scalar_evolution (loop, op); |
199 if (chrec_contains_undetermined (ev) | |
200 || chrec_contains_symbols (ev)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
201 return false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
202 return true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
203 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
204 |
111 | 205 /* Computes an estimated number of insns in LOOP. |
206 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last | |
207 iteration of the loop. | |
208 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration | |
209 of loop. | |
210 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. | |
211 Stop estimating after UPPER_BOUND is met. Return true in this case. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
212 |
111 | 213 static bool |
145 | 214 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel, |
111 | 215 struct loop_size *size, int upper_bound) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
216 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
217 basic_block *body = get_loop_body (loop); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
218 gimple_stmt_iterator gsi; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
219 unsigned int i; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
220 bool after_exit; |
111 | 221 vec<basic_block> path = get_loop_hot_path (loop); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
222 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
223 size->overall = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
224 size->eliminated_by_peeling = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
225 size->last_iteration = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
226 size->last_iteration_eliminated_by_peeling = 0; |
111 | 227 size->num_pure_calls_on_hot_path = 0; |
228 size->num_non_pure_calls_on_hot_path = 0; | |
229 size->non_call_stmts_on_hot_path = 0; | |
230 size->num_branches_on_hot_path = 0; | |
231 size->constant_iv = 0; | |
0 | 232 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
233 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
234 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
235 for (i = 0; i < loop->num_nodes; i++) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
236 { |
111 | 237 if (edge_to_cancel && body[i] != edge_to_cancel->src |
238 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
239 after_exit = true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
240 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
241 after_exit = false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
242 if (dump_file && (dump_flags & TDF_DETAILS)) |
111 | 243 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, |
244 after_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
245 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
246 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
247 { |
111 | 248 gimple *stmt = gsi_stmt (gsi); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
249 int num = estimate_num_insns (stmt, &eni_size_weights); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
250 bool likely_eliminated = false; |
111 | 251 bool likely_eliminated_last = false; |
252 bool likely_eliminated_peeled = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
253 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
254 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
255 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
256 fprintf (dump_file, " size: %3i ", num); |
111 | 257 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
258 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
259 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
260 /* Look for reasons why we might optimize this stmt away. */ |
0 | 261 |
111 | 262 if (!gimple_has_side_effects (stmt)) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
263 { |
111 | 264 /* Exit conditional. */ |
265 if (exit && body[i] == exit->src | |
266 && stmt == last_stmt (exit->src)) | |
267 { | |
268 if (dump_file && (dump_flags & TDF_DETAILS)) | |
269 fprintf (dump_file, " Exit condition will be eliminated " | |
270 "in peeled copies.\n"); | |
271 likely_eliminated_peeled = true; | |
272 } | |
273 if (edge_to_cancel && body[i] == edge_to_cancel->src | |
274 && stmt == last_stmt (edge_to_cancel->src)) | |
275 { | |
276 if (dump_file && (dump_flags & TDF_DETAILS)) | |
277 fprintf (dump_file, " Exit condition will be eliminated " | |
278 "in last copy.\n"); | |
279 likely_eliminated_last = true; | |
280 } | |
281 /* Sets of IV variables */ | |
282 if (gimple_code (stmt) == GIMPLE_ASSIGN | |
283 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) | |
284 { | |
285 if (dump_file && (dump_flags & TDF_DETAILS)) | |
286 fprintf (dump_file, " Induction variable computation will" | |
287 " be folded away.\n"); | |
288 likely_eliminated = true; | |
289 } | |
290 /* Assignments of IV variables. */ | |
291 else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
292 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME | |
293 && constant_after_peeling (gimple_assign_rhs1 (stmt), | |
294 stmt, loop) | |
295 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS | |
296 || constant_after_peeling (gimple_assign_rhs2 (stmt), | |
145 | 297 stmt, loop)) |
298 && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS) | |
111 | 299 { |
300 size->constant_iv = true; | |
301 if (dump_file && (dump_flags & TDF_DETAILS)) | |
302 fprintf (dump_file, | |
303 " Constant expression will be folded away.\n"); | |
304 likely_eliminated = true; | |
305 } | |
306 /* Conditionals. */ | |
307 else if ((gimple_code (stmt) == GIMPLE_COND | |
308 && constant_after_peeling (gimple_cond_lhs (stmt), stmt, | |
309 loop) | |
310 && constant_after_peeling (gimple_cond_rhs (stmt), stmt, | |
311 loop) | |
312 /* We don't simplify all constant compares so make sure | |
313 they are not both constant already. See PR70288. */ | |
314 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt)) | |
315 || ! is_gimple_min_invariant | |
316 (gimple_cond_rhs (stmt)))) | |
317 || (gimple_code (stmt) == GIMPLE_SWITCH | |
318 && constant_after_peeling (gimple_switch_index ( | |
319 as_a <gswitch *> | |
320 (stmt)), | |
321 stmt, loop) | |
322 && ! is_gimple_min_invariant | |
323 (gimple_switch_index | |
324 (as_a <gswitch *> (stmt))))) | |
325 { | |
326 if (dump_file && (dump_flags & TDF_DETAILS)) | |
327 fprintf (dump_file, " Constant conditional.\n"); | |
328 likely_eliminated = true; | |
329 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
330 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
331 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
332 size->overall += num; |
111 | 333 if (likely_eliminated || likely_eliminated_peeled) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
334 size->eliminated_by_peeling += num; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
335 if (!after_exit) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
336 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
337 size->last_iteration += num; |
111 | 338 if (likely_eliminated || likely_eliminated_last) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
339 size->last_iteration_eliminated_by_peeling += num; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
340 } |
111 | 341 if ((size->overall * 3 / 2 - size->eliminated_by_peeling |
342 - size->last_iteration_eliminated_by_peeling) > upper_bound) | |
343 { | |
344 free (body); | |
345 path.release (); | |
346 return true; | |
347 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
348 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
349 } |
111 | 350 while (path.length ()) |
351 { | |
352 basic_block bb = path.pop (); | |
353 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
354 { | |
355 gimple *stmt = gsi_stmt (gsi); | |
356 if (gimple_code (stmt) == GIMPLE_CALL | |
357 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))) | |
358 { | |
359 int flags = gimple_call_flags (stmt); | |
360 if (flags & (ECF_PURE | ECF_CONST)) | |
361 size->num_pure_calls_on_hot_path++; | |
362 else | |
363 size->num_non_pure_calls_on_hot_path++; | |
364 size->num_branches_on_hot_path ++; | |
365 } | |
366 /* Count inexpensive calls as non-calls, because they will likely | |
367 expand inline. */ | |
368 else if (gimple_code (stmt) != GIMPLE_DEBUG) | |
369 size->non_call_stmts_on_hot_path++; | |
370 if (((gimple_code (stmt) == GIMPLE_COND | |
371 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | |
131 | 372 || !constant_after_peeling (gimple_cond_rhs (stmt), stmt, |
373 loop))) | |
111 | 374 || (gimple_code (stmt) == GIMPLE_SWITCH |
375 && !constant_after_peeling (gimple_switch_index ( | |
376 as_a <gswitch *> (stmt)), | |
377 stmt, loop))) | |
378 && (!exit || bb != exit->src)) | |
379 size->num_branches_on_hot_path++; | |
380 } | |
381 } | |
382 path.release (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
383 if (dump_file && (dump_flags & TDF_DETAILS)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
384 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
385 size->eliminated_by_peeling, size->last_iteration, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
386 size->last_iteration_eliminated_by_peeling); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
387 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
388 free (body); |
111 | 389 return false; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
390 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
391 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
392 /* Estimate number of insns of completely unrolled loop. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
393 It is (NUNROLL + 1) * size of loop body with taking into account |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
394 the fact that in last copy everything after exit conditional |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
395 is dead and that some instructions will be eliminated after |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
396 peeling. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
397 |
111 | 398 Loop body is likely going to simplify further, this is difficult |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
399 to guess, we just decrease the result by 1/3. */ |
0 | 400 |
401 static unsigned HOST_WIDE_INT | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
402 estimated_unrolled_size (struct loop_size *size, |
0 | 403 unsigned HOST_WIDE_INT nunroll) |
404 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
405 HOST_WIDE_INT unr_insns = ((nunroll) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
406 * (HOST_WIDE_INT) (size->overall |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
407 - size->eliminated_by_peeling)); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
408 if (!nunroll) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
409 unr_insns = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
410 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
411 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
412 unr_insns = unr_insns * 2 / 3; |
0 | 413 if (unr_insns <= 0) |
414 unr_insns = 1; | |
415 | |
416 return unr_insns; | |
417 } | |
418 | |
111 | 419 /* Loop LOOP is known to not loop. See if there is an edge in the loop |
420 body that can be remove to make the loop to always exit and at | |
421 the same time it does not make any code potentially executed | |
422 during the last iteration dead. | |
423 | |
424 After complete unrolling we still may get rid of the conditional | |
425 on the exit in the last copy even if we have no idea what it does. | |
426 This is quite common case for loops of form | |
427 | |
428 int a[5]; | |
429 for (i=0;i<b;i++) | |
430 a[i]=0; | |
431 | |
432 Here we prove the loop to iterate 5 times but we do not know | |
433 it from induction variable. | |
434 | |
435 For now we handle only simple case where there is exit condition | |
436 just before the latch block and the latch block contains no statements | |
437 with side effect that may otherwise terminate the execution of loop | |
438 (such as by EH or by terminating the program or longjmp). | |
439 | |
440 In the general case we may want to cancel the paths leading to statements | |
441 loop-niter identified as having undefined effect in the last iteration. | |
442 The other cases are hopefully rare and will be cleaned up later. */ | |
443 | |
444 static edge | |
145 | 445 loop_edge_to_cancel (class loop *loop) |
111 | 446 { |
447 vec<edge> exits; | |
448 unsigned i; | |
449 edge edge_to_cancel; | |
450 gimple_stmt_iterator gsi; | |
451 | |
452 /* We want only one predecestor of the loop. */ | |
453 if (EDGE_COUNT (loop->latch->preds) > 1) | |
454 return NULL; | |
455 | |
456 exits = get_loop_exit_edges (loop); | |
457 | |
458 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel) | |
459 { | |
460 /* Find the other edge than the loop exit | |
461 leaving the conditoinal. */ | |
462 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2) | |
463 continue; | |
464 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel) | |
465 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1); | |
466 else | |
467 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0); | |
468 | |
469 /* We only can handle conditionals. */ | |
470 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
471 continue; | |
472 | |
473 /* We should never have conditionals in the loop latch. */ | |
474 gcc_assert (edge_to_cancel->dest != loop->header); | |
475 | |
476 /* Check that it leads to loop latch. */ | |
477 if (edge_to_cancel->dest != loop->latch) | |
478 continue; | |
479 | |
480 exits.release (); | |
481 | |
482 /* Verify that the code in loop latch does nothing that may end program | |
483 execution without really reaching the exit. This may include | |
484 non-pure/const function calls, EH statements, volatile ASMs etc. */ | |
485 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) | |
486 if (gimple_has_side_effects (gsi_stmt (gsi))) | |
487 return NULL; | |
488 return edge_to_cancel; | |
489 } | |
490 exits.release (); | |
491 return NULL; | |
492 } | |
493 | |
494 /* Remove all tests for exits that are known to be taken after LOOP was | |
495 peeled NPEELED times. Put gcc_unreachable before every statement | |
496 known to not be executed. */ | |
497 | |
498 static bool | |
145 | 499 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled) |
111 | 500 { |
145 | 501 class nb_iter_bound *elt; |
111 | 502 bool changed = false; |
503 | |
504 for (elt = loop->bounds; elt; elt = elt->next) | |
505 { | |
506 /* If statement is known to be undefined after peeling, turn it | |
507 into unreachable (or trap when debugging experience is supposed | |
508 to be good). */ | |
509 if (!elt->is_exit | |
510 && wi::ltu_p (elt->bound, npeeled)) | |
511 { | |
512 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); | |
513 gcall *stmt = gimple_build_call | |
514 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
515 gimple_set_location (stmt, gimple_location (elt->stmt)); | |
516 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); | |
517 split_block (gimple_bb (stmt), stmt); | |
518 changed = true; | |
519 if (dump_file && (dump_flags & TDF_DETAILS)) | |
520 { | |
521 fprintf (dump_file, "Forced statement unreachable: "); | |
522 print_gimple_stmt (dump_file, elt->stmt, 0); | |
523 } | |
524 } | |
525 /* If we know the exit will be taken after peeling, update. */ | |
526 else if (elt->is_exit | |
527 && wi::leu_p (elt->bound, npeeled)) | |
528 { | |
529 basic_block bb = gimple_bb (elt->stmt); | |
530 edge exit_edge = EDGE_SUCC (bb, 0); | |
531 | |
532 if (dump_file && (dump_flags & TDF_DETAILS)) | |
533 { | |
534 fprintf (dump_file, "Forced exit to be taken: "); | |
535 print_gimple_stmt (dump_file, elt->stmt, 0); | |
536 } | |
537 if (!loop_exit_edge_p (loop, exit_edge)) | |
538 exit_edge = EDGE_SUCC (bb, 1); | |
539 exit_edge->probability = profile_probability::always (); | |
540 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); | |
541 gcond *cond_stmt = as_a <gcond *> (elt->stmt); | |
542 if (exit_edge->flags & EDGE_TRUE_VALUE) | |
543 gimple_cond_make_true (cond_stmt); | |
544 else | |
545 gimple_cond_make_false (cond_stmt); | |
546 update_stmt (cond_stmt); | |
547 changed = true; | |
548 } | |
549 } | |
550 return changed; | |
551 } | |
552 | |
553 /* Remove all exits that are known to be never taken because of the loop bound | |
554 discovered. */ | |
555 | |
556 static bool | |
145 | 557 remove_redundant_iv_tests (class loop *loop) |
111 | 558 { |
145 | 559 class nb_iter_bound *elt; |
111 | 560 bool changed = false; |
561 | |
562 if (!loop->any_upper_bound) | |
563 return false; | |
564 for (elt = loop->bounds; elt; elt = elt->next) | |
565 { | |
566 /* Exit is pointless if it won't be taken before loop reaches | |
567 upper bound. */ | |
568 if (elt->is_exit && loop->any_upper_bound | |
569 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) | |
570 { | |
571 basic_block bb = gimple_bb (elt->stmt); | |
572 edge exit_edge = EDGE_SUCC (bb, 0); | |
145 | 573 class tree_niter_desc niter; |
111 | 574 |
575 if (!loop_exit_edge_p (loop, exit_edge)) | |
576 exit_edge = EDGE_SUCC (bb, 1); | |
577 | |
578 /* Only when we know the actual number of iterations, not | |
579 just a bound, we can remove the exit. */ | |
580 if (!number_of_iterations_exit (loop, exit_edge, | |
581 &niter, false, false) | |
582 || !integer_onep (niter.assumptions) | |
583 || !integer_zerop (niter.may_be_zero) | |
584 || !niter.niter | |
585 || TREE_CODE (niter.niter) != INTEGER_CST | |
586 || !wi::ltu_p (loop->nb_iterations_upper_bound, | |
587 wi::to_widest (niter.niter))) | |
588 continue; | |
589 | |
590 if (dump_file && (dump_flags & TDF_DETAILS)) | |
591 { | |
592 fprintf (dump_file, "Removed pointless exit: "); | |
593 print_gimple_stmt (dump_file, elt->stmt, 0); | |
594 } | |
595 gcond *cond_stmt = as_a <gcond *> (elt->stmt); | |
596 if (exit_edge->flags & EDGE_TRUE_VALUE) | |
597 gimple_cond_make_false (cond_stmt); | |
598 else | |
599 gimple_cond_make_true (cond_stmt); | |
600 update_stmt (cond_stmt); | |
601 changed = true; | |
602 } | |
603 } | |
604 return changed; | |
605 } | |
606 | |
607 /* Stores loops that will be unlooped and edges that will be removed | |
608 after we process whole loop tree. */ | |
609 static vec<loop_p> loops_to_unloop; | |
610 static vec<int> loops_to_unloop_nunroll; | |
611 static vec<edge> edges_to_remove; | |
612 /* Stores loops that has been peeled. */ | |
613 static bitmap peeled_loops; | |
614 | |
615 /* Cancel all fully unrolled loops by putting __builtin_unreachable | |
616 on the latch edge. | |
617 We do it after all unrolling since unlooping moves basic blocks | |
618 across loop boundaries trashing loop closed SSA form as well | |
619 as SCEV info needed to be intact during unrolling. | |
620 | |
621 IRRED_INVALIDATED is used to bookkeep if information about | |
622 irreducible regions may become invalid as a result | |
623 of the transformation. | |
624 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case | |
625 when we need to go into loop closed SSA form. */ | |
626 | |
627 static void | |
628 unloop_loops (bitmap loop_closed_ssa_invalidated, | |
629 bool *irred_invalidated) | |
630 { | |
631 while (loops_to_unloop.length ()) | |
632 { | |
145 | 633 class loop *loop = loops_to_unloop.pop (); |
111 | 634 int n_unroll = loops_to_unloop_nunroll.pop (); |
635 basic_block latch = loop->latch; | |
636 edge latch_edge = loop_latch_edge (loop); | |
637 int flags = latch_edge->flags; | |
638 location_t locus = latch_edge->goto_locus; | |
639 gcall *stmt; | |
640 gimple_stmt_iterator gsi; | |
641 | |
642 remove_exits_and_undefined_stmts (loop, n_unroll); | |
643 | |
644 /* Unloop destroys the latch edge. */ | |
645 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); | |
646 | |
647 /* Create new basic block for the latch edge destination and wire | |
648 it in. */ | |
649 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
650 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); | |
651 latch_edge->probability = profile_probability::never (); | |
652 latch_edge->flags |= flags; | |
653 latch_edge->goto_locus = locus; | |
654 | |
655 add_bb_to_loop (latch_edge->dest, current_loops->tree_root); | |
656 latch_edge->dest->count = profile_count::zero (); | |
657 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); | |
658 | |
659 gsi = gsi_start_bb (latch_edge->dest); | |
660 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
661 } | |
662 loops_to_unloop.release (); | |
663 loops_to_unloop_nunroll.release (); | |
664 | |
131 | 665 /* Remove edges in peeled copies. Given remove_path removes dominated |
666 regions we need to cope with removal of already removed paths. */ | |
111 | 667 unsigned i; |
668 edge e; | |
131 | 669 auto_vec<int, 20> src_bbs; |
670 src_bbs.reserve_exact (edges_to_remove.length ()); | |
111 | 671 FOR_EACH_VEC_ELT (edges_to_remove, i, e) |
131 | 672 src_bbs.quick_push (e->src->index); |
673 FOR_EACH_VEC_ELT (edges_to_remove, i, e) | |
674 if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])) | |
675 { | |
676 bool ok = remove_path (e, irred_invalidated, | |
677 loop_closed_ssa_invalidated); | |
678 gcc_assert (ok); | |
679 } | |
111 | 680 edges_to_remove.release (); |
681 } | |
682 | |
0 | 683 /* Tries to unroll LOOP completely, i.e. NITER times. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
684 UL determines which loops we are allowed to unroll. |
111 | 685 EXIT is the exit of the loop that should be eliminated. |
686 MAXITER specfy bound on number of iterations, -1 if it is | |
687 not known or too large for HOST_WIDE_INT. The location | |
688 LOCUS corresponding to the loop is used when emitting | |
689 a summary of the unroll to the dump file. */ | |
0 | 690 |
691 static bool | |
145 | 692 try_unroll_loop_completely (class loop *loop, |
131 | 693 edge exit, tree niter, bool may_be_zero, |
111 | 694 enum unroll_level ul, |
695 HOST_WIDE_INT maxiter, | |
131 | 696 dump_user_location_t locus, bool allow_peel) |
0 | 697 { |
131 | 698 unsigned HOST_WIDE_INT n_unroll = 0; |
111 | 699 bool n_unroll_found = false; |
700 edge edge_to_cancel = NULL; | |
0 | 701 |
111 | 702 /* See if we proved number of iterations to be low constant. |
703 | |
704 EXIT is an edge that will be removed in all but last iteration of | |
705 the loop. | |
706 | |
707 EDGE_TO_CACNEL is an edge that will be removed from the last iteration | |
708 of the unrolled sequence and is expected to make the final loop not | |
709 rolling. | |
710 | |
711 If the number of execution of loop is determined by standard induction | |
712 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving | |
713 from the iv test. */ | |
714 if (tree_fits_uhwi_p (niter)) | |
715 { | |
716 n_unroll = tree_to_uhwi (niter); | |
717 n_unroll_found = true; | |
718 edge_to_cancel = EDGE_SUCC (exit->src, 0); | |
719 if (edge_to_cancel == exit) | |
720 edge_to_cancel = EDGE_SUCC (exit->src, 1); | |
721 } | |
145 | 722 /* We do not know the number of iterations and thus we cannot eliminate |
111 | 723 the EXIT edge. */ |
724 else | |
725 exit = NULL; | |
726 | |
727 /* See if we can improve our estimate by using recorded loop bounds. */ | |
131 | 728 if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH) |
729 && maxiter >= 0 | |
111 | 730 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) |
731 { | |
732 n_unroll = maxiter; | |
733 n_unroll_found = true; | |
145 | 734 /* Loop terminates before the IV variable test, so we cannot |
111 | 735 remove it in the last iteration. */ |
736 edge_to_cancel = NULL; | |
737 } | |
738 | |
739 if (!n_unroll_found) | |
0 | 740 return false; |
741 | |
131 | 742 if (!loop->unroll |
145 | 743 && n_unroll > (unsigned) param_max_completely_peel_times) |
111 | 744 { |
745 if (dump_file && (dump_flags & TDF_DETAILS)) | |
746 fprintf (dump_file, "Not unrolling loop %d " | |
747 "(--param max-completely-peel-times limit reached).\n", | |
748 loop->num); | |
749 return false; | |
750 } | |
0 | 751 |
111 | 752 if (!edge_to_cancel) |
753 edge_to_cancel = loop_edge_to_cancel (loop); | |
0 | 754 |
755 if (n_unroll) | |
756 { | |
757 if (ul == UL_SINGLE_ITER) | |
758 return false; | |
759 | |
131 | 760 if (loop->unroll) |
0 | 761 { |
131 | 762 /* If the unrolling factor is too large, bail out. */ |
763 if (n_unroll > (unsigned)loop->unroll) | |
764 { | |
765 if (dump_file && (dump_flags & TDF_DETAILS)) | |
766 fprintf (dump_file, | |
767 "Not unrolling loop %d: " | |
768 "user didn't want it unrolled completely.\n", | |
769 loop->num); | |
770 return false; | |
771 } | |
111 | 772 } |
131 | 773 else |
0 | 774 { |
131 | 775 struct loop_size size; |
776 /* EXIT can be removed only if we are sure it passes first N_UNROLL | |
777 iterations. */ | |
778 bool remove_exit = (exit && niter | |
779 && TREE_CODE (niter) == INTEGER_CST | |
780 && wi::leu_p (n_unroll, wi::to_widest (niter))); | |
781 bool large | |
782 = tree_estimate_loop_size | |
783 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, | |
145 | 784 param_max_completely_peeled_insns); |
131 | 785 if (large) |
786 { | |
787 if (dump_file && (dump_flags & TDF_DETAILS)) | |
788 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | |
789 loop->num); | |
790 return false; | |
791 } | |
792 | |
793 unsigned HOST_WIDE_INT ninsns = size.overall; | |
794 unsigned HOST_WIDE_INT unr_insns | |
795 = estimated_unrolled_size (&size, n_unroll); | |
0 | 796 if (dump_file && (dump_flags & TDF_DETAILS)) |
131 | 797 { |
798 fprintf (dump_file, " Loop size: %d\n", (int) ninsns); | |
799 fprintf (dump_file, " Estimated size after unrolling: %d\n", | |
800 (int) unr_insns); | |
801 } | |
802 | |
803 /* If the code is going to shrink, we don't need to be extra | |
804 cautious on guessing if the unrolling is going to be | |
805 profitable. */ | |
806 if (unr_insns | |
807 /* If there is IV variable that will become constant, we | |
808 save one instruction in the loop prologue we do not | |
809 account otherwise. */ | |
810 <= ninsns + (size.constant_iv != false)) | |
811 ; | |
812 /* We unroll only inner loops, because we do not consider it | |
813 profitable otheriwse. We still can cancel loopback edge | |
814 of not rolling loop; this is always a good idea. */ | |
815 else if (ul == UL_NO_GROWTH) | |
816 { | |
817 if (dump_file && (dump_flags & TDF_DETAILS)) | |
818 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
819 loop->num); | |
820 return false; | |
821 } | |
822 /* Outer loops tend to be less interesting candidates for | |
823 complete unrolling unless we can do a lot of propagation | |
824 into the inner loop body. For now we disable outer loop | |
825 unrolling when the code would grow. */ | |
826 else if (loop->inner) | |
827 { | |
828 if (dump_file && (dump_flags & TDF_DETAILS)) | |
829 fprintf (dump_file, "Not unrolling loop %d: " | |
830 "it is not innermost and code would grow.\n", | |
831 loop->num); | |
832 return false; | |
833 } | |
834 /* If there is call on a hot path through the loop, then | |
835 there is most probably not much to optimize. */ | |
836 else if (size.num_non_pure_calls_on_hot_path) | |
837 { | |
838 if (dump_file && (dump_flags & TDF_DETAILS)) | |
839 fprintf (dump_file, "Not unrolling loop %d: " | |
840 "contains call and code would grow.\n", | |
841 loop->num); | |
842 return false; | |
843 } | |
844 /* If there is pure/const call in the function, then we can | |
845 still optimize the unrolled loop body if it contains some | |
846 other interesting code than the calls and code storing or | |
847 cumulating the return value. */ | |
848 else if (size.num_pure_calls_on_hot_path | |
849 /* One IV increment, one test, one ivtmp store and | |
850 one useful stmt. That is about minimal loop | |
851 doing pure call. */ | |
852 && (size.non_call_stmts_on_hot_path | |
853 <= 3 + size.num_pure_calls_on_hot_path)) | |
854 { | |
855 if (dump_file && (dump_flags & TDF_DETAILS)) | |
856 fprintf (dump_file, "Not unrolling loop %d: " | |
857 "contains just pure calls and code would grow.\n", | |
858 loop->num); | |
859 return false; | |
860 } | |
861 /* Complete unrolling is major win when control flow is | |
862 removed and one big basic block is created. If the loop | |
863 contains control flow the optimization may still be a win | |
864 because of eliminating the loop overhead but it also may | |
865 blow the branch predictor tables. Limit number of | |
866 branches on the hot path through the peeled sequence. */ | |
867 else if (size.num_branches_on_hot_path * (int)n_unroll | |
145 | 868 > param_max_peel_branches) |
131 | 869 { |
870 if (dump_file && (dump_flags & TDF_DETAILS)) | |
871 fprintf (dump_file, "Not unrolling loop %d: " | |
872 "number of branches on hot path in the unrolled " | |
873 "sequence reaches --param max-peel-branches limit.\n", | |
874 loop->num); | |
875 return false; | |
876 } | |
877 else if (unr_insns | |
145 | 878 > (unsigned) param_max_completely_peeled_insns) |
131 | 879 { |
880 if (dump_file && (dump_flags & TDF_DETAILS)) | |
881 fprintf (dump_file, "Not unrolling loop %d: " | |
882 "number of insns in the unrolled sequence reaches " | |
883 "--param max-completely-peeled-insns limit.\n", | |
884 loop->num); | |
885 return false; | |
886 } | |
111 | 887 } |
0 | 888 |
145 | 889 if (!dbg_cnt (gimple_unroll)) |
890 return false; | |
891 | |
0 | 892 initialize_original_copy_tables (); |
111 | 893 auto_sbitmap wont_exit (n_unroll + 1); |
894 if (exit && niter | |
895 && TREE_CODE (niter) == INTEGER_CST | |
896 && wi::leu_p (n_unroll, wi::to_widest (niter))) | |
897 { | |
898 bitmap_ones (wont_exit); | |
899 if (wi::eq_p (wi::to_widest (niter), n_unroll) | |
900 || edge_to_cancel) | |
901 bitmap_clear_bit (wont_exit, 0); | |
902 } | |
903 else | |
904 { | |
905 exit = NULL; | |
906 bitmap_clear (wont_exit); | |
907 } | |
131 | 908 if (may_be_zero) |
909 bitmap_clear_bit (wont_exit, 1); | |
0 | 910 |
911 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
912 n_unroll, wont_exit, | |
111 | 913 exit, &edges_to_remove, |
0 | 914 DLTHE_FLAG_UPDATE_FREQ |
915 | DLTHE_FLAG_COMPLETTE_PEEL)) | |
916 { | |
917 free_original_copy_tables (); | |
111 | 918 if (dump_file && (dump_flags & TDF_DETAILS)) |
919 fprintf (dump_file, "Failed to duplicate the loop\n"); | |
0 | 920 return false; |
921 } | |
922 | |
923 free_original_copy_tables (); | |
924 } | |
925 | |
111 | 926 /* Remove the conditional from the last copy of the loop. */ |
927 if (edge_to_cancel) | |
928 { | |
929 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src)); | |
930 force_edge_cold (edge_to_cancel, true); | |
931 if (edge_to_cancel->flags & EDGE_TRUE_VALUE) | |
932 gimple_cond_make_false (cond); | |
933 else | |
934 gimple_cond_make_true (cond); | |
935 update_stmt (cond); | |
131 | 936 /* Do not remove the path, as doing so may remove outer loop and |
937 confuse bookkeeping code in tree_unroll_loops_completely. */ | |
111 | 938 } |
939 | |
940 /* Store the loop for later unlooping and exit removal. */ | |
941 loops_to_unloop.safe_push (loop); | |
942 loops_to_unloop_nunroll.safe_push (n_unroll); | |
943 | |
944 if (dump_enabled_p ()) | |
945 { | |
946 if (!n_unroll) | |
947 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
948 "loop turned into non-loop; it never loops\n"); | |
949 else | |
950 { | |
951 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
952 "loop with %d iterations completely unrolled", | |
131 | 953 (int) n_unroll); |
111 | 954 if (loop->header->count.initialized_p ()) |
955 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, | |
956 " (header execution count %d)", | |
957 (int)loop->header->count.to_gcov_type ()); | |
958 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); | |
959 } | |
960 } | |
0 | 961 |
962 if (dump_file && (dump_flags & TDF_DETAILS)) | |
111 | 963 { |
964 if (exit) | |
965 fprintf (dump_file, "Exit condition of peeled iterations was " | |
966 "eliminated.\n"); | |
967 if (edge_to_cancel) | |
968 fprintf (dump_file, "Last iteration exit edge was proved true.\n"); | |
969 else | |
970 fprintf (dump_file, "Latch of last iteration was marked by " | |
971 "__builtin_unreachable ().\n"); | |
972 } | |
0 | 973 |
974 return true; | |
975 } | |
976 | |
111 | 977 /* Return number of instructions after peeling. */ |
978 static unsigned HOST_WIDE_INT | |
979 estimated_peeled_sequence_size (struct loop_size *size, | |
980 unsigned HOST_WIDE_INT npeel) | |
981 { | |
982 return MAX (npeel * (HOST_WIDE_INT) (size->overall | |
983 - size->eliminated_by_peeling), 1); | |
984 } | |
985 | |
986 /* If the loop is expected to iterate N times and is | |
987 small enough, duplicate the loop body N+1 times before | |
988 the loop itself. This way the hot path will never | |
989 enter the loop. | |
990 Parameters are the same as for try_unroll_loops_completely */ | |
991 | |
992 static bool | |
145 | 993 try_peel_loop (class loop *loop, |
131 | 994 edge exit, tree niter, bool may_be_zero, |
111 | 995 HOST_WIDE_INT maxiter) |
996 { | |
997 HOST_WIDE_INT npeel; | |
998 struct loop_size size; | |
999 int peeled_size; | |
1000 | |
131 | 1001 if (!flag_peel_loops |
145 | 1002 || param_max_peel_times <= 0 |
111 | 1003 || !peeled_loops) |
1004 return false; | |
1005 | |
1006 if (bitmap_bit_p (peeled_loops, loop->num)) | |
1007 { | |
1008 if (dump_file) | |
1009 fprintf (dump_file, "Not peeling: loop is already peeled\n"); | |
1010 return false; | |
1011 } | |
1012 | |
131 | 1013 /* We don't peel loops that will be unrolled as this can duplicate a |
1014 loop more times than the user requested. */ | |
1015 if (loop->unroll) | |
1016 { | |
1017 if (dump_file) | |
1018 fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); | |
1019 return false; | |
1020 } | |
1021 | |
111 | 1022 /* Peel only innermost loops. |
1023 While the code is perfectly capable of peeling non-innermost loops, | |
1024 the heuristics would probably need some improvements. */ | |
1025 if (loop->inner) | |
1026 { | |
1027 if (dump_file) | |
131 | 1028 fprintf (dump_file, "Not peeling: outer loop\n"); |
111 | 1029 return false; |
1030 } | |
1031 | |
1032 if (!optimize_loop_for_speed_p (loop)) | |
1033 { | |
1034 if (dump_file) | |
131 | 1035 fprintf (dump_file, "Not peeling: cold loop\n"); |
111 | 1036 return false; |
1037 } | |
1038 | |
1039 /* Check if there is an estimate on the number of iterations. */ | |
1040 npeel = estimated_loop_iterations_int (loop); | |
1041 if (npeel < 0) | |
1042 npeel = likely_max_loop_iterations_int (loop); | |
1043 if (npeel < 0) | |
1044 { | |
1045 if (dump_file) | |
1046 fprintf (dump_file, "Not peeling: number of iterations is not " | |
1047 "estimated\n"); | |
1048 return false; | |
1049 } | |
1050 if (maxiter >= 0 && maxiter <= npeel) | |
1051 { | |
1052 if (dump_file) | |
131 | 1053 fprintf (dump_file, "Not peeling: upper bound is known so can " |
111 | 1054 "unroll completely\n"); |
1055 return false; | |
1056 } | |
1057 | |
1058 /* We want to peel estimated number of iterations + 1 (so we never | |
1059 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES | |
1060 and be sure to avoid overflows. */ | |
145 | 1061 if (npeel > param_max_peel_times - 1) |
111 | 1062 { |
1063 if (dump_file) | |
131 | 1064 fprintf (dump_file, "Not peeling: rolls too much " |
111 | 1065 "(%i + 1 > --param max-peel-times)\n", (int) npeel); |
1066 return false; | |
1067 } | |
1068 npeel++; | |
1069 | |
1070 /* Check peeled loops size. */ | |
1071 tree_estimate_loop_size (loop, exit, NULL, &size, | |
145 | 1072 param_max_peeled_insns); |
111 | 1073 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) |
145 | 1074 > param_max_peeled_insns) |
111 | 1075 { |
1076 if (dump_file) | |
131 | 1077 fprintf (dump_file, "Not peeling: peeled sequence size is too large " |
111 | 1078 "(%i insns > --param max-peel-insns)", peeled_size); |
1079 return false; | |
1080 } | |
1081 | |
145 | 1082 if (!dbg_cnt (gimple_unroll)) |
1083 return false; | |
1084 | |
111 | 1085 /* Duplicate possibly eliminating the exits. */ |
1086 initialize_original_copy_tables (); | |
1087 auto_sbitmap wont_exit (npeel + 1); | |
1088 if (exit && niter | |
1089 && TREE_CODE (niter) == INTEGER_CST | |
1090 && wi::leu_p (npeel, wi::to_widest (niter))) | |
1091 { | |
1092 bitmap_ones (wont_exit); | |
1093 bitmap_clear_bit (wont_exit, 0); | |
1094 } | |
1095 else | |
1096 { | |
1097 exit = NULL; | |
1098 bitmap_clear (wont_exit); | |
1099 } | |
131 | 1100 if (may_be_zero) |
1101 bitmap_clear_bit (wont_exit, 1); | |
111 | 1102 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1103 npeel, wont_exit, | |
1104 exit, &edges_to_remove, | |
1105 DLTHE_FLAG_UPDATE_FREQ)) | |
1106 { | |
1107 free_original_copy_tables (); | |
1108 return false; | |
1109 } | |
1110 free_original_copy_tables (); | |
1111 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1112 { | |
1113 fprintf (dump_file, "Peeled loop %d, %i times.\n", | |
1114 loop->num, (int) npeel); | |
1115 } | |
1116 if (loop->any_estimate) | |
1117 { | |
1118 if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) | |
1119 loop->nb_iterations_estimate -= npeel; | |
1120 else | |
1121 loop->nb_iterations_estimate = 0; | |
1122 } | |
1123 if (loop->any_upper_bound) | |
1124 { | |
1125 if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) | |
1126 loop->nb_iterations_upper_bound -= npeel; | |
1127 else | |
1128 loop->nb_iterations_upper_bound = 0; | |
1129 } | |
1130 if (loop->any_likely_upper_bound) | |
1131 { | |
1132 if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) | |
1133 loop->nb_iterations_likely_upper_bound -= npeel; | |
1134 else | |
1135 { | |
1136 loop->any_estimate = true; | |
1137 loop->nb_iterations_estimate = 0; | |
1138 loop->nb_iterations_likely_upper_bound = 0; | |
1139 } | |
1140 } | |
1141 profile_count entry_count = profile_count::zero (); | |
1142 | |
1143 edge e; | |
1144 edge_iterator ei; | |
1145 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1146 if (e->src != loop->latch) | |
1147 { | |
1148 if (e->src->count.initialized_p ()) | |
145 | 1149 entry_count += e->src->count; |
111 | 1150 gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); |
1151 } | |
145 | 1152 profile_probability p; |
131 | 1153 p = entry_count.probability_in (loop->header->count); |
111 | 1154 scale_loop_profile (loop, p, 0); |
1155 bitmap_set_bit (peeled_loops, loop->num); | |
1156 return true; | |
1157 } | |
0 | 1158 /* Adds a canonical induction variable to LOOP if suitable. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1159 CREATE_IV is true if we may create a new iv. UL determines |
0 | 1160 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1161 to determine the number of iterations of a loop by direct evaluation. |
111 | 1162 Returns true if cfg is changed. */ |
0 | 1163 |
1164 static bool | |
145 | 1165 canonicalize_loop_induction_variables (class loop *loop, |
0 | 1166 bool create_iv, enum unroll_level ul, |
131 | 1167 bool try_eval, bool allow_peel) |
0 | 1168 { |
1169 edge exit = NULL; | |
1170 tree niter; | |
111 | 1171 HOST_WIDE_INT maxiter; |
1172 bool modified = false; | |
131 | 1173 dump_user_location_t locus; |
145 | 1174 class tree_niter_desc niter_desc; |
131 | 1175 bool may_be_zero = false; |
0 | 1176 |
131 | 1177 /* For unrolling allow conditional constant or zero iterations, thus |
1178 perform loop-header copying on-the-fly. */ | |
111 | 1179 exit = single_exit (loop); |
131 | 1180 niter = chrec_dont_know; |
1181 if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) | |
1182 { | |
1183 niter = niter_desc.niter; | |
1184 may_be_zero | |
1185 = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero); | |
1186 } | |
0 | 1187 if (TREE_CODE (niter) == INTEGER_CST) |
131 | 1188 locus = last_stmt (exit->src); |
0 | 1189 else |
1190 { | |
131 | 1191 /* For non-constant niter fold may_be_zero into niter again. */ |
1192 if (may_be_zero) | |
1193 { | |
1194 if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1195 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), | |
1196 niter_desc.may_be_zero, | |
1197 build_int_cst (TREE_TYPE (niter), 0), niter); | |
1198 else | |
1199 niter = chrec_dont_know; | |
1200 may_be_zero = false; | |
1201 } | |
1202 | |
0 | 1203 /* If the loop has more than one exit, try checking all of them |
1204 for # of iterations determinable through scev. */ | |
111 | 1205 if (!exit) |
0 | 1206 niter = find_loop_niter (loop, &exit); |
1207 | |
1208 /* Finally if everything else fails, try brute force evaluation. */ | |
1209 if (try_eval | |
1210 && (chrec_contains_undetermined (niter) | |
1211 || TREE_CODE (niter) != INTEGER_CST)) | |
1212 niter = find_loop_niter_by_eval (loop, &exit); | |
1213 | |
111 | 1214 if (exit) |
131 | 1215 locus = last_stmt (exit->src); |
111 | 1216 |
1217 if (TREE_CODE (niter) != INTEGER_CST) | |
1218 exit = NULL; | |
0 | 1219 } |
1220 | |
111 | 1221 /* We work exceptionally hard here to estimate the bound |
1222 by find_loop_niter_by_eval. Be sure to keep it for future. */ | |
1223 if (niter && TREE_CODE (niter) == INTEGER_CST) | |
1224 { | |
145 | 1225 vec<edge> exits = get_loop_exit_edges (loop); |
111 | 1226 record_niter_bound (loop, wi::to_widest (niter), |
145 | 1227 exit == single_likely_exit (loop, exits), true); |
1228 exits.release (); | |
111 | 1229 } |
1230 | |
1231 /* Force re-computation of loop bounds so we can remove redundant exits. */ | |
1232 maxiter = max_loop_iterations_int (loop); | |
1233 | |
1234 if (dump_file && (dump_flags & TDF_DETAILS) | |
1235 && TREE_CODE (niter) == INTEGER_CST) | |
0 | 1236 { |
1237 fprintf (dump_file, "Loop %d iterates ", loop->num); | |
1238 print_generic_expr (dump_file, niter, TDF_SLIM); | |
1239 fprintf (dump_file, " times.\n"); | |
1240 } | |
111 | 1241 if (dump_file && (dump_flags & TDF_DETAILS) |
1242 && maxiter >= 0) | |
1243 { | |
1244 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, | |
1245 (int)maxiter); | |
1246 } | |
1247 if (dump_file && (dump_flags & TDF_DETAILS) | |
1248 && likely_max_loop_iterations_int (loop) >= 0) | |
1249 { | |
1250 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", | |
1251 loop->num, (int)likely_max_loop_iterations_int (loop)); | |
1252 } | |
0 | 1253 |
111 | 1254 /* Remove exits that are known to be never taken based on loop bound. |
1255 Needs to be called after compilation of max_loop_iterations_int that | |
1256 populates the loop bounds. */ | |
1257 modified |= remove_redundant_iv_tests (loop); | |
1258 | |
131 | 1259 if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, |
1260 maxiter, locus, allow_peel)) | |
0 | 1261 return true; |
1262 | |
111 | 1263 if (create_iv |
1264 && niter && !chrec_contains_undetermined (niter) | |
1265 && exit && just_once_each_iteration_p (loop, exit->src)) | |
131 | 1266 { |
1267 tree iv_niter = niter; | |
1268 if (may_be_zero) | |
1269 { | |
1270 if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1271 iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter), | |
1272 niter_desc.may_be_zero, | |
1273 build_int_cst (TREE_TYPE (iv_niter), 0), | |
1274 iv_niter); | |
1275 else | |
1276 iv_niter = NULL_TREE; | |
1277 } | |
1278 if (iv_niter) | |
1279 create_canonical_iv (loop, exit, iv_niter); | |
1280 } | |
0 | 1281 |
111 | 1282 if (ul == UL_ALL) |
131 | 1283 modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter); |
111 | 1284 |
1285 return modified; | |
0 | 1286 } |
1287 | |
1288 /* The main entry point of the pass. Adds canonical induction variables | |
1289 to the suitable loops. */ | |
1290 | |
1291 unsigned int | |
1292 canonicalize_induction_variables (void) | |
1293 { | |
145 | 1294 class loop *loop; |
0 | 1295 bool changed = false; |
111 | 1296 bool irred_invalidated = false; |
1297 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1298 |
111 | 1299 estimate_numbers_of_iterations (cfun); |
1300 | |
1301 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
0 | 1302 { |
1303 changed |= canonicalize_loop_induction_variables (loop, | |
1304 true, UL_SINGLE_ITER, | |
131 | 1305 true, false); |
0 | 1306 } |
111 | 1307 gcc_assert (!need_ssa_update_p (cfun)); |
1308 | |
1309 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | |
1310 if (irred_invalidated | |
1311 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1312 mark_irreducible_loops (); | |
0 | 1313 |
1314 /* Clean up the information about numbers of iterations, since brute force | |
1315 evaluation could reveal new information. */ | |
111 | 1316 free_numbers_of_iterations_estimates (cfun); |
0 | 1317 scev_reset (); |
1318 | |
111 | 1319 if (!bitmap_empty_p (loop_closed_ssa_invalidated)) |
1320 { | |
1321 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); | |
1322 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1323 } | |
1324 BITMAP_FREE (loop_closed_ssa_invalidated); | |
1325 | |
0 | 1326 if (changed) |
1327 return TODO_cleanup_cfg; | |
1328 return 0; | |
1329 } | |
1330 | |
111 | 1331 /* Process loops from innermost to outer, stopping at the innermost |
1332 loop we unrolled. */ | |
1333 | |
1334 static bool | |
1335 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, | |
145 | 1336 bitmap father_bbs, class loop *loop) |
111 | 1337 { |
145 | 1338 class loop *loop_father; |
111 | 1339 bool changed = false; |
145 | 1340 class loop *inner; |
111 | 1341 enum unroll_level ul; |
131 | 1342 unsigned num = number_of_loops (cfun); |
111 | 1343 |
131 | 1344 /* Process inner loops first. Don't walk loops added by the recursive |
1345 calls because SSA form is not up-to-date. They can be handled in the | |
1346 next iteration. */ | |
1347 bitmap child_father_bbs = NULL; | |
111 | 1348 for (inner = loop->inner; inner != NULL; inner = inner->next) |
131 | 1349 if ((unsigned) inner->num < num) |
1350 { | |
1351 if (!child_father_bbs) | |
1352 child_father_bbs = BITMAP_ALLOC (NULL); | |
1353 if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer, | |
1354 child_father_bbs, inner)) | |
1355 { | |
1356 bitmap_ior_into (father_bbs, child_father_bbs); | |
1357 bitmap_clear (child_father_bbs); | |
1358 changed = true; | |
1359 } | |
1360 } | |
1361 if (child_father_bbs) | |
1362 BITMAP_FREE (child_father_bbs); | |
1363 | |
111 | 1364 /* If we changed an inner loop we cannot process outer loops in this |
1365 iteration because SSA form is not up-to-date. Continue with | |
1366 siblings of outer loops instead. */ | |
1367 if (changed) | |
131 | 1368 { |
1369 /* If we are recorded as father clear all other fathers that | |
1370 are necessarily covered already to avoid redundant work. */ | |
1371 if (bitmap_bit_p (father_bbs, loop->header->index)) | |
1372 { | |
1373 bitmap_clear (father_bbs); | |
1374 bitmap_set_bit (father_bbs, loop->header->index); | |
1375 } | |
1376 return true; | |
1377 } | |
111 | 1378 |
1379 /* Don't unroll #pragma omp simd loops until the vectorizer | |
1380 attempts to vectorize those. */ | |
1381 if (loop->force_vectorize) | |
1382 return false; | |
1383 | |
1384 /* Try to unroll this loop. */ | |
1385 loop_father = loop_outer (loop); | |
1386 if (!loop_father) | |
1387 return false; | |
1388 | |
131 | 1389 if (loop->unroll > 1) |
1390 ul = UL_ALL; | |
1391 else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | |
111 | 1392 /* Unroll outermost loops only if asked to do so or they do |
1393 not cause code growth. */ | |
1394 && (unroll_outer || loop_outer (loop_father))) | |
1395 ul = UL_ALL; | |
1396 else | |
1397 ul = UL_NO_GROWTH; | |
1398 | |
1399 if (canonicalize_loop_induction_variables | |
131 | 1400 (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer)) |
111 | 1401 { |
1402 /* If we'll continue unrolling, we need to propagate constants | |
1403 within the new basic blocks to fold away induction variable | |
1404 computations; otherwise, the size might blow up before the | |
1405 iteration is complete and the IR eventually cleaned up. */ | |
1406 if (loop_outer (loop_father)) | |
131 | 1407 { |
1408 /* Once we process our father we will have processed | |
1409 the fathers of our children as well, so avoid doing | |
1410 redundant work and clear fathers we've gathered sofar. */ | |
1411 bitmap_clear (father_bbs); | |
1412 bitmap_set_bit (father_bbs, loop_father->header->index); | |
1413 } | |
111 | 1414 |
1415 return true; | |
1416 } | |
1417 | |
1418 return false; | |
1419 } | |
1420 | |
0 | 1421 /* Unroll LOOPS completely if they iterate just few times. Unless |
1422 MAY_INCREASE_SIZE is true, perform the unrolling only if the | |
1423 size of the code does not increase. */ | |
1424 | |
131 | 1425 static unsigned int |
0 | 1426 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) |
1427 { | |
111 | 1428 bitmap father_bbs = BITMAP_ALLOC (NULL); |
0 | 1429 bool changed; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1430 int iteration = 0; |
111 | 1431 bool irred_invalidated = false; |
1432 | |
1433 estimate_numbers_of_iterations (cfun); | |
0 | 1434 |
1435 do | |
1436 { | |
1437 changed = false; | |
111 | 1438 bitmap loop_closed_ssa_invalidated = NULL; |
0 | 1439 |
111 | 1440 if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1441 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
0 | 1442 |
111 | 1443 free_numbers_of_iterations_estimates (cfun); |
1444 estimate_numbers_of_iterations (cfun); | |
1445 | |
1446 changed = tree_unroll_loops_completely_1 (may_increase_size, | |
1447 unroll_outer, father_bbs, | |
1448 current_loops->tree_root); | |
0 | 1449 if (changed) |
1450 { | |
111 | 1451 unsigned i; |
1452 | |
1453 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | |
1454 | |
145 | 1455 /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */ |
111 | 1456 if (loop_closed_ssa_invalidated |
1457 && !bitmap_empty_p (loop_closed_ssa_invalidated)) | |
1458 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, | |
1459 TODO_update_ssa); | |
1460 else | |
1461 update_ssa (TODO_update_ssa); | |
1462 | |
1463 /* father_bbs is a bitmap of loop father header BB indices. | |
1464 Translate that to what non-root loops these BBs belong to now. */ | |
1465 bitmap_iterator bi; | |
1466 bitmap fathers = BITMAP_ALLOC (NULL); | |
1467 EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi) | |
1468 { | |
1469 basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
1470 if (! unrolled_loop_bb) | |
1471 continue; | |
1472 if (loop_outer (unrolled_loop_bb->loop_father)) | |
1473 bitmap_set_bit (fathers, | |
1474 unrolled_loop_bb->loop_father->num); | |
1475 } | |
1476 bitmap_clear (father_bbs); | |
1477 /* Propagate the constants within the new basic blocks. */ | |
1478 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) | |
1479 { | |
1480 loop_p father = get_loop (cfun, i); | |
131 | 1481 bitmap exit_bbs = BITMAP_ALLOC (NULL); |
1482 loop_exit *exit = father->exits->next; | |
1483 while (exit->e) | |
1484 { | |
1485 bitmap_set_bit (exit_bbs, exit->e->dest->index); | |
1486 exit = exit->next; | |
1487 } | |
1488 do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs); | |
111 | 1489 } |
1490 BITMAP_FREE (fathers); | |
1491 | |
0 | 1492 /* This will take care of removing completely unrolled loops |
1493 from the loop structures so we can continue unrolling now | |
1494 innermost loops. */ | |
1495 if (cleanup_tree_cfg ()) | |
1496 update_ssa (TODO_update_ssa_only_virtuals); | |
1497 | |
1498 /* Clean up the information about numbers of iterations, since | |
1499 complete unrolling might have invalidated it. */ | |
1500 scev_reset (); | |
111 | 1501 if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1502 verify_loop_closed_ssa (true); | |
0 | 1503 } |
111 | 1504 if (loop_closed_ssa_invalidated) |
1505 BITMAP_FREE (loop_closed_ssa_invalidated); | |
0 | 1506 } |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1507 while (changed |
145 | 1508 && ++iteration <= param_max_unroll_iterations); |
0 | 1509 |
111 | 1510 BITMAP_FREE (father_bbs); |
1511 | |
1512 if (irred_invalidated | |
1513 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1514 mark_irreducible_loops (); | |
1515 | |
0 | 1516 return 0; |
1517 } | |
111 | 1518 |
1519 /* Canonical induction variable creation pass. */ | |
1520 | |
1521 namespace { | |
1522 | |
1523 const pass_data pass_data_iv_canon = | |
1524 { | |
1525 GIMPLE_PASS, /* type */ | |
1526 "ivcanon", /* name */ | |
1527 OPTGROUP_LOOP, /* optinfo_flags */ | |
1528 TV_TREE_LOOP_IVCANON, /* tv_id */ | |
1529 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1530 0, /* properties_provided */ | |
1531 0, /* properties_destroyed */ | |
1532 0, /* todo_flags_start */ | |
1533 0, /* todo_flags_finish */ | |
1534 }; | |
1535 | |
1536 class pass_iv_canon : public gimple_opt_pass | |
1537 { | |
1538 public: | |
1539 pass_iv_canon (gcc::context *ctxt) | |
1540 : gimple_opt_pass (pass_data_iv_canon, ctxt) | |
1541 {} | |
1542 | |
1543 /* opt_pass methods: */ | |
1544 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; } | |
1545 virtual unsigned int execute (function *fun); | |
1546 | |
1547 }; // class pass_iv_canon | |
1548 | |
1549 unsigned int | |
1550 pass_iv_canon::execute (function *fun) | |
1551 { | |
1552 if (number_of_loops (fun) <= 1) | |
1553 return 0; | |
1554 | |
1555 return canonicalize_induction_variables (); | |
1556 } | |
1557 | |
1558 } // anon namespace | |
1559 | |
1560 gimple_opt_pass * | |
1561 make_pass_iv_canon (gcc::context *ctxt) | |
1562 { | |
1563 return new pass_iv_canon (ctxt); | |
1564 } | |
1565 | |
1566 /* Complete unrolling of loops. */ | |
1567 | |
1568 namespace { | |
1569 | |
1570 const pass_data pass_data_complete_unroll = | |
1571 { | |
1572 GIMPLE_PASS, /* type */ | |
1573 "cunroll", /* name */ | |
1574 OPTGROUP_LOOP, /* optinfo_flags */ | |
1575 TV_COMPLETE_UNROLL, /* tv_id */ | |
1576 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1577 0, /* properties_provided */ | |
1578 0, /* properties_destroyed */ | |
1579 0, /* todo_flags_start */ | |
1580 0, /* todo_flags_finish */ | |
1581 }; | |
1582 | |
1583 class pass_complete_unroll : public gimple_opt_pass | |
1584 { | |
1585 public: | |
1586 pass_complete_unroll (gcc::context *ctxt) | |
1587 : gimple_opt_pass (pass_data_complete_unroll, ctxt) | |
1588 {} | |
1589 | |
1590 /* opt_pass methods: */ | |
1591 virtual unsigned int execute (function *); | |
1592 | |
1593 }; // class pass_complete_unroll | |
1594 | |
1595 unsigned int | |
1596 pass_complete_unroll::execute (function *fun) | |
1597 { | |
1598 if (number_of_loops (fun) <= 1) | |
1599 return 0; | |
1600 | |
1601 /* If we ever decide to run loop peeling more than once, we will need to | |
1602 track loops already peeled in loop structures themselves to avoid | |
1603 re-peeling the same loop multiple times. */ | |
1604 if (flag_peel_loops) | |
1605 peeled_loops = BITMAP_ALLOC (NULL); | |
131 | 1606 unsigned int val = tree_unroll_loops_completely (flag_unroll_loops |
1607 || flag_peel_loops | |
1608 || optimize >= 3, true); | |
111 | 1609 if (peeled_loops) |
1610 { | |
1611 BITMAP_FREE (peeled_loops); | |
1612 peeled_loops = NULL; | |
1613 } | |
1614 return val; | |
1615 } | |
1616 | |
1617 } // anon namespace | |
1618 | |
1619 gimple_opt_pass * | |
1620 make_pass_complete_unroll (gcc::context *ctxt) | |
1621 { | |
1622 return new pass_complete_unroll (ctxt); | |
1623 } | |
1624 | |
1625 /* Complete unrolling of inner loops. */ | |
1626 | |
1627 namespace { | |
1628 | |
1629 const pass_data pass_data_complete_unrolli = | |
1630 { | |
1631 GIMPLE_PASS, /* type */ | |
1632 "cunrolli", /* name */ | |
1633 OPTGROUP_LOOP, /* optinfo_flags */ | |
1634 TV_COMPLETE_UNROLL, /* tv_id */ | |
1635 ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1636 0, /* properties_provided */ | |
1637 0, /* properties_destroyed */ | |
1638 0, /* todo_flags_start */ | |
1639 0, /* todo_flags_finish */ | |
1640 }; | |
1641 | |
1642 class pass_complete_unrolli : public gimple_opt_pass | |
1643 { | |
1644 public: | |
1645 pass_complete_unrolli (gcc::context *ctxt) | |
1646 : gimple_opt_pass (pass_data_complete_unrolli, ctxt) | |
1647 {} | |
1648 | |
1649 /* opt_pass methods: */ | |
1650 virtual bool gate (function *) { return optimize >= 2; } | |
1651 virtual unsigned int execute (function *); | |
1652 | |
1653 }; // class pass_complete_unrolli | |
1654 | |
1655 unsigned int | |
1656 pass_complete_unrolli::execute (function *fun) | |
1657 { | |
1658 unsigned ret = 0; | |
1659 | |
131 | 1660 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
111 | 1661 if (number_of_loops (fun) > 1) |
1662 { | |
1663 scev_initialize (); | |
1664 ret = tree_unroll_loops_completely (optimize >= 3, false); | |
1665 scev_finalize (); | |
1666 } | |
1667 loop_optimizer_finalize (); | |
1668 | |
1669 return ret; | |
1670 } | |
1671 | |
1672 } // anon namespace | |
1673 | |
1674 gimple_opt_pass * | |
1675 make_pass_complete_unrolli (gcc::context *ctxt) | |
1676 { | |
1677 return new pass_complete_unrolli (ctxt); | |
1678 } | |
1679 | |
1680 |