Mercurial > hg > CbC > CbC_gcc
comparison gcc/loop-unroll.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Loop unrolling and peeling. | 1 /* Loop unrolling. |
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010 | 2 Copyright (C) 2002-2017 Free Software Foundation, Inc. |
3 Free Software Foundation, Inc. | |
4 | 3 |
5 This file is part of GCC. | 4 This file is part of GCC. |
6 | 5 |
7 GCC is free software; you can redistribute it and/or modify it under | 6 GCC is free software; you can redistribute it and/or modify it under |
8 the terms of the GNU General Public License as published by the Free | 7 the terms of the GNU General Public License as published by the Free |
19 <http://www.gnu.org/licenses/>. */ | 18 <http://www.gnu.org/licenses/>. */ |
20 | 19 |
21 #include "config.h" | 20 #include "config.h" |
22 #include "system.h" | 21 #include "system.h" |
23 #include "coretypes.h" | 22 #include "coretypes.h" |
24 #include "tm.h" | 23 #include "backend.h" |
24 #include "target.h" | |
25 #include "rtl.h" | 25 #include "rtl.h" |
26 #include "hard-reg-set.h" | 26 #include "tree.h" |
27 #include "obstack.h" | 27 #include "cfghooks.h" |
28 #include "basic-block.h" | 28 #include "memmodel.h" |
29 #include "optabs.h" | |
30 #include "emit-rtl.h" | |
31 #include "recog.h" | |
32 #include "profile.h" | |
33 #include "cfgrtl.h" | |
29 #include "cfgloop.h" | 34 #include "cfgloop.h" |
30 #include "cfglayout.h" | |
31 #include "params.h" | 35 #include "params.h" |
32 #include "output.h" | 36 #include "dojump.h" |
33 #include "expr.h" | 37 #include "expr.h" |
34 #include "hashtab.h" | 38 #include "dumpfile.h" |
35 #include "recog.h" | 39 |
36 #include "target.h" | 40 /* This pass performs loop unrolling. We only perform this |
37 | 41 optimization on innermost loops (with single exception) because |
38 /* This pass performs loop unrolling and peeling. We only perform these | |
39 optimizations on innermost loops (with single exception) because | |
40 the impact on performance is greatest here, and we want to avoid | 42 the impact on performance is greatest here, and we want to avoid |
41 unnecessary code size growth. The gain is caused by greater sequentiality | 43 unnecessary code size growth. The gain is caused by greater sequentiality |
42 of code, better code to optimize for further passes and in some cases | 44 of code, better code to optimize for further passes and in some cases |
43 by fewer testings of exit conditions. The main problem is code growth, | 45 by fewer testings of exit conditions. The main problem is code growth, |
44 that impacts performance negatively due to effect of caches. | 46 that impacts performance negatively due to effect of caches. |
45 | 47 |
46 What we do: | 48 What we do: |
47 | 49 |
48 -- complete peeling of once-rolling loops; this is the above mentioned | |
49 exception, as this causes loop to be cancelled completely and | |
50 does not cause code growth | |
51 -- complete peeling of loops that roll (small) constant times. | |
52 -- simple peeling of first iterations of loops that do not roll much | |
53 (according to profile feedback) | |
54 -- unrolling of loops that roll constant times; this is almost always | 50 -- unrolling of loops that roll constant times; this is almost always |
55 win, as we get rid of exit condition tests. | 51 win, as we get rid of exit condition tests. |
56 -- unrolling of loops that roll number of times that we can compute | 52 -- unrolling of loops that roll number of times that we can compute |
57 in runtime; we also get rid of exit condition tests here, but there | 53 in runtime; we also get rid of exit condition tests here, but there |
58 is the extra expense for calculating the number of iterations | 54 is the extra expense for calculating the number of iterations |
61 it may even slow down the code | 57 it may even slow down the code |
62 For more detailed descriptions of each of those, see comments at | 58 For more detailed descriptions of each of those, see comments at |
63 appropriate function below. | 59 appropriate function below. |
64 | 60 |
65 There is a lot of parameters (defined and described in params.def) that | 61 There is a lot of parameters (defined and described in params.def) that |
66 control how much we unroll/peel. | 62 control how much we unroll. |
67 | 63 |
68 ??? A great problem is that we don't have a good way how to determine | 64 ??? A great problem is that we don't have a good way how to determine |
69 how many times we should unroll the loop; the experiments I have made | 65 how many times we should unroll the loop; the experiments I have made |
70 showed that this choice may affect performance in order of several %. | 66 showed that this choice may affect performance in order of several %. |
71 */ | 67 */ |
72 | 68 |
73 /* Information about induction variables to split. */ | 69 /* Information about induction variables to split. */ |
74 | 70 |
75 struct iv_to_split | 71 struct iv_to_split |
76 { | 72 { |
77 rtx insn; /* The insn in that the induction variable occurs. */ | 73 rtx_insn *insn; /* The insn in that the induction variable occurs. */ |
74 rtx orig_var; /* The variable (register) for the IV before split. */ | |
78 rtx base_var; /* The variable on that the values in the further | 75 rtx base_var; /* The variable on that the values in the further |
79 iterations are based. */ | 76 iterations are based. */ |
80 rtx step; /* Step of the induction variable. */ | 77 rtx step; /* Step of the induction variable. */ |
81 struct iv_to_split *next; /* Next entry in walking order. */ | 78 struct iv_to_split *next; /* Next entry in walking order. */ |
82 unsigned n_loc; | |
83 unsigned loc[3]; /* Location where the definition of the induction | |
84 variable occurs in the insn. For example if | |
85 N_LOC is 2, the expression is located at | |
86 XEXP (XEXP (single_set, loc[0]), loc[1]). */ | |
87 }; | 79 }; |
88 | 80 |
89 /* Information about accumulators to expand. */ | 81 /* Information about accumulators to expand. */ |
90 | 82 |
91 struct var_to_expand | 83 struct var_to_expand |
92 { | 84 { |
93 rtx insn; /* The insn in that the variable expansion occurs. */ | 85 rtx_insn *insn; /* The insn in that the variable expansion occurs. */ |
94 rtx reg; /* The accumulator which is expanded. */ | 86 rtx reg; /* The accumulator which is expanded. */ |
95 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */ | 87 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */ |
96 struct var_to_expand *next; /* Next entry in walking order. */ | 88 struct var_to_expand *next; /* Next entry in walking order. */ |
97 enum rtx_code op; /* The type of the accumulation - addition, subtraction | 89 enum rtx_code op; /* The type of the accumulation - addition, subtraction |
98 or multiplication. */ | 90 or multiplication. */ |
99 int expansion_count; /* Count the number of expansions generated so far. */ | 91 int expansion_count; /* Count the number of expansions generated so far. */ |
100 int reuse_expansion; /* The expansion we intend to reuse to expand | 92 int reuse_expansion; /* The expansion we intend to reuse to expand |
101 the accumulator. If REUSE_EXPANSION is 0 reuse | 93 the accumulator. If REUSE_EXPANSION is 0 reuse |
102 the original accumulator. Else use | 94 the original accumulator. Else use |
103 var_expansions[REUSE_EXPANSION - 1]. */ | 95 var_expansions[REUSE_EXPANSION - 1]. */ |
104 unsigned accum_pos; /* The position in which the accumulator is placed in | |
105 the insn src. For example in x = x + something | |
106 accum_pos is 0 while in x = something + x accum_pos | |
107 is 1. */ | |
108 }; | 96 }; |
97 | |
98 /* Hashtable helper for iv_to_split. */ | |
99 | |
100 struct iv_split_hasher : free_ptr_hash <iv_to_split> | |
101 { | |
102 static inline hashval_t hash (const iv_to_split *); | |
103 static inline bool equal (const iv_to_split *, const iv_to_split *); | |
104 }; | |
105 | |
106 | |
107 /* A hash function for information about insns to split. */ | |
108 | |
109 inline hashval_t | |
110 iv_split_hasher::hash (const iv_to_split *ivts) | |
111 { | |
112 return (hashval_t) INSN_UID (ivts->insn); | |
113 } | |
114 | |
115 /* An equality functions for information about insns to split. */ | |
116 | |
117 inline bool | |
118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2) | |
119 { | |
120 return i1->insn == i2->insn; | |
121 } | |
122 | |
123 /* Hashtable helper for iv_to_split. */ | |
124 | |
125 struct var_expand_hasher : free_ptr_hash <var_to_expand> | |
126 { | |
127 static inline hashval_t hash (const var_to_expand *); | |
128 static inline bool equal (const var_to_expand *, const var_to_expand *); | |
129 }; | |
130 | |
131 /* Return a hash for VES. */ | |
132 | |
133 inline hashval_t | |
134 var_expand_hasher::hash (const var_to_expand *ves) | |
135 { | |
136 return (hashval_t) INSN_UID (ves->insn); | |
137 } | |
138 | |
139 /* Return true if I1 and I2 refer to the same instruction. */ | |
140 | |
141 inline bool | |
142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2) | |
143 { | |
144 return i1->insn == i2->insn; | |
145 } | |
109 | 146 |
110 /* Information about optimization applied in | 147 /* Information about optimization applied in |
111 the unrolled loop. */ | 148 the unrolled loop. */ |
112 | 149 |
113 struct opt_info | 150 struct opt_info |
114 { | 151 { |
115 htab_t insns_to_split; /* A hashtable of insns to split. */ | 152 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to |
153 split. */ | |
116 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ | 154 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ |
117 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ | 155 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ |
118 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators | 156 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of |
119 to expand. */ | 157 insns with accumulators to expand. */ |
120 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ | 158 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ |
121 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ | 159 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ |
122 unsigned first_new_block; /* The first basic block that was | 160 unsigned first_new_block; /* The first basic block that was |
123 duplicated. */ | 161 duplicated. */ |
124 basic_block loop_exit; /* The loop exit basic block. */ | 162 basic_block loop_exit; /* The loop exit basic block. */ |
125 basic_block loop_preheader; /* The loop preheader basic block. */ | 163 basic_block loop_preheader; /* The loop preheader basic block. */ |
126 }; | 164 }; |
127 | 165 |
128 static void decide_unrolling_and_peeling (int); | |
129 static void peel_loops_completely (int); | |
130 static void decide_peel_simple (struct loop *, int); | |
131 static void decide_peel_once_rolling (struct loop *, int); | |
132 static void decide_peel_completely (struct loop *, int); | |
133 static void decide_unroll_stupid (struct loop *, int); | 166 static void decide_unroll_stupid (struct loop *, int); |
134 static void decide_unroll_constant_iterations (struct loop *, int); | 167 static void decide_unroll_constant_iterations (struct loop *, int); |
135 static void decide_unroll_runtime_iterations (struct loop *, int); | 168 static void decide_unroll_runtime_iterations (struct loop *, int); |
136 static void peel_loop_simple (struct loop *); | |
137 static void peel_loop_completely (struct loop *); | |
138 static void unroll_loop_stupid (struct loop *); | 169 static void unroll_loop_stupid (struct loop *); |
170 static void decide_unrolling (int); | |
139 static void unroll_loop_constant_iterations (struct loop *); | 171 static void unroll_loop_constant_iterations (struct loop *); |
140 static void unroll_loop_runtime_iterations (struct loop *); | 172 static void unroll_loop_runtime_iterations (struct loop *); |
141 static struct opt_info *analyze_insns_in_loop (struct loop *); | 173 static struct opt_info *analyze_insns_in_loop (struct loop *); |
142 static void opt_info_start_duplication (struct opt_info *); | 174 static void opt_info_start_duplication (struct opt_info *); |
143 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); | 175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); |
144 static void free_opt_info (struct opt_info *); | 176 static void free_opt_info (struct opt_info *); |
145 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx); | 177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *); |
146 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *); | 178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *); |
147 static struct iv_to_split *analyze_iv_to_split_insn (rtx); | 179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *); |
148 static void expand_var_during_unrolling (struct var_to_expand *, rtx); | 180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *); |
149 static void insert_var_expansion_initialization (struct var_to_expand *, | 181 static void insert_var_expansion_initialization (struct var_to_expand *, |
150 basic_block); | 182 basic_block); |
151 static void combine_var_copies_in_loop_exit (struct var_to_expand *, | 183 static void combine_var_copies_in_loop_exit (struct var_to_expand *, |
152 basic_block); | 184 basic_block); |
153 static rtx get_expansion (struct var_to_expand *); | 185 static rtx get_expansion (struct var_to_expand *); |
154 | 186 |
155 /* Unroll and/or peel (depending on FLAGS) LOOPS. */ | 187 /* Emit a message summarizing the unroll that will be |
156 void | 188 performed for LOOP, along with the loop's location LOCUS, if |
157 unroll_and_peel_loops (int flags) | 189 appropriate given the dump or -fopt-info settings. */ |
190 | |
191 static void | |
192 report_unroll (struct loop *loop, location_t locus) | |
193 { | |
194 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; | |
195 | |
196 if (loop->lpt_decision.decision == LPT_NONE) | |
197 return; | |
198 | |
199 if (!dump_enabled_p ()) | |
200 return; | |
201 | |
202 dump_printf_loc (report_flags, locus, | |
203 "loop unrolled %d times", | |
204 loop->lpt_decision.times); | |
205 if (profile_info && loop->header->count.initialized_p ()) | |
206 dump_printf (report_flags, | |
207 " (header execution count %d)", | |
208 (int)loop->header->count.to_gcov_type ()); | |
209 | |
210 dump_printf (report_flags, "\n"); | |
211 } | |
212 | |
213 /* Decide whether unroll loops and how much. */ | |
214 static void | |
215 decide_unrolling (int flags) | |
158 { | 216 { |
159 struct loop *loop; | 217 struct loop *loop; |
160 bool check; | |
161 loop_iterator li; | |
162 | |
163 /* First perform complete loop peeling (it is almost surely a win, | |
164 and affects parameters for further decision a lot). */ | |
165 peel_loops_completely (flags); | |
166 | |
167 /* Now decide rest of unrolling and peeling. */ | |
168 decide_unrolling_and_peeling (flags); | |
169 | 218 |
170 /* Scan the loops, inner ones first. */ | 219 /* Scan the loops, inner ones first. */ |
171 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | 220 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
172 { | |
173 check = true; | |
174 /* And perform the appropriate transformations. */ | |
175 switch (loop->lpt_decision.decision) | |
176 { | |
177 case LPT_PEEL_COMPLETELY: | |
178 /* Already done. */ | |
179 gcc_unreachable (); | |
180 case LPT_PEEL_SIMPLE: | |
181 peel_loop_simple (loop); | |
182 break; | |
183 case LPT_UNROLL_CONSTANT: | |
184 unroll_loop_constant_iterations (loop); | |
185 break; | |
186 case LPT_UNROLL_RUNTIME: | |
187 unroll_loop_runtime_iterations (loop); | |
188 break; | |
189 case LPT_UNROLL_STUPID: | |
190 unroll_loop_stupid (loop); | |
191 break; | |
192 case LPT_NONE: | |
193 check = false; | |
194 break; | |
195 default: | |
196 gcc_unreachable (); | |
197 } | |
198 if (check) | |
199 { | |
200 #ifdef ENABLE_CHECKING | |
201 verify_dominators (CDI_DOMINATORS); | |
202 verify_loop_structure (); | |
203 #endif | |
204 } | |
205 } | |
206 | |
207 iv_analysis_done (); | |
208 } | |
209 | |
210 /* Check whether exit of the LOOP is at the end of loop body. */ | |
211 | |
212 static bool | |
213 loop_exit_at_end_p (struct loop *loop) | |
214 { | |
215 struct niter_desc *desc = get_simple_loop_desc (loop); | |
216 rtx insn; | |
217 | |
218 if (desc->in_edge->dest != loop->latch) | |
219 return false; | |
220 | |
221 /* Check that the latch is empty. */ | |
222 FOR_BB_INSNS (loop->latch, insn) | |
223 { | |
224 if (INSN_P (insn)) | |
225 return false; | |
226 } | |
227 | |
228 return true; | |
229 } | |
230 | |
231 /* Depending on FLAGS, check whether to peel loops completely and do so. */ | |
232 static void | |
233 peel_loops_completely (int flags) | |
234 { | |
235 struct loop *loop; | |
236 loop_iterator li; | |
237 | |
238 /* Scan the loops, the inner ones first. */ | |
239 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
240 { | 221 { |
241 loop->lpt_decision.decision = LPT_NONE; | 222 loop->lpt_decision.decision = LPT_NONE; |
242 | 223 location_t locus = get_loop_location (loop); |
243 if (dump_file) | 224 |
244 fprintf (dump_file, | 225 if (dump_enabled_p ()) |
245 "\n;; *** Considering loop %d for complete peeling ***\n", | 226 dump_printf_loc (MSG_NOTE, locus, |
246 loop->num); | 227 ";; *** Considering loop %d at BB %d for " |
247 | 228 "unrolling ***\n", |
248 loop->ninsns = num_loop_insns (loop); | 229 loop->num, loop->header->index); |
249 | |
250 decide_peel_once_rolling (loop, flags); | |
251 if (loop->lpt_decision.decision == LPT_NONE) | |
252 decide_peel_completely (loop, flags); | |
253 | |
254 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY) | |
255 { | |
256 peel_loop_completely (loop); | |
257 #ifdef ENABLE_CHECKING | |
258 verify_dominators (CDI_DOMINATORS); | |
259 verify_loop_structure (); | |
260 #endif | |
261 } | |
262 } | |
263 } | |
264 | |
265 /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */ | |
266 static void | |
267 decide_unrolling_and_peeling (int flags) | |
268 { | |
269 struct loop *loop; | |
270 loop_iterator li; | |
271 | |
272 /* Scan the loops, inner ones first. */ | |
273 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
274 { | |
275 loop->lpt_decision.decision = LPT_NONE; | |
276 | |
277 if (dump_file) | |
278 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num); | |
279 | 230 |
280 /* Do not peel cold areas. */ | 231 /* Do not peel cold areas. */ |
281 if (optimize_loop_for_size_p (loop)) | 232 if (optimize_loop_for_size_p (loop)) |
282 { | 233 { |
283 if (dump_file) | 234 if (dump_file) |
311 decide_unroll_constant_iterations (loop, flags); | 262 decide_unroll_constant_iterations (loop, flags); |
312 if (loop->lpt_decision.decision == LPT_NONE) | 263 if (loop->lpt_decision.decision == LPT_NONE) |
313 decide_unroll_runtime_iterations (loop, flags); | 264 decide_unroll_runtime_iterations (loop, flags); |
314 if (loop->lpt_decision.decision == LPT_NONE) | 265 if (loop->lpt_decision.decision == LPT_NONE) |
315 decide_unroll_stupid (loop, flags); | 266 decide_unroll_stupid (loop, flags); |
316 if (loop->lpt_decision.decision == LPT_NONE) | 267 |
317 decide_peel_simple (loop, flags); | 268 report_unroll (loop, locus); |
318 } | 269 } |
319 } | 270 } |
320 | 271 |
321 /* Decide whether the LOOP is once rolling and suitable for complete | 272 /* Unroll LOOPS. */ |
322 peeling. */ | 273 void |
323 static void | 274 unroll_loops (int flags) |
324 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) | 275 { |
325 { | 276 struct loop *loop; |
326 struct niter_desc *desc; | 277 bool changed = false; |
327 | 278 |
328 if (dump_file) | 279 /* Now decide rest of unrolling. */ |
329 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); | 280 decide_unrolling (flags); |
330 | 281 |
331 /* Is the loop small enough? */ | 282 /* Scan the loops, inner ones first. */ |
332 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) | 283 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
333 { | 284 { |
334 if (dump_file) | 285 /* And perform the appropriate transformations. */ |
335 fprintf (dump_file, ";; Not considering loop, is too big\n"); | 286 switch (loop->lpt_decision.decision) |
336 return; | |
337 } | |
338 | |
339 /* Check for simple loops. */ | |
340 desc = get_simple_loop_desc (loop); | |
341 | |
342 /* Check number of iterations. */ | |
343 if (!desc->simple_p | |
344 || desc->assumptions | |
345 || desc->infinite | |
346 || !desc->const_iter | |
347 || desc->niter != 0) | |
348 { | |
349 if (dump_file) | |
350 fprintf (dump_file, | |
351 ";; Unable to prove that the loop rolls exactly once\n"); | |
352 return; | |
353 } | |
354 | |
355 /* Success. */ | |
356 if (dump_file) | |
357 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n"); | |
358 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | |
359 } | |
360 | |
361 /* Decide whether the LOOP is suitable for complete peeling. */ | |
362 static void | |
363 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) | |
364 { | |
365 unsigned npeel; | |
366 struct niter_desc *desc; | |
367 | |
368 if (dump_file) | |
369 fprintf (dump_file, "\n;; Considering peeling completely\n"); | |
370 | |
371 /* Skip non-innermost loops. */ | |
372 if (loop->inner) | |
373 { | |
374 if (dump_file) | |
375 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
376 return; | |
377 } | |
378 | |
379 /* Do not peel cold areas. */ | |
380 if (optimize_loop_for_size_p (loop)) | |
381 { | |
382 if (dump_file) | |
383 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
384 return; | |
385 } | |
386 | |
387 /* Can the loop be manipulated? */ | |
388 if (!can_duplicate_loop_p (loop)) | |
389 { | |
390 if (dump_file) | |
391 fprintf (dump_file, | |
392 ";; Not considering loop, cannot duplicate\n"); | |
393 return; | |
394 } | |
395 | |
396 /* npeel = number of iterations to peel. */ | |
397 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; | |
398 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | |
399 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); | |
400 | |
401 /* Is the loop small enough? */ | |
402 if (!npeel) | |
403 { | |
404 if (dump_file) | |
405 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
406 return; | |
407 } | |
408 | |
409 /* Check for simple loops. */ | |
410 desc = get_simple_loop_desc (loop); | |
411 | |
412 /* Check number of iterations. */ | |
413 if (!desc->simple_p | |
414 || desc->assumptions | |
415 || !desc->const_iter | |
416 || desc->infinite) | |
417 { | |
418 if (dump_file) | |
419 fprintf (dump_file, | |
420 ";; Unable to prove that the loop iterates constant times\n"); | |
421 return; | |
422 } | |
423 | |
424 if (desc->niter > npeel - 1) | |
425 { | |
426 if (dump_file) | |
427 { | 287 { |
428 fprintf (dump_file, | 288 case LPT_UNROLL_CONSTANT: |
429 ";; Not peeling loop completely, rolls too much ("); | 289 unroll_loop_constant_iterations (loop); |
430 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | 290 changed = true; |
431 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); | 291 break; |
292 case LPT_UNROLL_RUNTIME: | |
293 unroll_loop_runtime_iterations (loop); | |
294 changed = true; | |
295 break; | |
296 case LPT_UNROLL_STUPID: | |
297 unroll_loop_stupid (loop); | |
298 changed = true; | |
299 break; | |
300 case LPT_NONE: | |
301 break; | |
302 default: | |
303 gcc_unreachable (); | |
432 } | 304 } |
433 return; | 305 } |
434 } | 306 |
435 | 307 if (changed) |
436 /* Success. */ | 308 { |
437 if (dump_file) | 309 calculate_dominance_info (CDI_DOMINATORS); |
438 fprintf (dump_file, ";; Decided to peel loop completely\n"); | 310 fix_loop_structure (NULL); |
439 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | 311 } |
440 } | 312 |
441 | 313 iv_analysis_done (); |
442 /* Peel all iterations of LOOP, remove exit edges and cancel the loop | 314 } |
443 completely. The transformation done: | 315 |
444 | 316 /* Check whether exit of the LOOP is at the end of loop body. */ |
445 for (i = 0; i < 4; i++) | 317 |
446 body; | 318 static bool |
447 | 319 loop_exit_at_end_p (struct loop *loop) |
448 ==> | 320 { |
449 | |
450 i = 0; | |
451 body; i++; | |
452 body; i++; | |
453 body; i++; | |
454 body; i++; | |
455 */ | |
456 static void | |
457 peel_loop_completely (struct loop *loop) | |
458 { | |
459 sbitmap wont_exit; | |
460 unsigned HOST_WIDE_INT npeel; | |
461 unsigned i; | |
462 VEC (edge, heap) *remove_edges; | |
463 edge ein; | |
464 struct niter_desc *desc = get_simple_loop_desc (loop); | 321 struct niter_desc *desc = get_simple_loop_desc (loop); |
465 struct opt_info *opt_info = NULL; | 322 rtx_insn *insn; |
466 | 323 |
467 npeel = desc->niter; | 324 /* We should never have conditional in latch block. */ |
468 | 325 gcc_assert (desc->in_edge->dest != loop->header); |
469 if (npeel) | 326 |
470 { | 327 if (desc->in_edge->dest != loop->latch) |
471 bool ok; | 328 return false; |
472 | 329 |
473 wont_exit = sbitmap_alloc (npeel + 1); | 330 /* Check that the latch is empty. */ |
474 sbitmap_ones (wont_exit); | 331 FOR_BB_INSNS (loop->latch, insn) |
475 RESET_BIT (wont_exit, 0); | 332 { |
476 if (desc->noloop_assumptions) | 333 if (INSN_P (insn) && active_insn_p (insn)) |
477 RESET_BIT (wont_exit, 1); | 334 return false; |
478 | 335 } |
479 remove_edges = NULL; | 336 |
480 | 337 return true; |
481 if (flag_split_ivs_in_unroller) | |
482 opt_info = analyze_insns_in_loop (loop); | |
483 | |
484 opt_info_start_duplication (opt_info); | |
485 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
486 npeel, | |
487 wont_exit, desc->out_edge, | |
488 &remove_edges, | |
489 DLTHE_FLAG_UPDATE_FREQ | |
490 | DLTHE_FLAG_COMPLETTE_PEEL | |
491 | (opt_info | |
492 ? DLTHE_RECORD_COPY_NUMBER : 0)); | |
493 gcc_assert (ok); | |
494 | |
495 free (wont_exit); | |
496 | |
497 if (opt_info) | |
498 { | |
499 apply_opt_in_copies (opt_info, npeel, false, true); | |
500 free_opt_info (opt_info); | |
501 } | |
502 | |
503 /* Remove the exit edges. */ | |
504 FOR_EACH_VEC_ELT (edge, remove_edges, i, ein) | |
505 remove_path (ein); | |
506 VEC_free (edge, heap, remove_edges); | |
507 } | |
508 | |
509 ein = desc->in_edge; | |
510 free_simple_loop_desc (loop); | |
511 | |
512 /* Now remove the unreachable part of the last iteration and cancel | |
513 the loop. */ | |
514 remove_path (ein); | |
515 | |
516 if (dump_file) | |
517 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); | |
518 } | 338 } |
519 | 339 |
520 /* Decide whether to unroll LOOP iterating constant number of times | 340 /* Decide whether to unroll LOOP iterating constant number of times |
521 and how much. */ | 341 and how much. */ |
522 | 342 |
523 static void | 343 static void |
524 decide_unroll_constant_iterations (struct loop *loop, int flags) | 344 decide_unroll_constant_iterations (struct loop *loop, int flags) |
525 { | 345 { |
526 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; | 346 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; |
527 struct niter_desc *desc; | 347 struct niter_desc *desc; |
348 widest_int iterations; | |
528 | 349 |
529 if (!(flags & UAP_UNROLL)) | 350 if (!(flags & UAP_UNROLL)) |
530 { | 351 { |
531 /* We were not asked to, just return back silently. */ | 352 /* We were not asked to, just return back silently. */ |
532 return; | 353 return; |
545 if (nunroll > nunroll_by_av) | 366 if (nunroll > nunroll_by_av) |
546 nunroll = nunroll_by_av; | 367 nunroll = nunroll_by_av; |
547 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | 368 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) |
548 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | 369 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); |
549 | 370 |
371 if (targetm.loop_unroll_adjust) | |
372 nunroll = targetm.loop_unroll_adjust (nunroll, loop); | |
373 | |
550 /* Skip big loops. */ | 374 /* Skip big loops. */ |
551 if (nunroll <= 1) | 375 if (nunroll <= 1) |
552 { | 376 { |
553 if (dump_file) | 377 if (dump_file) |
554 fprintf (dump_file, ";; Not considering loop, is too big\n"); | 378 fprintf (dump_file, ";; Not considering loop, is too big\n"); |
565 fprintf (dump_file, | 389 fprintf (dump_file, |
566 ";; Unable to prove that the loop iterates constant times\n"); | 390 ";; Unable to prove that the loop iterates constant times\n"); |
567 return; | 391 return; |
568 } | 392 } |
569 | 393 |
570 /* Check whether the loop rolls enough to consider. */ | 394 /* Check whether the loop rolls enough to consider. |
571 if (desc->niter < 2 * nunroll) | 395 Consult also loop bounds and profile; in the case the loop has more |
396 than one exit it may well loop less than determined maximal number | |
397 of iterations. */ | |
398 if (desc->niter < 2 * nunroll | |
399 || ((get_estimated_loop_iterations (loop, &iterations) | |
400 || get_likely_max_loop_iterations (loop, &iterations)) | |
401 && wi::ltu_p (iterations, 2 * nunroll))) | |
572 { | 402 { |
573 if (dump_file) | 403 if (dump_file) |
574 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | 404 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); |
575 return; | 405 return; |
576 } | 406 } |
602 best_copies = n_copies; | 432 best_copies = n_copies; |
603 best_unroll = i; | 433 best_unroll = i; |
604 } | 434 } |
605 } | 435 } |
606 | 436 |
607 if (dump_file) | |
608 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n", | |
609 best_unroll + 1, best_copies, nunroll); | |
610 | |
611 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; | 437 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; |
612 loop->lpt_decision.times = best_unroll; | 438 loop->lpt_decision.times = best_unroll; |
613 | 439 } |
614 if (dump_file) | 440 |
615 fprintf (dump_file, | 441 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times. |
616 ";; Decided to unroll the constant times rolling loop, %d times.\n", | 442 The transformation does this: |
617 loop->lpt_decision.times); | |
618 } | |
619 | |
620 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 | |
621 times. The transformation does this: | |
622 | 443 |
623 for (i = 0; i < 102; i++) | 444 for (i = 0; i < 102; i++) |
624 body; | 445 body; |
625 | 446 |
626 ==> | 447 ==> (LOOP->LPT_DECISION.TIMES == 3) |
627 | 448 |
628 i = 0; | 449 i = 0; |
629 body; i++; | 450 body; i++; |
630 body; i++; | 451 body; i++; |
631 while (i < 102) | 452 while (i < 102) |
639 static void | 460 static void |
640 unroll_loop_constant_iterations (struct loop *loop) | 461 unroll_loop_constant_iterations (struct loop *loop) |
641 { | 462 { |
642 unsigned HOST_WIDE_INT niter; | 463 unsigned HOST_WIDE_INT niter; |
643 unsigned exit_mod; | 464 unsigned exit_mod; |
644 sbitmap wont_exit; | |
645 unsigned i; | 465 unsigned i; |
646 VEC (edge, heap) *remove_edges; | |
647 edge e; | 466 edge e; |
648 unsigned max_unroll = loop->lpt_decision.times; | 467 unsigned max_unroll = loop->lpt_decision.times; |
649 struct niter_desc *desc = get_simple_loop_desc (loop); | 468 struct niter_desc *desc = get_simple_loop_desc (loop); |
650 bool exit_at_end = loop_exit_at_end_p (loop); | 469 bool exit_at_end = loop_exit_at_end_p (loop); |
651 struct opt_info *opt_info = NULL; | 470 struct opt_info *opt_info = NULL; |
656 /* Should not get here (such loop should be peeled instead). */ | 475 /* Should not get here (such loop should be peeled instead). */ |
657 gcc_assert (niter > max_unroll + 1); | 476 gcc_assert (niter > max_unroll + 1); |
658 | 477 |
659 exit_mod = niter % (max_unroll + 1); | 478 exit_mod = niter % (max_unroll + 1); |
660 | 479 |
661 wont_exit = sbitmap_alloc (max_unroll + 1); | 480 auto_sbitmap wont_exit (max_unroll + 1); |
662 sbitmap_ones (wont_exit); | 481 bitmap_ones (wont_exit); |
663 | 482 |
664 remove_edges = NULL; | 483 auto_vec<edge> remove_edges; |
665 if (flag_split_ivs_in_unroller | 484 if (flag_split_ivs_in_unroller |
666 || flag_variable_expansion_in_unroller) | 485 || flag_variable_expansion_in_unroller) |
667 opt_info = analyze_insns_in_loop (loop); | 486 opt_info = analyze_insns_in_loop (loop); |
668 | 487 |
669 if (!exit_at_end) | 488 if (!exit_at_end) |
671 /* The exit is not at the end of the loop; leave exit test | 490 /* The exit is not at the end of the loop; leave exit test |
672 in the first copy, so that the loops that start with test | 491 in the first copy, so that the loops that start with test |
673 of exit condition have continuous body after unrolling. */ | 492 of exit condition have continuous body after unrolling. */ |
674 | 493 |
675 if (dump_file) | 494 if (dump_file) |
676 fprintf (dump_file, ";; Condition on beginning of loop.\n"); | 495 fprintf (dump_file, ";; Condition at beginning of loop.\n"); |
677 | 496 |
678 /* Peel exit_mod iterations. */ | 497 /* Peel exit_mod iterations. */ |
679 RESET_BIT (wont_exit, 0); | 498 bitmap_clear_bit (wont_exit, 0); |
680 if (desc->noloop_assumptions) | 499 if (desc->noloop_assumptions) |
681 RESET_BIT (wont_exit, 1); | 500 bitmap_clear_bit (wont_exit, 1); |
682 | 501 |
683 if (exit_mod) | 502 if (exit_mod) |
684 { | 503 { |
685 opt_info_start_duplication (opt_info); | 504 opt_info_start_duplication (opt_info); |
686 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | 505 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
696 if (opt_info && exit_mod > 1) | 515 if (opt_info && exit_mod > 1) |
697 apply_opt_in_copies (opt_info, exit_mod, false, false); | 516 apply_opt_in_copies (opt_info, exit_mod, false, false); |
698 | 517 |
699 desc->noloop_assumptions = NULL_RTX; | 518 desc->noloop_assumptions = NULL_RTX; |
700 desc->niter -= exit_mod; | 519 desc->niter -= exit_mod; |
701 desc->niter_max -= exit_mod; | 520 loop->nb_iterations_upper_bound -= exit_mod; |
521 if (loop->any_estimate | |
522 && wi::leu_p (exit_mod, loop->nb_iterations_estimate)) | |
523 loop->nb_iterations_estimate -= exit_mod; | |
524 else | |
525 loop->any_estimate = false; | |
526 if (loop->any_likely_upper_bound | |
527 && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound)) | |
528 loop->nb_iterations_likely_upper_bound -= exit_mod; | |
529 else | |
530 loop->any_likely_upper_bound = false; | |
702 } | 531 } |
703 | 532 |
704 SET_BIT (wont_exit, 1); | 533 bitmap_set_bit (wont_exit, 1); |
705 } | 534 } |
706 else | 535 else |
707 { | 536 { |
708 /* Leave exit test in last copy, for the same reason as above if | 537 /* Leave exit test in last copy, for the same reason as above if |
709 the loop tests the condition at the end of loop body. */ | 538 the loop tests the condition at the end of loop body. */ |
710 | 539 |
711 if (dump_file) | 540 if (dump_file) |
712 fprintf (dump_file, ";; Condition on end of loop.\n"); | 541 fprintf (dump_file, ";; Condition at end of loop.\n"); |
713 | 542 |
714 /* We know that niter >= max_unroll + 2; so we do not need to care of | 543 /* We know that niter >= max_unroll + 2; so we do not need to care of |
715 case when we would exit before reaching the loop. So just peel | 544 case when we would exit before reaching the loop. So just peel |
716 exit_mod + 1 iterations. */ | 545 exit_mod + 1 iterations. */ |
717 if (exit_mod != max_unroll | 546 if (exit_mod != max_unroll |
718 || desc->noloop_assumptions) | 547 || desc->noloop_assumptions) |
719 { | 548 { |
720 RESET_BIT (wont_exit, 0); | 549 bitmap_clear_bit (wont_exit, 0); |
721 if (desc->noloop_assumptions) | 550 if (desc->noloop_assumptions) |
722 RESET_BIT (wont_exit, 1); | 551 bitmap_clear_bit (wont_exit, 1); |
723 | 552 |
724 opt_info_start_duplication (opt_info); | 553 opt_info_start_duplication (opt_info); |
725 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | 554 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
726 exit_mod + 1, | 555 exit_mod + 1, |
727 wont_exit, desc->out_edge, | 556 wont_exit, desc->out_edge, |
734 | 563 |
735 if (opt_info && exit_mod > 0) | 564 if (opt_info && exit_mod > 0) |
736 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); | 565 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); |
737 | 566 |
738 desc->niter -= exit_mod + 1; | 567 desc->niter -= exit_mod + 1; |
739 desc->niter_max -= exit_mod + 1; | 568 loop->nb_iterations_upper_bound -= exit_mod + 1; |
569 if (loop->any_estimate | |
570 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate)) | |
571 loop->nb_iterations_estimate -= exit_mod + 1; | |
572 else | |
573 loop->any_estimate = false; | |
574 if (loop->any_likely_upper_bound | |
575 && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound)) | |
576 loop->nb_iterations_likely_upper_bound -= exit_mod + 1; | |
577 else | |
578 loop->any_likely_upper_bound = false; | |
740 desc->noloop_assumptions = NULL_RTX; | 579 desc->noloop_assumptions = NULL_RTX; |
741 | 580 |
742 SET_BIT (wont_exit, 0); | 581 bitmap_set_bit (wont_exit, 0); |
743 SET_BIT (wont_exit, 1); | 582 bitmap_set_bit (wont_exit, 1); |
744 } | 583 } |
745 | 584 |
746 RESET_BIT (wont_exit, max_unroll); | 585 bitmap_clear_bit (wont_exit, max_unroll); |
747 } | 586 } |
748 | 587 |
749 /* Now unroll the loop. */ | 588 /* Now unroll the loop. */ |
750 | 589 |
751 opt_info_start_duplication (opt_info); | 590 opt_info_start_duplication (opt_info); |
763 { | 602 { |
764 apply_opt_in_copies (opt_info, max_unroll, true, true); | 603 apply_opt_in_copies (opt_info, max_unroll, true, true); |
765 free_opt_info (opt_info); | 604 free_opt_info (opt_info); |
766 } | 605 } |
767 | 606 |
768 free (wont_exit); | |
769 | |
770 if (exit_at_end) | 607 if (exit_at_end) |
771 { | 608 { |
772 basic_block exit_block = get_bb_copy (desc->in_edge->src); | 609 basic_block exit_block = get_bb_copy (desc->in_edge->src); |
773 /* Find a new in and out edge; they are in the last copy we have made. */ | 610 /* Find a new in and out edge; they are in the last copy we have made. */ |
774 | 611 |
783 desc->in_edge = EDGE_SUCC (exit_block, 0); | 620 desc->in_edge = EDGE_SUCC (exit_block, 0); |
784 } | 621 } |
785 } | 622 } |
786 | 623 |
787 desc->niter /= max_unroll + 1; | 624 desc->niter /= max_unroll + 1; |
788 desc->niter_max /= max_unroll + 1; | 625 loop->nb_iterations_upper_bound |
626 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); | |
627 if (loop->any_estimate) | |
628 loop->nb_iterations_estimate | |
629 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); | |
630 if (loop->any_likely_upper_bound) | |
631 loop->nb_iterations_likely_upper_bound | |
632 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1); | |
789 desc->niter_expr = GEN_INT (desc->niter); | 633 desc->niter_expr = GEN_INT (desc->niter); |
790 | 634 |
791 /* Remove the edges. */ | 635 /* Remove the edges. */ |
792 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) | 636 FOR_EACH_VEC_ELT (remove_edges, i, e) |
793 remove_path (e); | 637 remove_path (e); |
794 VEC_free (edge, heap, remove_edges); | |
795 | 638 |
796 if (dump_file) | 639 if (dump_file) |
797 fprintf (dump_file, | 640 fprintf (dump_file, |
798 ";; Unrolled loop %d times, constant # of iterations %i insns\n", | 641 ";; Unrolled loop %d times, constant # of iterations %i insns\n", |
799 max_unroll, num_loop_insns (loop)); | 642 max_unroll, num_loop_insns (loop)); |
804 static void | 647 static void |
805 decide_unroll_runtime_iterations (struct loop *loop, int flags) | 648 decide_unroll_runtime_iterations (struct loop *loop, int flags) |
806 { | 649 { |
807 unsigned nunroll, nunroll_by_av, i; | 650 unsigned nunroll, nunroll_by_av, i; |
808 struct niter_desc *desc; | 651 struct niter_desc *desc; |
652 widest_int iterations; | |
809 | 653 |
810 if (!(flags & UAP_UNROLL)) | 654 if (!(flags & UAP_UNROLL)) |
811 { | 655 { |
812 /* We were not asked to, just return back silently. */ | 656 /* We were not asked to, just return back silently. */ |
813 return; | 657 return; |
856 if (dump_file) | 700 if (dump_file) |
857 fprintf (dump_file, ";; Loop iterates constant times\n"); | 701 fprintf (dump_file, ";; Loop iterates constant times\n"); |
858 return; | 702 return; |
859 } | 703 } |
860 | 704 |
861 /* If we have profile feedback, check whether the loop rolls. */ | 705 /* Check whether the loop rolls. */ |
862 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) | 706 if ((get_estimated_loop_iterations (loop, &iterations) |
707 || get_likely_max_loop_iterations (loop, &iterations)) | |
708 && wi::ltu_p (iterations, 2 * nunroll)) | |
863 { | 709 { |
864 if (dump_file) | 710 if (dump_file) |
865 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | 711 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); |
866 return; | 712 return; |
867 } | 713 } |
871 for (i = 1; 2 * i <= nunroll; i *= 2) | 717 for (i = 1; 2 * i <= nunroll; i *= 2) |
872 continue; | 718 continue; |
873 | 719 |
874 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; | 720 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; |
875 loop->lpt_decision.times = i - 1; | 721 loop->lpt_decision.times = i - 1; |
876 | |
877 if (dump_file) | |
878 fprintf (dump_file, | |
879 ";; Decided to unroll the runtime computable " | |
880 "times rolling loop, %d times.\n", | |
881 loop->lpt_decision.times); | |
882 } | 722 } |
883 | 723 |
884 /* Splits edge E and inserts the sequence of instructions INSNS on it, and | 724 /* Splits edge E and inserts the sequence of instructions INSNS on it, and |
885 returns the newly created block. If INSNS is NULL_RTX, nothing is changed | 725 returns the newly created block. If INSNS is NULL_RTX, nothing is changed |
886 and NULL is returned instead. */ | 726 and NULL is returned instead. */ |
887 | 727 |
888 basic_block | 728 basic_block |
889 split_edge_and_insert (edge e, rtx insns) | 729 split_edge_and_insert (edge e, rtx_insn *insns) |
890 { | 730 { |
891 basic_block bb; | 731 basic_block bb; |
892 | 732 |
893 if (!insns) | 733 if (!insns) |
894 return NULL; | 734 return NULL; |
898 /* ??? We used to assume that INSNS can contain control flow insns, and | 738 /* ??? We used to assume that INSNS can contain control flow insns, and |
899 that we had to try to find sub basic blocks in BB to maintain a valid | 739 that we had to try to find sub basic blocks in BB to maintain a valid |
900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB | 740 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB |
901 and call break_superblocks when going out of cfglayout mode. But it | 741 and call break_superblocks when going out of cfglayout mode. But it |
902 turns out that this never happens; and that if it does ever happen, | 742 turns out that this never happens; and that if it does ever happen, |
903 the TODO_verify_flow at the end of the RTL loop passes would fail. | 743 the verify_flow_info at the end of the RTL loop passes would fail. |
904 | 744 |
905 There are two reasons why we expected we could have control flow insns | 745 There are two reasons why we expected we could have control flow insns |
906 in INSNS. The first is when a comparison has to be done in parts, and | 746 in INSNS. The first is when a comparison has to be done in parts, and |
907 the second is when the number of iterations is computed for loops with | 747 the second is when the number of iterations is computed for loops with |
908 the number of iterations known at runtime. In both cases, test cases | 748 the number of iterations known at runtime. In both cases, test cases |
926 fix it. */ | 766 fix it. */ |
927 | 767 |
928 return bb; | 768 return bb; |
929 } | 769 } |
930 | 770 |
931 /* Unroll LOOP for that we are able to count number of iterations in runtime | 771 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if |
932 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some | 772 true, with probability PROB. If CINSN is not NULL, it is the insn to copy |
773 in order to create a jump. */ | |
774 | |
775 static rtx_insn * | |
776 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, | |
777 rtx_code_label *label, profile_probability prob, | |
778 rtx_insn *cinsn) | |
779 { | |
780 rtx_insn *seq; | |
781 rtx_jump_insn *jump; | |
782 rtx cond; | |
783 machine_mode mode; | |
784 | |
785 mode = GET_MODE (op0); | |
786 if (mode == VOIDmode) | |
787 mode = GET_MODE (op1); | |
788 | |
789 start_sequence (); | |
790 if (GET_MODE_CLASS (mode) == MODE_CC) | |
791 { | |
792 /* A hack -- there seems to be no easy generic way how to make a | |
793 conditional jump from a ccmode comparison. */ | |
794 gcc_assert (cinsn); | |
795 cond = XEXP (SET_SRC (pc_set (cinsn)), 0); | |
796 gcc_assert (GET_CODE (cond) == comp); | |
797 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); | |
798 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); | |
799 emit_jump_insn (copy_insn (PATTERN (cinsn))); | |
800 jump = as_a <rtx_jump_insn *> (get_last_insn ()); | |
801 JUMP_LABEL (jump) = JUMP_LABEL (cinsn); | |
802 LABEL_NUSES (JUMP_LABEL (jump))++; | |
803 redirect_jump (jump, label, 0); | |
804 } | |
805 else | |
806 { | |
807 gcc_assert (!cinsn); | |
808 | |
809 op0 = force_operand (op0, NULL_RTX); | |
810 op1 = force_operand (op1, NULL_RTX); | |
811 do_compare_rtx_and_jump (op0, op1, comp, 0, | |
812 mode, NULL_RTX, NULL, label, | |
813 profile_probability::uninitialized ()); | |
814 jump = as_a <rtx_jump_insn *> (get_last_insn ()); | |
815 jump->set_jump_target (label); | |
816 LABEL_NUSES (label)++; | |
817 } | |
818 if (prob.initialized_p ()) | |
819 add_reg_br_prob_note (jump, prob); | |
820 | |
821 seq = get_insns (); | |
822 end_sequence (); | |
823 | |
824 return seq; | |
825 } | |
826 | |
827 /* Unroll LOOP for which we are able to count number of iterations in runtime | |
828 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some | |
933 extra care for case n < 0): | 829 extra care for case n < 0): |
934 | 830 |
935 for (i = 0; i < n; i++) | 831 for (i = 0; i < n; i++) |
936 body; | 832 body; |
937 | 833 |
938 ==> | 834 ==> (LOOP->LPT_DECISION.TIMES == 3) |
939 | 835 |
940 i = 0; | 836 i = 0; |
941 mod = n % 4; | 837 mod = n % 4; |
942 | 838 |
943 switch (mod) | 839 switch (mod) |
960 } | 856 } |
961 */ | 857 */ |
962 static void | 858 static void |
963 unroll_loop_runtime_iterations (struct loop *loop) | 859 unroll_loop_runtime_iterations (struct loop *loop) |
964 { | 860 { |
965 rtx old_niter, niter, init_code, branch_code, tmp; | 861 rtx old_niter, niter, tmp; |
966 unsigned i, j, p; | 862 rtx_insn *init_code, *branch_code; |
967 basic_block preheader, *body, swtch, ezc_swtch; | 863 unsigned i, j; |
968 VEC (basic_block, heap) *dom_bbs; | 864 profile_probability p; |
969 sbitmap wont_exit; | 865 basic_block preheader, *body, swtch, ezc_swtch = NULL; |
970 int may_exit_copy; | 866 int may_exit_copy, iter_freq, new_freq; |
867 profile_count iter_count, new_count; | |
971 unsigned n_peel; | 868 unsigned n_peel; |
972 VEC (edge, heap) *remove_edges; | |
973 edge e; | 869 edge e; |
974 bool extra_zero_check, last_may_exit; | 870 bool extra_zero_check, last_may_exit; |
975 unsigned max_unroll = loop->lpt_decision.times; | 871 unsigned max_unroll = loop->lpt_decision.times; |
976 struct niter_desc *desc = get_simple_loop_desc (loop); | 872 struct niter_desc *desc = get_simple_loop_desc (loop); |
977 bool exit_at_end = loop_exit_at_end_p (loop); | 873 bool exit_at_end = loop_exit_at_end_p (loop); |
981 if (flag_split_ivs_in_unroller | 877 if (flag_split_ivs_in_unroller |
982 || flag_variable_expansion_in_unroller) | 878 || flag_variable_expansion_in_unroller) |
983 opt_info = analyze_insns_in_loop (loop); | 879 opt_info = analyze_insns_in_loop (loop); |
984 | 880 |
985 /* Remember blocks whose dominators will have to be updated. */ | 881 /* Remember blocks whose dominators will have to be updated. */ |
986 dom_bbs = NULL; | 882 auto_vec<basic_block> dom_bbs; |
987 | 883 |
988 body = get_loop_body (loop); | 884 body = get_loop_body (loop); |
989 for (i = 0; i < loop->num_nodes; i++) | 885 for (i = 0; i < loop->num_nodes; i++) |
990 { | 886 { |
991 VEC (basic_block, heap) *ldom; | 887 vec<basic_block> ldom; |
992 basic_block bb; | 888 basic_block bb; |
993 | 889 |
994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); | 890 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); |
995 FOR_EACH_VEC_ELT (basic_block, ldom, j, bb) | 891 FOR_EACH_VEC_ELT (ldom, j, bb) |
996 if (!flow_bb_inside_loop_p (loop, bb)) | 892 if (!flow_bb_inside_loop_p (loop, bb)) |
997 VEC_safe_push (basic_block, heap, dom_bbs, bb); | 893 dom_bbs.safe_push (bb); |
998 | 894 |
999 VEC_free (basic_block, heap, ldom); | 895 ldom.release (); |
1000 } | 896 } |
1001 free (body); | 897 free (body); |
1002 | 898 |
1003 if (!exit_at_end) | 899 if (!exit_at_end) |
1004 { | 900 { |
1024 old_niter = niter = gen_reg_rtx (desc->mode); | 920 old_niter = niter = gen_reg_rtx (desc->mode); |
1025 tmp = force_operand (copy_rtx (desc->niter_expr), niter); | 921 tmp = force_operand (copy_rtx (desc->niter_expr), niter); |
1026 if (tmp != niter) | 922 if (tmp != niter) |
1027 emit_move_insn (niter, tmp); | 923 emit_move_insn (niter, tmp); |
1028 | 924 |
925 /* For loops that exit at end and whose number of iterations is reliable, | |
926 add one to niter to account for first pass through loop body before | |
927 reaching exit test. */ | |
928 if (exit_at_end && !desc->noloop_assumptions) | |
929 { | |
930 niter = expand_simple_binop (desc->mode, PLUS, | |
931 niter, const1_rtx, | |
932 NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
933 old_niter = niter; | |
934 } | |
935 | |
1029 /* Count modulo by ANDing it with max_unroll; we use the fact that | 936 /* Count modulo by ANDing it with max_unroll; we use the fact that |
1030 the number of unrollings is a power of two, and thus this is correct | 937 the number of unrollings is a power of two, and thus this is correct |
1031 even if there is overflow in the computation. */ | 938 even if there is overflow in the computation. */ |
1032 niter = expand_simple_binop (desc->mode, AND, | 939 niter = expand_simple_binop (desc->mode, AND, |
1033 niter, | 940 niter, gen_int_mode (max_unroll, desc->mode), |
1034 GEN_INT (max_unroll), | |
1035 NULL_RTX, 0, OPTAB_LIB_WIDEN); | 941 NULL_RTX, 0, OPTAB_LIB_WIDEN); |
1036 | 942 |
1037 init_code = get_insns (); | 943 init_code = get_insns (); |
1038 end_sequence (); | 944 end_sequence (); |
1039 unshare_all_rtl_in_chain (init_code); | 945 unshare_all_rtl_in_chain (init_code); |
1040 | 946 |
1041 /* Precondition the loop. */ | 947 /* Precondition the loop. */ |
1042 split_edge_and_insert (loop_preheader_edge (loop), init_code); | 948 split_edge_and_insert (loop_preheader_edge (loop), init_code); |
1043 | 949 |
1044 remove_edges = NULL; | 950 auto_vec<edge> remove_edges; |
1045 | 951 |
1046 wont_exit = sbitmap_alloc (max_unroll + 2); | 952 auto_sbitmap wont_exit (max_unroll + 2); |
1047 | 953 |
1048 /* Peel the first copy of loop body (almost always we must leave exit test | 954 if (extra_zero_check || desc->noloop_assumptions) |
1049 here; the only exception is when we have extra zero check and the number | 955 { |
1050 of iterations is reliable. Also record the place of (possible) extra | 956 /* Peel the first copy of loop body. Leave the exit test if the number |
1051 zero check. */ | 957 of iterations is not reliable. Also record the place of the extra zero |
1052 sbitmap_zero (wont_exit); | 958 check. */ |
1053 if (extra_zero_check | 959 bitmap_clear (wont_exit); |
1054 && !desc->noloop_assumptions) | 960 if (!desc->noloop_assumptions) |
1055 SET_BIT (wont_exit, 1); | 961 bitmap_set_bit (wont_exit, 1); |
1056 ezc_swtch = loop_preheader_edge (loop)->src; | 962 ezc_swtch = loop_preheader_edge (loop)->src; |
1057 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1058 1, wont_exit, desc->out_edge, | |
1059 &remove_edges, | |
1060 DLTHE_FLAG_UPDATE_FREQ); | |
1061 gcc_assert (ok); | |
1062 | |
1063 /* Record the place where switch will be built for preconditioning. */ | |
1064 swtch = split_edge (loop_preheader_edge (loop)); | |
1065 | |
1066 for (i = 0; i < n_peel; i++) | |
1067 { | |
1068 /* Peel the copy. */ | |
1069 sbitmap_zero (wont_exit); | |
1070 if (i != n_peel - 1 || !last_may_exit) | |
1071 SET_BIT (wont_exit, 1); | |
1072 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | 963 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1073 1, wont_exit, desc->out_edge, | 964 1, wont_exit, desc->out_edge, |
1074 &remove_edges, | 965 &remove_edges, |
1075 DLTHE_FLAG_UPDATE_FREQ); | 966 DLTHE_FLAG_UPDATE_FREQ); |
1076 gcc_assert (ok); | 967 gcc_assert (ok); |
968 } | |
969 | |
970 /* Record the place where switch will be built for preconditioning. */ | |
971 swtch = split_edge (loop_preheader_edge (loop)); | |
972 | |
973 /* Compute frequency/count increments for each switch block and initialize | |
974 innermost switch block. Switch blocks and peeled loop copies are built | |
975 from innermost outward. */ | |
976 iter_freq = new_freq = swtch->frequency / (max_unroll + 1); | |
977 iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1); | |
978 swtch->frequency = new_freq; | |
979 swtch->count = new_count; | |
980 | |
981 for (i = 0; i < n_peel; i++) | |
982 { | |
983 /* Peel the copy. */ | |
984 bitmap_clear (wont_exit); | |
985 if (i != n_peel - 1 || !last_may_exit) | |
986 bitmap_set_bit (wont_exit, 1); | |
987 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
988 1, wont_exit, desc->out_edge, | |
989 &remove_edges, | |
990 DLTHE_FLAG_UPDATE_FREQ); | |
991 gcc_assert (ok); | |
1077 | 992 |
1078 /* Create item for switch. */ | 993 /* Create item for switch. */ |
1079 j = n_peel - i - (extra_zero_check ? 0 : 1); | 994 j = n_peel - i - (extra_zero_check ? 0 : 1); |
1080 p = REG_BR_PROB_BASE / (i + 2); | 995 p = profile_probability::always ().apply_scale (1, i + 2); |
1081 | 996 |
1082 preheader = split_edge (loop_preheader_edge (loop)); | 997 preheader = split_edge (loop_preheader_edge (loop)); |
998 /* Add in frequency/count of edge from switch block. */ | |
999 preheader->frequency += iter_freq; | |
1000 preheader->count += iter_count; | |
1083 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, | 1001 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, |
1084 block_label (preheader), p, | 1002 block_label (preheader), p, |
1085 NULL_RTX); | 1003 NULL); |
1086 | 1004 |
1087 /* We rely on the fact that the compare and jump cannot be optimized out, | 1005 /* We rely on the fact that the compare and jump cannot be optimized out, |
1088 and hence the cfg we create is correct. */ | 1006 and hence the cfg we create is correct. */ |
1089 gcc_assert (branch_code != NULL_RTX); | 1007 gcc_assert (branch_code != NULL_RTX); |
1090 | 1008 |
1091 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); | 1009 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); |
1092 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | 1010 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); |
1093 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; | 1011 single_succ_edge (swtch)->probability = p.invert (); |
1012 new_freq += iter_freq; | |
1013 new_count += iter_count; | |
1014 swtch->frequency = new_freq; | |
1015 swtch->count = new_count; | |
1094 e = make_edge (swtch, preheader, | 1016 e = make_edge (swtch, preheader, |
1095 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | 1017 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); |
1096 e->probability = p; | 1018 e->probability = p; |
1097 } | 1019 } |
1098 | 1020 |
1099 if (extra_zero_check) | 1021 if (extra_zero_check) |
1100 { | 1022 { |
1101 /* Add branch for zero iterations. */ | 1023 /* Add branch for zero iterations. */ |
1102 p = REG_BR_PROB_BASE / (max_unroll + 1); | 1024 p = profile_probability::always ().apply_scale (1, max_unroll + 1); |
1103 swtch = ezc_swtch; | 1025 swtch = ezc_swtch; |
1104 preheader = split_edge (loop_preheader_edge (loop)); | 1026 preheader = split_edge (loop_preheader_edge (loop)); |
1027 /* Recompute frequency/count adjustments since initial peel copy may | |
1028 have exited and reduced those values that were computed above. */ | |
1029 iter_freq = swtch->frequency / (max_unroll + 1); | |
1030 iter_count = swtch->count.apply_scale (1, max_unroll + 1); | |
1031 /* Add in frequency/count of edge from switch block. */ | |
1032 preheader->frequency += iter_freq; | |
1033 preheader->count += iter_count; | |
1105 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, | 1034 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, |
1106 block_label (preheader), p, | 1035 block_label (preheader), p, |
1107 NULL_RTX); | 1036 NULL); |
1108 gcc_assert (branch_code != NULL_RTX); | 1037 gcc_assert (branch_code != NULL_RTX); |
1109 | 1038 |
1110 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); | 1039 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); |
1111 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | 1040 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); |
1112 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; | 1041 single_succ_edge (swtch)->probability = p.invert (); |
1113 e = make_edge (swtch, preheader, | 1042 e = make_edge (swtch, preheader, |
1114 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | 1043 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); |
1115 e->probability = p; | 1044 e->probability = p; |
1116 } | 1045 } |
1117 | 1046 |
1118 /* Recount dominators for outer blocks. */ | 1047 /* Recount dominators for outer blocks. */ |
1119 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); | 1048 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); |
1120 | 1049 |
1121 /* And unroll loop. */ | 1050 /* And unroll loop. */ |
1122 | 1051 |
1123 sbitmap_ones (wont_exit); | 1052 bitmap_ones (wont_exit); |
1124 RESET_BIT (wont_exit, may_exit_copy); | 1053 bitmap_clear_bit (wont_exit, may_exit_copy); |
1125 opt_info_start_duplication (opt_info); | 1054 opt_info_start_duplication (opt_info); |
1126 | 1055 |
1127 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), | 1056 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1128 max_unroll, | 1057 max_unroll, |
1129 wont_exit, desc->out_edge, | 1058 wont_exit, desc->out_edge, |
1137 if (opt_info) | 1066 if (opt_info) |
1138 { | 1067 { |
1139 apply_opt_in_copies (opt_info, max_unroll, true, true); | 1068 apply_opt_in_copies (opt_info, max_unroll, true, true); |
1140 free_opt_info (opt_info); | 1069 free_opt_info (opt_info); |
1141 } | 1070 } |
1142 | |
1143 free (wont_exit); | |
1144 | 1071 |
1145 if (exit_at_end) | 1072 if (exit_at_end) |
1146 { | 1073 { |
1147 basic_block exit_block = get_bb_copy (desc->in_edge->src); | 1074 basic_block exit_block = get_bb_copy (desc->in_edge->src); |
1148 /* Find a new in and out edge; they are in the last copy we have | 1075 /* Find a new in and out edge; they are in the last copy we have |
1159 desc->in_edge = EDGE_SUCC (exit_block, 0); | 1086 desc->in_edge = EDGE_SUCC (exit_block, 0); |
1160 } | 1087 } |
1161 } | 1088 } |
1162 | 1089 |
1163 /* Remove the edges. */ | 1090 /* Remove the edges. */ |
1164 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) | 1091 FOR_EACH_VEC_ELT (remove_edges, i, e) |
1165 remove_path (e); | 1092 remove_path (e); |
1166 VEC_free (edge, heap, remove_edges); | |
1167 | 1093 |
1168 /* We must be careful when updating the number of iterations due to | 1094 /* We must be careful when updating the number of iterations due to |
1169 preconditioning and the fact that the value must be valid at entry | 1095 preconditioning and the fact that the value must be valid at entry |
1170 of the loop. After passing through the above code, we see that | 1096 of the loop. After passing through the above code, we see that |
1171 the correct new number of iterations is this: */ | 1097 the correct new number of iterations is this: */ |
1172 gcc_assert (!desc->const_iter); | 1098 gcc_assert (!desc->const_iter); |
1173 desc->niter_expr = | 1099 desc->niter_expr = |
1174 simplify_gen_binary (UDIV, desc->mode, old_niter, | 1100 simplify_gen_binary (UDIV, desc->mode, old_niter, |
1175 GEN_INT (max_unroll + 1)); | 1101 gen_int_mode (max_unroll + 1, desc->mode)); |
1176 desc->niter_max /= max_unroll + 1; | 1102 loop->nb_iterations_upper_bound |
1103 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); | |
1104 if (loop->any_estimate) | |
1105 loop->nb_iterations_estimate | |
1106 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); | |
1107 if (loop->any_likely_upper_bound) | |
1108 loop->nb_iterations_likely_upper_bound | |
1109 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1); | |
1177 if (exit_at_end) | 1110 if (exit_at_end) |
1178 { | 1111 { |
1179 desc->niter_expr = | 1112 desc->niter_expr = |
1180 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); | 1113 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); |
1181 desc->noloop_assumptions = NULL_RTX; | 1114 desc->noloop_assumptions = NULL_RTX; |
1182 desc->niter_max--; | 1115 --loop->nb_iterations_upper_bound; |
1116 if (loop->any_estimate | |
1117 && loop->nb_iterations_estimate != 0) | |
1118 --loop->nb_iterations_estimate; | |
1119 else | |
1120 loop->any_estimate = false; | |
1121 if (loop->any_likely_upper_bound | |
1122 && loop->nb_iterations_likely_upper_bound != 0) | |
1123 --loop->nb_iterations_likely_upper_bound; | |
1124 else | |
1125 loop->any_likely_upper_bound = false; | |
1183 } | 1126 } |
1184 | 1127 |
1185 if (dump_file) | 1128 if (dump_file) |
1186 fprintf (dump_file, | 1129 fprintf (dump_file, |
1187 ";; Unrolled loop %d times, counting # of iterations " | 1130 ";; Unrolled loop %d times, counting # of iterations " |
1188 "in runtime, %i insns\n", | 1131 "in runtime, %i insns\n", |
1189 max_unroll, num_loop_insns (loop)); | 1132 max_unroll, num_loop_insns (loop)); |
1190 | |
1191 VEC_free (basic_block, heap, dom_bbs); | |
1192 } | |
1193 | |
1194 /* Decide whether to simply peel LOOP and how much. */ | |
1195 static void | |
1196 decide_peel_simple (struct loop *loop, int flags) | |
1197 { | |
1198 unsigned npeel; | |
1199 struct niter_desc *desc; | |
1200 | |
1201 if (!(flags & UAP_PEEL)) | |
1202 { | |
1203 /* We were not asked to, just return back silently. */ | |
1204 return; | |
1205 } | |
1206 | |
1207 if (dump_file) | |
1208 fprintf (dump_file, "\n;; Considering simply peeling loop\n"); | |
1209 | |
1210 /* npeel = number of iterations to peel. */ | |
1211 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; | |
1212 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES)) | |
1213 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES); | |
1214 | |
1215 /* Skip big loops. */ | |
1216 if (!npeel) | |
1217 { | |
1218 if (dump_file) | |
1219 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1220 return; | |
1221 } | |
1222 | |
1223 /* Check for simple loops. */ | |
1224 desc = get_simple_loop_desc (loop); | |
1225 | |
1226 /* Check number of iterations. */ | |
1227 if (desc->simple_p && !desc->assumptions && desc->const_iter) | |
1228 { | |
1229 if (dump_file) | |
1230 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
1231 return; | |
1232 } | |
1233 | |
1234 /* Do not simply peel loops with branches inside -- it increases number | |
1235 of mispredicts. */ | |
1236 if (num_loop_branches (loop) > 1) | |
1237 { | |
1238 if (dump_file) | |
1239 fprintf (dump_file, ";; Not peeling, contains branches\n"); | |
1240 return; | |
1241 } | |
1242 | |
1243 if (loop->header->count) | |
1244 { | |
1245 unsigned niter = expected_loop_iterations (loop); | |
1246 if (niter + 1 > npeel) | |
1247 { | |
1248 if (dump_file) | |
1249 { | |
1250 fprintf (dump_file, ";; Not peeling loop, rolls too much ("); | |
1251 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1252 (HOST_WIDEST_INT) (niter + 1)); | |
1253 fprintf (dump_file, " iterations > %d [maximum peelings])\n", | |
1254 npeel); | |
1255 } | |
1256 return; | |
1257 } | |
1258 npeel = niter + 1; | |
1259 } | |
1260 else | |
1261 { | |
1262 /* For now we have no good heuristics to decide whether loop peeling | |
1263 will be effective, so disable it. */ | |
1264 if (dump_file) | |
1265 fprintf (dump_file, | |
1266 ";; Not peeling loop, no evidence it will be profitable\n"); | |
1267 return; | |
1268 } | |
1269 | |
1270 /* Success. */ | |
1271 loop->lpt_decision.decision = LPT_PEEL_SIMPLE; | |
1272 loop->lpt_decision.times = npeel; | |
1273 | |
1274 if (dump_file) | |
1275 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n", | |
1276 loop->lpt_decision.times); | |
1277 } | |
1278 | |
1279 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1280 while (cond) | |
1281 body; | |
1282 | |
1283 ==> | |
1284 | |
1285 if (!cond) goto end; | |
1286 body; | |
1287 if (!cond) goto end; | |
1288 body; | |
1289 while (cond) | |
1290 body; | |
1291 end: ; | |
1292 */ | |
1293 static void | |
1294 peel_loop_simple (struct loop *loop) | |
1295 { | |
1296 sbitmap wont_exit; | |
1297 unsigned npeel = loop->lpt_decision.times; | |
1298 struct niter_desc *desc = get_simple_loop_desc (loop); | |
1299 struct opt_info *opt_info = NULL; | |
1300 bool ok; | |
1301 | |
1302 if (flag_split_ivs_in_unroller && npeel > 1) | |
1303 opt_info = analyze_insns_in_loop (loop); | |
1304 | |
1305 wont_exit = sbitmap_alloc (npeel + 1); | |
1306 sbitmap_zero (wont_exit); | |
1307 | |
1308 opt_info_start_duplication (opt_info); | |
1309 | |
1310 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1311 npeel, wont_exit, NULL, | |
1312 NULL, DLTHE_FLAG_UPDATE_FREQ | |
1313 | (opt_info | |
1314 ? DLTHE_RECORD_COPY_NUMBER | |
1315 : 0)); | |
1316 gcc_assert (ok); | |
1317 | |
1318 free (wont_exit); | |
1319 | |
1320 if (opt_info) | |
1321 { | |
1322 apply_opt_in_copies (opt_info, npeel, false, false); | |
1323 free_opt_info (opt_info); | |
1324 } | |
1325 | |
1326 if (desc->simple_p) | |
1327 { | |
1328 if (desc->const_iter) | |
1329 { | |
1330 desc->niter -= npeel; | |
1331 desc->niter_expr = GEN_INT (desc->niter); | |
1332 desc->noloop_assumptions = NULL_RTX; | |
1333 } | |
1334 else | |
1335 { | |
1336 /* We cannot just update niter_expr, as its value might be clobbered | |
1337 inside loop. We could handle this by counting the number into | |
1338 temporary just like we do in runtime unrolling, but it does not | |
1339 seem worthwhile. */ | |
1340 free_simple_loop_desc (loop); | |
1341 } | |
1342 } | |
1343 if (dump_file) | |
1344 fprintf (dump_file, ";; Peeling loop %d times\n", npeel); | |
1345 } | 1133 } |
1346 | 1134 |
1347 /* Decide whether to unroll LOOP stupidly and how much. */ | 1135 /* Decide whether to unroll LOOP stupidly and how much. */ |
1348 static void | 1136 static void |
1349 decide_unroll_stupid (struct loop *loop, int flags) | 1137 decide_unroll_stupid (struct loop *loop, int flags) |
1350 { | 1138 { |
1351 unsigned nunroll, nunroll_by_av, i; | 1139 unsigned nunroll, nunroll_by_av, i; |
1352 struct niter_desc *desc; | 1140 struct niter_desc *desc; |
1141 widest_int iterations; | |
1353 | 1142 |
1354 if (!(flags & UAP_UNROLL_ALL)) | 1143 if (!(flags & UAP_UNROLL_ALL)) |
1355 { | 1144 { |
1356 /* We were not asked to, just return back silently. */ | 1145 /* We were not asked to, just return back silently. */ |
1357 return; | 1146 return; |
1391 fprintf (dump_file, ";; The loop is simple\n"); | 1180 fprintf (dump_file, ";; The loop is simple\n"); |
1392 return; | 1181 return; |
1393 } | 1182 } |
1394 | 1183 |
1395 /* Do not unroll loops with branches inside -- it increases number | 1184 /* Do not unroll loops with branches inside -- it increases number |
1396 of mispredicts. */ | 1185 of mispredicts. |
1186 TODO: this heuristic needs tunning; call inside the loop body | |
1187 is also relatively good reason to not unroll. */ | |
1397 if (num_loop_branches (loop) > 1) | 1188 if (num_loop_branches (loop) > 1) |
1398 { | 1189 { |
1399 if (dump_file) | 1190 if (dump_file) |
1400 fprintf (dump_file, ";; Not unrolling, contains branches\n"); | 1191 fprintf (dump_file, ";; Not unrolling, contains branches\n"); |
1401 return; | 1192 return; |
1402 } | 1193 } |
1403 | 1194 |
1404 /* If we have profile feedback, check whether the loop rolls. */ | 1195 /* Check whether the loop rolls. */ |
1405 if (loop->header->count | 1196 if ((get_estimated_loop_iterations (loop, &iterations) |
1406 && expected_loop_iterations (loop) < 2 * nunroll) | 1197 || get_likely_max_loop_iterations (loop, &iterations)) |
1198 && wi::ltu_p (iterations, 2 * nunroll)) | |
1407 { | 1199 { |
1408 if (dump_file) | 1200 if (dump_file) |
1409 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | 1201 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); |
1410 return; | 1202 return; |
1411 } | 1203 } |
1416 for (i = 1; 2 * i <= nunroll; i *= 2) | 1208 for (i = 1; 2 * i <= nunroll; i *= 2) |
1417 continue; | 1209 continue; |
1418 | 1210 |
1419 loop->lpt_decision.decision = LPT_UNROLL_STUPID; | 1211 loop->lpt_decision.decision = LPT_UNROLL_STUPID; |
1420 loop->lpt_decision.times = i - 1; | 1212 loop->lpt_decision.times = i - 1; |
1421 | 1213 } |
1422 if (dump_file) | 1214 |
1423 fprintf (dump_file, | 1215 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this: |
1424 ";; Decided to unroll the loop stupidly, %d times.\n", | 1216 |
1425 loop->lpt_decision.times); | |
1426 } | |
1427 | |
1428 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1429 while (cond) | 1217 while (cond) |
1430 body; | 1218 body; |
1431 | 1219 |
1432 ==> | 1220 ==> (LOOP->LPT_DECISION.TIMES == 3) |
1433 | 1221 |
1434 while (cond) | 1222 while (cond) |
1435 { | 1223 { |
1436 body; | 1224 body; |
1437 if (!cond) break; | 1225 if (!cond) break; |
1443 } | 1231 } |
1444 */ | 1232 */ |
1445 static void | 1233 static void |
1446 unroll_loop_stupid (struct loop *loop) | 1234 unroll_loop_stupid (struct loop *loop) |
1447 { | 1235 { |
1448 sbitmap wont_exit; | |
1449 unsigned nunroll = loop->lpt_decision.times; | 1236 unsigned nunroll = loop->lpt_decision.times; |
1450 struct niter_desc *desc = get_simple_loop_desc (loop); | 1237 struct niter_desc *desc = get_simple_loop_desc (loop); |
1451 struct opt_info *opt_info = NULL; | 1238 struct opt_info *opt_info = NULL; |
1452 bool ok; | 1239 bool ok; |
1453 | 1240 |
1454 if (flag_split_ivs_in_unroller | 1241 if (flag_split_ivs_in_unroller |
1455 || flag_variable_expansion_in_unroller) | 1242 || flag_variable_expansion_in_unroller) |
1456 opt_info = analyze_insns_in_loop (loop); | 1243 opt_info = analyze_insns_in_loop (loop); |
1457 | 1244 |
1458 | 1245 auto_sbitmap wont_exit (nunroll + 1); |
1459 wont_exit = sbitmap_alloc (nunroll + 1); | 1246 bitmap_clear (wont_exit); |
1460 sbitmap_zero (wont_exit); | |
1461 opt_info_start_duplication (opt_info); | 1247 opt_info_start_duplication (opt_info); |
1462 | 1248 |
1463 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), | 1249 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1464 nunroll, wont_exit, | 1250 nunroll, wont_exit, |
1465 NULL, NULL, | 1251 NULL, NULL, |
1473 { | 1259 { |
1474 apply_opt_in_copies (opt_info, nunroll, true, true); | 1260 apply_opt_in_copies (opt_info, nunroll, true, true); |
1475 free_opt_info (opt_info); | 1261 free_opt_info (opt_info); |
1476 } | 1262 } |
1477 | 1263 |
1478 free (wont_exit); | |
1479 | |
1480 if (desc->simple_p) | 1264 if (desc->simple_p) |
1481 { | 1265 { |
1482 /* We indeed may get here provided that there are nontrivial assumptions | 1266 /* We indeed may get here provided that there are nontrivial assumptions |
1483 for a loop to be really simple. We could update the counts, but the | 1267 for a loop to be really simple. We could update the counts, but the |
1484 problem is that we are unable to decide which exit will be taken | 1268 problem is that we are unable to decide which exit will be taken |
1485 (not really true in case the number of iterations is constant, | 1269 (not really true in case the number of iterations is constant, |
1486 but noone will do anything with this information, so we do not | 1270 but no one will do anything with this information, so we do not |
1487 worry about it). */ | 1271 worry about it). */ |
1488 desc->simple_p = false; | 1272 desc->simple_p = false; |
1489 } | 1273 } |
1490 | 1274 |
1491 if (dump_file) | 1275 if (dump_file) |
1492 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", | 1276 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", |
1493 nunroll, num_loop_insns (loop)); | 1277 nunroll, num_loop_insns (loop)); |
1494 } | 1278 } |
1495 | 1279 |
1496 /* A hash function for information about insns to split. */ | |
1497 | |
1498 static hashval_t | |
1499 si_info_hash (const void *ivts) | |
1500 { | |
1501 return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn); | |
1502 } | |
1503 | |
1504 /* An equality functions for information about insns to split. */ | |
1505 | |
1506 static int | |
1507 si_info_eq (const void *ivts1, const void *ivts2) | |
1508 { | |
1509 const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1; | |
1510 const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2; | |
1511 | |
1512 return i1->insn == i2->insn; | |
1513 } | |
1514 | |
1515 /* Return a hash for VES, which is really a "var_to_expand *". */ | |
1516 | |
1517 static hashval_t | |
1518 ve_info_hash (const void *ves) | |
1519 { | |
1520 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn); | |
1521 } | |
1522 | |
1523 /* Return true if IVTS1 and IVTS2 (which are really both of type | |
1524 "var_to_expand *") refer to the same instruction. */ | |
1525 | |
1526 static int | |
1527 ve_info_eq (const void *ivts1, const void *ivts2) | |
1528 { | |
1529 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1; | |
1530 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2; | |
1531 | |
1532 return i1->insn == i2->insn; | |
1533 } | |
1534 | |
1535 /* Returns true if REG is referenced in one nondebug insn in LOOP. | 1280 /* Returns true if REG is referenced in one nondebug insn in LOOP. |
1536 Set *DEBUG_USES to the number of debug insns that reference the | 1281 Set *DEBUG_USES to the number of debug insns that reference the |
1537 variable. */ | 1282 variable. */ |
1538 | 1283 |
1539 bool | 1284 static bool |
1540 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg, | 1285 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg, |
1541 int *debug_uses) | 1286 int *debug_uses) |
1542 { | 1287 { |
1543 basic_block *body, bb; | 1288 basic_block *body, bb; |
1544 unsigned i; | 1289 unsigned i; |
1545 int count_ref = 0; | 1290 int count_ref = 0; |
1546 rtx insn; | 1291 rtx_insn *insn; |
1547 | 1292 |
1548 body = get_loop_body (loop); | 1293 body = get_loop_body (loop); |
1549 for (i = 0; i < loop->num_nodes; i++) | 1294 for (i = 0; i < loop->num_nodes; i++) |
1550 { | 1295 { |
1551 bb = body[i]; | 1296 bb = body[i]; |
1567 static void | 1312 static void |
1568 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses) | 1313 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses) |
1569 { | 1314 { |
1570 basic_block *body, bb; | 1315 basic_block *body, bb; |
1571 unsigned i; | 1316 unsigned i; |
1572 rtx insn; | 1317 rtx_insn *insn; |
1573 | 1318 |
1574 body = get_loop_body (loop); | 1319 body = get_loop_body (loop); |
1575 for (i = 0; debug_uses && i < loop->num_nodes; i++) | 1320 for (i = 0; debug_uses && i < loop->num_nodes; i++) |
1576 { | 1321 { |
1577 bb = body[i]; | 1322 bb = body[i]; |
1612 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant | 1357 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant |
1613 information and return a pointer to it. | 1358 information and return a pointer to it. |
1614 */ | 1359 */ |
1615 | 1360 |
1616 static struct var_to_expand * | 1361 static struct var_to_expand * |
1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn) | 1362 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn) |
1618 { | 1363 { |
1619 rtx set, dest, src; | 1364 rtx set, dest, src; |
1620 struct var_to_expand *ves; | 1365 struct var_to_expand *ves; |
1621 unsigned accum_pos; | 1366 unsigned accum_pos; |
1622 enum rtx_code code; | 1367 enum rtx_code code; |
1704 } | 1449 } |
1705 | 1450 |
1706 if (debug_uses) | 1451 if (debug_uses) |
1707 /* Instead of resetting the debug insns, we could replace each | 1452 /* Instead of resetting the debug insns, we could replace each |
1708 debug use in the loop with the sum or product of all expanded | 1453 debug use in the loop with the sum or product of all expanded |
1709 accummulators. Since we'll only know of all expansions at the | 1454 accumulators. Since we'll only know of all expansions at the |
1710 end, we'd have to keep track of which vars_to_expand a debug | 1455 end, we'd have to keep track of which vars_to_expand a debug |
1711 insn in the loop references, take note of each copy of the | 1456 insn in the loop references, take note of each copy of the |
1712 debug insn during unrolling, and when it's all done, compute | 1457 debug insn during unrolling, and when it's all done, compute |
1713 the sum or product of each variable and adjust the original | 1458 the sum or product of each variable and adjust the original |
1714 debug insn and each copy thereof. What a pain! */ | 1459 debug insn and each copy thereof. What a pain! */ |
1716 | 1461 |
1717 /* Record the accumulator to expand. */ | 1462 /* Record the accumulator to expand. */ |
1718 ves = XNEW (struct var_to_expand); | 1463 ves = XNEW (struct var_to_expand); |
1719 ves->insn = insn; | 1464 ves->insn = insn; |
1720 ves->reg = copy_rtx (dest); | 1465 ves->reg = copy_rtx (dest); |
1721 ves->var_expansions = VEC_alloc (rtx, heap, 1); | 1466 ves->var_expansions.create (1); |
1722 ves->next = NULL; | 1467 ves->next = NULL; |
1723 ves->op = GET_CODE (src); | 1468 ves->op = GET_CODE (src); |
1724 ves->expansion_count = 0; | 1469 ves->expansion_count = 0; |
1725 ves->reuse_expansion = 0; | 1470 ves->reuse_expansion = 0; |
1726 ves->accum_pos = accum_pos; | |
1727 return ves; | 1471 return ves; |
1728 } | 1472 } |
1729 | 1473 |
1730 /* Determine whether there is an induction variable in INSN that | 1474 /* Determine whether there is an induction variable in INSN that |
1731 we would like to split during unrolling. | 1475 we would like to split during unrolling. |
1751 Return NULL if INSN contains no interesting IVs. Otherwise, allocate | 1495 Return NULL if INSN contains no interesting IVs. Otherwise, allocate |
1752 an IV_TO_SPLIT structure, fill it with the relevant information and return a | 1496 an IV_TO_SPLIT structure, fill it with the relevant information and return a |
1753 pointer to it. */ | 1497 pointer to it. */ |
1754 | 1498 |
1755 static struct iv_to_split * | 1499 static struct iv_to_split * |
1756 analyze_iv_to_split_insn (rtx insn) | 1500 analyze_iv_to_split_insn (rtx_insn *insn) |
1757 { | 1501 { |
1758 rtx set, dest; | 1502 rtx set, dest; |
1759 struct rtx_iv iv; | 1503 struct rtx_iv iv; |
1760 struct iv_to_split *ivts; | 1504 struct iv_to_split *ivts; |
1505 scalar_int_mode mode; | |
1761 bool ok; | 1506 bool ok; |
1762 | 1507 |
1763 /* For now we just split the basic induction variables. Later this may be | 1508 /* For now we just split the basic induction variables. Later this may be |
1764 extended for example by selecting also addresses of memory references. */ | 1509 extended for example by selecting also addresses of memory references. */ |
1765 set = single_set (insn); | 1510 set = single_set (insn); |
1766 if (!set) | 1511 if (!set) |
1767 return NULL; | 1512 return NULL; |
1768 | 1513 |
1769 dest = SET_DEST (set); | 1514 dest = SET_DEST (set); |
1770 if (!REG_P (dest)) | 1515 if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode)) |
1771 return NULL; | 1516 return NULL; |
1772 | 1517 |
1773 if (!biv_p (insn, dest)) | 1518 if (!biv_p (insn, mode, dest)) |
1774 return NULL; | 1519 return NULL; |
1775 | 1520 |
1776 ok = iv_analyze_result (insn, dest, &iv); | 1521 ok = iv_analyze_result (insn, dest, &iv); |
1777 | 1522 |
1778 /* This used to be an assert under the assumption that if biv_p returns | 1523 /* This used to be an assert under the assumption that if biv_p returns |
1791 return NULL; | 1536 return NULL; |
1792 | 1537 |
1793 /* Record the insn to split. */ | 1538 /* Record the insn to split. */ |
1794 ivts = XNEW (struct iv_to_split); | 1539 ivts = XNEW (struct iv_to_split); |
1795 ivts->insn = insn; | 1540 ivts->insn = insn; |
1541 ivts->orig_var = dest; | |
1796 ivts->base_var = NULL_RTX; | 1542 ivts->base_var = NULL_RTX; |
1797 ivts->step = iv.step; | 1543 ivts->step = iv.step; |
1798 ivts->next = NULL; | 1544 ivts->next = NULL; |
1799 ivts->n_loc = 1; | |
1800 ivts->loc[0] = 1; | |
1801 | 1545 |
1802 return ivts; | 1546 return ivts; |
1803 } | 1547 } |
1804 | 1548 |
1805 /* Determines which of insns in LOOP can be optimized. | 1549 /* Determines which of insns in LOOP can be optimized. |
1811 analyze_insns_in_loop (struct loop *loop) | 1555 analyze_insns_in_loop (struct loop *loop) |
1812 { | 1556 { |
1813 basic_block *body, bb; | 1557 basic_block *body, bb; |
1814 unsigned i; | 1558 unsigned i; |
1815 struct opt_info *opt_info = XCNEW (struct opt_info); | 1559 struct opt_info *opt_info = XCNEW (struct opt_info); |
1816 rtx insn; | 1560 rtx_insn *insn; |
1817 struct iv_to_split *ivts = NULL; | 1561 struct iv_to_split *ivts = NULL; |
1818 struct var_to_expand *ves = NULL; | 1562 struct var_to_expand *ves = NULL; |
1819 PTR *slot1; | 1563 iv_to_split **slot1; |
1820 PTR *slot2; | 1564 var_to_expand **slot2; |
1821 VEC (edge, heap) *edges = get_loop_exit_edges (loop); | 1565 vec<edge> edges = get_loop_exit_edges (loop); |
1822 edge exit; | 1566 edge exit; |
1823 bool can_apply = false; | 1567 bool can_apply = false; |
1824 | 1568 |
1825 iv_analysis_loop_init (loop); | 1569 iv_analysis_loop_init (loop); |
1826 | 1570 |
1827 body = get_loop_body (loop); | 1571 body = get_loop_body (loop); |
1828 | 1572 |
1829 if (flag_split_ivs_in_unroller) | 1573 if (flag_split_ivs_in_unroller) |
1830 { | 1574 { |
1831 opt_info->insns_to_split = htab_create (5 * loop->num_nodes, | 1575 opt_info->insns_to_split |
1832 si_info_hash, si_info_eq, free); | 1576 = new hash_table<iv_split_hasher> (5 * loop->num_nodes); |
1833 opt_info->iv_to_split_head = NULL; | 1577 opt_info->iv_to_split_head = NULL; |
1834 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; | 1578 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; |
1835 } | 1579 } |
1836 | 1580 |
1837 /* Record the loop exit bb and loop preheader before the unrolling. */ | 1581 /* Record the loop exit bb and loop preheader before the unrolling. */ |
1838 opt_info->loop_preheader = loop_preheader_edge (loop)->src; | 1582 opt_info->loop_preheader = loop_preheader_edge (loop)->src; |
1839 | 1583 |
1840 if (VEC_length (edge, edges) == 1) | 1584 if (edges.length () == 1) |
1841 { | 1585 { |
1842 exit = VEC_index (edge, edges, 0); | 1586 exit = edges[0]; |
1843 if (!(exit->flags & EDGE_COMPLEX)) | 1587 if (!(exit->flags & EDGE_COMPLEX)) |
1844 { | 1588 { |
1845 opt_info->loop_exit = split_edge (exit); | 1589 opt_info->loop_exit = split_edge (exit); |
1846 can_apply = true; | 1590 can_apply = true; |
1847 } | 1591 } |
1848 } | 1592 } |
1849 | 1593 |
1850 if (flag_variable_expansion_in_unroller | 1594 if (flag_variable_expansion_in_unroller |
1851 && can_apply) | 1595 && can_apply) |
1852 { | 1596 { |
1853 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes, | 1597 opt_info->insns_with_var_to_expand |
1854 ve_info_hash, | 1598 = new hash_table<var_expand_hasher> (5 * loop->num_nodes); |
1855 ve_info_eq, free); | |
1856 opt_info->var_to_expand_head = NULL; | 1599 opt_info->var_to_expand_head = NULL; |
1857 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; | 1600 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; |
1858 } | 1601 } |
1859 | 1602 |
1860 for (i = 0; i < loop->num_nodes; i++) | 1603 for (i = 0; i < loop->num_nodes; i++) |
1871 if (opt_info->insns_to_split) | 1614 if (opt_info->insns_to_split) |
1872 ivts = analyze_iv_to_split_insn (insn); | 1615 ivts = analyze_iv_to_split_insn (insn); |
1873 | 1616 |
1874 if (ivts) | 1617 if (ivts) |
1875 { | 1618 { |
1876 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); | 1619 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT); |
1877 gcc_assert (*slot1 == NULL); | 1620 gcc_assert (*slot1 == NULL); |
1878 *slot1 = ivts; | 1621 *slot1 = ivts; |
1879 *opt_info->iv_to_split_tail = ivts; | 1622 *opt_info->iv_to_split_tail = ivts; |
1880 opt_info->iv_to_split_tail = &ivts->next; | 1623 opt_info->iv_to_split_tail = &ivts->next; |
1881 continue; | 1624 continue; |
1884 if (opt_info->insns_with_var_to_expand) | 1627 if (opt_info->insns_with_var_to_expand) |
1885 ves = analyze_insn_to_expand_var (loop, insn); | 1628 ves = analyze_insn_to_expand_var (loop, insn); |
1886 | 1629 |
1887 if (ves) | 1630 if (ves) |
1888 { | 1631 { |
1889 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT); | 1632 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT); |
1890 gcc_assert (*slot2 == NULL); | 1633 gcc_assert (*slot2 == NULL); |
1891 *slot2 = ves; | 1634 *slot2 = ves; |
1892 *opt_info->var_to_expand_tail = ves; | 1635 *opt_info->var_to_expand_tail = ves; |
1893 opt_info->var_to_expand_tail = &ves->next; | 1636 opt_info->var_to_expand_tail = &ves->next; |
1894 } | 1637 } |
1895 } | 1638 } |
1896 } | 1639 } |
1897 | 1640 |
1898 VEC_free (edge, heap, edges); | 1641 edges.release (); |
1899 free (body); | 1642 free (body); |
1900 return opt_info; | 1643 return opt_info; |
1901 } | 1644 } |
1902 | 1645 |
1903 /* Called just before loop duplication. Records start of duplicated area | 1646 /* Called just before loop duplication. Records start of duplicated area |
1905 | 1648 |
1906 static void | 1649 static void |
1907 opt_info_start_duplication (struct opt_info *opt_info) | 1650 opt_info_start_duplication (struct opt_info *opt_info) |
1908 { | 1651 { |
1909 if (opt_info) | 1652 if (opt_info) |
1910 opt_info->first_new_block = last_basic_block; | 1653 opt_info->first_new_block = last_basic_block_for_fn (cfun); |
1911 } | 1654 } |
1912 | 1655 |
1913 /* Determine the number of iterations between initialization of the base | 1656 /* Determine the number of iterations between initialization of the base |
1914 variable and the current copy (N_COPY). N_COPIES is the total number | 1657 variable and the current copy (N_COPY). N_COPIES is the total number |
1915 of newly created copies. UNROLLING is true if we are unrolling | 1658 of newly created copies. UNROLLING is true if we are unrolling |
1933 else | 1676 else |
1934 return n_copies; | 1677 return n_copies; |
1935 } | 1678 } |
1936 } | 1679 } |
1937 | 1680 |
1938 /* Locate in EXPR the expression corresponding to the location recorded | |
1939 in IVTS, and return a pointer to the RTX for this location. */ | |
1940 | |
1941 static rtx * | |
1942 get_ivts_expr (rtx expr, struct iv_to_split *ivts) | |
1943 { | |
1944 unsigned i; | |
1945 rtx *ret = &expr; | |
1946 | |
1947 for (i = 0; i < ivts->n_loc; i++) | |
1948 ret = &XEXP (*ret, ivts->loc[i]); | |
1949 | |
1950 return ret; | |
1951 } | |
1952 | |
1953 /* Allocate basic variable for the induction variable chain. */ | 1681 /* Allocate basic variable for the induction variable chain. */ |
1954 | 1682 |
1955 static void | 1683 static void |
1956 allocate_basic_variable (struct iv_to_split *ivts) | 1684 allocate_basic_variable (struct iv_to_split *ivts) |
1957 { | 1685 { |
1958 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts); | 1686 rtx expr = SET_SRC (single_set (ivts->insn)); |
1959 | 1687 |
1960 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); | 1688 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); |
1961 } | 1689 } |
1962 | 1690 |
1963 /* Insert initialization of basic variable of IVTS before INSN, taking | 1691 /* Insert initialization of basic variable of IVTS before INSN, taking |
1964 the initial value from INSN. */ | 1692 the initial value from INSN. */ |
1965 | 1693 |
1966 static void | 1694 static void |
1967 insert_base_initialization (struct iv_to_split *ivts, rtx insn) | 1695 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn) |
1968 { | 1696 { |
1969 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts)); | 1697 rtx expr = copy_rtx (SET_SRC (single_set (insn))); |
1970 rtx seq; | 1698 rtx_insn *seq; |
1971 | 1699 |
1972 start_sequence (); | 1700 start_sequence (); |
1973 expr = force_operand (expr, ivts->base_var); | 1701 expr = force_operand (expr, ivts->base_var); |
1974 if (expr != ivts->base_var) | 1702 if (expr != ivts->base_var) |
1975 emit_move_insn (ivts->base_var, expr); | 1703 emit_move_insn (ivts->base_var, expr); |
1981 | 1709 |
1982 /* Replace the use of induction variable described in IVTS in INSN | 1710 /* Replace the use of induction variable described in IVTS in INSN |
1983 by base variable + DELTA * step. */ | 1711 by base variable + DELTA * step. */ |
1984 | 1712 |
1985 static void | 1713 static void |
1986 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta) | 1714 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta) |
1987 { | 1715 { |
1988 rtx expr, *loc, seq, incr, var; | 1716 rtx expr, *loc, incr, var; |
1989 enum machine_mode mode = GET_MODE (ivts->base_var); | 1717 rtx_insn *seq; |
1718 machine_mode mode = GET_MODE (ivts->base_var); | |
1990 rtx src, dest, set; | 1719 rtx src, dest, set; |
1991 | 1720 |
1992 /* Construct base + DELTA * step. */ | 1721 /* Construct base + DELTA * step. */ |
1993 if (!delta) | 1722 if (!delta) |
1994 expr = ivts->base_var; | 1723 expr = ivts->base_var; |
1995 else | 1724 else |
1996 { | 1725 { |
1997 incr = simplify_gen_binary (MULT, mode, | 1726 incr = simplify_gen_binary (MULT, mode, |
1998 ivts->step, gen_int_mode (delta, mode)); | 1727 copy_rtx (ivts->step), |
1728 gen_int_mode (delta, mode)); | |
1999 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), | 1729 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), |
2000 ivts->base_var, incr); | 1730 ivts->base_var, incr); |
2001 } | 1731 } |
2002 | 1732 |
2003 /* Figure out where to do the replacement. */ | 1733 /* Figure out where to do the replacement. */ |
2004 loc = get_ivts_expr (single_set (insn), ivts); | 1734 loc = &SET_SRC (single_set (insn)); |
2005 | 1735 |
2006 /* If we can make the replacement right away, we're done. */ | 1736 /* If we can make the replacement right away, we're done. */ |
2007 if (validate_change (insn, loc, expr, 0)) | 1737 if (validate_change (insn, loc, expr, 0)) |
2008 return; | 1738 return; |
2009 | 1739 |
2048 rtx reg; | 1778 rtx reg; |
2049 | 1779 |
2050 if (ve->reuse_expansion == 0) | 1780 if (ve->reuse_expansion == 0) |
2051 reg = ve->reg; | 1781 reg = ve->reg; |
2052 else | 1782 else |
2053 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1); | 1783 reg = ve->var_expansions[ve->reuse_expansion - 1]; |
2054 | 1784 |
2055 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion) | 1785 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion) |
2056 ve->reuse_expansion = 0; | 1786 ve->reuse_expansion = 0; |
2057 else | 1787 else |
2058 ve->reuse_expansion++; | 1788 ve->reuse_expansion++; |
2059 | 1789 |
2060 return reg; | 1790 return reg; |
2063 | 1793 |
2064 /* Given INSN replace the uses of the accumulator recorded in VE | 1794 /* Given INSN replace the uses of the accumulator recorded in VE |
2065 with a new register. */ | 1795 with a new register. */ |
2066 | 1796 |
2067 static void | 1797 static void |
2068 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn) | 1798 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn) |
2069 { | 1799 { |
2070 rtx new_reg, set; | 1800 rtx new_reg, set; |
2071 bool really_new_expansion = false; | 1801 bool really_new_expansion = false; |
2072 | 1802 |
2073 set = single_set (insn); | 1803 set = single_set (insn); |
2081 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); | 1811 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); |
2082 } | 1812 } |
2083 else | 1813 else |
2084 new_reg = get_expansion (ve); | 1814 new_reg = get_expansion (ve); |
2085 | 1815 |
2086 validate_change (insn, &SET_DEST (set), new_reg, 1); | 1816 validate_replace_rtx_group (SET_DEST (set), new_reg, insn); |
2087 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1); | |
2088 | |
2089 if (apply_change_group ()) | 1817 if (apply_change_group ()) |
2090 if (really_new_expansion) | 1818 if (really_new_expansion) |
2091 { | 1819 { |
2092 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg); | 1820 ve->var_expansions.safe_push (new_reg); |
2093 ve->expansion_count++; | 1821 ve->expansion_count++; |
2094 } | 1822 } |
2095 } | 1823 } |
2096 | 1824 |
2097 /* Initialize the variable expansions in loop preheader. PLACE is the | 1825 /* Initialize the variable expansions in loop preheader. PLACE is the |
2123 | 1851 |
2124 static void | 1852 static void |
2125 insert_var_expansion_initialization (struct var_to_expand *ve, | 1853 insert_var_expansion_initialization (struct var_to_expand *ve, |
2126 basic_block place) | 1854 basic_block place) |
2127 { | 1855 { |
2128 rtx seq, var, zero_init, insn; | 1856 rtx_insn *seq; |
1857 rtx var, zero_init; | |
2129 unsigned i; | 1858 unsigned i; |
2130 enum machine_mode mode = GET_MODE (ve->reg); | 1859 machine_mode mode = GET_MODE (ve->reg); |
2131 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); | 1860 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); |
2132 | 1861 |
2133 if (VEC_length (rtx, ve->var_expansions) == 0) | 1862 if (ve->var_expansions.length () == 0) |
2134 return; | 1863 return; |
2135 | 1864 |
2136 start_sequence (); | 1865 start_sequence (); |
2137 switch (ve->op) | 1866 switch (ve->op) |
2138 { | 1867 { |
2139 case FMA: | 1868 case FMA: |
2140 /* Note that we only accumulate FMA via the ADD operand. */ | 1869 /* Note that we only accumulate FMA via the ADD operand. */ |
2141 case PLUS: | 1870 case PLUS: |
2142 case MINUS: | 1871 case MINUS: |
2143 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) | 1872 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
2144 { | 1873 { |
2145 if (honor_signed_zero_p) | 1874 if (honor_signed_zero_p) |
2146 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); | 1875 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); |
2147 else | 1876 else |
2148 zero_init = CONST0_RTX (mode); | 1877 zero_init = CONST0_RTX (mode); |
2149 emit_move_insn (var, zero_init); | 1878 emit_move_insn (var, zero_init); |
2150 } | 1879 } |
2151 break; | 1880 break; |
2152 | 1881 |
2153 case MULT: | 1882 case MULT: |
2154 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) | 1883 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
2155 { | 1884 { |
2156 zero_init = CONST1_RTX (GET_MODE (var)); | 1885 zero_init = CONST1_RTX (GET_MODE (var)); |
2157 emit_move_insn (var, zero_init); | 1886 emit_move_insn (var, zero_init); |
2158 } | 1887 } |
2159 break; | 1888 break; |
2163 } | 1892 } |
2164 | 1893 |
2165 seq = get_insns (); | 1894 seq = get_insns (); |
2166 end_sequence (); | 1895 end_sequence (); |
2167 | 1896 |
2168 insn = BB_HEAD (place); | 1897 emit_insn_after (seq, BB_END (place)); |
2169 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
2170 insn = NEXT_INSN (insn); | |
2171 | |
2172 emit_insn_after (seq, insn); | |
2173 } | 1898 } |
2174 | 1899 |
2175 /* Combine the variable expansions at the loop exit. PLACE is the | 1900 /* Combine the variable expansions at the loop exit. PLACE is the |
2176 loop exit basic block where the summation of the expansions should | 1901 loop exit basic block where the summation of the expansions should |
2177 take place. */ | 1902 take place. */ |
2178 | 1903 |
2179 static void | 1904 static void |
2180 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) | 1905 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) |
2181 { | 1906 { |
2182 rtx sum = ve->reg; | 1907 rtx sum = ve->reg; |
2183 rtx expr, seq, var, insn; | 1908 rtx expr, var; |
1909 rtx_insn *seq, *insn; | |
2184 unsigned i; | 1910 unsigned i; |
2185 | 1911 |
2186 if (VEC_length (rtx, ve->var_expansions) == 0) | 1912 if (ve->var_expansions.length () == 0) |
2187 return; | 1913 return; |
2188 | 1914 |
1915 /* ve->reg might be SUBREG or some other non-shareable RTL, and we use | |
1916 it both here and as the destination of the assignment. */ | |
1917 sum = copy_rtx (sum); | |
2189 start_sequence (); | 1918 start_sequence (); |
2190 switch (ve->op) | 1919 switch (ve->op) |
2191 { | 1920 { |
2192 case FMA: | 1921 case FMA: |
2193 /* Note that we only accumulate FMA via the ADD operand. */ | 1922 /* Note that we only accumulate FMA via the ADD operand. */ |
2194 case PLUS: | 1923 case PLUS: |
2195 case MINUS: | 1924 case MINUS: |
2196 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) | 1925 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
2197 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); | 1926 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); |
2198 break; | 1927 break; |
2199 | 1928 |
2200 case MULT: | 1929 case MULT: |
2201 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) | 1930 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
2202 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); | 1931 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); |
2203 break; | 1932 break; |
2204 | 1933 |
2205 default: | 1934 default: |
2206 gcc_unreachable (); | 1935 gcc_unreachable (); |
2217 insn = NEXT_INSN (insn); | 1946 insn = NEXT_INSN (insn); |
2218 | 1947 |
2219 emit_insn_after (seq, insn); | 1948 emit_insn_after (seq, insn); |
2220 } | 1949 } |
2221 | 1950 |
1951 /* Strip away REG_EQUAL notes for IVs we're splitting. | |
1952 | |
1953 Updating REG_EQUAL notes for IVs we split is tricky: We | |
1954 cannot tell until after unrolling, DF-rescanning, and liveness | |
1955 updating, whether an EQ_USE is reached by the split IV while | |
1956 the IV reg is still live. See PR55006. | |
1957 | |
1958 ??? We cannot use remove_reg_equal_equiv_notes_for_regno, | |
1959 because RTL loop-iv requires us to defer rescanning insns and | |
1960 any notes attached to them. So resort to old techniques... */ | |
1961 | |
1962 static void | |
1963 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn) | |
1964 { | |
1965 struct iv_to_split *ivts; | |
1966 rtx note = find_reg_equal_equiv_note (insn); | |
1967 if (! note) | |
1968 return; | |
1969 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) | |
1970 if (reg_mentioned_p (ivts->orig_var, note)) | |
1971 { | |
1972 remove_note (insn, note); | |
1973 return; | |
1974 } | |
1975 } | |
1976 | |
2222 /* Apply loop optimizations in loop copies using the | 1977 /* Apply loop optimizations in loop copies using the |
2223 data which gathered during the unrolling. Structure | 1978 data which gathered during the unrolling. Structure |
2224 OPT_INFO record that data. | 1979 OPT_INFO record that data. |
2225 | 1980 |
2226 UNROLLING is true if we unrolled (not peeled) the loop. | 1981 UNROLLING is true if we unrolled (not peeled) the loop. |
2233 unsigned n_copies, bool unrolling, | 1988 unsigned n_copies, bool unrolling, |
2234 bool rewrite_original_loop) | 1989 bool rewrite_original_loop) |
2235 { | 1990 { |
2236 unsigned i, delta; | 1991 unsigned i, delta; |
2237 basic_block bb, orig_bb; | 1992 basic_block bb, orig_bb; |
2238 rtx insn, orig_insn, next; | 1993 rtx_insn *insn, *orig_insn, *next; |
2239 struct iv_to_split ivts_templ, *ivts; | 1994 struct iv_to_split ivts_templ, *ivts; |
2240 struct var_to_expand ve_templ, *ves; | 1995 struct var_to_expand ve_templ, *ves; |
2241 | 1996 |
2242 /* Sanity check -- we need to put initialization in the original loop | 1997 /* Sanity check -- we need to put initialization in the original loop |
2243 body. */ | 1998 body. */ |
2246 /* Allocate the basic variables (i0). */ | 2001 /* Allocate the basic variables (i0). */ |
2247 if (opt_info->insns_to_split) | 2002 if (opt_info->insns_to_split) |
2248 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) | 2003 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) |
2249 allocate_basic_variable (ivts); | 2004 allocate_basic_variable (ivts); |
2250 | 2005 |
2251 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) | 2006 for (i = opt_info->first_new_block; |
2252 { | 2007 i < (unsigned) last_basic_block_for_fn (cfun); |
2253 bb = BASIC_BLOCK (i); | 2008 i++) |
2009 { | |
2010 bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
2254 orig_bb = get_bb_original (bb); | 2011 orig_bb = get_bb_original (bb); |
2255 | 2012 |
2256 /* bb->aux holds position in copy sequence initialized by | 2013 /* bb->aux holds position in copy sequence initialized by |
2257 duplicate_loop_to_header_edge. */ | 2014 duplicate_loop_to_header_edge. */ |
2258 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, | 2015 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, |
2259 unrolling); | 2016 unrolling); |
2260 bb->aux = 0; | 2017 bb->aux = 0; |
2261 orig_insn = BB_HEAD (orig_bb); | 2018 orig_insn = BB_HEAD (orig_bb); |
2262 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) | 2019 FOR_BB_INSNS_SAFE (bb, insn, next) |
2263 { | 2020 { |
2264 next = NEXT_INSN (insn); | 2021 if (!INSN_P (insn) |
2265 if (!INSN_P (insn)) | 2022 || (DEBUG_INSN_P (insn) |
2023 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL)) | |
2266 continue; | 2024 continue; |
2267 | 2025 |
2268 while (!INSN_P (orig_insn)) | 2026 while (!INSN_P (orig_insn) |
2027 || (DEBUG_INSN_P (orig_insn) | |
2028 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn)) | |
2029 == LABEL_DECL))) | |
2269 orig_insn = NEXT_INSN (orig_insn); | 2030 orig_insn = NEXT_INSN (orig_insn); |
2270 | 2031 |
2271 ivts_templ.insn = orig_insn; | 2032 ivts_templ.insn = orig_insn; |
2272 ve_templ.insn = orig_insn; | 2033 ve_templ.insn = orig_insn; |
2273 | 2034 |
2274 /* Apply splitting iv optimization. */ | 2035 /* Apply splitting iv optimization. */ |
2275 if (opt_info->insns_to_split) | 2036 if (opt_info->insns_to_split) |
2276 { | 2037 { |
2277 ivts = (struct iv_to_split *) | 2038 maybe_strip_eq_note_for_split_iv (opt_info, insn); |
2278 htab_find (opt_info->insns_to_split, &ivts_templ); | 2039 |
2040 ivts = opt_info->insns_to_split->find (&ivts_templ); | |
2279 | 2041 |
2280 if (ivts) | 2042 if (ivts) |
2281 { | 2043 { |
2282 gcc_assert (GET_CODE (PATTERN (insn)) | 2044 gcc_assert (GET_CODE (PATTERN (insn)) |
2283 == GET_CODE (PATTERN (orig_insn))); | 2045 == GET_CODE (PATTERN (orig_insn))); |
2289 } | 2051 } |
2290 /* Apply variable expansion optimization. */ | 2052 /* Apply variable expansion optimization. */ |
2291 if (unrolling && opt_info->insns_with_var_to_expand) | 2053 if (unrolling && opt_info->insns_with_var_to_expand) |
2292 { | 2054 { |
2293 ves = (struct var_to_expand *) | 2055 ves = (struct var_to_expand *) |
2294 htab_find (opt_info->insns_with_var_to_expand, &ve_templ); | 2056 opt_info->insns_with_var_to_expand->find (&ve_templ); |
2295 if (ves) | 2057 if (ves) |
2296 { | 2058 { |
2297 gcc_assert (GET_CODE (PATTERN (insn)) | 2059 gcc_assert (GET_CODE (PATTERN (insn)) |
2298 == GET_CODE (PATTERN (orig_insn))); | 2060 == GET_CODE (PATTERN (orig_insn))); |
2299 expand_var_during_unrolling (ves, insn); | 2061 expand_var_during_unrolling (ves, insn); |
2317 } | 2079 } |
2318 | 2080 |
2319 /* Rewrite also the original loop body. Find them as originals of the blocks | 2081 /* Rewrite also the original loop body. Find them as originals of the blocks |
2320 in the last copied iteration, i.e. those that have | 2082 in the last copied iteration, i.e. those that have |
2321 get_bb_copy (get_bb_original (bb)) == bb. */ | 2083 get_bb_copy (get_bb_original (bb)) == bb. */ |
2322 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) | 2084 for (i = opt_info->first_new_block; |
2323 { | 2085 i < (unsigned) last_basic_block_for_fn (cfun); |
2324 bb = BASIC_BLOCK (i); | 2086 i++) |
2087 { | |
2088 bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
2325 orig_bb = get_bb_original (bb); | 2089 orig_bb = get_bb_original (bb); |
2326 if (get_bb_copy (orig_bb) != bb) | 2090 if (get_bb_copy (orig_bb) != bb) |
2327 continue; | 2091 continue; |
2328 | 2092 |
2329 delta = determine_split_iv_delta (0, n_copies, unrolling); | 2093 delta = determine_split_iv_delta (0, n_copies, unrolling); |
2337 continue; | 2101 continue; |
2338 | 2102 |
2339 ivts_templ.insn = orig_insn; | 2103 ivts_templ.insn = orig_insn; |
2340 if (opt_info->insns_to_split) | 2104 if (opt_info->insns_to_split) |
2341 { | 2105 { |
2106 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn); | |
2107 | |
2342 ivts = (struct iv_to_split *) | 2108 ivts = (struct iv_to_split *) |
2343 htab_find (opt_info->insns_to_split, &ivts_templ); | 2109 opt_info->insns_to_split->find (&ivts_templ); |
2344 if (ivts) | 2110 if (ivts) |
2345 { | 2111 { |
2346 if (!delta) | 2112 if (!delta) |
2347 insert_base_initialization (ivts, orig_insn); | 2113 insert_base_initialization (ivts, orig_insn); |
2348 split_iv (ivts, orig_insn, delta); | 2114 split_iv (ivts, orig_insn, delta); |
2357 /* Release OPT_INFO. */ | 2123 /* Release OPT_INFO. */ |
2358 | 2124 |
2359 static void | 2125 static void |
2360 free_opt_info (struct opt_info *opt_info) | 2126 free_opt_info (struct opt_info *opt_info) |
2361 { | 2127 { |
2362 if (opt_info->insns_to_split) | 2128 delete opt_info->insns_to_split; |
2363 htab_delete (opt_info->insns_to_split); | 2129 opt_info->insns_to_split = NULL; |
2364 if (opt_info->insns_with_var_to_expand) | 2130 if (opt_info->insns_with_var_to_expand) |
2365 { | 2131 { |
2366 struct var_to_expand *ves; | 2132 struct var_to_expand *ves; |
2367 | 2133 |
2368 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) | 2134 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
2369 VEC_free (rtx, heap, ves->var_expansions); | 2135 ves->var_expansions.release (); |
2370 htab_delete (opt_info->insns_with_var_to_expand); | 2136 delete opt_info->insns_with_var_to_expand; |
2137 opt_info->insns_with_var_to_expand = NULL; | |
2371 } | 2138 } |
2372 free (opt_info); | 2139 free (opt_info); |
2373 } | 2140 } |