comparison gcc/loop-unroll.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Loop unrolling and peeling. 1 /* Loop unrolling.
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010 2 Copyright (C) 2002-2017 Free Software Foundation, Inc.
3 Free Software Foundation, Inc.
4 3
5 This file is part of GCC. 4 This file is part of GCC.
6 5
7 GCC is free software; you can redistribute it and/or modify it under 6 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free 7 the terms of the GNU General Public License as published by the Free
19 <http://www.gnu.org/licenses/>. */ 18 <http://www.gnu.org/licenses/>. */
20 19
21 #include "config.h" 20 #include "config.h"
22 #include "system.h" 21 #include "system.h"
23 #include "coretypes.h" 22 #include "coretypes.h"
24 #include "tm.h" 23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h" 25 #include "rtl.h"
26 #include "hard-reg-set.h" 26 #include "tree.h"
27 #include "obstack.h" 27 #include "cfghooks.h"
28 #include "basic-block.h" 28 #include "memmodel.h"
29 #include "optabs.h"
30 #include "emit-rtl.h"
31 #include "recog.h"
32 #include "profile.h"
33 #include "cfgrtl.h"
29 #include "cfgloop.h" 34 #include "cfgloop.h"
30 #include "cfglayout.h"
31 #include "params.h" 35 #include "params.h"
32 #include "output.h" 36 #include "dojump.h"
33 #include "expr.h" 37 #include "expr.h"
34 #include "hashtab.h" 38 #include "dumpfile.h"
35 #include "recog.h" 39
36 #include "target.h" 40 /* This pass performs loop unrolling. We only perform this
37 41 optimization on innermost loops (with single exception) because
38 /* This pass performs loop unrolling and peeling. We only perform these
39 optimizations on innermost loops (with single exception) because
40 the impact on performance is greatest here, and we want to avoid 42 the impact on performance is greatest here, and we want to avoid
41 unnecessary code size growth. The gain is caused by greater sequentiality 43 unnecessary code size growth. The gain is caused by greater sequentiality
42 of code, better code to optimize for further passes and in some cases 44 of code, better code to optimize for further passes and in some cases
43 by fewer testings of exit conditions. The main problem is code growth, 45 by fewer testings of exit conditions. The main problem is code growth,
44 that impacts performance negatively due to effect of caches. 46 that impacts performance negatively due to effect of caches.
45 47
46 What we do: 48 What we do:
47 49
48 -- complete peeling of once-rolling loops; this is the above mentioned
49 exception, as this causes loop to be cancelled completely and
50 does not cause code growth
51 -- complete peeling of loops that roll (small) constant times.
52 -- simple peeling of first iterations of loops that do not roll much
53 (according to profile feedback)
54 -- unrolling of loops that roll constant times; this is almost always 50 -- unrolling of loops that roll constant times; this is almost always
55 win, as we get rid of exit condition tests. 51 win, as we get rid of exit condition tests.
56 -- unrolling of loops that roll number of times that we can compute 52 -- unrolling of loops that roll number of times that we can compute
57 in runtime; we also get rid of exit condition tests here, but there 53 in runtime; we also get rid of exit condition tests here, but there
58 is the extra expense for calculating the number of iterations 54 is the extra expense for calculating the number of iterations
61 it may even slow down the code 57 it may even slow down the code
62 For more detailed descriptions of each of those, see comments at 58 For more detailed descriptions of each of those, see comments at
63 appropriate function below. 59 appropriate function below.
64 60
65 There is a lot of parameters (defined and described in params.def) that 61 There is a lot of parameters (defined and described in params.def) that
66 control how much we unroll/peel. 62 control how much we unroll.
67 63
68 ??? A great problem is that we don't have a good way how to determine 64 ??? A great problem is that we don't have a good way how to determine
69 how many times we should unroll the loop; the experiments I have made 65 how many times we should unroll the loop; the experiments I have made
70 showed that this choice may affect performance in order of several %. 66 showed that this choice may affect performance in order of several %.
71 */ 67 */
72 68
73 /* Information about induction variables to split. */ 69 /* Information about induction variables to split. */
74 70
75 struct iv_to_split 71 struct iv_to_split
76 { 72 {
77 rtx insn; /* The insn in that the induction variable occurs. */ 73 rtx_insn *insn; /* The insn in that the induction variable occurs. */
74 rtx orig_var; /* The variable (register) for the IV before split. */
78 rtx base_var; /* The variable on that the values in the further 75 rtx base_var; /* The variable on that the values in the further
79 iterations are based. */ 76 iterations are based. */
80 rtx step; /* Step of the induction variable. */ 77 rtx step; /* Step of the induction variable. */
81 struct iv_to_split *next; /* Next entry in walking order. */ 78 struct iv_to_split *next; /* Next entry in walking order. */
82 unsigned n_loc;
83 unsigned loc[3]; /* Location where the definition of the induction
84 variable occurs in the insn. For example if
85 N_LOC is 2, the expression is located at
86 XEXP (XEXP (single_set, loc[0]), loc[1]). */
87 }; 79 };
88 80
89 /* Information about accumulators to expand. */ 81 /* Information about accumulators to expand. */
90 82
91 struct var_to_expand 83 struct var_to_expand
92 { 84 {
93 rtx insn; /* The insn in that the variable expansion occurs. */ 85 rtx_insn *insn; /* The insn in that the variable expansion occurs. */
94 rtx reg; /* The accumulator which is expanded. */ 86 rtx reg; /* The accumulator which is expanded. */
95 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */ 87 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
96 struct var_to_expand *next; /* Next entry in walking order. */ 88 struct var_to_expand *next; /* Next entry in walking order. */
97 enum rtx_code op; /* The type of the accumulation - addition, subtraction 89 enum rtx_code op; /* The type of the accumulation - addition, subtraction
98 or multiplication. */ 90 or multiplication. */
99 int expansion_count; /* Count the number of expansions generated so far. */ 91 int expansion_count; /* Count the number of expansions generated so far. */
100 int reuse_expansion; /* The expansion we intend to reuse to expand 92 int reuse_expansion; /* The expansion we intend to reuse to expand
101 the accumulator. If REUSE_EXPANSION is 0 reuse 93 the accumulator. If REUSE_EXPANSION is 0 reuse
102 the original accumulator. Else use 94 the original accumulator. Else use
103 var_expansions[REUSE_EXPANSION - 1]. */ 95 var_expansions[REUSE_EXPANSION - 1]. */
104 unsigned accum_pos; /* The position in which the accumulator is placed in
105 the insn src. For example in x = x + something
106 accum_pos is 0 while in x = something + x accum_pos
107 is 1. */
108 }; 96 };
97
98 /* Hashtable helper for iv_to_split. */
99
100 struct iv_split_hasher : free_ptr_hash <iv_to_split>
101 {
102 static inline hashval_t hash (const iv_to_split *);
103 static inline bool equal (const iv_to_split *, const iv_to_split *);
104 };
105
106
107 /* A hash function for information about insns to split. */
108
109 inline hashval_t
110 iv_split_hasher::hash (const iv_to_split *ivts)
111 {
112 return (hashval_t) INSN_UID (ivts->insn);
113 }
114
115 /* An equality functions for information about insns to split. */
116
117 inline bool
118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
119 {
120 return i1->insn == i2->insn;
121 }
122
123 /* Hashtable helper for iv_to_split. */
124
125 struct var_expand_hasher : free_ptr_hash <var_to_expand>
126 {
127 static inline hashval_t hash (const var_to_expand *);
128 static inline bool equal (const var_to_expand *, const var_to_expand *);
129 };
130
131 /* Return a hash for VES. */
132
133 inline hashval_t
134 var_expand_hasher::hash (const var_to_expand *ves)
135 {
136 return (hashval_t) INSN_UID (ves->insn);
137 }
138
139 /* Return true if I1 and I2 refer to the same instruction. */
140
141 inline bool
142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
143 {
144 return i1->insn == i2->insn;
145 }
109 146
110 /* Information about optimization applied in 147 /* Information about optimization applied in
111 the unrolled loop. */ 148 the unrolled loop. */
112 149
113 struct opt_info 150 struct opt_info
114 { 151 {
115 htab_t insns_to_split; /* A hashtable of insns to split. */ 152 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
153 split. */
116 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ 154 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
117 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ 155 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
118 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators 156 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
119 to expand. */ 157 insns with accumulators to expand. */
120 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ 158 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
121 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ 159 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
122 unsigned first_new_block; /* The first basic block that was 160 unsigned first_new_block; /* The first basic block that was
123 duplicated. */ 161 duplicated. */
124 basic_block loop_exit; /* The loop exit basic block. */ 162 basic_block loop_exit; /* The loop exit basic block. */
125 basic_block loop_preheader; /* The loop preheader basic block. */ 163 basic_block loop_preheader; /* The loop preheader basic block. */
126 }; 164 };
127 165
128 static void decide_unrolling_and_peeling (int);
129 static void peel_loops_completely (int);
130 static void decide_peel_simple (struct loop *, int);
131 static void decide_peel_once_rolling (struct loop *, int);
132 static void decide_peel_completely (struct loop *, int);
133 static void decide_unroll_stupid (struct loop *, int); 166 static void decide_unroll_stupid (struct loop *, int);
134 static void decide_unroll_constant_iterations (struct loop *, int); 167 static void decide_unroll_constant_iterations (struct loop *, int);
135 static void decide_unroll_runtime_iterations (struct loop *, int); 168 static void decide_unroll_runtime_iterations (struct loop *, int);
136 static void peel_loop_simple (struct loop *);
137 static void peel_loop_completely (struct loop *);
138 static void unroll_loop_stupid (struct loop *); 169 static void unroll_loop_stupid (struct loop *);
170 static void decide_unrolling (int);
139 static void unroll_loop_constant_iterations (struct loop *); 171 static void unroll_loop_constant_iterations (struct loop *);
140 static void unroll_loop_runtime_iterations (struct loop *); 172 static void unroll_loop_runtime_iterations (struct loop *);
141 static struct opt_info *analyze_insns_in_loop (struct loop *); 173 static struct opt_info *analyze_insns_in_loop (struct loop *);
142 static void opt_info_start_duplication (struct opt_info *); 174 static void opt_info_start_duplication (struct opt_info *);
143 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); 175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
144 static void free_opt_info (struct opt_info *); 176 static void free_opt_info (struct opt_info *);
145 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx); 177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
146 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *); 178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
147 static struct iv_to_split *analyze_iv_to_split_insn (rtx); 179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
148 static void expand_var_during_unrolling (struct var_to_expand *, rtx); 180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
149 static void insert_var_expansion_initialization (struct var_to_expand *, 181 static void insert_var_expansion_initialization (struct var_to_expand *,
150 basic_block); 182 basic_block);
151 static void combine_var_copies_in_loop_exit (struct var_to_expand *, 183 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
152 basic_block); 184 basic_block);
153 static rtx get_expansion (struct var_to_expand *); 185 static rtx get_expansion (struct var_to_expand *);
154 186
155 /* Unroll and/or peel (depending on FLAGS) LOOPS. */ 187 /* Emit a message summarizing the unroll that will be
156 void 188 performed for LOOP, along with the loop's location LOCUS, if
157 unroll_and_peel_loops (int flags) 189 appropriate given the dump or -fopt-info settings. */
190
191 static void
192 report_unroll (struct loop *loop, location_t locus)
193 {
194 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
195
196 if (loop->lpt_decision.decision == LPT_NONE)
197 return;
198
199 if (!dump_enabled_p ())
200 return;
201
202 dump_printf_loc (report_flags, locus,
203 "loop unrolled %d times",
204 loop->lpt_decision.times);
205 if (profile_info && loop->header->count.initialized_p ())
206 dump_printf (report_flags,
207 " (header execution count %d)",
208 (int)loop->header->count.to_gcov_type ());
209
210 dump_printf (report_flags, "\n");
211 }
212
213 /* Decide whether unroll loops and how much. */
214 static void
215 decide_unrolling (int flags)
158 { 216 {
159 struct loop *loop; 217 struct loop *loop;
160 bool check;
161 loop_iterator li;
162
163 /* First perform complete loop peeling (it is almost surely a win,
164 and affects parameters for further decision a lot). */
165 peel_loops_completely (flags);
166
167 /* Now decide rest of unrolling and peeling. */
168 decide_unrolling_and_peeling (flags);
169 218
170 /* Scan the loops, inner ones first. */ 219 /* Scan the loops, inner ones first. */
171 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) 220 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
172 {
173 check = true;
174 /* And perform the appropriate transformations. */
175 switch (loop->lpt_decision.decision)
176 {
177 case LPT_PEEL_COMPLETELY:
178 /* Already done. */
179 gcc_unreachable ();
180 case LPT_PEEL_SIMPLE:
181 peel_loop_simple (loop);
182 break;
183 case LPT_UNROLL_CONSTANT:
184 unroll_loop_constant_iterations (loop);
185 break;
186 case LPT_UNROLL_RUNTIME:
187 unroll_loop_runtime_iterations (loop);
188 break;
189 case LPT_UNROLL_STUPID:
190 unroll_loop_stupid (loop);
191 break;
192 case LPT_NONE:
193 check = false;
194 break;
195 default:
196 gcc_unreachable ();
197 }
198 if (check)
199 {
200 #ifdef ENABLE_CHECKING
201 verify_dominators (CDI_DOMINATORS);
202 verify_loop_structure ();
203 #endif
204 }
205 }
206
207 iv_analysis_done ();
208 }
209
210 /* Check whether exit of the LOOP is at the end of loop body. */
211
212 static bool
213 loop_exit_at_end_p (struct loop *loop)
214 {
215 struct niter_desc *desc = get_simple_loop_desc (loop);
216 rtx insn;
217
218 if (desc->in_edge->dest != loop->latch)
219 return false;
220
221 /* Check that the latch is empty. */
222 FOR_BB_INSNS (loop->latch, insn)
223 {
224 if (INSN_P (insn))
225 return false;
226 }
227
228 return true;
229 }
230
231 /* Depending on FLAGS, check whether to peel loops completely and do so. */
232 static void
233 peel_loops_completely (int flags)
234 {
235 struct loop *loop;
236 loop_iterator li;
237
238 /* Scan the loops, the inner ones first. */
239 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
240 { 221 {
241 loop->lpt_decision.decision = LPT_NONE; 222 loop->lpt_decision.decision = LPT_NONE;
242 223 location_t locus = get_loop_location (loop);
243 if (dump_file) 224
244 fprintf (dump_file, 225 if (dump_enabled_p ())
245 "\n;; *** Considering loop %d for complete peeling ***\n", 226 dump_printf_loc (MSG_NOTE, locus,
246 loop->num); 227 ";; *** Considering loop %d at BB %d for "
247 228 "unrolling ***\n",
248 loop->ninsns = num_loop_insns (loop); 229 loop->num, loop->header->index);
249
250 decide_peel_once_rolling (loop, flags);
251 if (loop->lpt_decision.decision == LPT_NONE)
252 decide_peel_completely (loop, flags);
253
254 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
255 {
256 peel_loop_completely (loop);
257 #ifdef ENABLE_CHECKING
258 verify_dominators (CDI_DOMINATORS);
259 verify_loop_structure ();
260 #endif
261 }
262 }
263 }
264
265 /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */
266 static void
267 decide_unrolling_and_peeling (int flags)
268 {
269 struct loop *loop;
270 loop_iterator li;
271
272 /* Scan the loops, inner ones first. */
273 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
274 {
275 loop->lpt_decision.decision = LPT_NONE;
276
277 if (dump_file)
278 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
279 230
280 /* Do not peel cold areas. */ 231 /* Do not peel cold areas. */
281 if (optimize_loop_for_size_p (loop)) 232 if (optimize_loop_for_size_p (loop))
282 { 233 {
283 if (dump_file) 234 if (dump_file)
311 decide_unroll_constant_iterations (loop, flags); 262 decide_unroll_constant_iterations (loop, flags);
312 if (loop->lpt_decision.decision == LPT_NONE) 263 if (loop->lpt_decision.decision == LPT_NONE)
313 decide_unroll_runtime_iterations (loop, flags); 264 decide_unroll_runtime_iterations (loop, flags);
314 if (loop->lpt_decision.decision == LPT_NONE) 265 if (loop->lpt_decision.decision == LPT_NONE)
315 decide_unroll_stupid (loop, flags); 266 decide_unroll_stupid (loop, flags);
316 if (loop->lpt_decision.decision == LPT_NONE) 267
317 decide_peel_simple (loop, flags); 268 report_unroll (loop, locus);
318 } 269 }
319 } 270 }
320 271
321 /* Decide whether the LOOP is once rolling and suitable for complete 272 /* Unroll LOOPS. */
322 peeling. */ 273 void
323 static void 274 unroll_loops (int flags)
324 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) 275 {
325 { 276 struct loop *loop;
326 struct niter_desc *desc; 277 bool changed = false;
327 278
328 if (dump_file) 279 /* Now decide rest of unrolling. */
329 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); 280 decide_unrolling (flags);
330 281
331 /* Is the loop small enough? */ 282 /* Scan the loops, inner ones first. */
332 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) 283 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
333 { 284 {
334 if (dump_file) 285 /* And perform the appropriate transformations. */
335 fprintf (dump_file, ";; Not considering loop, is too big\n"); 286 switch (loop->lpt_decision.decision)
336 return;
337 }
338
339 /* Check for simple loops. */
340 desc = get_simple_loop_desc (loop);
341
342 /* Check number of iterations. */
343 if (!desc->simple_p
344 || desc->assumptions
345 || desc->infinite
346 || !desc->const_iter
347 || desc->niter != 0)
348 {
349 if (dump_file)
350 fprintf (dump_file,
351 ";; Unable to prove that the loop rolls exactly once\n");
352 return;
353 }
354
355 /* Success. */
356 if (dump_file)
357 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
358 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
359 }
360
361 /* Decide whether the LOOP is suitable for complete peeling. */
362 static void
363 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
364 {
365 unsigned npeel;
366 struct niter_desc *desc;
367
368 if (dump_file)
369 fprintf (dump_file, "\n;; Considering peeling completely\n");
370
371 /* Skip non-innermost loops. */
372 if (loop->inner)
373 {
374 if (dump_file)
375 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
376 return;
377 }
378
379 /* Do not peel cold areas. */
380 if (optimize_loop_for_size_p (loop))
381 {
382 if (dump_file)
383 fprintf (dump_file, ";; Not considering loop, cold area\n");
384 return;
385 }
386
387 /* Can the loop be manipulated? */
388 if (!can_duplicate_loop_p (loop))
389 {
390 if (dump_file)
391 fprintf (dump_file,
392 ";; Not considering loop, cannot duplicate\n");
393 return;
394 }
395
396 /* npeel = number of iterations to peel. */
397 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
398 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
399 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
400
401 /* Is the loop small enough? */
402 if (!npeel)
403 {
404 if (dump_file)
405 fprintf (dump_file, ";; Not considering loop, is too big\n");
406 return;
407 }
408
409 /* Check for simple loops. */
410 desc = get_simple_loop_desc (loop);
411
412 /* Check number of iterations. */
413 if (!desc->simple_p
414 || desc->assumptions
415 || !desc->const_iter
416 || desc->infinite)
417 {
418 if (dump_file)
419 fprintf (dump_file,
420 ";; Unable to prove that the loop iterates constant times\n");
421 return;
422 }
423
424 if (desc->niter > npeel - 1)
425 {
426 if (dump_file)
427 { 287 {
428 fprintf (dump_file, 288 case LPT_UNROLL_CONSTANT:
429 ";; Not peeling loop completely, rolls too much ("); 289 unroll_loop_constant_iterations (loop);
430 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); 290 changed = true;
431 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); 291 break;
292 case LPT_UNROLL_RUNTIME:
293 unroll_loop_runtime_iterations (loop);
294 changed = true;
295 break;
296 case LPT_UNROLL_STUPID:
297 unroll_loop_stupid (loop);
298 changed = true;
299 break;
300 case LPT_NONE:
301 break;
302 default:
303 gcc_unreachable ();
432 } 304 }
433 return; 305 }
434 } 306
435 307 if (changed)
436 /* Success. */ 308 {
437 if (dump_file) 309 calculate_dominance_info (CDI_DOMINATORS);
438 fprintf (dump_file, ";; Decided to peel loop completely\n"); 310 fix_loop_structure (NULL);
439 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; 311 }
440 } 312
441 313 iv_analysis_done ();
442 /* Peel all iterations of LOOP, remove exit edges and cancel the loop 314 }
443 completely. The transformation done: 315
444 316 /* Check whether exit of the LOOP is at the end of loop body. */
445 for (i = 0; i < 4; i++) 317
446 body; 318 static bool
447 319 loop_exit_at_end_p (struct loop *loop)
448 ==> 320 {
449
450 i = 0;
451 body; i++;
452 body; i++;
453 body; i++;
454 body; i++;
455 */
456 static void
457 peel_loop_completely (struct loop *loop)
458 {
459 sbitmap wont_exit;
460 unsigned HOST_WIDE_INT npeel;
461 unsigned i;
462 VEC (edge, heap) *remove_edges;
463 edge ein;
464 struct niter_desc *desc = get_simple_loop_desc (loop); 321 struct niter_desc *desc = get_simple_loop_desc (loop);
465 struct opt_info *opt_info = NULL; 322 rtx_insn *insn;
466 323
467 npeel = desc->niter; 324 /* We should never have conditional in latch block. */
468 325 gcc_assert (desc->in_edge->dest != loop->header);
469 if (npeel) 326
470 { 327 if (desc->in_edge->dest != loop->latch)
471 bool ok; 328 return false;
472 329
473 wont_exit = sbitmap_alloc (npeel + 1); 330 /* Check that the latch is empty. */
474 sbitmap_ones (wont_exit); 331 FOR_BB_INSNS (loop->latch, insn)
475 RESET_BIT (wont_exit, 0); 332 {
476 if (desc->noloop_assumptions) 333 if (INSN_P (insn) && active_insn_p (insn))
477 RESET_BIT (wont_exit, 1); 334 return false;
478 335 }
479 remove_edges = NULL; 336
480 337 return true;
481 if (flag_split_ivs_in_unroller)
482 opt_info = analyze_insns_in_loop (loop);
483
484 opt_info_start_duplication (opt_info);
485 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
486 npeel,
487 wont_exit, desc->out_edge,
488 &remove_edges,
489 DLTHE_FLAG_UPDATE_FREQ
490 | DLTHE_FLAG_COMPLETTE_PEEL
491 | (opt_info
492 ? DLTHE_RECORD_COPY_NUMBER : 0));
493 gcc_assert (ok);
494
495 free (wont_exit);
496
497 if (opt_info)
498 {
499 apply_opt_in_copies (opt_info, npeel, false, true);
500 free_opt_info (opt_info);
501 }
502
503 /* Remove the exit edges. */
504 FOR_EACH_VEC_ELT (edge, remove_edges, i, ein)
505 remove_path (ein);
506 VEC_free (edge, heap, remove_edges);
507 }
508
509 ein = desc->in_edge;
510 free_simple_loop_desc (loop);
511
512 /* Now remove the unreachable part of the last iteration and cancel
513 the loop. */
514 remove_path (ein);
515
516 if (dump_file)
517 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
518 } 338 }
519 339
520 /* Decide whether to unroll LOOP iterating constant number of times 340 /* Decide whether to unroll LOOP iterating constant number of times
521 and how much. */ 341 and how much. */
522 342
523 static void 343 static void
524 decide_unroll_constant_iterations (struct loop *loop, int flags) 344 decide_unroll_constant_iterations (struct loop *loop, int flags)
525 { 345 {
526 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; 346 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
527 struct niter_desc *desc; 347 struct niter_desc *desc;
348 widest_int iterations;
528 349
529 if (!(flags & UAP_UNROLL)) 350 if (!(flags & UAP_UNROLL))
530 { 351 {
531 /* We were not asked to, just return back silently. */ 352 /* We were not asked to, just return back silently. */
532 return; 353 return;
545 if (nunroll > nunroll_by_av) 366 if (nunroll > nunroll_by_av)
546 nunroll = nunroll_by_av; 367 nunroll = nunroll_by_av;
547 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 368 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
548 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 369 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
549 370
371 if (targetm.loop_unroll_adjust)
372 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
373
550 /* Skip big loops. */ 374 /* Skip big loops. */
551 if (nunroll <= 1) 375 if (nunroll <= 1)
552 { 376 {
553 if (dump_file) 377 if (dump_file)
554 fprintf (dump_file, ";; Not considering loop, is too big\n"); 378 fprintf (dump_file, ";; Not considering loop, is too big\n");
565 fprintf (dump_file, 389 fprintf (dump_file,
566 ";; Unable to prove that the loop iterates constant times\n"); 390 ";; Unable to prove that the loop iterates constant times\n");
567 return; 391 return;
568 } 392 }
569 393
570 /* Check whether the loop rolls enough to consider. */ 394 /* Check whether the loop rolls enough to consider.
571 if (desc->niter < 2 * nunroll) 395 Consult also loop bounds and profile; in the case the loop has more
396 than one exit it may well loop less than determined maximal number
397 of iterations. */
398 if (desc->niter < 2 * nunroll
399 || ((get_estimated_loop_iterations (loop, &iterations)
400 || get_likely_max_loop_iterations (loop, &iterations))
401 && wi::ltu_p (iterations, 2 * nunroll)))
572 { 402 {
573 if (dump_file) 403 if (dump_file)
574 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 404 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
575 return; 405 return;
576 } 406 }
602 best_copies = n_copies; 432 best_copies = n_copies;
603 best_unroll = i; 433 best_unroll = i;
604 } 434 }
605 } 435 }
606 436
607 if (dump_file)
608 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
609 best_unroll + 1, best_copies, nunroll);
610
611 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; 437 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
612 loop->lpt_decision.times = best_unroll; 438 loop->lpt_decision.times = best_unroll;
613 439 }
614 if (dump_file) 440
615 fprintf (dump_file, 441 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
616 ";; Decided to unroll the constant times rolling loop, %d times.\n", 442 The transformation does this:
617 loop->lpt_decision.times);
618 }
619
620 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
621 times. The transformation does this:
622 443
623 for (i = 0; i < 102; i++) 444 for (i = 0; i < 102; i++)
624 body; 445 body;
625 446
626 ==> 447 ==> (LOOP->LPT_DECISION.TIMES == 3)
627 448
628 i = 0; 449 i = 0;
629 body; i++; 450 body; i++;
630 body; i++; 451 body; i++;
631 while (i < 102) 452 while (i < 102)
639 static void 460 static void
640 unroll_loop_constant_iterations (struct loop *loop) 461 unroll_loop_constant_iterations (struct loop *loop)
641 { 462 {
642 unsigned HOST_WIDE_INT niter; 463 unsigned HOST_WIDE_INT niter;
643 unsigned exit_mod; 464 unsigned exit_mod;
644 sbitmap wont_exit;
645 unsigned i; 465 unsigned i;
646 VEC (edge, heap) *remove_edges;
647 edge e; 466 edge e;
648 unsigned max_unroll = loop->lpt_decision.times; 467 unsigned max_unroll = loop->lpt_decision.times;
649 struct niter_desc *desc = get_simple_loop_desc (loop); 468 struct niter_desc *desc = get_simple_loop_desc (loop);
650 bool exit_at_end = loop_exit_at_end_p (loop); 469 bool exit_at_end = loop_exit_at_end_p (loop);
651 struct opt_info *opt_info = NULL; 470 struct opt_info *opt_info = NULL;
656 /* Should not get here (such loop should be peeled instead). */ 475 /* Should not get here (such loop should be peeled instead). */
657 gcc_assert (niter > max_unroll + 1); 476 gcc_assert (niter > max_unroll + 1);
658 477
659 exit_mod = niter % (max_unroll + 1); 478 exit_mod = niter % (max_unroll + 1);
660 479
661 wont_exit = sbitmap_alloc (max_unroll + 1); 480 auto_sbitmap wont_exit (max_unroll + 1);
662 sbitmap_ones (wont_exit); 481 bitmap_ones (wont_exit);
663 482
664 remove_edges = NULL; 483 auto_vec<edge> remove_edges;
665 if (flag_split_ivs_in_unroller 484 if (flag_split_ivs_in_unroller
666 || flag_variable_expansion_in_unroller) 485 || flag_variable_expansion_in_unroller)
667 opt_info = analyze_insns_in_loop (loop); 486 opt_info = analyze_insns_in_loop (loop);
668 487
669 if (!exit_at_end) 488 if (!exit_at_end)
671 /* The exit is not at the end of the loop; leave exit test 490 /* The exit is not at the end of the loop; leave exit test
672 in the first copy, so that the loops that start with test 491 in the first copy, so that the loops that start with test
673 of exit condition have continuous body after unrolling. */ 492 of exit condition have continuous body after unrolling. */
674 493
675 if (dump_file) 494 if (dump_file)
676 fprintf (dump_file, ";; Condition on beginning of loop.\n"); 495 fprintf (dump_file, ";; Condition at beginning of loop.\n");
677 496
678 /* Peel exit_mod iterations. */ 497 /* Peel exit_mod iterations. */
679 RESET_BIT (wont_exit, 0); 498 bitmap_clear_bit (wont_exit, 0);
680 if (desc->noloop_assumptions) 499 if (desc->noloop_assumptions)
681 RESET_BIT (wont_exit, 1); 500 bitmap_clear_bit (wont_exit, 1);
682 501
683 if (exit_mod) 502 if (exit_mod)
684 { 503 {
685 opt_info_start_duplication (opt_info); 504 opt_info_start_duplication (opt_info);
686 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 505 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
696 if (opt_info && exit_mod > 1) 515 if (opt_info && exit_mod > 1)
697 apply_opt_in_copies (opt_info, exit_mod, false, false); 516 apply_opt_in_copies (opt_info, exit_mod, false, false);
698 517
699 desc->noloop_assumptions = NULL_RTX; 518 desc->noloop_assumptions = NULL_RTX;
700 desc->niter -= exit_mod; 519 desc->niter -= exit_mod;
701 desc->niter_max -= exit_mod; 520 loop->nb_iterations_upper_bound -= exit_mod;
521 if (loop->any_estimate
522 && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
523 loop->nb_iterations_estimate -= exit_mod;
524 else
525 loop->any_estimate = false;
526 if (loop->any_likely_upper_bound
527 && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
528 loop->nb_iterations_likely_upper_bound -= exit_mod;
529 else
530 loop->any_likely_upper_bound = false;
702 } 531 }
703 532
704 SET_BIT (wont_exit, 1); 533 bitmap_set_bit (wont_exit, 1);
705 } 534 }
706 else 535 else
707 { 536 {
708 /* Leave exit test in last copy, for the same reason as above if 537 /* Leave exit test in last copy, for the same reason as above if
709 the loop tests the condition at the end of loop body. */ 538 the loop tests the condition at the end of loop body. */
710 539
711 if (dump_file) 540 if (dump_file)
712 fprintf (dump_file, ";; Condition on end of loop.\n"); 541 fprintf (dump_file, ";; Condition at end of loop.\n");
713 542
714 /* We know that niter >= max_unroll + 2; so we do not need to care of 543 /* We know that niter >= max_unroll + 2; so we do not need to care of
715 case when we would exit before reaching the loop. So just peel 544 case when we would exit before reaching the loop. So just peel
716 exit_mod + 1 iterations. */ 545 exit_mod + 1 iterations. */
717 if (exit_mod != max_unroll 546 if (exit_mod != max_unroll
718 || desc->noloop_assumptions) 547 || desc->noloop_assumptions)
719 { 548 {
720 RESET_BIT (wont_exit, 0); 549 bitmap_clear_bit (wont_exit, 0);
721 if (desc->noloop_assumptions) 550 if (desc->noloop_assumptions)
722 RESET_BIT (wont_exit, 1); 551 bitmap_clear_bit (wont_exit, 1);
723 552
724 opt_info_start_duplication (opt_info); 553 opt_info_start_duplication (opt_info);
725 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 554 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
726 exit_mod + 1, 555 exit_mod + 1,
727 wont_exit, desc->out_edge, 556 wont_exit, desc->out_edge,
734 563
735 if (opt_info && exit_mod > 0) 564 if (opt_info && exit_mod > 0)
736 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); 565 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
737 566
738 desc->niter -= exit_mod + 1; 567 desc->niter -= exit_mod + 1;
739 desc->niter_max -= exit_mod + 1; 568 loop->nb_iterations_upper_bound -= exit_mod + 1;
569 if (loop->any_estimate
570 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
571 loop->nb_iterations_estimate -= exit_mod + 1;
572 else
573 loop->any_estimate = false;
574 if (loop->any_likely_upper_bound
575 && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
576 loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
577 else
578 loop->any_likely_upper_bound = false;
740 desc->noloop_assumptions = NULL_RTX; 579 desc->noloop_assumptions = NULL_RTX;
741 580
742 SET_BIT (wont_exit, 0); 581 bitmap_set_bit (wont_exit, 0);
743 SET_BIT (wont_exit, 1); 582 bitmap_set_bit (wont_exit, 1);
744 } 583 }
745 584
746 RESET_BIT (wont_exit, max_unroll); 585 bitmap_clear_bit (wont_exit, max_unroll);
747 } 586 }
748 587
749 /* Now unroll the loop. */ 588 /* Now unroll the loop. */
750 589
751 opt_info_start_duplication (opt_info); 590 opt_info_start_duplication (opt_info);
763 { 602 {
764 apply_opt_in_copies (opt_info, max_unroll, true, true); 603 apply_opt_in_copies (opt_info, max_unroll, true, true);
765 free_opt_info (opt_info); 604 free_opt_info (opt_info);
766 } 605 }
767 606
768 free (wont_exit);
769
770 if (exit_at_end) 607 if (exit_at_end)
771 { 608 {
772 basic_block exit_block = get_bb_copy (desc->in_edge->src); 609 basic_block exit_block = get_bb_copy (desc->in_edge->src);
773 /* Find a new in and out edge; they are in the last copy we have made. */ 610 /* Find a new in and out edge; they are in the last copy we have made. */
774 611
783 desc->in_edge = EDGE_SUCC (exit_block, 0); 620 desc->in_edge = EDGE_SUCC (exit_block, 0);
784 } 621 }
785 } 622 }
786 623
787 desc->niter /= max_unroll + 1; 624 desc->niter /= max_unroll + 1;
788 desc->niter_max /= max_unroll + 1; 625 loop->nb_iterations_upper_bound
626 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
627 if (loop->any_estimate)
628 loop->nb_iterations_estimate
629 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
630 if (loop->any_likely_upper_bound)
631 loop->nb_iterations_likely_upper_bound
632 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
789 desc->niter_expr = GEN_INT (desc->niter); 633 desc->niter_expr = GEN_INT (desc->niter);
790 634
791 /* Remove the edges. */ 635 /* Remove the edges. */
792 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) 636 FOR_EACH_VEC_ELT (remove_edges, i, e)
793 remove_path (e); 637 remove_path (e);
794 VEC_free (edge, heap, remove_edges);
795 638
796 if (dump_file) 639 if (dump_file)
797 fprintf (dump_file, 640 fprintf (dump_file,
798 ";; Unrolled loop %d times, constant # of iterations %i insns\n", 641 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
799 max_unroll, num_loop_insns (loop)); 642 max_unroll, num_loop_insns (loop));
804 static void 647 static void
805 decide_unroll_runtime_iterations (struct loop *loop, int flags) 648 decide_unroll_runtime_iterations (struct loop *loop, int flags)
806 { 649 {
807 unsigned nunroll, nunroll_by_av, i; 650 unsigned nunroll, nunroll_by_av, i;
808 struct niter_desc *desc; 651 struct niter_desc *desc;
652 widest_int iterations;
809 653
810 if (!(flags & UAP_UNROLL)) 654 if (!(flags & UAP_UNROLL))
811 { 655 {
812 /* We were not asked to, just return back silently. */ 656 /* We were not asked to, just return back silently. */
813 return; 657 return;
856 if (dump_file) 700 if (dump_file)
857 fprintf (dump_file, ";; Loop iterates constant times\n"); 701 fprintf (dump_file, ";; Loop iterates constant times\n");
858 return; 702 return;
859 } 703 }
860 704
861 /* If we have profile feedback, check whether the loop rolls. */ 705 /* Check whether the loop rolls. */
862 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) 706 if ((get_estimated_loop_iterations (loop, &iterations)
707 || get_likely_max_loop_iterations (loop, &iterations))
708 && wi::ltu_p (iterations, 2 * nunroll))
863 { 709 {
864 if (dump_file) 710 if (dump_file)
865 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 711 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
866 return; 712 return;
867 } 713 }
871 for (i = 1; 2 * i <= nunroll; i *= 2) 717 for (i = 1; 2 * i <= nunroll; i *= 2)
872 continue; 718 continue;
873 719
874 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; 720 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
875 loop->lpt_decision.times = i - 1; 721 loop->lpt_decision.times = i - 1;
876
877 if (dump_file)
878 fprintf (dump_file,
879 ";; Decided to unroll the runtime computable "
880 "times rolling loop, %d times.\n",
881 loop->lpt_decision.times);
882 } 722 }
883 723
884 /* Splits edge E and inserts the sequence of instructions INSNS on it, and 724 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
885 returns the newly created block. If INSNS is NULL_RTX, nothing is changed 725 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
886 and NULL is returned instead. */ 726 and NULL is returned instead. */
887 727
888 basic_block 728 basic_block
889 split_edge_and_insert (edge e, rtx insns) 729 split_edge_and_insert (edge e, rtx_insn *insns)
890 { 730 {
891 basic_block bb; 731 basic_block bb;
892 732
893 if (!insns) 733 if (!insns)
894 return NULL; 734 return NULL;
898 /* ??? We used to assume that INSNS can contain control flow insns, and 738 /* ??? We used to assume that INSNS can contain control flow insns, and
899 that we had to try to find sub basic blocks in BB to maintain a valid 739 that we had to try to find sub basic blocks in BB to maintain a valid
900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB 740 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
901 and call break_superblocks when going out of cfglayout mode. But it 741 and call break_superblocks when going out of cfglayout mode. But it
902 turns out that this never happens; and that if it does ever happen, 742 turns out that this never happens; and that if it does ever happen,
903 the TODO_verify_flow at the end of the RTL loop passes would fail. 743 the verify_flow_info at the end of the RTL loop passes would fail.
904 744
905 There are two reasons why we expected we could have control flow insns 745 There are two reasons why we expected we could have control flow insns
906 in INSNS. The first is when a comparison has to be done in parts, and 746 in INSNS. The first is when a comparison has to be done in parts, and
907 the second is when the number of iterations is computed for loops with 747 the second is when the number of iterations is computed for loops with
908 the number of iterations known at runtime. In both cases, test cases 748 the number of iterations known at runtime. In both cases, test cases
926 fix it. */ 766 fix it. */
927 767
928 return bb; 768 return bb;
929 } 769 }
930 770
931 /* Unroll LOOP for that we are able to count number of iterations in runtime 771 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
932 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some 772 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
773 in order to create a jump. */
774
775 static rtx_insn *
776 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
777 rtx_code_label *label, profile_probability prob,
778 rtx_insn *cinsn)
779 {
780 rtx_insn *seq;
781 rtx_jump_insn *jump;
782 rtx cond;
783 machine_mode mode;
784
785 mode = GET_MODE (op0);
786 if (mode == VOIDmode)
787 mode = GET_MODE (op1);
788
789 start_sequence ();
790 if (GET_MODE_CLASS (mode) == MODE_CC)
791 {
792 /* A hack -- there seems to be no easy generic way how to make a
793 conditional jump from a ccmode comparison. */
794 gcc_assert (cinsn);
795 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
796 gcc_assert (GET_CODE (cond) == comp);
797 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
798 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
799 emit_jump_insn (copy_insn (PATTERN (cinsn)));
800 jump = as_a <rtx_jump_insn *> (get_last_insn ());
801 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
802 LABEL_NUSES (JUMP_LABEL (jump))++;
803 redirect_jump (jump, label, 0);
804 }
805 else
806 {
807 gcc_assert (!cinsn);
808
809 op0 = force_operand (op0, NULL_RTX);
810 op1 = force_operand (op1, NULL_RTX);
811 do_compare_rtx_and_jump (op0, op1, comp, 0,
812 mode, NULL_RTX, NULL, label,
813 profile_probability::uninitialized ());
814 jump = as_a <rtx_jump_insn *> (get_last_insn ());
815 jump->set_jump_target (label);
816 LABEL_NUSES (label)++;
817 }
818 if (prob.initialized_p ())
819 add_reg_br_prob_note (jump, prob);
820
821 seq = get_insns ();
822 end_sequence ();
823
824 return seq;
825 }
826
827 /* Unroll LOOP for which we are able to count number of iterations in runtime
828 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
933 extra care for case n < 0): 829 extra care for case n < 0):
934 830
935 for (i = 0; i < n; i++) 831 for (i = 0; i < n; i++)
936 body; 832 body;
937 833
938 ==> 834 ==> (LOOP->LPT_DECISION.TIMES == 3)
939 835
940 i = 0; 836 i = 0;
941 mod = n % 4; 837 mod = n % 4;
942 838
943 switch (mod) 839 switch (mod)
960 } 856 }
961 */ 857 */
962 static void 858 static void
963 unroll_loop_runtime_iterations (struct loop *loop) 859 unroll_loop_runtime_iterations (struct loop *loop)
964 { 860 {
965 rtx old_niter, niter, init_code, branch_code, tmp; 861 rtx old_niter, niter, tmp;
966 unsigned i, j, p; 862 rtx_insn *init_code, *branch_code;
967 basic_block preheader, *body, swtch, ezc_swtch; 863 unsigned i, j;
968 VEC (basic_block, heap) *dom_bbs; 864 profile_probability p;
969 sbitmap wont_exit; 865 basic_block preheader, *body, swtch, ezc_swtch = NULL;
970 int may_exit_copy; 866 int may_exit_copy, iter_freq, new_freq;
867 profile_count iter_count, new_count;
971 unsigned n_peel; 868 unsigned n_peel;
972 VEC (edge, heap) *remove_edges;
973 edge e; 869 edge e;
974 bool extra_zero_check, last_may_exit; 870 bool extra_zero_check, last_may_exit;
975 unsigned max_unroll = loop->lpt_decision.times; 871 unsigned max_unroll = loop->lpt_decision.times;
976 struct niter_desc *desc = get_simple_loop_desc (loop); 872 struct niter_desc *desc = get_simple_loop_desc (loop);
977 bool exit_at_end = loop_exit_at_end_p (loop); 873 bool exit_at_end = loop_exit_at_end_p (loop);
981 if (flag_split_ivs_in_unroller 877 if (flag_split_ivs_in_unroller
982 || flag_variable_expansion_in_unroller) 878 || flag_variable_expansion_in_unroller)
983 opt_info = analyze_insns_in_loop (loop); 879 opt_info = analyze_insns_in_loop (loop);
984 880
985 /* Remember blocks whose dominators will have to be updated. */ 881 /* Remember blocks whose dominators will have to be updated. */
986 dom_bbs = NULL; 882 auto_vec<basic_block> dom_bbs;
987 883
988 body = get_loop_body (loop); 884 body = get_loop_body (loop);
989 for (i = 0; i < loop->num_nodes; i++) 885 for (i = 0; i < loop->num_nodes; i++)
990 { 886 {
991 VEC (basic_block, heap) *ldom; 887 vec<basic_block> ldom;
992 basic_block bb; 888 basic_block bb;
993 889
994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); 890 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
995 FOR_EACH_VEC_ELT (basic_block, ldom, j, bb) 891 FOR_EACH_VEC_ELT (ldom, j, bb)
996 if (!flow_bb_inside_loop_p (loop, bb)) 892 if (!flow_bb_inside_loop_p (loop, bb))
997 VEC_safe_push (basic_block, heap, dom_bbs, bb); 893 dom_bbs.safe_push (bb);
998 894
999 VEC_free (basic_block, heap, ldom); 895 ldom.release ();
1000 } 896 }
1001 free (body); 897 free (body);
1002 898
1003 if (!exit_at_end) 899 if (!exit_at_end)
1004 { 900 {
1024 old_niter = niter = gen_reg_rtx (desc->mode); 920 old_niter = niter = gen_reg_rtx (desc->mode);
1025 tmp = force_operand (copy_rtx (desc->niter_expr), niter); 921 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1026 if (tmp != niter) 922 if (tmp != niter)
1027 emit_move_insn (niter, tmp); 923 emit_move_insn (niter, tmp);
1028 924
925 /* For loops that exit at end and whose number of iterations is reliable,
926 add one to niter to account for first pass through loop body before
927 reaching exit test. */
928 if (exit_at_end && !desc->noloop_assumptions)
929 {
930 niter = expand_simple_binop (desc->mode, PLUS,
931 niter, const1_rtx,
932 NULL_RTX, 0, OPTAB_LIB_WIDEN);
933 old_niter = niter;
934 }
935
1029 /* Count modulo by ANDing it with max_unroll; we use the fact that 936 /* Count modulo by ANDing it with max_unroll; we use the fact that
1030 the number of unrollings is a power of two, and thus this is correct 937 the number of unrollings is a power of two, and thus this is correct
1031 even if there is overflow in the computation. */ 938 even if there is overflow in the computation. */
1032 niter = expand_simple_binop (desc->mode, AND, 939 niter = expand_simple_binop (desc->mode, AND,
1033 niter, 940 niter, gen_int_mode (max_unroll, desc->mode),
1034 GEN_INT (max_unroll),
1035 NULL_RTX, 0, OPTAB_LIB_WIDEN); 941 NULL_RTX, 0, OPTAB_LIB_WIDEN);
1036 942
1037 init_code = get_insns (); 943 init_code = get_insns ();
1038 end_sequence (); 944 end_sequence ();
1039 unshare_all_rtl_in_chain (init_code); 945 unshare_all_rtl_in_chain (init_code);
1040 946
1041 /* Precondition the loop. */ 947 /* Precondition the loop. */
1042 split_edge_and_insert (loop_preheader_edge (loop), init_code); 948 split_edge_and_insert (loop_preheader_edge (loop), init_code);
1043 949
1044 remove_edges = NULL; 950 auto_vec<edge> remove_edges;
1045 951
1046 wont_exit = sbitmap_alloc (max_unroll + 2); 952 auto_sbitmap wont_exit (max_unroll + 2);
1047 953
1048 /* Peel the first copy of loop body (almost always we must leave exit test 954 if (extra_zero_check || desc->noloop_assumptions)
1049 here; the only exception is when we have extra zero check and the number 955 {
1050 of iterations is reliable. Also record the place of (possible) extra 956 /* Peel the first copy of loop body. Leave the exit test if the number
1051 zero check. */ 957 of iterations is not reliable. Also record the place of the extra zero
1052 sbitmap_zero (wont_exit); 958 check. */
1053 if (extra_zero_check 959 bitmap_clear (wont_exit);
1054 && !desc->noloop_assumptions) 960 if (!desc->noloop_assumptions)
1055 SET_BIT (wont_exit, 1); 961 bitmap_set_bit (wont_exit, 1);
1056 ezc_swtch = loop_preheader_edge (loop)->src; 962 ezc_swtch = loop_preheader_edge (loop)->src;
1057 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1058 1, wont_exit, desc->out_edge,
1059 &remove_edges,
1060 DLTHE_FLAG_UPDATE_FREQ);
1061 gcc_assert (ok);
1062
1063 /* Record the place where switch will be built for preconditioning. */
1064 swtch = split_edge (loop_preheader_edge (loop));
1065
1066 for (i = 0; i < n_peel; i++)
1067 {
1068 /* Peel the copy. */
1069 sbitmap_zero (wont_exit);
1070 if (i != n_peel - 1 || !last_may_exit)
1071 SET_BIT (wont_exit, 1);
1072 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 963 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1073 1, wont_exit, desc->out_edge, 964 1, wont_exit, desc->out_edge,
1074 &remove_edges, 965 &remove_edges,
1075 DLTHE_FLAG_UPDATE_FREQ); 966 DLTHE_FLAG_UPDATE_FREQ);
1076 gcc_assert (ok); 967 gcc_assert (ok);
968 }
969
970 /* Record the place where switch will be built for preconditioning. */
971 swtch = split_edge (loop_preheader_edge (loop));
972
973 /* Compute frequency/count increments for each switch block and initialize
974 innermost switch block. Switch blocks and peeled loop copies are built
975 from innermost outward. */
976 iter_freq = new_freq = swtch->frequency / (max_unroll + 1);
977 iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
978 swtch->frequency = new_freq;
979 swtch->count = new_count;
980
981 for (i = 0; i < n_peel; i++)
982 {
983 /* Peel the copy. */
984 bitmap_clear (wont_exit);
985 if (i != n_peel - 1 || !last_may_exit)
986 bitmap_set_bit (wont_exit, 1);
987 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
988 1, wont_exit, desc->out_edge,
989 &remove_edges,
990 DLTHE_FLAG_UPDATE_FREQ);
991 gcc_assert (ok);
1077 992
1078 /* Create item for switch. */ 993 /* Create item for switch. */
1079 j = n_peel - i - (extra_zero_check ? 0 : 1); 994 j = n_peel - i - (extra_zero_check ? 0 : 1);
1080 p = REG_BR_PROB_BASE / (i + 2); 995 p = profile_probability::always ().apply_scale (1, i + 2);
1081 996
1082 preheader = split_edge (loop_preheader_edge (loop)); 997 preheader = split_edge (loop_preheader_edge (loop));
998 /* Add in frequency/count of edge from switch block. */
999 preheader->frequency += iter_freq;
1000 preheader->count += iter_count;
1083 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, 1001 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1084 block_label (preheader), p, 1002 block_label (preheader), p,
1085 NULL_RTX); 1003 NULL);
1086 1004
1087 /* We rely on the fact that the compare and jump cannot be optimized out, 1005 /* We rely on the fact that the compare and jump cannot be optimized out,
1088 and hence the cfg we create is correct. */ 1006 and hence the cfg we create is correct. */
1089 gcc_assert (branch_code != NULL_RTX); 1007 gcc_assert (branch_code != NULL_RTX);
1090 1008
1091 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); 1009 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1092 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1010 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1093 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; 1011 single_succ_edge (swtch)->probability = p.invert ();
1012 new_freq += iter_freq;
1013 new_count += iter_count;
1014 swtch->frequency = new_freq;
1015 swtch->count = new_count;
1094 e = make_edge (swtch, preheader, 1016 e = make_edge (swtch, preheader,
1095 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1017 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1096 e->probability = p; 1018 e->probability = p;
1097 } 1019 }
1098 1020
1099 if (extra_zero_check) 1021 if (extra_zero_check)
1100 { 1022 {
1101 /* Add branch for zero iterations. */ 1023 /* Add branch for zero iterations. */
1102 p = REG_BR_PROB_BASE / (max_unroll + 1); 1024 p = profile_probability::always ().apply_scale (1, max_unroll + 1);
1103 swtch = ezc_swtch; 1025 swtch = ezc_swtch;
1104 preheader = split_edge (loop_preheader_edge (loop)); 1026 preheader = split_edge (loop_preheader_edge (loop));
1027 /* Recompute frequency/count adjustments since initial peel copy may
1028 have exited and reduced those values that were computed above. */
1029 iter_freq = swtch->frequency / (max_unroll + 1);
1030 iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1031 /* Add in frequency/count of edge from switch block. */
1032 preheader->frequency += iter_freq;
1033 preheader->count += iter_count;
1105 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, 1034 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1106 block_label (preheader), p, 1035 block_label (preheader), p,
1107 NULL_RTX); 1036 NULL);
1108 gcc_assert (branch_code != NULL_RTX); 1037 gcc_assert (branch_code != NULL_RTX);
1109 1038
1110 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); 1039 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1111 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1040 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1112 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; 1041 single_succ_edge (swtch)->probability = p.invert ();
1113 e = make_edge (swtch, preheader, 1042 e = make_edge (swtch, preheader,
1114 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1043 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1115 e->probability = p; 1044 e->probability = p;
1116 } 1045 }
1117 1046
1118 /* Recount dominators for outer blocks. */ 1047 /* Recount dominators for outer blocks. */
1119 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); 1048 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1120 1049
1121 /* And unroll loop. */ 1050 /* And unroll loop. */
1122 1051
1123 sbitmap_ones (wont_exit); 1052 bitmap_ones (wont_exit);
1124 RESET_BIT (wont_exit, may_exit_copy); 1053 bitmap_clear_bit (wont_exit, may_exit_copy);
1125 opt_info_start_duplication (opt_info); 1054 opt_info_start_duplication (opt_info);
1126 1055
1127 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1056 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1128 max_unroll, 1057 max_unroll,
1129 wont_exit, desc->out_edge, 1058 wont_exit, desc->out_edge,
1137 if (opt_info) 1066 if (opt_info)
1138 { 1067 {
1139 apply_opt_in_copies (opt_info, max_unroll, true, true); 1068 apply_opt_in_copies (opt_info, max_unroll, true, true);
1140 free_opt_info (opt_info); 1069 free_opt_info (opt_info);
1141 } 1070 }
1142
1143 free (wont_exit);
1144 1071
1145 if (exit_at_end) 1072 if (exit_at_end)
1146 { 1073 {
1147 basic_block exit_block = get_bb_copy (desc->in_edge->src); 1074 basic_block exit_block = get_bb_copy (desc->in_edge->src);
1148 /* Find a new in and out edge; they are in the last copy we have 1075 /* Find a new in and out edge; they are in the last copy we have
1159 desc->in_edge = EDGE_SUCC (exit_block, 0); 1086 desc->in_edge = EDGE_SUCC (exit_block, 0);
1160 } 1087 }
1161 } 1088 }
1162 1089
1163 /* Remove the edges. */ 1090 /* Remove the edges. */
1164 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) 1091 FOR_EACH_VEC_ELT (remove_edges, i, e)
1165 remove_path (e); 1092 remove_path (e);
1166 VEC_free (edge, heap, remove_edges);
1167 1093
1168 /* We must be careful when updating the number of iterations due to 1094 /* We must be careful when updating the number of iterations due to
1169 preconditioning and the fact that the value must be valid at entry 1095 preconditioning and the fact that the value must be valid at entry
1170 of the loop. After passing through the above code, we see that 1096 of the loop. After passing through the above code, we see that
1171 the correct new number of iterations is this: */ 1097 the correct new number of iterations is this: */
1172 gcc_assert (!desc->const_iter); 1098 gcc_assert (!desc->const_iter);
1173 desc->niter_expr = 1099 desc->niter_expr =
1174 simplify_gen_binary (UDIV, desc->mode, old_niter, 1100 simplify_gen_binary (UDIV, desc->mode, old_niter,
1175 GEN_INT (max_unroll + 1)); 1101 gen_int_mode (max_unroll + 1, desc->mode));
1176 desc->niter_max /= max_unroll + 1; 1102 loop->nb_iterations_upper_bound
1103 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1104 if (loop->any_estimate)
1105 loop->nb_iterations_estimate
1106 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1107 if (loop->any_likely_upper_bound)
1108 loop->nb_iterations_likely_upper_bound
1109 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1177 if (exit_at_end) 1110 if (exit_at_end)
1178 { 1111 {
1179 desc->niter_expr = 1112 desc->niter_expr =
1180 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); 1113 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1181 desc->noloop_assumptions = NULL_RTX; 1114 desc->noloop_assumptions = NULL_RTX;
1182 desc->niter_max--; 1115 --loop->nb_iterations_upper_bound;
1116 if (loop->any_estimate
1117 && loop->nb_iterations_estimate != 0)
1118 --loop->nb_iterations_estimate;
1119 else
1120 loop->any_estimate = false;
1121 if (loop->any_likely_upper_bound
1122 && loop->nb_iterations_likely_upper_bound != 0)
1123 --loop->nb_iterations_likely_upper_bound;
1124 else
1125 loop->any_likely_upper_bound = false;
1183 } 1126 }
1184 1127
1185 if (dump_file) 1128 if (dump_file)
1186 fprintf (dump_file, 1129 fprintf (dump_file,
1187 ";; Unrolled loop %d times, counting # of iterations " 1130 ";; Unrolled loop %d times, counting # of iterations "
1188 "in runtime, %i insns\n", 1131 "in runtime, %i insns\n",
1189 max_unroll, num_loop_insns (loop)); 1132 max_unroll, num_loop_insns (loop));
1190
1191 VEC_free (basic_block, heap, dom_bbs);
1192 }
1193
1194 /* Decide whether to simply peel LOOP and how much. */
1195 static void
1196 decide_peel_simple (struct loop *loop, int flags)
1197 {
1198 unsigned npeel;
1199 struct niter_desc *desc;
1200
1201 if (!(flags & UAP_PEEL))
1202 {
1203 /* We were not asked to, just return back silently. */
1204 return;
1205 }
1206
1207 if (dump_file)
1208 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1209
1210 /* npeel = number of iterations to peel. */
1211 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1212 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1213 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1214
1215 /* Skip big loops. */
1216 if (!npeel)
1217 {
1218 if (dump_file)
1219 fprintf (dump_file, ";; Not considering loop, is too big\n");
1220 return;
1221 }
1222
1223 /* Check for simple loops. */
1224 desc = get_simple_loop_desc (loop);
1225
1226 /* Check number of iterations. */
1227 if (desc->simple_p && !desc->assumptions && desc->const_iter)
1228 {
1229 if (dump_file)
1230 fprintf (dump_file, ";; Loop iterates constant times\n");
1231 return;
1232 }
1233
1234 /* Do not simply peel loops with branches inside -- it increases number
1235 of mispredicts. */
1236 if (num_loop_branches (loop) > 1)
1237 {
1238 if (dump_file)
1239 fprintf (dump_file, ";; Not peeling, contains branches\n");
1240 return;
1241 }
1242
1243 if (loop->header->count)
1244 {
1245 unsigned niter = expected_loop_iterations (loop);
1246 if (niter + 1 > npeel)
1247 {
1248 if (dump_file)
1249 {
1250 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1251 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1252 (HOST_WIDEST_INT) (niter + 1));
1253 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1254 npeel);
1255 }
1256 return;
1257 }
1258 npeel = niter + 1;
1259 }
1260 else
1261 {
1262 /* For now we have no good heuristics to decide whether loop peeling
1263 will be effective, so disable it. */
1264 if (dump_file)
1265 fprintf (dump_file,
1266 ";; Not peeling loop, no evidence it will be profitable\n");
1267 return;
1268 }
1269
1270 /* Success. */
1271 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1272 loop->lpt_decision.times = npeel;
1273
1274 if (dump_file)
1275 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1276 loop->lpt_decision.times);
1277 }
1278
1279 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1280 while (cond)
1281 body;
1282
1283 ==>
1284
1285 if (!cond) goto end;
1286 body;
1287 if (!cond) goto end;
1288 body;
1289 while (cond)
1290 body;
1291 end: ;
1292 */
1293 static void
1294 peel_loop_simple (struct loop *loop)
1295 {
1296 sbitmap wont_exit;
1297 unsigned npeel = loop->lpt_decision.times;
1298 struct niter_desc *desc = get_simple_loop_desc (loop);
1299 struct opt_info *opt_info = NULL;
1300 bool ok;
1301
1302 if (flag_split_ivs_in_unroller && npeel > 1)
1303 opt_info = analyze_insns_in_loop (loop);
1304
1305 wont_exit = sbitmap_alloc (npeel + 1);
1306 sbitmap_zero (wont_exit);
1307
1308 opt_info_start_duplication (opt_info);
1309
1310 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1311 npeel, wont_exit, NULL,
1312 NULL, DLTHE_FLAG_UPDATE_FREQ
1313 | (opt_info
1314 ? DLTHE_RECORD_COPY_NUMBER
1315 : 0));
1316 gcc_assert (ok);
1317
1318 free (wont_exit);
1319
1320 if (opt_info)
1321 {
1322 apply_opt_in_copies (opt_info, npeel, false, false);
1323 free_opt_info (opt_info);
1324 }
1325
1326 if (desc->simple_p)
1327 {
1328 if (desc->const_iter)
1329 {
1330 desc->niter -= npeel;
1331 desc->niter_expr = GEN_INT (desc->niter);
1332 desc->noloop_assumptions = NULL_RTX;
1333 }
1334 else
1335 {
1336 /* We cannot just update niter_expr, as its value might be clobbered
1337 inside loop. We could handle this by counting the number into
1338 temporary just like we do in runtime unrolling, but it does not
1339 seem worthwhile. */
1340 free_simple_loop_desc (loop);
1341 }
1342 }
1343 if (dump_file)
1344 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1345 } 1133 }
1346 1134
1347 /* Decide whether to unroll LOOP stupidly and how much. */ 1135 /* Decide whether to unroll LOOP stupidly and how much. */
1348 static void 1136 static void
1349 decide_unroll_stupid (struct loop *loop, int flags) 1137 decide_unroll_stupid (struct loop *loop, int flags)
1350 { 1138 {
1351 unsigned nunroll, nunroll_by_av, i; 1139 unsigned nunroll, nunroll_by_av, i;
1352 struct niter_desc *desc; 1140 struct niter_desc *desc;
1141 widest_int iterations;
1353 1142
1354 if (!(flags & UAP_UNROLL_ALL)) 1143 if (!(flags & UAP_UNROLL_ALL))
1355 { 1144 {
1356 /* We were not asked to, just return back silently. */ 1145 /* We were not asked to, just return back silently. */
1357 return; 1146 return;
1391 fprintf (dump_file, ";; The loop is simple\n"); 1180 fprintf (dump_file, ";; The loop is simple\n");
1392 return; 1181 return;
1393 } 1182 }
1394 1183
1395 /* Do not unroll loops with branches inside -- it increases number 1184 /* Do not unroll loops with branches inside -- it increases number
1396 of mispredicts. */ 1185 of mispredicts.
1186 TODO: this heuristic needs tunning; call inside the loop body
1187 is also relatively good reason to not unroll. */
1397 if (num_loop_branches (loop) > 1) 1188 if (num_loop_branches (loop) > 1)
1398 { 1189 {
1399 if (dump_file) 1190 if (dump_file)
1400 fprintf (dump_file, ";; Not unrolling, contains branches\n"); 1191 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1401 return; 1192 return;
1402 } 1193 }
1403 1194
1404 /* If we have profile feedback, check whether the loop rolls. */ 1195 /* Check whether the loop rolls. */
1405 if (loop->header->count 1196 if ((get_estimated_loop_iterations (loop, &iterations)
1406 && expected_loop_iterations (loop) < 2 * nunroll) 1197 || get_likely_max_loop_iterations (loop, &iterations))
1198 && wi::ltu_p (iterations, 2 * nunroll))
1407 { 1199 {
1408 if (dump_file) 1200 if (dump_file)
1409 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 1201 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1410 return; 1202 return;
1411 } 1203 }
1416 for (i = 1; 2 * i <= nunroll; i *= 2) 1208 for (i = 1; 2 * i <= nunroll; i *= 2)
1417 continue; 1209 continue;
1418 1210
1419 loop->lpt_decision.decision = LPT_UNROLL_STUPID; 1211 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1420 loop->lpt_decision.times = i - 1; 1212 loop->lpt_decision.times = i - 1;
1421 1213 }
1422 if (dump_file) 1214
1423 fprintf (dump_file, 1215 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1424 ";; Decided to unroll the loop stupidly, %d times.\n", 1216
1425 loop->lpt_decision.times);
1426 }
1427
1428 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1429 while (cond) 1217 while (cond)
1430 body; 1218 body;
1431 1219
1432 ==> 1220 ==> (LOOP->LPT_DECISION.TIMES == 3)
1433 1221
1434 while (cond) 1222 while (cond)
1435 { 1223 {
1436 body; 1224 body;
1437 if (!cond) break; 1225 if (!cond) break;
1443 } 1231 }
1444 */ 1232 */
1445 static void 1233 static void
1446 unroll_loop_stupid (struct loop *loop) 1234 unroll_loop_stupid (struct loop *loop)
1447 { 1235 {
1448 sbitmap wont_exit;
1449 unsigned nunroll = loop->lpt_decision.times; 1236 unsigned nunroll = loop->lpt_decision.times;
1450 struct niter_desc *desc = get_simple_loop_desc (loop); 1237 struct niter_desc *desc = get_simple_loop_desc (loop);
1451 struct opt_info *opt_info = NULL; 1238 struct opt_info *opt_info = NULL;
1452 bool ok; 1239 bool ok;
1453 1240
1454 if (flag_split_ivs_in_unroller 1241 if (flag_split_ivs_in_unroller
1455 || flag_variable_expansion_in_unroller) 1242 || flag_variable_expansion_in_unroller)
1456 opt_info = analyze_insns_in_loop (loop); 1243 opt_info = analyze_insns_in_loop (loop);
1457 1244
1458 1245 auto_sbitmap wont_exit (nunroll + 1);
1459 wont_exit = sbitmap_alloc (nunroll + 1); 1246 bitmap_clear (wont_exit);
1460 sbitmap_zero (wont_exit);
1461 opt_info_start_duplication (opt_info); 1247 opt_info_start_duplication (opt_info);
1462 1248
1463 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1249 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1464 nunroll, wont_exit, 1250 nunroll, wont_exit,
1465 NULL, NULL, 1251 NULL, NULL,
1473 { 1259 {
1474 apply_opt_in_copies (opt_info, nunroll, true, true); 1260 apply_opt_in_copies (opt_info, nunroll, true, true);
1475 free_opt_info (opt_info); 1261 free_opt_info (opt_info);
1476 } 1262 }
1477 1263
1478 free (wont_exit);
1479
1480 if (desc->simple_p) 1264 if (desc->simple_p)
1481 { 1265 {
1482 /* We indeed may get here provided that there are nontrivial assumptions 1266 /* We indeed may get here provided that there are nontrivial assumptions
1483 for a loop to be really simple. We could update the counts, but the 1267 for a loop to be really simple. We could update the counts, but the
1484 problem is that we are unable to decide which exit will be taken 1268 problem is that we are unable to decide which exit will be taken
1485 (not really true in case the number of iterations is constant, 1269 (not really true in case the number of iterations is constant,
1486 but noone will do anything with this information, so we do not 1270 but no one will do anything with this information, so we do not
1487 worry about it). */ 1271 worry about it). */
1488 desc->simple_p = false; 1272 desc->simple_p = false;
1489 } 1273 }
1490 1274
1491 if (dump_file) 1275 if (dump_file)
1492 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", 1276 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1493 nunroll, num_loop_insns (loop)); 1277 nunroll, num_loop_insns (loop));
1494 } 1278 }
1495 1279
1496 /* A hash function for information about insns to split. */
1497
1498 static hashval_t
1499 si_info_hash (const void *ivts)
1500 {
1501 return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
1502 }
1503
1504 /* An equality functions for information about insns to split. */
1505
1506 static int
1507 si_info_eq (const void *ivts1, const void *ivts2)
1508 {
1509 const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
1510 const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
1511
1512 return i1->insn == i2->insn;
1513 }
1514
1515 /* Return a hash for VES, which is really a "var_to_expand *". */
1516
1517 static hashval_t
1518 ve_info_hash (const void *ves)
1519 {
1520 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1521 }
1522
1523 /* Return true if IVTS1 and IVTS2 (which are really both of type
1524 "var_to_expand *") refer to the same instruction. */
1525
1526 static int
1527 ve_info_eq (const void *ivts1, const void *ivts2)
1528 {
1529 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1530 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1531
1532 return i1->insn == i2->insn;
1533 }
1534
1535 /* Returns true if REG is referenced in one nondebug insn in LOOP. 1280 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1536 Set *DEBUG_USES to the number of debug insns that reference the 1281 Set *DEBUG_USES to the number of debug insns that reference the
1537 variable. */ 1282 variable. */
1538 1283
1539 bool 1284 static bool
1540 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg, 1285 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1541 int *debug_uses) 1286 int *debug_uses)
1542 { 1287 {
1543 basic_block *body, bb; 1288 basic_block *body, bb;
1544 unsigned i; 1289 unsigned i;
1545 int count_ref = 0; 1290 int count_ref = 0;
1546 rtx insn; 1291 rtx_insn *insn;
1547 1292
1548 body = get_loop_body (loop); 1293 body = get_loop_body (loop);
1549 for (i = 0; i < loop->num_nodes; i++) 1294 for (i = 0; i < loop->num_nodes; i++)
1550 { 1295 {
1551 bb = body[i]; 1296 bb = body[i];
1567 static void 1312 static void
1568 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses) 1313 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1569 { 1314 {
1570 basic_block *body, bb; 1315 basic_block *body, bb;
1571 unsigned i; 1316 unsigned i;
1572 rtx insn; 1317 rtx_insn *insn;
1573 1318
1574 body = get_loop_body (loop); 1319 body = get_loop_body (loop);
1575 for (i = 0; debug_uses && i < loop->num_nodes; i++) 1320 for (i = 0; debug_uses && i < loop->num_nodes; i++)
1576 { 1321 {
1577 bb = body[i]; 1322 bb = body[i];
1612 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant 1357 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1613 information and return a pointer to it. 1358 information and return a pointer to it.
1614 */ 1359 */
1615 1360
1616 static struct var_to_expand * 1361 static struct var_to_expand *
1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn) 1362 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1618 { 1363 {
1619 rtx set, dest, src; 1364 rtx set, dest, src;
1620 struct var_to_expand *ves; 1365 struct var_to_expand *ves;
1621 unsigned accum_pos; 1366 unsigned accum_pos;
1622 enum rtx_code code; 1367 enum rtx_code code;
1704 } 1449 }
1705 1450
1706 if (debug_uses) 1451 if (debug_uses)
1707 /* Instead of resetting the debug insns, we could replace each 1452 /* Instead of resetting the debug insns, we could replace each
1708 debug use in the loop with the sum or product of all expanded 1453 debug use in the loop with the sum or product of all expanded
1709 accummulators. Since we'll only know of all expansions at the 1454 accumulators. Since we'll only know of all expansions at the
1710 end, we'd have to keep track of which vars_to_expand a debug 1455 end, we'd have to keep track of which vars_to_expand a debug
1711 insn in the loop references, take note of each copy of the 1456 insn in the loop references, take note of each copy of the
1712 debug insn during unrolling, and when it's all done, compute 1457 debug insn during unrolling, and when it's all done, compute
1713 the sum or product of each variable and adjust the original 1458 the sum or product of each variable and adjust the original
1714 debug insn and each copy thereof. What a pain! */ 1459 debug insn and each copy thereof. What a pain! */
1716 1461
1717 /* Record the accumulator to expand. */ 1462 /* Record the accumulator to expand. */
1718 ves = XNEW (struct var_to_expand); 1463 ves = XNEW (struct var_to_expand);
1719 ves->insn = insn; 1464 ves->insn = insn;
1720 ves->reg = copy_rtx (dest); 1465 ves->reg = copy_rtx (dest);
1721 ves->var_expansions = VEC_alloc (rtx, heap, 1); 1466 ves->var_expansions.create (1);
1722 ves->next = NULL; 1467 ves->next = NULL;
1723 ves->op = GET_CODE (src); 1468 ves->op = GET_CODE (src);
1724 ves->expansion_count = 0; 1469 ves->expansion_count = 0;
1725 ves->reuse_expansion = 0; 1470 ves->reuse_expansion = 0;
1726 ves->accum_pos = accum_pos;
1727 return ves; 1471 return ves;
1728 } 1472 }
1729 1473
1730 /* Determine whether there is an induction variable in INSN that 1474 /* Determine whether there is an induction variable in INSN that
1731 we would like to split during unrolling. 1475 we would like to split during unrolling.
1751 Return NULL if INSN contains no interesting IVs. Otherwise, allocate 1495 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1752 an IV_TO_SPLIT structure, fill it with the relevant information and return a 1496 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1753 pointer to it. */ 1497 pointer to it. */
1754 1498
1755 static struct iv_to_split * 1499 static struct iv_to_split *
1756 analyze_iv_to_split_insn (rtx insn) 1500 analyze_iv_to_split_insn (rtx_insn *insn)
1757 { 1501 {
1758 rtx set, dest; 1502 rtx set, dest;
1759 struct rtx_iv iv; 1503 struct rtx_iv iv;
1760 struct iv_to_split *ivts; 1504 struct iv_to_split *ivts;
1505 scalar_int_mode mode;
1761 bool ok; 1506 bool ok;
1762 1507
1763 /* For now we just split the basic induction variables. Later this may be 1508 /* For now we just split the basic induction variables. Later this may be
1764 extended for example by selecting also addresses of memory references. */ 1509 extended for example by selecting also addresses of memory references. */
1765 set = single_set (insn); 1510 set = single_set (insn);
1766 if (!set) 1511 if (!set)
1767 return NULL; 1512 return NULL;
1768 1513
1769 dest = SET_DEST (set); 1514 dest = SET_DEST (set);
1770 if (!REG_P (dest)) 1515 if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
1771 return NULL; 1516 return NULL;
1772 1517
1773 if (!biv_p (insn, dest)) 1518 if (!biv_p (insn, mode, dest))
1774 return NULL; 1519 return NULL;
1775 1520
1776 ok = iv_analyze_result (insn, dest, &iv); 1521 ok = iv_analyze_result (insn, dest, &iv);
1777 1522
1778 /* This used to be an assert under the assumption that if biv_p returns 1523 /* This used to be an assert under the assumption that if biv_p returns
1791 return NULL; 1536 return NULL;
1792 1537
1793 /* Record the insn to split. */ 1538 /* Record the insn to split. */
1794 ivts = XNEW (struct iv_to_split); 1539 ivts = XNEW (struct iv_to_split);
1795 ivts->insn = insn; 1540 ivts->insn = insn;
1541 ivts->orig_var = dest;
1796 ivts->base_var = NULL_RTX; 1542 ivts->base_var = NULL_RTX;
1797 ivts->step = iv.step; 1543 ivts->step = iv.step;
1798 ivts->next = NULL; 1544 ivts->next = NULL;
1799 ivts->n_loc = 1;
1800 ivts->loc[0] = 1;
1801 1545
1802 return ivts; 1546 return ivts;
1803 } 1547 }
1804 1548
1805 /* Determines which of insns in LOOP can be optimized. 1549 /* Determines which of insns in LOOP can be optimized.
1811 analyze_insns_in_loop (struct loop *loop) 1555 analyze_insns_in_loop (struct loop *loop)
1812 { 1556 {
1813 basic_block *body, bb; 1557 basic_block *body, bb;
1814 unsigned i; 1558 unsigned i;
1815 struct opt_info *opt_info = XCNEW (struct opt_info); 1559 struct opt_info *opt_info = XCNEW (struct opt_info);
1816 rtx insn; 1560 rtx_insn *insn;
1817 struct iv_to_split *ivts = NULL; 1561 struct iv_to_split *ivts = NULL;
1818 struct var_to_expand *ves = NULL; 1562 struct var_to_expand *ves = NULL;
1819 PTR *slot1; 1563 iv_to_split **slot1;
1820 PTR *slot2; 1564 var_to_expand **slot2;
1821 VEC (edge, heap) *edges = get_loop_exit_edges (loop); 1565 vec<edge> edges = get_loop_exit_edges (loop);
1822 edge exit; 1566 edge exit;
1823 bool can_apply = false; 1567 bool can_apply = false;
1824 1568
1825 iv_analysis_loop_init (loop); 1569 iv_analysis_loop_init (loop);
1826 1570
1827 body = get_loop_body (loop); 1571 body = get_loop_body (loop);
1828 1572
1829 if (flag_split_ivs_in_unroller) 1573 if (flag_split_ivs_in_unroller)
1830 { 1574 {
1831 opt_info->insns_to_split = htab_create (5 * loop->num_nodes, 1575 opt_info->insns_to_split
1832 si_info_hash, si_info_eq, free); 1576 = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1833 opt_info->iv_to_split_head = NULL; 1577 opt_info->iv_to_split_head = NULL;
1834 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; 1578 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1835 } 1579 }
1836 1580
1837 /* Record the loop exit bb and loop preheader before the unrolling. */ 1581 /* Record the loop exit bb and loop preheader before the unrolling. */
1838 opt_info->loop_preheader = loop_preheader_edge (loop)->src; 1582 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1839 1583
1840 if (VEC_length (edge, edges) == 1) 1584 if (edges.length () == 1)
1841 { 1585 {
1842 exit = VEC_index (edge, edges, 0); 1586 exit = edges[0];
1843 if (!(exit->flags & EDGE_COMPLEX)) 1587 if (!(exit->flags & EDGE_COMPLEX))
1844 { 1588 {
1845 opt_info->loop_exit = split_edge (exit); 1589 opt_info->loop_exit = split_edge (exit);
1846 can_apply = true; 1590 can_apply = true;
1847 } 1591 }
1848 } 1592 }
1849 1593
1850 if (flag_variable_expansion_in_unroller 1594 if (flag_variable_expansion_in_unroller
1851 && can_apply) 1595 && can_apply)
1852 { 1596 {
1853 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes, 1597 opt_info->insns_with_var_to_expand
1854 ve_info_hash, 1598 = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1855 ve_info_eq, free);
1856 opt_info->var_to_expand_head = NULL; 1599 opt_info->var_to_expand_head = NULL;
1857 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; 1600 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1858 } 1601 }
1859 1602
1860 for (i = 0; i < loop->num_nodes; i++) 1603 for (i = 0; i < loop->num_nodes; i++)
1871 if (opt_info->insns_to_split) 1614 if (opt_info->insns_to_split)
1872 ivts = analyze_iv_to_split_insn (insn); 1615 ivts = analyze_iv_to_split_insn (insn);
1873 1616
1874 if (ivts) 1617 if (ivts)
1875 { 1618 {
1876 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); 1619 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1877 gcc_assert (*slot1 == NULL); 1620 gcc_assert (*slot1 == NULL);
1878 *slot1 = ivts; 1621 *slot1 = ivts;
1879 *opt_info->iv_to_split_tail = ivts; 1622 *opt_info->iv_to_split_tail = ivts;
1880 opt_info->iv_to_split_tail = &ivts->next; 1623 opt_info->iv_to_split_tail = &ivts->next;
1881 continue; 1624 continue;
1884 if (opt_info->insns_with_var_to_expand) 1627 if (opt_info->insns_with_var_to_expand)
1885 ves = analyze_insn_to_expand_var (loop, insn); 1628 ves = analyze_insn_to_expand_var (loop, insn);
1886 1629
1887 if (ves) 1630 if (ves)
1888 { 1631 {
1889 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT); 1632 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1890 gcc_assert (*slot2 == NULL); 1633 gcc_assert (*slot2 == NULL);
1891 *slot2 = ves; 1634 *slot2 = ves;
1892 *opt_info->var_to_expand_tail = ves; 1635 *opt_info->var_to_expand_tail = ves;
1893 opt_info->var_to_expand_tail = &ves->next; 1636 opt_info->var_to_expand_tail = &ves->next;
1894 } 1637 }
1895 } 1638 }
1896 } 1639 }
1897 1640
1898 VEC_free (edge, heap, edges); 1641 edges.release ();
1899 free (body); 1642 free (body);
1900 return opt_info; 1643 return opt_info;
1901 } 1644 }
1902 1645
1903 /* Called just before loop duplication. Records start of duplicated area 1646 /* Called just before loop duplication. Records start of duplicated area
1905 1648
1906 static void 1649 static void
1907 opt_info_start_duplication (struct opt_info *opt_info) 1650 opt_info_start_duplication (struct opt_info *opt_info)
1908 { 1651 {
1909 if (opt_info) 1652 if (opt_info)
1910 opt_info->first_new_block = last_basic_block; 1653 opt_info->first_new_block = last_basic_block_for_fn (cfun);
1911 } 1654 }
1912 1655
1913 /* Determine the number of iterations between initialization of the base 1656 /* Determine the number of iterations between initialization of the base
1914 variable and the current copy (N_COPY). N_COPIES is the total number 1657 variable and the current copy (N_COPY). N_COPIES is the total number
1915 of newly created copies. UNROLLING is true if we are unrolling 1658 of newly created copies. UNROLLING is true if we are unrolling
1933 else 1676 else
1934 return n_copies; 1677 return n_copies;
1935 } 1678 }
1936 } 1679 }
1937 1680
1938 /* Locate in EXPR the expression corresponding to the location recorded
1939 in IVTS, and return a pointer to the RTX for this location. */
1940
1941 static rtx *
1942 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1943 {
1944 unsigned i;
1945 rtx *ret = &expr;
1946
1947 for (i = 0; i < ivts->n_loc; i++)
1948 ret = &XEXP (*ret, ivts->loc[i]);
1949
1950 return ret;
1951 }
1952
1953 /* Allocate basic variable for the induction variable chain. */ 1681 /* Allocate basic variable for the induction variable chain. */
1954 1682
1955 static void 1683 static void
1956 allocate_basic_variable (struct iv_to_split *ivts) 1684 allocate_basic_variable (struct iv_to_split *ivts)
1957 { 1685 {
1958 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts); 1686 rtx expr = SET_SRC (single_set (ivts->insn));
1959 1687
1960 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); 1688 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1961 } 1689 }
1962 1690
1963 /* Insert initialization of basic variable of IVTS before INSN, taking 1691 /* Insert initialization of basic variable of IVTS before INSN, taking
1964 the initial value from INSN. */ 1692 the initial value from INSN. */
1965 1693
1966 static void 1694 static void
1967 insert_base_initialization (struct iv_to_split *ivts, rtx insn) 1695 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1968 { 1696 {
1969 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts)); 1697 rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1970 rtx seq; 1698 rtx_insn *seq;
1971 1699
1972 start_sequence (); 1700 start_sequence ();
1973 expr = force_operand (expr, ivts->base_var); 1701 expr = force_operand (expr, ivts->base_var);
1974 if (expr != ivts->base_var) 1702 if (expr != ivts->base_var)
1975 emit_move_insn (ivts->base_var, expr); 1703 emit_move_insn (ivts->base_var, expr);
1981 1709
1982 /* Replace the use of induction variable described in IVTS in INSN 1710 /* Replace the use of induction variable described in IVTS in INSN
1983 by base variable + DELTA * step. */ 1711 by base variable + DELTA * step. */
1984 1712
1985 static void 1713 static void
1986 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta) 1714 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1987 { 1715 {
1988 rtx expr, *loc, seq, incr, var; 1716 rtx expr, *loc, incr, var;
1989 enum machine_mode mode = GET_MODE (ivts->base_var); 1717 rtx_insn *seq;
1718 machine_mode mode = GET_MODE (ivts->base_var);
1990 rtx src, dest, set; 1719 rtx src, dest, set;
1991 1720
1992 /* Construct base + DELTA * step. */ 1721 /* Construct base + DELTA * step. */
1993 if (!delta) 1722 if (!delta)
1994 expr = ivts->base_var; 1723 expr = ivts->base_var;
1995 else 1724 else
1996 { 1725 {
1997 incr = simplify_gen_binary (MULT, mode, 1726 incr = simplify_gen_binary (MULT, mode,
1998 ivts->step, gen_int_mode (delta, mode)); 1727 copy_rtx (ivts->step),
1728 gen_int_mode (delta, mode));
1999 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), 1729 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
2000 ivts->base_var, incr); 1730 ivts->base_var, incr);
2001 } 1731 }
2002 1732
2003 /* Figure out where to do the replacement. */ 1733 /* Figure out where to do the replacement. */
2004 loc = get_ivts_expr (single_set (insn), ivts); 1734 loc = &SET_SRC (single_set (insn));
2005 1735
2006 /* If we can make the replacement right away, we're done. */ 1736 /* If we can make the replacement right away, we're done. */
2007 if (validate_change (insn, loc, expr, 0)) 1737 if (validate_change (insn, loc, expr, 0))
2008 return; 1738 return;
2009 1739
2048 rtx reg; 1778 rtx reg;
2049 1779
2050 if (ve->reuse_expansion == 0) 1780 if (ve->reuse_expansion == 0)
2051 reg = ve->reg; 1781 reg = ve->reg;
2052 else 1782 else
2053 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1); 1783 reg = ve->var_expansions[ve->reuse_expansion - 1];
2054 1784
2055 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion) 1785 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
2056 ve->reuse_expansion = 0; 1786 ve->reuse_expansion = 0;
2057 else 1787 else
2058 ve->reuse_expansion++; 1788 ve->reuse_expansion++;
2059 1789
2060 return reg; 1790 return reg;
2063 1793
2064 /* Given INSN replace the uses of the accumulator recorded in VE 1794 /* Given INSN replace the uses of the accumulator recorded in VE
2065 with a new register. */ 1795 with a new register. */
2066 1796
2067 static void 1797 static void
2068 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn) 1798 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
2069 { 1799 {
2070 rtx new_reg, set; 1800 rtx new_reg, set;
2071 bool really_new_expansion = false; 1801 bool really_new_expansion = false;
2072 1802
2073 set = single_set (insn); 1803 set = single_set (insn);
2081 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); 1811 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
2082 } 1812 }
2083 else 1813 else
2084 new_reg = get_expansion (ve); 1814 new_reg = get_expansion (ve);
2085 1815
2086 validate_change (insn, &SET_DEST (set), new_reg, 1); 1816 validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
2087 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2088
2089 if (apply_change_group ()) 1817 if (apply_change_group ())
2090 if (really_new_expansion) 1818 if (really_new_expansion)
2091 { 1819 {
2092 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg); 1820 ve->var_expansions.safe_push (new_reg);
2093 ve->expansion_count++; 1821 ve->expansion_count++;
2094 } 1822 }
2095 } 1823 }
2096 1824
2097 /* Initialize the variable expansions in loop preheader. PLACE is the 1825 /* Initialize the variable expansions in loop preheader. PLACE is the
2123 1851
2124 static void 1852 static void
2125 insert_var_expansion_initialization (struct var_to_expand *ve, 1853 insert_var_expansion_initialization (struct var_to_expand *ve,
2126 basic_block place) 1854 basic_block place)
2127 { 1855 {
2128 rtx seq, var, zero_init, insn; 1856 rtx_insn *seq;
1857 rtx var, zero_init;
2129 unsigned i; 1858 unsigned i;
2130 enum machine_mode mode = GET_MODE (ve->reg); 1859 machine_mode mode = GET_MODE (ve->reg);
2131 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); 1860 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2132 1861
2133 if (VEC_length (rtx, ve->var_expansions) == 0) 1862 if (ve->var_expansions.length () == 0)
2134 return; 1863 return;
2135 1864
2136 start_sequence (); 1865 start_sequence ();
2137 switch (ve->op) 1866 switch (ve->op)
2138 { 1867 {
2139 case FMA: 1868 case FMA:
2140 /* Note that we only accumulate FMA via the ADD operand. */ 1869 /* Note that we only accumulate FMA via the ADD operand. */
2141 case PLUS: 1870 case PLUS:
2142 case MINUS: 1871 case MINUS:
2143 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) 1872 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
2144 { 1873 {
2145 if (honor_signed_zero_p) 1874 if (honor_signed_zero_p)
2146 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); 1875 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2147 else 1876 else
2148 zero_init = CONST0_RTX (mode); 1877 zero_init = CONST0_RTX (mode);
2149 emit_move_insn (var, zero_init); 1878 emit_move_insn (var, zero_init);
2150 } 1879 }
2151 break; 1880 break;
2152 1881
2153 case MULT: 1882 case MULT:
2154 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) 1883 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
2155 { 1884 {
2156 zero_init = CONST1_RTX (GET_MODE (var)); 1885 zero_init = CONST1_RTX (GET_MODE (var));
2157 emit_move_insn (var, zero_init); 1886 emit_move_insn (var, zero_init);
2158 } 1887 }
2159 break; 1888 break;
2163 } 1892 }
2164 1893
2165 seq = get_insns (); 1894 seq = get_insns ();
2166 end_sequence (); 1895 end_sequence ();
2167 1896
2168 insn = BB_HEAD (place); 1897 emit_insn_after (seq, BB_END (place));
2169 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2170 insn = NEXT_INSN (insn);
2171
2172 emit_insn_after (seq, insn);
2173 } 1898 }
2174 1899
2175 /* Combine the variable expansions at the loop exit. PLACE is the 1900 /* Combine the variable expansions at the loop exit. PLACE is the
2176 loop exit basic block where the summation of the expansions should 1901 loop exit basic block where the summation of the expansions should
2177 take place. */ 1902 take place. */
2178 1903
2179 static void 1904 static void
2180 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) 1905 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2181 { 1906 {
2182 rtx sum = ve->reg; 1907 rtx sum = ve->reg;
2183 rtx expr, seq, var, insn; 1908 rtx expr, var;
1909 rtx_insn *seq, *insn;
2184 unsigned i; 1910 unsigned i;
2185 1911
2186 if (VEC_length (rtx, ve->var_expansions) == 0) 1912 if (ve->var_expansions.length () == 0)
2187 return; 1913 return;
2188 1914
1915 /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1916 it both here and as the destination of the assignment. */
1917 sum = copy_rtx (sum);
2189 start_sequence (); 1918 start_sequence ();
2190 switch (ve->op) 1919 switch (ve->op)
2191 { 1920 {
2192 case FMA: 1921 case FMA:
2193 /* Note that we only accumulate FMA via the ADD operand. */ 1922 /* Note that we only accumulate FMA via the ADD operand. */
2194 case PLUS: 1923 case PLUS:
2195 case MINUS: 1924 case MINUS:
2196 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) 1925 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
2197 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); 1926 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
2198 break; 1927 break;
2199 1928
2200 case MULT: 1929 case MULT:
2201 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) 1930 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
2202 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); 1931 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
2203 break; 1932 break;
2204 1933
2205 default: 1934 default:
2206 gcc_unreachable (); 1935 gcc_unreachable ();
2217 insn = NEXT_INSN (insn); 1946 insn = NEXT_INSN (insn);
2218 1947
2219 emit_insn_after (seq, insn); 1948 emit_insn_after (seq, insn);
2220 } 1949 }
2221 1950
1951 /* Strip away REG_EQUAL notes for IVs we're splitting.
1952
1953 Updating REG_EQUAL notes for IVs we split is tricky: We
1954 cannot tell until after unrolling, DF-rescanning, and liveness
1955 updating, whether an EQ_USE is reached by the split IV while
1956 the IV reg is still live. See PR55006.
1957
1958 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1959 because RTL loop-iv requires us to defer rescanning insns and
1960 any notes attached to them. So resort to old techniques... */
1961
1962 static void
1963 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1964 {
1965 struct iv_to_split *ivts;
1966 rtx note = find_reg_equal_equiv_note (insn);
1967 if (! note)
1968 return;
1969 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1970 if (reg_mentioned_p (ivts->orig_var, note))
1971 {
1972 remove_note (insn, note);
1973 return;
1974 }
1975 }
1976
2222 /* Apply loop optimizations in loop copies using the 1977 /* Apply loop optimizations in loop copies using the
2223 data which gathered during the unrolling. Structure 1978 data which gathered during the unrolling. Structure
2224 OPT_INFO record that data. 1979 OPT_INFO record that data.
2225 1980
2226 UNROLLING is true if we unrolled (not peeled) the loop. 1981 UNROLLING is true if we unrolled (not peeled) the loop.
2233 unsigned n_copies, bool unrolling, 1988 unsigned n_copies, bool unrolling,
2234 bool rewrite_original_loop) 1989 bool rewrite_original_loop)
2235 { 1990 {
2236 unsigned i, delta; 1991 unsigned i, delta;
2237 basic_block bb, orig_bb; 1992 basic_block bb, orig_bb;
2238 rtx insn, orig_insn, next; 1993 rtx_insn *insn, *orig_insn, *next;
2239 struct iv_to_split ivts_templ, *ivts; 1994 struct iv_to_split ivts_templ, *ivts;
2240 struct var_to_expand ve_templ, *ves; 1995 struct var_to_expand ve_templ, *ves;
2241 1996
2242 /* Sanity check -- we need to put initialization in the original loop 1997 /* Sanity check -- we need to put initialization in the original loop
2243 body. */ 1998 body. */
2246 /* Allocate the basic variables (i0). */ 2001 /* Allocate the basic variables (i0). */
2247 if (opt_info->insns_to_split) 2002 if (opt_info->insns_to_split)
2248 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) 2003 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2249 allocate_basic_variable (ivts); 2004 allocate_basic_variable (ivts);
2250 2005
2251 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) 2006 for (i = opt_info->first_new_block;
2252 { 2007 i < (unsigned) last_basic_block_for_fn (cfun);
2253 bb = BASIC_BLOCK (i); 2008 i++)
2009 {
2010 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2254 orig_bb = get_bb_original (bb); 2011 orig_bb = get_bb_original (bb);
2255 2012
2256 /* bb->aux holds position in copy sequence initialized by 2013 /* bb->aux holds position in copy sequence initialized by
2257 duplicate_loop_to_header_edge. */ 2014 duplicate_loop_to_header_edge. */
2258 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, 2015 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2259 unrolling); 2016 unrolling);
2260 bb->aux = 0; 2017 bb->aux = 0;
2261 orig_insn = BB_HEAD (orig_bb); 2018 orig_insn = BB_HEAD (orig_bb);
2262 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) 2019 FOR_BB_INSNS_SAFE (bb, insn, next)
2263 { 2020 {
2264 next = NEXT_INSN (insn); 2021 if (!INSN_P (insn)
2265 if (!INSN_P (insn)) 2022 || (DEBUG_INSN_P (insn)
2023 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2266 continue; 2024 continue;
2267 2025
2268 while (!INSN_P (orig_insn)) 2026 while (!INSN_P (orig_insn)
2027 || (DEBUG_INSN_P (orig_insn)
2028 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2029 == LABEL_DECL)))
2269 orig_insn = NEXT_INSN (orig_insn); 2030 orig_insn = NEXT_INSN (orig_insn);
2270 2031
2271 ivts_templ.insn = orig_insn; 2032 ivts_templ.insn = orig_insn;
2272 ve_templ.insn = orig_insn; 2033 ve_templ.insn = orig_insn;
2273 2034
2274 /* Apply splitting iv optimization. */ 2035 /* Apply splitting iv optimization. */
2275 if (opt_info->insns_to_split) 2036 if (opt_info->insns_to_split)
2276 { 2037 {
2277 ivts = (struct iv_to_split *) 2038 maybe_strip_eq_note_for_split_iv (opt_info, insn);
2278 htab_find (opt_info->insns_to_split, &ivts_templ); 2039
2040 ivts = opt_info->insns_to_split->find (&ivts_templ);
2279 2041
2280 if (ivts) 2042 if (ivts)
2281 { 2043 {
2282 gcc_assert (GET_CODE (PATTERN (insn)) 2044 gcc_assert (GET_CODE (PATTERN (insn))
2283 == GET_CODE (PATTERN (orig_insn))); 2045 == GET_CODE (PATTERN (orig_insn)));
2289 } 2051 }
2290 /* Apply variable expansion optimization. */ 2052 /* Apply variable expansion optimization. */
2291 if (unrolling && opt_info->insns_with_var_to_expand) 2053 if (unrolling && opt_info->insns_with_var_to_expand)
2292 { 2054 {
2293 ves = (struct var_to_expand *) 2055 ves = (struct var_to_expand *)
2294 htab_find (opt_info->insns_with_var_to_expand, &ve_templ); 2056 opt_info->insns_with_var_to_expand->find (&ve_templ);
2295 if (ves) 2057 if (ves)
2296 { 2058 {
2297 gcc_assert (GET_CODE (PATTERN (insn)) 2059 gcc_assert (GET_CODE (PATTERN (insn))
2298 == GET_CODE (PATTERN (orig_insn))); 2060 == GET_CODE (PATTERN (orig_insn)));
2299 expand_var_during_unrolling (ves, insn); 2061 expand_var_during_unrolling (ves, insn);
2317 } 2079 }
2318 2080
2319 /* Rewrite also the original loop body. Find them as originals of the blocks 2081 /* Rewrite also the original loop body. Find them as originals of the blocks
2320 in the last copied iteration, i.e. those that have 2082 in the last copied iteration, i.e. those that have
2321 get_bb_copy (get_bb_original (bb)) == bb. */ 2083 get_bb_copy (get_bb_original (bb)) == bb. */
2322 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) 2084 for (i = opt_info->first_new_block;
2323 { 2085 i < (unsigned) last_basic_block_for_fn (cfun);
2324 bb = BASIC_BLOCK (i); 2086 i++)
2087 {
2088 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2325 orig_bb = get_bb_original (bb); 2089 orig_bb = get_bb_original (bb);
2326 if (get_bb_copy (orig_bb) != bb) 2090 if (get_bb_copy (orig_bb) != bb)
2327 continue; 2091 continue;
2328 2092
2329 delta = determine_split_iv_delta (0, n_copies, unrolling); 2093 delta = determine_split_iv_delta (0, n_copies, unrolling);
2337 continue; 2101 continue;
2338 2102
2339 ivts_templ.insn = orig_insn; 2103 ivts_templ.insn = orig_insn;
2340 if (opt_info->insns_to_split) 2104 if (opt_info->insns_to_split)
2341 { 2105 {
2106 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2107
2342 ivts = (struct iv_to_split *) 2108 ivts = (struct iv_to_split *)
2343 htab_find (opt_info->insns_to_split, &ivts_templ); 2109 opt_info->insns_to_split->find (&ivts_templ);
2344 if (ivts) 2110 if (ivts)
2345 { 2111 {
2346 if (!delta) 2112 if (!delta)
2347 insert_base_initialization (ivts, orig_insn); 2113 insert_base_initialization (ivts, orig_insn);
2348 split_iv (ivts, orig_insn, delta); 2114 split_iv (ivts, orig_insn, delta);
2357 /* Release OPT_INFO. */ 2123 /* Release OPT_INFO. */
2358 2124
2359 static void 2125 static void
2360 free_opt_info (struct opt_info *opt_info) 2126 free_opt_info (struct opt_info *opt_info)
2361 { 2127 {
2362 if (opt_info->insns_to_split) 2128 delete opt_info->insns_to_split;
2363 htab_delete (opt_info->insns_to_split); 2129 opt_info->insns_to_split = NULL;
2364 if (opt_info->insns_with_var_to_expand) 2130 if (opt_info->insns_with_var_to_expand)
2365 { 2131 {
2366 struct var_to_expand *ves; 2132 struct var_to_expand *ves;
2367 2133
2368 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) 2134 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2369 VEC_free (rtx, heap, ves->var_expansions); 2135 ves->var_expansions.release ();
2370 htab_delete (opt_info->insns_with_var_to_expand); 2136 delete opt_info->insns_with_var_to_expand;
2137 opt_info->insns_with_var_to_expand = NULL;
2371 } 2138 }
2372 free (opt_info); 2139 free (opt_info);
2373 } 2140 }