Mercurial > hg > CbC > CbC_gcc
annotate gcc/loop-doloop.c @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 04ced10e8804 |
rev | line source |
---|---|
0 | 1 /* Perform doloop optimizations |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
3 Free Software Foundation, Inc. |
0 | 4 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) |
5 | |
6 This file is part of GCC. | |
7 | |
8 GCC is free software; you can redistribute it and/or modify it under | |
9 the terms of the GNU General Public License as published by the Free | |
10 Software Foundation; either version 3, or (at your option) any later | |
11 version. | |
12 | |
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 #include "config.h" | |
23 #include "system.h" | |
24 #include "coretypes.h" | |
25 #include "tm.h" | |
26 #include "rtl.h" | |
27 #include "flags.h" | |
28 #include "expr.h" | |
29 #include "hard-reg-set.h" | |
30 #include "basic-block.h" | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
31 #include "diagnostic-core.h" |
0 | 32 #include "tm_p.h" |
33 #include "cfgloop.h" | |
34 #include "output.h" | |
35 #include "params.h" | |
36 #include "target.h" | |
37 | |
38 /* This module is used to modify loops with a determinable number of | |
39 iterations to use special low-overhead looping instructions. | |
40 | |
41 It first validates whether the loop is well behaved and has a | |
42 determinable number of iterations (either at compile or run-time). | |
43 It then modifies the loop to use a low-overhead looping pattern as | |
44 follows: | |
45 | |
46 1. A pseudo register is allocated as the loop iteration counter. | |
47 | |
48 2. The number of loop iterations is calculated and is stored | |
49 in the loop counter. | |
50 | |
51 3. At the end of the loop, the jump insn is replaced by the | |
52 doloop_end pattern. The compare must remain because it might be | |
53 used elsewhere. If the loop-variable or condition register are | |
54 used elsewhere, they will be eliminated by flow. | |
55 | |
56 4. An optional doloop_begin pattern is inserted at the top of the | |
57 loop. | |
58 | |
59 TODO The optimization should only performed when either the biv used for exit | |
60 condition is unused at all except for the exit test, or if we do not have to | |
61 change its value, since otherwise we have to add a new induction variable, | |
62 which usually will not pay up (unless the cost of the doloop pattern is | |
63 somehow extremely lower than the cost of compare & jump, or unless the bct | |
64 register cannot be used for anything else but doloop -- ??? detect these | |
65 cases). */ | |
66 | |
67 #ifdef HAVE_doloop_end | |
68 | |
69 /* Return the loop termination condition for PATTERN or zero | |
70 if it is not a decrement and branch jump insn. */ | |
71 | |
72 rtx | |
73 doloop_condition_get (rtx doloop_pat) | |
74 { | |
75 rtx cmp; | |
76 rtx inc; | |
77 rtx reg; | |
78 rtx inc_src; | |
79 rtx condition; | |
80 rtx pattern; | |
81 | |
82 /* The canonical doloop pattern we expect has one of the following | |
83 forms: | |
84 | |
85 1) (parallel [(set (pc) (if_then_else (condition) | |
86 (label_ref (label)) | |
87 (pc))) | |
88 (set (reg) (plus (reg) (const_int -1))) | |
89 (additional clobbers and uses)]) | |
90 | |
91 The branch must be the first entry of the parallel (also required | |
92 by jump.c), and the second entry of the parallel must be a set of | |
93 the loop counter register. Some targets (IA-64) wrap the set of | |
94 the loop counter in an if_then_else too. | |
95 | |
96 2) (set (reg) (plus (reg) (const_int -1)) | |
97 (set (pc) (if_then_else (reg != 0) | |
98 (label_ref (label)) | |
99 (pc))). */ | |
100 | |
101 pattern = PATTERN (doloop_pat); | |
102 | |
103 if (GET_CODE (pattern) != PARALLEL) | |
104 { | |
105 rtx cond; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
106 rtx prev_insn = prev_nondebug_insn (doloop_pat); |
0 | 107 |
108 /* We expect the decrement to immediately precede the branch. */ | |
109 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
110 if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) |
0 | 111 return 0; |
112 | |
113 cmp = pattern; | |
114 inc = PATTERN (PREV_INSN (doloop_pat)); | |
115 /* We expect the condition to be of the form (reg != 0) */ | |
116 cond = XEXP (SET_SRC (cmp), 0); | |
117 if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) | |
118 return 0; | |
119 | |
120 } | |
121 else | |
122 { | |
123 cmp = XVECEXP (pattern, 0, 0); | |
124 inc = XVECEXP (pattern, 0, 1); | |
125 } | |
126 | |
127 /* Check for (set (reg) (something)). */ | |
128 if (GET_CODE (inc) != SET) | |
129 return 0; | |
130 reg = SET_DEST (inc); | |
131 if (! REG_P (reg)) | |
132 return 0; | |
133 | |
134 /* Check if something = (plus (reg) (const_int -1)). | |
135 On IA-64, this decrement is wrapped in an if_then_else. */ | |
136 inc_src = SET_SRC (inc); | |
137 if (GET_CODE (inc_src) == IF_THEN_ELSE) | |
138 inc_src = XEXP (inc_src, 1); | |
139 if (GET_CODE (inc_src) != PLUS | |
140 || XEXP (inc_src, 0) != reg | |
141 || XEXP (inc_src, 1) != constm1_rtx) | |
142 return 0; | |
143 | |
144 /* Check for (set (pc) (if_then_else (condition) | |
145 (label_ref (label)) | |
146 (pc))). */ | |
147 if (GET_CODE (cmp) != SET | |
148 || SET_DEST (cmp) != pc_rtx | |
149 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
150 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
151 || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
152 return 0; | |
153 | |
154 /* Extract loop termination condition. */ | |
155 condition = XEXP (SET_SRC (cmp), 0); | |
156 | |
157 /* We expect a GE or NE comparison with 0 or 1. */ | |
158 if ((GET_CODE (condition) != GE | |
159 && GET_CODE (condition) != NE) | |
160 || (XEXP (condition, 1) != const0_rtx | |
161 && XEXP (condition, 1) != const1_rtx)) | |
162 return 0; | |
163 | |
164 if ((XEXP (condition, 0) == reg) | |
165 || (GET_CODE (XEXP (condition, 0)) == PLUS | |
166 && XEXP (XEXP (condition, 0), 0) == reg)) | |
167 { | |
168 if (GET_CODE (pattern) != PARALLEL) | |
169 /* The second form we expect: | |
170 | |
171 (set (reg) (plus (reg) (const_int -1)) | |
172 (set (pc) (if_then_else (reg != 0) | |
173 (label_ref (label)) | |
174 (pc))). | |
175 | |
176 is equivalent to the following: | |
177 | |
178 (parallel [(set (pc) (if_then_else (reg != 1) | |
179 (label_ref (label)) | |
180 (pc))) | |
181 (set (reg) (plus (reg) (const_int -1))) | |
182 (additional clobbers and uses)]) | |
183 | |
184 So we return that form instead. | |
185 */ | |
186 condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); | |
187 | |
188 return condition; | |
189 } | |
190 | |
191 /* ??? If a machine uses a funny comparison, we could return a | |
192 canonicalized form here. */ | |
193 | |
194 return 0; | |
195 } | |
196 | |
197 /* Return nonzero if the loop specified by LOOP is suitable for | |
198 the use of special low-overhead looping instructions. DESC | |
199 describes the number of iterations of the loop. */ | |
200 | |
201 static bool | |
202 doloop_valid_p (struct loop *loop, struct niter_desc *desc) | |
203 { | |
204 basic_block *body = get_loop_body (loop), bb; | |
205 rtx insn; | |
206 unsigned i; | |
207 bool result = true; | |
208 | |
209 /* Check for loops that may not terminate under special conditions. */ | |
210 if (!desc->simple_p | |
211 || desc->assumptions | |
212 || desc->infinite) | |
213 { | |
214 /* There are some cases that would require a special attention. | |
215 For example if the comparison is LEU and the comparison value | |
216 is UINT_MAX then the loop will not terminate. Similarly, if the | |
217 comparison code is GEU and the comparison value is 0, the | |
218 loop will not terminate. | |
219 | |
220 If the absolute increment is not 1, the loop can be infinite | |
221 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
222 | |
223 ??? We could compute these conditions at run-time and have a | |
224 additional jump around the loop to ensure an infinite loop. | |
225 However, it is very unlikely that this is the intended | |
226 behavior of the loop and checking for these rare boundary | |
227 conditions would pessimize all other code. | |
228 | |
229 If the loop is executed only a few times an extra check to | |
230 restart the loop could use up most of the benefits of using a | |
231 count register loop. Note however, that normally, this | |
232 restart branch would never execute, so it could be predicted | |
233 well by the CPU. We should generate the pessimistic code by | |
234 default, and have an option, e.g. -funsafe-loops that would | |
235 enable count-register loops in this case. */ | |
236 if (dump_file) | |
237 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
238 result = false; | |
239 goto cleanup; | |
240 } | |
241 | |
242 for (i = 0; i < loop->num_nodes; i++) | |
243 { | |
244 bb = body[i]; | |
245 | |
246 for (insn = BB_HEAD (bb); | |
247 insn != NEXT_INSN (BB_END (bb)); | |
248 insn = NEXT_INSN (insn)) | |
249 { | |
250 /* Different targets have different necessities for low-overhead | |
251 looping. Call the back end for each instruction within the loop | |
252 to let it decide whether the insn prohibits a low-overhead loop. | |
253 It will then return the cause for it to emit to the dump file. */ | |
254 const char * invalid = targetm.invalid_within_doloop (insn); | |
255 if (invalid) | |
256 { | |
257 if (dump_file) | |
258 fprintf (dump_file, "Doloop: %s\n", invalid); | |
259 result = false; | |
260 goto cleanup; | |
261 } | |
262 } | |
263 } | |
264 result = true; | |
265 | |
266 cleanup: | |
267 free (body); | |
268 | |
269 return result; | |
270 } | |
271 | |
272 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru | |
273 edge. If the condition is always false, do not do anything. If it is always | |
274 true, redirect E to DEST and return false. In all other cases, true is | |
275 returned. */ | |
276 | |
277 static bool | |
278 add_test (rtx cond, edge *e, basic_block dest) | |
279 { | |
280 rtx seq, jump, label; | |
281 enum machine_mode mode; | |
282 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); | |
283 enum rtx_code code = GET_CODE (cond); | |
284 basic_block bb; | |
285 | |
286 mode = GET_MODE (XEXP (cond, 0)); | |
287 if (mode == VOIDmode) | |
288 mode = GET_MODE (XEXP (cond, 1)); | |
289 | |
290 start_sequence (); | |
291 op0 = force_operand (op0, NULL_RTX); | |
292 op1 = force_operand (op1, NULL_RTX); | |
293 label = block_label (dest); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
294 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
295 NULL_RTX, label, -1); |
0 | 296 |
297 jump = get_last_insn (); | |
298 if (!jump || !JUMP_P (jump)) | |
299 { | |
300 /* The condition is always false and the jump was optimized out. */ | |
301 end_sequence (); | |
302 return true; | |
303 } | |
304 | |
305 seq = get_insns (); | |
306 end_sequence (); | |
307 | |
308 /* There always is at least the jump insn in the sequence. */ | |
309 gcc_assert (seq != NULL_RTX); | |
310 | |
311 bb = split_edge_and_insert (*e, seq); | |
312 *e = single_succ_edge (bb); | |
313 | |
314 if (any_uncondjump_p (jump)) | |
315 { | |
316 /* The condition is always true. */ | |
317 delete_insn (jump); | |
318 redirect_edge_and_branch_force (*e, dest); | |
319 return false; | |
320 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
321 |
0 | 322 JUMP_LABEL (jump) = label; |
323 | |
324 /* The jump is supposed to handle an unlikely special case. */ | |
325 add_reg_note (jump, REG_BR_PROB, const0_rtx); | |
326 | |
327 LABEL_NUSES (label)++; | |
328 | |
329 make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); | |
330 return true; | |
331 } | |
332 | |
333 /* Modify the loop to use the low-overhead looping insn where LOOP | |
334 describes the loop, DESC describes the number of iterations of the | |
335 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
336 end of the loop. CONDITION is the condition separated from the | |
337 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. | |
338 ZERO_EXTEND_P says to zero extend COUNT after the increment of it to | |
339 word_mode from FROM_MODE. */ | |
340 | |
341 static void | |
342 doloop_modify (struct loop *loop, struct niter_desc *desc, | |
343 rtx doloop_seq, rtx condition, rtx count, | |
344 bool zero_extend_p, enum machine_mode from_mode) | |
345 { | |
346 rtx counter_reg; | |
347 rtx tmp, noloop = NULL_RTX; | |
348 rtx sequence; | |
349 rtx jump_insn; | |
350 rtx jump_label; | |
351 int nonneg = 0; | |
352 bool increment_count; | |
353 basic_block loop_end = desc->out_edge->src; | |
354 enum machine_mode mode; | |
355 rtx true_prob_val; | |
356 | |
357 jump_insn = BB_END (loop_end); | |
358 | |
359 if (dump_file) | |
360 { | |
361 fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
362 if (desc->const_iter) | |
363 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | |
364 else | |
365 fputs ("runtime", dump_file); | |
366 fputs (" iterations).\n", dump_file); | |
367 } | |
368 | |
369 /* Get the probability of the original branch. If it exists we would | |
370 need to update REG_BR_PROB of the new jump_insn. */ | |
371 true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX); | |
372 | |
373 /* Discard original jump to continue loop. The original compare | |
374 result may still be live, so it cannot be discarded explicitly. */ | |
375 delete_insn (jump_insn); | |
376 | |
377 counter_reg = XEXP (condition, 0); | |
378 if (GET_CODE (counter_reg) == PLUS) | |
379 counter_reg = XEXP (counter_reg, 0); | |
380 mode = GET_MODE (counter_reg); | |
381 | |
382 increment_count = false; | |
383 switch (GET_CODE (condition)) | |
384 { | |
385 case NE: | |
386 /* Currently only NE tests against zero and one are supported. */ | |
387 noloop = XEXP (condition, 1); | |
388 if (noloop != const0_rtx) | |
389 { | |
390 gcc_assert (noloop == const1_rtx); | |
391 increment_count = true; | |
392 } | |
393 break; | |
394 | |
395 case GE: | |
396 /* Currently only GE tests against zero are supported. */ | |
397 gcc_assert (XEXP (condition, 1) == const0_rtx); | |
398 | |
399 noloop = constm1_rtx; | |
400 | |
401 /* The iteration count does not need incrementing for a GE test. */ | |
402 increment_count = false; | |
403 | |
404 /* Determine if the iteration counter will be non-negative. | |
405 Note that the maximum value loaded is iterations_max - 1. */ | |
406 if (desc->niter_max | |
407 <= ((unsigned HOST_WIDEST_INT) 1 | |
408 << (GET_MODE_BITSIZE (mode) - 1))) | |
409 nonneg = 1; | |
410 break; | |
411 | |
412 /* Abort if an invalid doloop pattern has been generated. */ | |
413 default: | |
414 gcc_unreachable (); | |
415 } | |
416 | |
417 if (increment_count) | |
418 count = simplify_gen_binary (PLUS, from_mode, count, const1_rtx); | |
419 | |
420 if (zero_extend_p) | |
421 count = simplify_gen_unary (ZERO_EXTEND, word_mode, | |
422 count, from_mode); | |
423 | |
424 /* Insert initialization of the count register into the loop header. */ | |
425 start_sequence (); | |
426 tmp = force_operand (count, counter_reg); | |
427 convert_move (counter_reg, tmp, 1); | |
428 sequence = get_insns (); | |
429 end_sequence (); | |
430 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
431 | |
432 if (desc->noloop_assumptions) | |
433 { | |
434 rtx ass = copy_rtx (desc->noloop_assumptions); | |
435 basic_block preheader = loop_preheader_edge (loop)->src; | |
436 basic_block set_zero | |
437 = split_edge (loop_preheader_edge (loop)); | |
438 basic_block new_preheader | |
439 = split_edge (loop_preheader_edge (loop)); | |
440 edge te; | |
441 | |
442 /* Expand the condition testing the assumptions and if it does not pass, | |
443 reset the count register to 0. */ | |
444 redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); | |
445 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); | |
446 | |
447 set_zero->count = 0; | |
448 set_zero->frequency = 0; | |
449 | |
450 te = single_succ_edge (preheader); | |
451 for (; ass; ass = XEXP (ass, 1)) | |
452 if (!add_test (XEXP (ass, 0), &te, set_zero)) | |
453 break; | |
454 | |
455 if (ass) | |
456 { | |
457 /* We reached a condition that is always true. This is very hard to | |
458 reproduce (such a loop does not roll, and thus it would most | |
459 likely get optimized out by some of the preceding optimizations). | |
460 In fact, I do not have any testcase for it. However, it would | |
461 also be very hard to show that it is impossible, so we must | |
462 handle this case. */ | |
463 set_zero->count = preheader->count; | |
464 set_zero->frequency = preheader->frequency; | |
465 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
466 |
0 | 467 if (EDGE_COUNT (set_zero->preds) == 0) |
468 { | |
469 /* All the conditions were simplified to false, remove the | |
470 unreachable set_zero block. */ | |
471 delete_basic_block (set_zero); | |
472 } | |
473 else | |
474 { | |
475 /* Reset the counter to zero in the set_zero block. */ | |
476 start_sequence (); | |
477 convert_move (counter_reg, noloop, 0); | |
478 sequence = get_insns (); | |
479 end_sequence (); | |
480 emit_insn_after (sequence, BB_END (set_zero)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
481 |
0 | 482 set_immediate_dominator (CDI_DOMINATORS, set_zero, |
483 recompute_dominator (CDI_DOMINATORS, | |
484 set_zero)); | |
485 } | |
486 | |
487 set_immediate_dominator (CDI_DOMINATORS, new_preheader, | |
488 recompute_dominator (CDI_DOMINATORS, | |
489 new_preheader)); | |
490 } | |
491 | |
492 /* Some targets (eg, C4x) need to initialize special looping | |
493 registers. */ | |
494 #ifdef HAVE_doloop_begin | |
495 { | |
496 rtx init; | |
497 unsigned level = get_loop_level (loop) + 1; | |
498 init = gen_doloop_begin (counter_reg, | |
499 desc->const_iter ? desc->niter_expr : const0_rtx, | |
500 GEN_INT (desc->niter_max), | |
501 GEN_INT (level)); | |
502 if (init) | |
503 { | |
504 start_sequence (); | |
505 emit_insn (init); | |
506 sequence = get_insns (); | |
507 end_sequence (); | |
508 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
509 } | |
510 } | |
511 #endif | |
512 | |
513 /* Insert the new low-overhead looping insn. */ | |
514 emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
515 jump_insn = BB_END (loop_end); | |
516 jump_label = block_label (desc->in_edge->dest); | |
517 JUMP_LABEL (jump_insn) = jump_label; | |
518 LABEL_NUSES (jump_label)++; | |
519 | |
520 /* Ensure the right fallthru edge is marked, for case we have reversed | |
521 the condition. */ | |
522 desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
523 desc->out_edge->flags |= EDGE_FALLTHRU; | |
524 | |
525 /* Add a REG_NONNEG note if the actual or estimated maximum number | |
526 of iterations is non-negative. */ | |
527 if (nonneg) | |
528 add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); | |
529 | |
530 /* Update the REG_BR_PROB note. */ | |
531 if (true_prob_val) | |
532 { | |
533 /* Seems safer to use the branch probability. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
534 add_reg_note (jump_insn, REG_BR_PROB, |
0 | 535 GEN_INT (desc->in_edge->probability)); |
536 } | |
537 } | |
538 | |
539 /* Process loop described by LOOP validating that the loop is suitable for | |
540 conversion to use a low overhead looping instruction, replacing the jump | |
541 insn where suitable. Returns true if the loop was successfully | |
542 modified. */ | |
543 | |
544 static bool | |
545 doloop_optimize (struct loop *loop) | |
546 { | |
547 enum machine_mode mode; | |
548 rtx doloop_seq, doloop_pat, doloop_reg; | |
549 rtx iterations, count; | |
550 rtx iterations_max; | |
551 rtx start_label; | |
552 rtx condition; | |
553 unsigned level, est_niter; | |
554 int max_cost; | |
555 struct niter_desc *desc; | |
556 unsigned word_mode_size; | |
557 unsigned HOST_WIDE_INT word_mode_max; | |
558 bool zero_extend_p = false; | |
559 | |
560 if (dump_file) | |
561 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
562 | |
563 iv_analysis_loop_init (loop); | |
564 | |
565 /* Find the simple exit of a LOOP. */ | |
566 desc = get_simple_loop_desc (loop); | |
567 | |
568 /* Check that loop is a candidate for a low-overhead looping insn. */ | |
569 if (!doloop_valid_p (loop, desc)) | |
570 { | |
571 if (dump_file) | |
572 fprintf (dump_file, | |
573 "Doloop: The loop is not suitable.\n"); | |
574 return false; | |
575 } | |
576 mode = desc->mode; | |
577 | |
578 est_niter = 3; | |
579 if (desc->const_iter) | |
580 est_niter = desc->niter; | |
581 /* If the estimate on number of iterations is reliable (comes from profile | |
582 feedback), use it. Do not use it normally, since the expected number | |
583 of iterations of an unrolled loop is 2. */ | |
584 if (loop->header->count) | |
585 est_niter = expected_loop_iterations (loop); | |
586 | |
587 if (est_niter < 3) | |
588 { | |
589 if (dump_file) | |
590 fprintf (dump_file, | |
591 "Doloop: Too few iterations (%u) to be profitable.\n", | |
592 est_niter); | |
593 return false; | |
594 } | |
595 | |
596 max_cost | |
597 = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); | |
598 if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop)) | |
599 > max_cost) | |
600 { | |
601 if (dump_file) | |
602 fprintf (dump_file, | |
603 "Doloop: number of iterations too costly to compute.\n"); | |
604 return false; | |
605 } | |
606 | |
607 count = copy_rtx (desc->niter_expr); | |
608 iterations = desc->const_iter ? desc->niter_expr : const0_rtx; | |
609 iterations_max = GEN_INT (desc->niter_max); | |
610 level = get_loop_level (loop) + 1; | |
611 | |
612 /* Generate looping insn. If the pattern FAILs then give up trying | |
613 to modify the loop since there is some aspect the back-end does | |
614 not like. */ | |
615 start_label = block_label (desc->in_edge->dest); | |
616 doloop_reg = gen_reg_rtx (mode); | |
617 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
618 GEN_INT (level), start_label); | |
619 | |
620 word_mode_size = GET_MODE_BITSIZE (word_mode); | |
621 word_mode_max | |
622 = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1; | |
623 if (! doloop_seq | |
624 && mode != word_mode | |
625 /* Before trying mode different from the one in that # of iterations is | |
626 computed, we must be sure that the number of iterations fits into | |
627 the new mode. */ | |
628 && (word_mode_size >= GET_MODE_BITSIZE (mode) | |
629 || desc->niter_max <= word_mode_max)) | |
630 { | |
631 if (word_mode_size > GET_MODE_BITSIZE (mode)) | |
632 { | |
633 zero_extend_p = true; | |
634 iterations = simplify_gen_unary (ZERO_EXTEND, word_mode, | |
635 iterations, mode); | |
636 iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode, | |
637 iterations_max, mode); | |
638 } | |
639 else | |
640 { | |
641 count = lowpart_subreg (word_mode, count, mode); | |
642 iterations = lowpart_subreg (word_mode, iterations, mode); | |
643 iterations_max = lowpart_subreg (word_mode, iterations_max, mode); | |
644 } | |
645 PUT_MODE (doloop_reg, word_mode); | |
646 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
647 GEN_INT (level), start_label); | |
648 } | |
649 if (! doloop_seq) | |
650 { | |
651 if (dump_file) | |
652 fprintf (dump_file, | |
653 "Doloop: Target unwilling to use doloop pattern!\n"); | |
654 return false; | |
655 } | |
656 | |
657 /* If multiple instructions were created, the last must be the | |
658 jump instruction. Also, a raw define_insn may yield a plain | |
659 pattern. */ | |
660 doloop_pat = doloop_seq; | |
661 if (INSN_P (doloop_pat)) | |
662 { | |
663 while (NEXT_INSN (doloop_pat) != NULL_RTX) | |
664 doloop_pat = NEXT_INSN (doloop_pat); | |
665 if (!JUMP_P (doloop_pat)) | |
666 doloop_pat = NULL_RTX; | |
667 } | |
668 | |
669 if (! doloop_pat | |
670 || ! (condition = doloop_condition_get (doloop_pat))) | |
671 { | |
672 if (dump_file) | |
673 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
674 return false; | |
675 } | |
676 | |
677 doloop_modify (loop, desc, doloop_seq, condition, count, | |
678 zero_extend_p, mode); | |
679 return true; | |
680 } | |
681 | |
682 /* This is the main entry point. Process all loops using doloop_optimize. */ | |
683 | |
684 void | |
685 doloop_optimize_loops (void) | |
686 { | |
687 loop_iterator li; | |
688 struct loop *loop; | |
689 | |
690 FOR_EACH_LOOP (li, loop, 0) | |
691 { | |
692 doloop_optimize (loop); | |
693 } | |
694 | |
695 iv_analysis_done (); | |
696 | |
697 #ifdef ENABLE_CHECKING | |
698 verify_dominators (CDI_DOMINATORS); | |
699 verify_loop_structure (); | |
700 #endif | |
701 } | |
702 #endif /* HAVE_doloop_end */ | |
703 |