Mercurial > hg > CbC > CbC_gcc
annotate gcc/loop-doloop.c @ 138:fc828634a951
merge
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 08 Nov 2018 14:17:14 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
rev | line source |
---|---|
0 | 1 /* Perform doloop optimizations |
131 | 2 Copyright (C) 2004-2018 Free Software Foundation, Inc. |
0 | 3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
111 | 24 #include "backend.h" |
25 #include "target.h" | |
0 | 26 #include "rtl.h" |
111 | 27 #include "tree.h" |
28 #include "cfghooks.h" | |
29 #include "memmodel.h" | |
30 #include "emit-rtl.h" | |
31 #include "dojump.h" | |
0 | 32 #include "expr.h" |
33 #include "cfgloop.h" | |
111 | 34 #include "cfgrtl.h" |
0 | 35 #include "params.h" |
111 | 36 #include "dumpfile.h" |
37 #include "loop-unroll.h" | |
38 #include "regs.h" | |
39 #include "df.h" | |
0 | 40 |
41 /* This module is used to modify loops with a determinable number of | |
42 iterations to use special low-overhead looping instructions. | |
43 | |
44 It first validates whether the loop is well behaved and has a | |
45 determinable number of iterations (either at compile or run-time). | |
46 It then modifies the loop to use a low-overhead looping pattern as | |
47 follows: | |
48 | |
49 1. A pseudo register is allocated as the loop iteration counter. | |
50 | |
51 2. The number of loop iterations is calculated and is stored | |
52 in the loop counter. | |
53 | |
54 3. At the end of the loop, the jump insn is replaced by the | |
55 doloop_end pattern. The compare must remain because it might be | |
56 used elsewhere. If the loop-variable or condition register are | |
57 used elsewhere, they will be eliminated by flow. | |
58 | |
59 4. An optional doloop_begin pattern is inserted at the top of the | |
60 loop. | |
61 | |
62 TODO The optimization should only performed when either the biv used for exit | |
63 condition is unused at all except for the exit test, or if we do not have to | |
64 change its value, since otherwise we have to add a new induction variable, | |
65 which usually will not pay up (unless the cost of the doloop pattern is | |
66 somehow extremely lower than the cost of compare & jump, or unless the bct | |
67 register cannot be used for anything else but doloop -- ??? detect these | |
68 cases). */ | |
69 | |
70 /* Return the loop termination condition for PATTERN or zero | |
71 if it is not a decrement and branch jump insn. */ | |
72 | |
73 rtx | |
111 | 74 doloop_condition_get (rtx_insn *doloop_pat) |
0 | 75 { |
76 rtx cmp; | |
77 rtx inc; | |
78 rtx reg; | |
79 rtx inc_src; | |
80 rtx condition; | |
81 rtx pattern; | |
111 | 82 rtx cc_reg = NULL_RTX; |
83 rtx reg_orig = NULL_RTX; | |
0 | 84 |
85 /* The canonical doloop pattern we expect has one of the following | |
86 forms: | |
87 | |
88 1) (parallel [(set (pc) (if_then_else (condition) | |
89 (label_ref (label)) | |
90 (pc))) | |
91 (set (reg) (plus (reg) (const_int -1))) | |
92 (additional clobbers and uses)]) | |
93 | |
94 The branch must be the first entry of the parallel (also required | |
95 by jump.c), and the second entry of the parallel must be a set of | |
96 the loop counter register. Some targets (IA-64) wrap the set of | |
97 the loop counter in an if_then_else too. | |
98 | |
99 2) (set (reg) (plus (reg) (const_int -1)) | |
100 (set (pc) (if_then_else (reg != 0) | |
101 (label_ref (label)) | |
111 | 102 (pc))). |
103 | |
104 Some targets (ARM) do the comparison before the branch, as in the | |
105 following form: | |
106 | |
107 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) | |
108 (set (reg) (plus (reg) (const_int -1)))]) | |
109 (set (pc) (if_then_else (cc == NE) | |
110 (label_ref (label)) | |
111 (pc))) */ | |
0 | 112 |
113 pattern = PATTERN (doloop_pat); | |
114 | |
115 if (GET_CODE (pattern) != PARALLEL) | |
116 { | |
117 rtx cond; | |
111 | 118 rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat); |
119 rtx cmp_arg1, cmp_arg2; | |
120 rtx cmp_orig; | |
0 | 121 |
111 | 122 /* In case the pattern is not PARALLEL we expect two forms |
123 of doloop which are cases 2) and 3) above: in case 2) the | |
124 decrement immediately precedes the branch, while in case 3) | |
125 the compare and decrement instructions immediately precede | |
126 the branch. */ | |
0 | 127 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
128 if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) |
0 | 129 return 0; |
130 | |
131 cmp = pattern; | |
111 | 132 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) |
133 { | |
134 /* The third case: the compare and decrement instructions | |
135 immediately precede the branch. */ | |
136 cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); | |
137 if (GET_CODE (cmp_orig) != SET) | |
138 return 0; | |
139 if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) | |
140 return 0; | |
141 cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); | |
142 cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); | |
143 if (cmp_arg2 != const0_rtx | |
144 || GET_CODE (cmp_arg1) != PLUS) | |
145 return 0; | |
146 reg_orig = XEXP (cmp_arg1, 0); | |
147 if (XEXP (cmp_arg1, 1) != GEN_INT (-1) | |
148 || !REG_P (reg_orig)) | |
149 return 0; | |
150 cc_reg = SET_DEST (cmp_orig); | |
151 | |
152 inc = XVECEXP (PATTERN (prev_insn), 0, 1); | |
153 } | |
154 else | |
155 inc = PATTERN (prev_insn); | |
156 if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE) | |
157 { | |
158 /* We expect the condition to be of the form (reg != 0) */ | |
159 cond = XEXP (SET_SRC (cmp), 0); | |
160 if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) | |
161 return 0; | |
162 } | |
0 | 163 } |
164 else | |
165 { | |
166 cmp = XVECEXP (pattern, 0, 0); | |
167 inc = XVECEXP (pattern, 0, 1); | |
168 } | |
169 | |
170 /* Check for (set (reg) (something)). */ | |
171 if (GET_CODE (inc) != SET) | |
172 return 0; | |
173 reg = SET_DEST (inc); | |
174 if (! REG_P (reg)) | |
175 return 0; | |
176 | |
177 /* Check if something = (plus (reg) (const_int -1)). | |
178 On IA-64, this decrement is wrapped in an if_then_else. */ | |
179 inc_src = SET_SRC (inc); | |
180 if (GET_CODE (inc_src) == IF_THEN_ELSE) | |
181 inc_src = XEXP (inc_src, 1); | |
182 if (GET_CODE (inc_src) != PLUS | |
183 || XEXP (inc_src, 0) != reg | |
184 || XEXP (inc_src, 1) != constm1_rtx) | |
185 return 0; | |
186 | |
187 /* Check for (set (pc) (if_then_else (condition) | |
188 (label_ref (label)) | |
189 (pc))). */ | |
190 if (GET_CODE (cmp) != SET | |
191 || SET_DEST (cmp) != pc_rtx | |
192 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
193 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
194 || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
195 return 0; | |
196 | |
197 /* Extract loop termination condition. */ | |
198 condition = XEXP (SET_SRC (cmp), 0); | |
199 | |
200 /* We expect a GE or NE comparison with 0 or 1. */ | |
201 if ((GET_CODE (condition) != GE | |
202 && GET_CODE (condition) != NE) | |
203 || (XEXP (condition, 1) != const0_rtx | |
204 && XEXP (condition, 1) != const1_rtx)) | |
205 return 0; | |
206 | |
207 if ((XEXP (condition, 0) == reg) | |
111 | 208 /* For the third case: */ |
209 || ((cc_reg != NULL_RTX) | |
210 && (XEXP (condition, 0) == cc_reg) | |
211 && (reg_orig == reg)) | |
0 | 212 || (GET_CODE (XEXP (condition, 0)) == PLUS |
111 | 213 && XEXP (XEXP (condition, 0), 0) == reg)) |
0 | 214 { |
215 if (GET_CODE (pattern) != PARALLEL) | |
111 | 216 /* For the second form we expect: |
0 | 217 |
218 (set (reg) (plus (reg) (const_int -1)) | |
219 (set (pc) (if_then_else (reg != 0) | |
220 (label_ref (label)) | |
221 (pc))). | |
222 | |
223 is equivalent to the following: | |
224 | |
225 (parallel [(set (pc) (if_then_else (reg != 1) | |
226 (label_ref (label)) | |
227 (pc))) | |
228 (set (reg) (plus (reg) (const_int -1))) | |
229 (additional clobbers and uses)]) | |
230 | |
111 | 231 For the third form we expect: |
232 | |
233 (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) | |
234 (set (reg) (plus (reg) (const_int -1)))]) | |
235 (set (pc) (if_then_else (cc == NE) | |
236 (label_ref (label)) | |
237 (pc))) | |
238 | |
239 which is equivalent to the following: | |
240 | |
241 (parallel [(set (cc) (compare (reg, 1)) | |
242 (set (reg) (plus (reg) (const_int -1))) | |
243 (set (pc) (if_then_else (NE == cc) | |
244 (label_ref (label)) | |
245 (pc))))]) | |
246 | |
247 So we return the second form instead for the two cases. | |
248 | |
0 | 249 */ |
250 condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); | |
251 | |
252 return condition; | |
253 } | |
254 | |
255 /* ??? If a machine uses a funny comparison, we could return a | |
256 canonicalized form here. */ | |
257 | |
258 return 0; | |
259 } | |
260 | |
261 /* Return nonzero if the loop specified by LOOP is suitable for | |
262 the use of special low-overhead looping instructions. DESC | |
263 describes the number of iterations of the loop. */ | |
264 | |
265 static bool | |
266 doloop_valid_p (struct loop *loop, struct niter_desc *desc) | |
267 { | |
268 basic_block *body = get_loop_body (loop), bb; | |
111 | 269 rtx_insn *insn; |
0 | 270 unsigned i; |
271 bool result = true; | |
272 | |
273 /* Check for loops that may not terminate under special conditions. */ | |
274 if (!desc->simple_p | |
275 || desc->assumptions | |
276 || desc->infinite) | |
277 { | |
278 /* There are some cases that would require a special attention. | |
279 For example if the comparison is LEU and the comparison value | |
280 is UINT_MAX then the loop will not terminate. Similarly, if the | |
281 comparison code is GEU and the comparison value is 0, the | |
282 loop will not terminate. | |
283 | |
284 If the absolute increment is not 1, the loop can be infinite | |
285 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
286 | |
287 ??? We could compute these conditions at run-time and have a | |
288 additional jump around the loop to ensure an infinite loop. | |
289 However, it is very unlikely that this is the intended | |
290 behavior of the loop and checking for these rare boundary | |
291 conditions would pessimize all other code. | |
292 | |
293 If the loop is executed only a few times an extra check to | |
294 restart the loop could use up most of the benefits of using a | |
295 count register loop. Note however, that normally, this | |
296 restart branch would never execute, so it could be predicted | |
297 well by the CPU. We should generate the pessimistic code by | |
298 default, and have an option, e.g. -funsafe-loops that would | |
299 enable count-register loops in this case. */ | |
300 if (dump_file) | |
301 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
302 result = false; | |
303 goto cleanup; | |
304 } | |
305 | |
306 for (i = 0; i < loop->num_nodes; i++) | |
307 { | |
308 bb = body[i]; | |
309 | |
310 for (insn = BB_HEAD (bb); | |
311 insn != NEXT_INSN (BB_END (bb)); | |
312 insn = NEXT_INSN (insn)) | |
313 { | |
314 /* Different targets have different necessities for low-overhead | |
315 looping. Call the back end for each instruction within the loop | |
316 to let it decide whether the insn prohibits a low-overhead loop. | |
317 It will then return the cause for it to emit to the dump file. */ | |
318 const char * invalid = targetm.invalid_within_doloop (insn); | |
319 if (invalid) | |
320 { | |
321 if (dump_file) | |
322 fprintf (dump_file, "Doloop: %s\n", invalid); | |
323 result = false; | |
324 goto cleanup; | |
325 } | |
326 } | |
327 } | |
328 result = true; | |
329 | |
330 cleanup: | |
331 free (body); | |
332 | |
333 return result; | |
334 } | |
335 | |
336 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru | |
337 edge. If the condition is always false, do not do anything. If it is always | |
338 true, redirect E to DEST and return false. In all other cases, true is | |
339 returned. */ | |
340 | |
341 static bool | |
342 add_test (rtx cond, edge *e, basic_block dest) | |
343 { | |
111 | 344 rtx_insn *seq, *jump; |
345 rtx_code_label *label; | |
346 machine_mode mode; | |
0 | 347 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); |
348 enum rtx_code code = GET_CODE (cond); | |
349 basic_block bb; | |
111 | 350 /* The jump is supposed to handle an unlikely special case. */ |
351 profile_probability prob = profile_probability::guessed_never (); | |
0 | 352 |
353 mode = GET_MODE (XEXP (cond, 0)); | |
354 if (mode == VOIDmode) | |
355 mode = GET_MODE (XEXP (cond, 1)); | |
356 | |
357 start_sequence (); | |
358 op0 = force_operand (op0, NULL_RTX); | |
359 op1 = force_operand (op1, NULL_RTX); | |
360 label = block_label (dest); | |
111 | 361 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, |
362 prob); | |
0 | 363 |
364 jump = get_last_insn (); | |
365 if (!jump || !JUMP_P (jump)) | |
366 { | |
367 /* The condition is always false and the jump was optimized out. */ | |
368 end_sequence (); | |
369 return true; | |
370 } | |
371 | |
372 seq = get_insns (); | |
111 | 373 unshare_all_rtl_in_chain (seq); |
0 | 374 end_sequence (); |
375 | |
376 /* There always is at least the jump insn in the sequence. */ | |
377 gcc_assert (seq != NULL_RTX); | |
378 | |
379 bb = split_edge_and_insert (*e, seq); | |
380 *e = single_succ_edge (bb); | |
381 | |
382 if (any_uncondjump_p (jump)) | |
383 { | |
384 /* The condition is always true. */ | |
385 delete_insn (jump); | |
386 redirect_edge_and_branch_force (*e, dest); | |
387 return false; | |
388 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
389 |
0 | 390 JUMP_LABEL (jump) = label; |
391 | |
392 LABEL_NUSES (label)++; | |
393 | |
111 | 394 edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); |
395 e2->probability = prob; | |
396 (*e)->probability = prob.invert (); | |
397 update_br_prob_note (e2->src); | |
0 | 398 return true; |
399 } | |
400 | |
401 /* Modify the loop to use the low-overhead looping insn where LOOP | |
402 describes the loop, DESC describes the number of iterations of the | |
403 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
404 end of the loop. CONDITION is the condition separated from the | |
111 | 405 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ |
0 | 406 |
407 static void | |
408 doloop_modify (struct loop *loop, struct niter_desc *desc, | |
111 | 409 rtx_insn *doloop_seq, rtx condition, rtx count) |
0 | 410 { |
411 rtx counter_reg; | |
412 rtx tmp, noloop = NULL_RTX; | |
111 | 413 rtx_insn *sequence; |
414 rtx_insn *jump_insn; | |
415 rtx_code_label *jump_label; | |
0 | 416 int nonneg = 0; |
417 bool increment_count; | |
418 basic_block loop_end = desc->out_edge->src; | |
111 | 419 scalar_int_mode mode; |
420 widest_int iterations; | |
0 | 421 |
422 jump_insn = BB_END (loop_end); | |
423 | |
424 if (dump_file) | |
425 { | |
426 fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
427 if (desc->const_iter) | |
111 | 428 fprintf (dump_file, "%" PRId64, desc->niter); |
0 | 429 else |
430 fputs ("runtime", dump_file); | |
431 fputs (" iterations).\n", dump_file); | |
432 } | |
433 | |
434 /* Discard original jump to continue loop. The original compare | |
435 result may still be live, so it cannot be discarded explicitly. */ | |
436 delete_insn (jump_insn); | |
437 | |
438 counter_reg = XEXP (condition, 0); | |
439 if (GET_CODE (counter_reg) == PLUS) | |
440 counter_reg = XEXP (counter_reg, 0); | |
111 | 441 /* These patterns must operate on integer counters. */ |
442 mode = as_a <scalar_int_mode> (GET_MODE (counter_reg)); | |
0 | 443 |
444 increment_count = false; | |
445 switch (GET_CODE (condition)) | |
446 { | |
447 case NE: | |
448 /* Currently only NE tests against zero and one are supported. */ | |
449 noloop = XEXP (condition, 1); | |
450 if (noloop != const0_rtx) | |
451 { | |
452 gcc_assert (noloop == const1_rtx); | |
453 increment_count = true; | |
454 } | |
455 break; | |
456 | |
457 case GE: | |
458 /* Currently only GE tests against zero are supported. */ | |
459 gcc_assert (XEXP (condition, 1) == const0_rtx); | |
460 | |
461 noloop = constm1_rtx; | |
462 | |
463 /* The iteration count does not need incrementing for a GE test. */ | |
464 increment_count = false; | |
465 | |
466 /* Determine if the iteration counter will be non-negative. | |
467 Note that the maximum value loaded is iterations_max - 1. */ | |
111 | 468 if (get_max_loop_iterations (loop, &iterations) |
469 && wi::leu_p (iterations, | |
470 wi::set_bit_in_zero <widest_int> | |
471 (GET_MODE_PRECISION (mode) - 1))) | |
0 | 472 nonneg = 1; |
473 break; | |
474 | |
475 /* Abort if an invalid doloop pattern has been generated. */ | |
476 default: | |
477 gcc_unreachable (); | |
478 } | |
479 | |
480 if (increment_count) | |
111 | 481 count = simplify_gen_binary (PLUS, mode, count, const1_rtx); |
0 | 482 |
483 /* Insert initialization of the count register into the loop header. */ | |
484 start_sequence (); | |
111 | 485 /* count has been already copied through copy_rtx. */ |
486 reset_used_flags (count); | |
487 set_used_flags (condition); | |
0 | 488 tmp = force_operand (count, counter_reg); |
489 convert_move (counter_reg, tmp, 1); | |
490 sequence = get_insns (); | |
111 | 491 unshare_all_rtl_in_chain (sequence); |
0 | 492 end_sequence (); |
493 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
494 | |
495 if (desc->noloop_assumptions) | |
496 { | |
497 rtx ass = copy_rtx (desc->noloop_assumptions); | |
498 basic_block preheader = loop_preheader_edge (loop)->src; | |
111 | 499 basic_block set_zero = split_edge (loop_preheader_edge (loop)); |
500 basic_block new_preheader = split_edge (loop_preheader_edge (loop)); | |
0 | 501 edge te; |
502 | |
503 /* Expand the condition testing the assumptions and if it does not pass, | |
504 reset the count register to 0. */ | |
505 redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); | |
506 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); | |
507 | |
111 | 508 set_zero->count = profile_count::uninitialized (); |
0 | 509 |
510 te = single_succ_edge (preheader); | |
511 for (; ass; ass = XEXP (ass, 1)) | |
512 if (!add_test (XEXP (ass, 0), &te, set_zero)) | |
513 break; | |
514 | |
515 if (ass) | |
516 { | |
517 /* We reached a condition that is always true. This is very hard to | |
518 reproduce (such a loop does not roll, and thus it would most | |
519 likely get optimized out by some of the preceding optimizations). | |
520 In fact, I do not have any testcase for it. However, it would | |
521 also be very hard to show that it is impossible, so we must | |
522 handle this case. */ | |
523 set_zero->count = preheader->count; | |
524 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
525 |
0 | 526 if (EDGE_COUNT (set_zero->preds) == 0) |
527 { | |
528 /* All the conditions were simplified to false, remove the | |
529 unreachable set_zero block. */ | |
530 delete_basic_block (set_zero); | |
531 } | |
532 else | |
533 { | |
534 /* Reset the counter to zero in the set_zero block. */ | |
535 start_sequence (); | |
536 convert_move (counter_reg, noloop, 0); | |
537 sequence = get_insns (); | |
538 end_sequence (); | |
539 emit_insn_after (sequence, BB_END (set_zero)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
540 |
0 | 541 set_immediate_dominator (CDI_DOMINATORS, set_zero, |
542 recompute_dominator (CDI_DOMINATORS, | |
543 set_zero)); | |
544 } | |
545 | |
546 set_immediate_dominator (CDI_DOMINATORS, new_preheader, | |
547 recompute_dominator (CDI_DOMINATORS, | |
548 new_preheader)); | |
549 } | |
550 | |
551 /* Some targets (eg, C4x) need to initialize special looping | |
552 registers. */ | |
111 | 553 if (targetm.have_doloop_begin ()) |
554 if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq)) | |
555 emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src)); | |
0 | 556 |
557 /* Insert the new low-overhead looping insn. */ | |
558 emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
559 jump_insn = BB_END (loop_end); | |
560 jump_label = block_label (desc->in_edge->dest); | |
561 JUMP_LABEL (jump_insn) = jump_label; | |
562 LABEL_NUSES (jump_label)++; | |
563 | |
564 /* Ensure the right fallthru edge is marked, for case we have reversed | |
565 the condition. */ | |
566 desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
567 desc->out_edge->flags |= EDGE_FALLTHRU; | |
568 | |
569 /* Add a REG_NONNEG note if the actual or estimated maximum number | |
570 of iterations is non-negative. */ | |
571 if (nonneg) | |
572 add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); | |
573 | |
574 /* Update the REG_BR_PROB note. */ | |
111 | 575 if (desc->in_edge->probability.initialized_p ()) |
576 add_reg_br_prob_note (jump_insn, desc->in_edge->probability); | |
577 } | |
578 | |
579 /* Called through note_stores. */ | |
580 | |
581 static void | |
582 record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) | |
583 { | |
584 bitmap mod = (bitmap)data; | |
585 if (REG_P (x)) | |
0 | 586 { |
111 | 587 unsigned int regno = REGNO (x); |
588 if (HARD_REGISTER_P (x)) | |
589 { | |
590 unsigned int end_regno = end_hard_regno (GET_MODE (x), regno); | |
591 do | |
592 bitmap_set_bit (mod, regno); | |
593 while (++regno < end_regno); | |
594 } | |
595 else | |
596 bitmap_set_bit (mod, regno); | |
0 | 597 } |
598 } | |
599 | |
600 /* Process loop described by LOOP validating that the loop is suitable for | |
601 conversion to use a low overhead looping instruction, replacing the jump | |
602 insn where suitable. Returns true if the loop was successfully | |
603 modified. */ | |
604 | |
605 static bool | |
606 doloop_optimize (struct loop *loop) | |
607 { | |
111 | 608 scalar_int_mode mode; |
609 rtx doloop_reg; | |
610 rtx count; | |
611 widest_int iterations, iterations_max; | |
612 rtx_code_label *start_label; | |
0 | 613 rtx condition; |
111 | 614 unsigned level; |
615 HOST_WIDE_INT est_niter; | |
0 | 616 int max_cost; |
617 struct niter_desc *desc; | |
618 unsigned word_mode_size; | |
619 unsigned HOST_WIDE_INT word_mode_max; | |
111 | 620 int entered_at_top; |
0 | 621 |
622 if (dump_file) | |
623 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
624 | |
625 iv_analysis_loop_init (loop); | |
626 | |
627 /* Find the simple exit of a LOOP. */ | |
628 desc = get_simple_loop_desc (loop); | |
629 | |
630 /* Check that loop is a candidate for a low-overhead looping insn. */ | |
631 if (!doloop_valid_p (loop, desc)) | |
632 { | |
633 if (dump_file) | |
634 fprintf (dump_file, | |
635 "Doloop: The loop is not suitable.\n"); | |
636 return false; | |
637 } | |
638 mode = desc->mode; | |
639 | |
111 | 640 est_niter = get_estimated_loop_iterations_int (loop); |
641 if (est_niter == -1) | |
642 est_niter = get_likely_max_loop_iterations_int (loop); | |
0 | 643 |
111 | 644 if (est_niter >= 0 && est_niter < 3) |
0 | 645 { |
646 if (dump_file) | |
647 fprintf (dump_file, | |
648 "Doloop: Too few iterations (%u) to be profitable.\n", | |
111 | 649 (unsigned int)est_niter); |
0 | 650 return false; |
651 } | |
652 | |
653 max_cost | |
654 = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); | |
111 | 655 if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop)) |
0 | 656 > max_cost) |
657 { | |
658 if (dump_file) | |
659 fprintf (dump_file, | |
660 "Doloop: number of iterations too costly to compute.\n"); | |
661 return false; | |
662 } | |
663 | |
111 | 664 if (desc->const_iter) |
665 iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode), | |
666 UNSIGNED); | |
667 else | |
668 iterations = 0; | |
669 if (!get_max_loop_iterations (loop, &iterations_max)) | |
670 iterations_max = 0; | |
0 | 671 level = get_loop_level (loop) + 1; |
111 | 672 entered_at_top = (loop->latch == desc->in_edge->dest |
673 && contains_no_active_insn_p (loop->latch)); | |
674 if (!targetm.can_use_doloop_p (iterations, iterations_max, level, | |
675 entered_at_top)) | |
676 { | |
677 if (dump_file) | |
678 fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n"); | |
679 return false; | |
680 } | |
0 | 681 |
682 /* Generate looping insn. If the pattern FAILs then give up trying | |
683 to modify the loop since there is some aspect the back-end does | |
684 not like. */ | |
111 | 685 count = copy_rtx (desc->niter_expr); |
0 | 686 start_label = block_label (desc->in_edge->dest); |
687 doloop_reg = gen_reg_rtx (mode); | |
111 | 688 rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
0 | 689 |
111 | 690 word_mode_size = GET_MODE_PRECISION (word_mode); |
691 word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1; | |
0 | 692 if (! doloop_seq |
693 && mode != word_mode | |
694 /* Before trying mode different from the one in that # of iterations is | |
695 computed, we must be sure that the number of iterations fits into | |
696 the new mode. */ | |
111 | 697 && (word_mode_size >= GET_MODE_PRECISION (mode) |
698 || wi::leu_p (iterations_max, word_mode_max))) | |
0 | 699 { |
111 | 700 if (word_mode_size > GET_MODE_PRECISION (mode)) |
701 count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode); | |
0 | 702 else |
111 | 703 count = lowpart_subreg (word_mode, count, mode); |
0 | 704 PUT_MODE (doloop_reg, word_mode); |
111 | 705 doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
0 | 706 } |
707 if (! doloop_seq) | |
708 { | |
709 if (dump_file) | |
710 fprintf (dump_file, | |
711 "Doloop: Target unwilling to use doloop pattern!\n"); | |
712 return false; | |
713 } | |
714 | |
715 /* If multiple instructions were created, the last must be the | |
111 | 716 jump instruction. */ |
717 rtx_insn *doloop_insn = doloop_seq; | |
718 while (NEXT_INSN (doloop_insn) != NULL_RTX) | |
719 doloop_insn = NEXT_INSN (doloop_insn); | |
720 if (!JUMP_P (doloop_insn) | |
721 || !(condition = doloop_condition_get (doloop_insn))) | |
0 | 722 { |
723 if (dump_file) | |
724 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
725 return false; | |
726 } | |
727 | |
111 | 728 /* Ensure that the new sequence doesn't clobber a register that |
729 is live at the end of the block. */ | |
730 { | |
731 bitmap modified = BITMAP_ALLOC (NULL); | |
732 | |
733 for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i)) | |
734 note_stores (PATTERN (i), record_reg_sets, modified); | |
735 | |
736 basic_block loop_end = desc->out_edge->src; | |
737 bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); | |
738 BITMAP_FREE (modified); | |
739 | |
740 if (fail) | |
741 { | |
742 if (dump_file) | |
743 fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n"); | |
744 return false; | |
745 } | |
746 } | |
747 | |
748 doloop_modify (loop, desc, doloop_seq, condition, count); | |
0 | 749 return true; |
750 } | |
751 | |
752 /* This is the main entry point. Process all loops using doloop_optimize. */ | |
753 | |
754 void | |
755 doloop_optimize_loops (void) | |
756 { | |
757 struct loop *loop; | |
758 | |
111 | 759 if (optimize == 1) |
760 { | |
761 df_live_add_problem (); | |
762 df_live_set_all_dirty (); | |
763 } | |
764 | |
765 FOR_EACH_LOOP (loop, 0) | |
0 | 766 { |
767 doloop_optimize (loop); | |
768 } | |
769 | |
111 | 770 if (optimize == 1) |
771 df_remove_problem (df_live); | |
772 | |
0 | 773 iv_analysis_done (); |
774 | |
111 | 775 checking_verify_loop_structure (); |
0 | 776 } |