Mercurial > hg > CbC > CbC_gcc
annotate gcc/loop-doloop.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
0 | 1 /* Perform doloop optimizations |
145 | 2 Copyright (C) 2004-2020 Free Software Foundation, Inc. |
0 | 3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
111 | 24 #include "backend.h" |
25 #include "target.h" | |
0 | 26 #include "rtl.h" |
111 | 27 #include "tree.h" |
28 #include "cfghooks.h" | |
29 #include "memmodel.h" | |
30 #include "emit-rtl.h" | |
31 #include "dojump.h" | |
0 | 32 #include "expr.h" |
33 #include "cfgloop.h" | |
111 | 34 #include "cfgrtl.h" |
35 #include "dumpfile.h" | |
36 #include "loop-unroll.h" | |
37 #include "regs.h" | |
38 #include "df.h" | |
0 | 39 |
40 /* This module is used to modify loops with a determinable number of | |
41 iterations to use special low-overhead looping instructions. | |
42 | |
43 It first validates whether the loop is well behaved and has a | |
44 determinable number of iterations (either at compile or run-time). | |
45 It then modifies the loop to use a low-overhead looping pattern as | |
46 follows: | |
47 | |
48 1. A pseudo register is allocated as the loop iteration counter. | |
49 | |
50 2. The number of loop iterations is calculated and is stored | |
51 in the loop counter. | |
52 | |
53 3. At the end of the loop, the jump insn is replaced by the | |
54 doloop_end pattern. The compare must remain because it might be | |
55 used elsewhere. If the loop-variable or condition register are | |
56 used elsewhere, they will be eliminated by flow. | |
57 | |
58 4. An optional doloop_begin pattern is inserted at the top of the | |
59 loop. | |
60 | |
61 TODO The optimization should only performed when either the biv used for exit | |
62 condition is unused at all except for the exit test, or if we do not have to | |
63 change its value, since otherwise we have to add a new induction variable, | |
64 which usually will not pay up (unless the cost of the doloop pattern is | |
65 somehow extremely lower than the cost of compare & jump, or unless the bct | |
66 register cannot be used for anything else but doloop -- ??? detect these | |
67 cases). */ | |
68 | |
69 /* Return the loop termination condition for PATTERN or zero | |
70 if it is not a decrement and branch jump insn. */ | |
71 | |
72 rtx | |
111 | 73 doloop_condition_get (rtx_insn *doloop_pat) |
0 | 74 { |
75 rtx cmp; | |
76 rtx inc; | |
77 rtx reg; | |
78 rtx inc_src; | |
79 rtx condition; | |
80 rtx pattern; | |
111 | 81 rtx cc_reg = NULL_RTX; |
82 rtx reg_orig = NULL_RTX; | |
0 | 83 |
84 /* The canonical doloop pattern we expect has one of the following | |
85 forms: | |
86 | |
87 1) (parallel [(set (pc) (if_then_else (condition) | |
88 (label_ref (label)) | |
89 (pc))) | |
90 (set (reg) (plus (reg) (const_int -1))) | |
91 (additional clobbers and uses)]) | |
92 | |
93 The branch must be the first entry of the parallel (also required | |
94 by jump.c), and the second entry of the parallel must be a set of | |
95 the loop counter register. Some targets (IA-64) wrap the set of | |
96 the loop counter in an if_then_else too. | |
97 | |
98 2) (set (reg) (plus (reg) (const_int -1)) | |
99 (set (pc) (if_then_else (reg != 0) | |
100 (label_ref (label)) | |
111 | 101 (pc))). |
102 | |
103 Some targets (ARM) do the comparison before the branch, as in the | |
104 following form: | |
105 | |
106 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) | |
107 (set (reg) (plus (reg) (const_int -1)))]) | |
108 (set (pc) (if_then_else (cc == NE) | |
109 (label_ref (label)) | |
110 (pc))) */ | |
0 | 111 |
112 pattern = PATTERN (doloop_pat); | |
113 | |
114 if (GET_CODE (pattern) != PARALLEL) | |
115 { | |
116 rtx cond; | |
111 | 117 rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat); |
118 rtx cmp_arg1, cmp_arg2; | |
119 rtx cmp_orig; | |
0 | 120 |
111 | 121 /* In case the pattern is not PARALLEL we expect two forms |
122 of doloop which are cases 2) and 3) above: in case 2) the | |
123 decrement immediately precedes the branch, while in case 3) | |
124 the compare and decrement instructions immediately precede | |
125 the branch. */ | |
0 | 126 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
127 if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) |
0 | 128 return 0; |
129 | |
130 cmp = pattern; | |
111 | 131 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) |
132 { | |
133 /* The third case: the compare and decrement instructions | |
134 immediately precede the branch. */ | |
135 cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); | |
136 if (GET_CODE (cmp_orig) != SET) | |
137 return 0; | |
138 if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) | |
139 return 0; | |
140 cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); | |
141 cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); | |
142 if (cmp_arg2 != const0_rtx | |
143 || GET_CODE (cmp_arg1) != PLUS) | |
144 return 0; | |
145 reg_orig = XEXP (cmp_arg1, 0); | |
146 if (XEXP (cmp_arg1, 1) != GEN_INT (-1) | |
147 || !REG_P (reg_orig)) | |
148 return 0; | |
149 cc_reg = SET_DEST (cmp_orig); | |
150 | |
151 inc = XVECEXP (PATTERN (prev_insn), 0, 1); | |
152 } | |
153 else | |
154 inc = PATTERN (prev_insn); | |
155 if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE) | |
156 { | |
157 /* We expect the condition to be of the form (reg != 0) */ | |
158 cond = XEXP (SET_SRC (cmp), 0); | |
159 if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) | |
160 return 0; | |
161 } | |
0 | 162 } |
163 else | |
164 { | |
165 cmp = XVECEXP (pattern, 0, 0); | |
166 inc = XVECEXP (pattern, 0, 1); | |
167 } | |
168 | |
169 /* Check for (set (reg) (something)). */ | |
170 if (GET_CODE (inc) != SET) | |
171 return 0; | |
172 reg = SET_DEST (inc); | |
173 if (! REG_P (reg)) | |
174 return 0; | |
175 | |
176 /* Check if something = (plus (reg) (const_int -1)). | |
177 On IA-64, this decrement is wrapped in an if_then_else. */ | |
178 inc_src = SET_SRC (inc); | |
179 if (GET_CODE (inc_src) == IF_THEN_ELSE) | |
180 inc_src = XEXP (inc_src, 1); | |
181 if (GET_CODE (inc_src) != PLUS | |
182 || XEXP (inc_src, 0) != reg | |
183 || XEXP (inc_src, 1) != constm1_rtx) | |
184 return 0; | |
185 | |
186 /* Check for (set (pc) (if_then_else (condition) | |
187 (label_ref (label)) | |
188 (pc))). */ | |
189 if (GET_CODE (cmp) != SET | |
190 || SET_DEST (cmp) != pc_rtx | |
191 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
192 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
193 || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
194 return 0; | |
195 | |
196 /* Extract loop termination condition. */ | |
197 condition = XEXP (SET_SRC (cmp), 0); | |
198 | |
199 /* We expect a GE or NE comparison with 0 or 1. */ | |
200 if ((GET_CODE (condition) != GE | |
201 && GET_CODE (condition) != NE) | |
202 || (XEXP (condition, 1) != const0_rtx | |
203 && XEXP (condition, 1) != const1_rtx)) | |
204 return 0; | |
205 | |
206 if ((XEXP (condition, 0) == reg) | |
111 | 207 /* For the third case: */ |
208 || ((cc_reg != NULL_RTX) | |
209 && (XEXP (condition, 0) == cc_reg) | |
210 && (reg_orig == reg)) | |
0 | 211 || (GET_CODE (XEXP (condition, 0)) == PLUS |
111 | 212 && XEXP (XEXP (condition, 0), 0) == reg)) |
0 | 213 { |
214 if (GET_CODE (pattern) != PARALLEL) | |
111 | 215 /* For the second form we expect: |
0 | 216 |
217 (set (reg) (plus (reg) (const_int -1)) | |
218 (set (pc) (if_then_else (reg != 0) | |
219 (label_ref (label)) | |
220 (pc))). | |
221 | |
222 is equivalent to the following: | |
223 | |
224 (parallel [(set (pc) (if_then_else (reg != 1) | |
225 (label_ref (label)) | |
226 (pc))) | |
227 (set (reg) (plus (reg) (const_int -1))) | |
228 (additional clobbers and uses)]) | |
229 | |
111 | 230 For the third form we expect: |
231 | |
232 (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) | |
233 (set (reg) (plus (reg) (const_int -1)))]) | |
234 (set (pc) (if_then_else (cc == NE) | |
235 (label_ref (label)) | |
236 (pc))) | |
237 | |
238 which is equivalent to the following: | |
239 | |
240 (parallel [(set (cc) (compare (reg, 1)) | |
241 (set (reg) (plus (reg) (const_int -1))) | |
242 (set (pc) (if_then_else (NE == cc) | |
243 (label_ref (label)) | |
244 (pc))))]) | |
245 | |
246 So we return the second form instead for the two cases. | |
247 | |
0 | 248 */ |
249 condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); | |
250 | |
251 return condition; | |
252 } | |
253 | |
254 /* ??? If a machine uses a funny comparison, we could return a | |
255 canonicalized form here. */ | |
256 | |
257 return 0; | |
258 } | |
259 | |
260 /* Return nonzero if the loop specified by LOOP is suitable for | |
261 the use of special low-overhead looping instructions. DESC | |
262 describes the number of iterations of the loop. */ | |
263 | |
264 static bool | |
145 | 265 doloop_valid_p (class loop *loop, class niter_desc *desc) |
0 | 266 { |
267 basic_block *body = get_loop_body (loop), bb; | |
111 | 268 rtx_insn *insn; |
0 | 269 unsigned i; |
270 bool result = true; | |
271 | |
272 /* Check for loops that may not terminate under special conditions. */ | |
273 if (!desc->simple_p | |
274 || desc->assumptions | |
275 || desc->infinite) | |
276 { | |
277 /* There are some cases that would require a special attention. | |
278 For example if the comparison is LEU and the comparison value | |
279 is UINT_MAX then the loop will not terminate. Similarly, if the | |
280 comparison code is GEU and the comparison value is 0, the | |
281 loop will not terminate. | |
282 | |
283 If the absolute increment is not 1, the loop can be infinite | |
284 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
285 | |
286 ??? We could compute these conditions at run-time and have a | |
287 additional jump around the loop to ensure an infinite loop. | |
288 However, it is very unlikely that this is the intended | |
289 behavior of the loop and checking for these rare boundary | |
290 conditions would pessimize all other code. | |
291 | |
292 If the loop is executed only a few times an extra check to | |
293 restart the loop could use up most of the benefits of using a | |
294 count register loop. Note however, that normally, this | |
295 restart branch would never execute, so it could be predicted | |
296 well by the CPU. We should generate the pessimistic code by | |
297 default, and have an option, e.g. -funsafe-loops that would | |
298 enable count-register loops in this case. */ | |
299 if (dump_file) | |
300 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
301 result = false; | |
302 goto cleanup; | |
303 } | |
304 | |
305 for (i = 0; i < loop->num_nodes; i++) | |
306 { | |
307 bb = body[i]; | |
308 | |
309 for (insn = BB_HEAD (bb); | |
310 insn != NEXT_INSN (BB_END (bb)); | |
311 insn = NEXT_INSN (insn)) | |
312 { | |
313 /* Different targets have different necessities for low-overhead | |
314 looping. Call the back end for each instruction within the loop | |
315 to let it decide whether the insn prohibits a low-overhead loop. | |
316 It will then return the cause for it to emit to the dump file. */ | |
317 const char * invalid = targetm.invalid_within_doloop (insn); | |
318 if (invalid) | |
319 { | |
320 if (dump_file) | |
321 fprintf (dump_file, "Doloop: %s\n", invalid); | |
322 result = false; | |
323 goto cleanup; | |
324 } | |
325 } | |
326 } | |
327 result = true; | |
328 | |
329 cleanup: | |
330 free (body); | |
331 | |
332 return result; | |
333 } | |
334 | |
335 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru | |
336 edge. If the condition is always false, do not do anything. If it is always | |
337 true, redirect E to DEST and return false. In all other cases, true is | |
338 returned. */ | |
339 | |
340 static bool | |
341 add_test (rtx cond, edge *e, basic_block dest) | |
342 { | |
111 | 343 rtx_insn *seq, *jump; |
344 rtx_code_label *label; | |
345 machine_mode mode; | |
0 | 346 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); |
347 enum rtx_code code = GET_CODE (cond); | |
348 basic_block bb; | |
111 | 349 /* The jump is supposed to handle an unlikely special case. */ |
350 profile_probability prob = profile_probability::guessed_never (); | |
0 | 351 |
352 mode = GET_MODE (XEXP (cond, 0)); | |
353 if (mode == VOIDmode) | |
354 mode = GET_MODE (XEXP (cond, 1)); | |
355 | |
356 start_sequence (); | |
357 op0 = force_operand (op0, NULL_RTX); | |
358 op1 = force_operand (op1, NULL_RTX); | |
359 label = block_label (dest); | |
111 | 360 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, |
361 prob); | |
0 | 362 |
363 jump = get_last_insn (); | |
364 if (!jump || !JUMP_P (jump)) | |
365 { | |
366 /* The condition is always false and the jump was optimized out. */ | |
367 end_sequence (); | |
368 return true; | |
369 } | |
370 | |
371 seq = get_insns (); | |
111 | 372 unshare_all_rtl_in_chain (seq); |
0 | 373 end_sequence (); |
374 | |
375 /* There always is at least the jump insn in the sequence. */ | |
376 gcc_assert (seq != NULL_RTX); | |
377 | |
378 bb = split_edge_and_insert (*e, seq); | |
379 *e = single_succ_edge (bb); | |
380 | |
381 if (any_uncondjump_p (jump)) | |
382 { | |
383 /* The condition is always true. */ | |
384 delete_insn (jump); | |
385 redirect_edge_and_branch_force (*e, dest); | |
386 return false; | |
387 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
388 |
0 | 389 JUMP_LABEL (jump) = label; |
390 | |
391 LABEL_NUSES (label)++; | |
392 | |
111 | 393 edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); |
394 e2->probability = prob; | |
395 (*e)->probability = prob.invert (); | |
396 update_br_prob_note (e2->src); | |
0 | 397 return true; |
398 } | |
399 | |
400 /* Modify the loop to use the low-overhead looping insn where LOOP | |
401 describes the loop, DESC describes the number of iterations of the | |
402 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
403 end of the loop. CONDITION is the condition separated from the | |
111 | 404 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ |
0 | 405 |
406 static void | |
145 | 407 doloop_modify (class loop *loop, class niter_desc *desc, |
111 | 408 rtx_insn *doloop_seq, rtx condition, rtx count) |
0 | 409 { |
410 rtx counter_reg; | |
411 rtx tmp, noloop = NULL_RTX; | |
111 | 412 rtx_insn *sequence; |
413 rtx_insn *jump_insn; | |
414 rtx_code_label *jump_label; | |
0 | 415 int nonneg = 0; |
416 bool increment_count; | |
417 basic_block loop_end = desc->out_edge->src; | |
111 | 418 scalar_int_mode mode; |
419 widest_int iterations; | |
0 | 420 |
421 jump_insn = BB_END (loop_end); | |
422 | |
423 if (dump_file) | |
424 { | |
425 fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
426 if (desc->const_iter) | |
111 | 427 fprintf (dump_file, "%" PRId64, desc->niter); |
0 | 428 else |
429 fputs ("runtime", dump_file); | |
430 fputs (" iterations).\n", dump_file); | |
431 } | |
432 | |
433 /* Discard original jump to continue loop. The original compare | |
434 result may still be live, so it cannot be discarded explicitly. */ | |
435 delete_insn (jump_insn); | |
436 | |
437 counter_reg = XEXP (condition, 0); | |
438 if (GET_CODE (counter_reg) == PLUS) | |
439 counter_reg = XEXP (counter_reg, 0); | |
111 | 440 /* These patterns must operate on integer counters. */ |
441 mode = as_a <scalar_int_mode> (GET_MODE (counter_reg)); | |
0 | 442 |
443 increment_count = false; | |
444 switch (GET_CODE (condition)) | |
445 { | |
446 case NE: | |
447 /* Currently only NE tests against zero and one are supported. */ | |
448 noloop = XEXP (condition, 1); | |
449 if (noloop != const0_rtx) | |
450 { | |
451 gcc_assert (noloop == const1_rtx); | |
452 increment_count = true; | |
453 } | |
454 break; | |
455 | |
456 case GE: | |
457 /* Currently only GE tests against zero are supported. */ | |
458 gcc_assert (XEXP (condition, 1) == const0_rtx); | |
459 | |
460 noloop = constm1_rtx; | |
461 | |
462 /* The iteration count does not need incrementing for a GE test. */ | |
463 increment_count = false; | |
464 | |
465 /* Determine if the iteration counter will be non-negative. | |
466 Note that the maximum value loaded is iterations_max - 1. */ | |
111 | 467 if (get_max_loop_iterations (loop, &iterations) |
468 && wi::leu_p (iterations, | |
469 wi::set_bit_in_zero <widest_int> | |
470 (GET_MODE_PRECISION (mode) - 1))) | |
0 | 471 nonneg = 1; |
472 break; | |
473 | |
474 /* Abort if an invalid doloop pattern has been generated. */ | |
475 default: | |
476 gcc_unreachable (); | |
477 } | |
478 | |
479 if (increment_count) | |
111 | 480 count = simplify_gen_binary (PLUS, mode, count, const1_rtx); |
0 | 481 |
482 /* Insert initialization of the count register into the loop header. */ | |
483 start_sequence (); | |
111 | 484 /* count has been already copied through copy_rtx. */ |
485 reset_used_flags (count); | |
486 set_used_flags (condition); | |
0 | 487 tmp = force_operand (count, counter_reg); |
488 convert_move (counter_reg, tmp, 1); | |
489 sequence = get_insns (); | |
111 | 490 unshare_all_rtl_in_chain (sequence); |
0 | 491 end_sequence (); |
492 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
493 | |
494 if (desc->noloop_assumptions) | |
495 { | |
496 rtx ass = copy_rtx (desc->noloop_assumptions); | |
497 basic_block preheader = loop_preheader_edge (loop)->src; | |
111 | 498 basic_block set_zero = split_edge (loop_preheader_edge (loop)); |
499 basic_block new_preheader = split_edge (loop_preheader_edge (loop)); | |
0 | 500 edge te; |
501 | |
502 /* Expand the condition testing the assumptions and if it does not pass, | |
503 reset the count register to 0. */ | |
504 redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); | |
505 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); | |
506 | |
111 | 507 set_zero->count = profile_count::uninitialized (); |
0 | 508 |
509 te = single_succ_edge (preheader); | |
510 for (; ass; ass = XEXP (ass, 1)) | |
511 if (!add_test (XEXP (ass, 0), &te, set_zero)) | |
512 break; | |
513 | |
514 if (ass) | |
515 { | |
516 /* We reached a condition that is always true. This is very hard to | |
517 reproduce (such a loop does not roll, and thus it would most | |
518 likely get optimized out by some of the preceding optimizations). | |
519 In fact, I do not have any testcase for it. However, it would | |
520 also be very hard to show that it is impossible, so we must | |
521 handle this case. */ | |
522 set_zero->count = preheader->count; | |
523 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
524 |
0 | 525 if (EDGE_COUNT (set_zero->preds) == 0) |
526 { | |
527 /* All the conditions were simplified to false, remove the | |
528 unreachable set_zero block. */ | |
529 delete_basic_block (set_zero); | |
530 } | |
531 else | |
532 { | |
533 /* Reset the counter to zero in the set_zero block. */ | |
534 start_sequence (); | |
535 convert_move (counter_reg, noloop, 0); | |
536 sequence = get_insns (); | |
537 end_sequence (); | |
538 emit_insn_after (sequence, BB_END (set_zero)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
539 |
0 | 540 set_immediate_dominator (CDI_DOMINATORS, set_zero, |
541 recompute_dominator (CDI_DOMINATORS, | |
542 set_zero)); | |
543 } | |
544 | |
545 set_immediate_dominator (CDI_DOMINATORS, new_preheader, | |
546 recompute_dominator (CDI_DOMINATORS, | |
547 new_preheader)); | |
548 } | |
549 | |
550 /* Some targets (eg, C4x) need to initialize special looping | |
551 registers. */ | |
111 | 552 if (targetm.have_doloop_begin ()) |
553 if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq)) | |
554 emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src)); | |
0 | 555 |
556 /* Insert the new low-overhead looping insn. */ | |
557 emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
558 jump_insn = BB_END (loop_end); | |
559 jump_label = block_label (desc->in_edge->dest); | |
560 JUMP_LABEL (jump_insn) = jump_label; | |
561 LABEL_NUSES (jump_label)++; | |
562 | |
563 /* Ensure the right fallthru edge is marked, for case we have reversed | |
564 the condition. */ | |
565 desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
566 desc->out_edge->flags |= EDGE_FALLTHRU; | |
567 | |
568 /* Add a REG_NONNEG note if the actual or estimated maximum number | |
569 of iterations is non-negative. */ | |
570 if (nonneg) | |
571 add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); | |
572 | |
573 /* Update the REG_BR_PROB note. */ | |
111 | 574 if (desc->in_edge->probability.initialized_p ()) |
575 add_reg_br_prob_note (jump_insn, desc->in_edge->probability); | |
576 } | |
577 | |
578 /* Called through note_stores. */ | |
579 | |
580 static void | |
581 record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) | |
582 { | |
583 bitmap mod = (bitmap)data; | |
584 if (REG_P (x)) | |
0 | 585 { |
111 | 586 unsigned int regno = REGNO (x); |
587 if (HARD_REGISTER_P (x)) | |
588 { | |
589 unsigned int end_regno = end_hard_regno (GET_MODE (x), regno); | |
590 do | |
591 bitmap_set_bit (mod, regno); | |
592 while (++regno < end_regno); | |
593 } | |
594 else | |
595 bitmap_set_bit (mod, regno); | |
0 | 596 } |
597 } | |
598 | |
599 /* Process loop described by LOOP validating that the loop is suitable for | |
600 conversion to use a low overhead looping instruction, replacing the jump | |
601 insn where suitable. Returns true if the loop was successfully | |
602 modified. */ | |
603 | |
604 static bool | |
145 | 605 doloop_optimize (class loop *loop) |
0 | 606 { |
111 | 607 scalar_int_mode mode; |
608 rtx doloop_reg; | |
609 rtx count; | |
610 widest_int iterations, iterations_max; | |
611 rtx_code_label *start_label; | |
0 | 612 rtx condition; |
111 | 613 unsigned level; |
614 HOST_WIDE_INT est_niter; | |
0 | 615 int max_cost; |
145 | 616 class niter_desc *desc; |
0 | 617 unsigned word_mode_size; |
618 unsigned HOST_WIDE_INT word_mode_max; | |
111 | 619 int entered_at_top; |
0 | 620 |
621 if (dump_file) | |
622 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
623 | |
624 iv_analysis_loop_init (loop); | |
625 | |
626 /* Find the simple exit of a LOOP. */ | |
627 desc = get_simple_loop_desc (loop); | |
628 | |
629 /* Check that loop is a candidate for a low-overhead looping insn. */ | |
630 if (!doloop_valid_p (loop, desc)) | |
631 { | |
632 if (dump_file) | |
633 fprintf (dump_file, | |
634 "Doloop: The loop is not suitable.\n"); | |
635 return false; | |
636 } | |
637 mode = desc->mode; | |
638 | |
111 | 639 est_niter = get_estimated_loop_iterations_int (loop); |
640 if (est_niter == -1) | |
641 est_niter = get_likely_max_loop_iterations_int (loop); | |
0 | 642 |
111 | 643 if (est_niter >= 0 && est_niter < 3) |
0 | 644 { |
645 if (dump_file) | |
646 fprintf (dump_file, | |
647 "Doloop: Too few iterations (%u) to be profitable.\n", | |
111 | 648 (unsigned int)est_niter); |
0 | 649 return false; |
650 } | |
651 | |
652 max_cost | |
145 | 653 = COSTS_N_INSNS (param_max_iterations_computation_cost); |
111 | 654 if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop)) |
0 | 655 > max_cost) |
656 { | |
657 if (dump_file) | |
658 fprintf (dump_file, | |
659 "Doloop: number of iterations too costly to compute.\n"); | |
660 return false; | |
661 } | |
662 | |
111 | 663 if (desc->const_iter) |
664 iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode), | |
665 UNSIGNED); | |
666 else | |
667 iterations = 0; | |
668 if (!get_max_loop_iterations (loop, &iterations_max)) | |
669 iterations_max = 0; | |
0 | 670 level = get_loop_level (loop) + 1; |
111 | 671 entered_at_top = (loop->latch == desc->in_edge->dest |
672 && contains_no_active_insn_p (loop->latch)); | |
673 if (!targetm.can_use_doloop_p (iterations, iterations_max, level, | |
674 entered_at_top)) | |
675 { | |
676 if (dump_file) | |
677 fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n"); | |
678 return false; | |
679 } | |
0 | 680 |
681 /* Generate looping insn. If the pattern FAILs then give up trying | |
682 to modify the loop since there is some aspect the back-end does | |
683 not like. */ | |
111 | 684 count = copy_rtx (desc->niter_expr); |
0 | 685 start_label = block_label (desc->in_edge->dest); |
686 doloop_reg = gen_reg_rtx (mode); | |
111 | 687 rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
0 | 688 |
111 | 689 word_mode_size = GET_MODE_PRECISION (word_mode); |
690 word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1; | |
0 | 691 if (! doloop_seq |
692 && mode != word_mode | |
693 /* Before trying mode different from the one in that # of iterations is | |
694 computed, we must be sure that the number of iterations fits into | |
695 the new mode. */ | |
111 | 696 && (word_mode_size >= GET_MODE_PRECISION (mode) |
697 || wi::leu_p (iterations_max, word_mode_max))) | |
0 | 698 { |
111 | 699 if (word_mode_size > GET_MODE_PRECISION (mode)) |
700 count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode); | |
0 | 701 else |
111 | 702 count = lowpart_subreg (word_mode, count, mode); |
0 | 703 PUT_MODE (doloop_reg, word_mode); |
111 | 704 doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
0 | 705 } |
706 if (! doloop_seq) | |
707 { | |
708 if (dump_file) | |
709 fprintf (dump_file, | |
710 "Doloop: Target unwilling to use doloop pattern!\n"); | |
711 return false; | |
712 } | |
713 | |
714 /* If multiple instructions were created, the last must be the | |
111 | 715 jump instruction. */ |
716 rtx_insn *doloop_insn = doloop_seq; | |
717 while (NEXT_INSN (doloop_insn) != NULL_RTX) | |
718 doloop_insn = NEXT_INSN (doloop_insn); | |
719 if (!JUMP_P (doloop_insn) | |
720 || !(condition = doloop_condition_get (doloop_insn))) | |
0 | 721 { |
722 if (dump_file) | |
723 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
724 return false; | |
725 } | |
726 | |
111 | 727 /* Ensure that the new sequence doesn't clobber a register that |
728 is live at the end of the block. */ | |
729 { | |
730 bitmap modified = BITMAP_ALLOC (NULL); | |
731 | |
732 for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i)) | |
145 | 733 note_stores (i, record_reg_sets, modified); |
111 | 734 |
735 basic_block loop_end = desc->out_edge->src; | |
736 bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); | |
737 BITMAP_FREE (modified); | |
738 | |
739 if (fail) | |
740 { | |
741 if (dump_file) | |
742 fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n"); | |
743 return false; | |
744 } | |
745 } | |
746 | |
747 doloop_modify (loop, desc, doloop_seq, condition, count); | |
0 | 748 return true; |
749 } | |
750 | |
751 /* This is the main entry point. Process all loops using doloop_optimize. */ | |
752 | |
753 void | |
754 doloop_optimize_loops (void) | |
755 { | |
145 | 756 class loop *loop; |
0 | 757 |
111 | 758 if (optimize == 1) |
759 { | |
760 df_live_add_problem (); | |
761 df_live_set_all_dirty (); | |
762 } | |
763 | |
764 FOR_EACH_LOOP (loop, 0) | |
0 | 765 { |
766 doloop_optimize (loop); | |
767 } | |
768 | |
111 | 769 if (optimize == 1) |
770 df_remove_problem (df_live); | |
771 | |
0 | 772 iv_analysis_done (); |
773 | |
111 | 774 checking_verify_loop_structure (); |
0 | 775 } |