Mercurial > hg > CbC > CbC_gcc
comparison gcc/fwprop.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 58ad6c70ea60 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* RTL-based forward propagation pass for GNU compiler. | |
2 Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. | |
3 Contributed by Paolo Bonzini and Steven Bosscher. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tm.h" | |
25 #include "toplev.h" | |
26 | |
27 #include "timevar.h" | |
28 #include "rtl.h" | |
29 #include "tm_p.h" | |
30 #include "emit-rtl.h" | |
31 #include "insn-config.h" | |
32 #include "recog.h" | |
33 #include "flags.h" | |
34 #include "obstack.h" | |
35 #include "basic-block.h" | |
36 #include "output.h" | |
37 #include "df.h" | |
38 #include "target.h" | |
39 #include "cfgloop.h" | |
40 #include "tree-pass.h" | |
41 | |
42 | |
43 /* This pass does simple forward propagation and simplification when an | |
44 operand of an insn can only come from a single def. This pass uses | |
45 df.c, so it is global. However, we only do limited analysis of | |
46 available expressions. | |
47 | |
48 1) The pass tries to propagate the source of the def into the use, | |
49 and checks if the result is independent of the substituted value. | |
50 For example, the high word of a (zero_extend:DI (reg:SI M)) is always | |
51 zero, independent of the source register. | |
52 | |
53 In particular, we propagate constants into the use site. Sometimes | |
54 RTL expansion did not put the constant in the same insn on purpose, | |
55 to satisfy a predicate, and the result will fail to be recognized; | |
56 but this happens rarely and in this case we can still create a | |
57 REG_EQUAL note. For multi-word operations, this | |
58 | |
59 (set (subreg:SI (reg:DI 120) 0) (const_int 0)) | |
60 (set (subreg:SI (reg:DI 120) 4) (const_int -1)) | |
61 (set (subreg:SI (reg:DI 122) 0) | |
62 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) | |
63 (set (subreg:SI (reg:DI 122) 4) | |
64 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) | |
65 | |
66 can be simplified to the much simpler | |
67 | |
68 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119))) | |
69 (set (subreg:SI (reg:DI 122) 4) (const_int -1)) | |
70 | |
71 This particular propagation is also effective at putting together | |
72 complex addressing modes. We are more aggressive inside MEMs, in | |
73 that all definitions are propagated if the use is in a MEM; if the | |
74 result is a valid memory address we check address_cost to decide | |
75 whether the substitution is worthwhile. | |
76 | |
77 2) The pass propagates register copies. This is not as effective as | |
78 the copy propagation done by CSE's canon_reg, which works by walking | |
79 the instruction chain, it can help the other transformations. | |
80 | |
81 We should consider removing this optimization, and instead reorder the | |
82 RTL passes, because GCSE does this transformation too. With some luck, | |
83 the CSE pass at the end of rest_of_handle_gcse could also go away. | |
84 | |
85 3) The pass looks for paradoxical subregs that are actually unnecessary. | |
86 Things like this: | |
87 | |
88 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) | |
89 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) | |
90 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0) | |
91 (subreg:SI (reg:QI 121) 0))) | |
92 | |
93 are very common on machines that can only do word-sized operations. | |
94 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0), | |
95 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0), | |
96 we can replace the paradoxical subreg with simply (reg:WIDE M). The | |
97 above will simplify this to | |
98 | |
99 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) | |
100 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) | |
101 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) | |
102 | |
103 where the first two insns are now dead. */ | |
104 | |
105 | |
106 static int num_changes; | |
107 | |
108 | |
109 /* Do not try to replace constant addresses or addresses of local and | |
110 argument slots. These MEM expressions are made only once and inserted | |
111 in many instructions, as well as being used to control symbol table | |
112 output. It is not safe to clobber them. | |
113 | |
114 There are some uncommon cases where the address is already in a register | |
115 for some reason, but we cannot take advantage of that because we have | |
116 no easy way to unshare the MEM. In addition, looking up all stack | |
117 addresses is costly. */ | |
118 | |
119 static bool | |
120 can_simplify_addr (rtx addr) | |
121 { | |
122 rtx reg; | |
123 | |
124 if (CONSTANT_ADDRESS_P (addr)) | |
125 return false; | |
126 | |
127 if (GET_CODE (addr) == PLUS) | |
128 reg = XEXP (addr, 0); | |
129 else | |
130 reg = addr; | |
131 | |
132 return (!REG_P (reg) | |
133 || (REGNO (reg) != FRAME_POINTER_REGNUM | |
134 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM | |
135 && REGNO (reg) != ARG_POINTER_REGNUM)); | |
136 } | |
137 | |
138 /* Returns a canonical version of X for the address, from the point of view, | |
139 that all multiplications are represented as MULT instead of the multiply | |
140 by a power of 2 being represented as ASHIFT. | |
141 | |
142 Every ASHIFT we find has been made by simplify_gen_binary and was not | |
143 there before, so it is not shared. So we can do this in place. */ | |
144 | |
145 static void | |
146 canonicalize_address (rtx x) | |
147 { | |
148 for (;;) | |
149 switch (GET_CODE (x)) | |
150 { | |
151 case ASHIFT: | |
152 if (GET_CODE (XEXP (x, 1)) == CONST_INT | |
153 && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x)) | |
154 && INTVAL (XEXP (x, 1)) >= 0) | |
155 { | |
156 HOST_WIDE_INT shift = INTVAL (XEXP (x, 1)); | |
157 PUT_CODE (x, MULT); | |
158 XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift, | |
159 GET_MODE (x)); | |
160 } | |
161 | |
162 x = XEXP (x, 0); | |
163 break; | |
164 | |
165 case PLUS: | |
166 if (GET_CODE (XEXP (x, 0)) == PLUS | |
167 || GET_CODE (XEXP (x, 0)) == ASHIFT | |
168 || GET_CODE (XEXP (x, 0)) == CONST) | |
169 canonicalize_address (XEXP (x, 0)); | |
170 | |
171 x = XEXP (x, 1); | |
172 break; | |
173 | |
174 case CONST: | |
175 x = XEXP (x, 0); | |
176 break; | |
177 | |
178 default: | |
179 return; | |
180 } | |
181 } | |
182 | |
183 /* OLD is a memory address. Return whether it is good to use NEW instead, | |
184 for a memory access in the given MODE. */ | |
185 | |
186 static bool | |
187 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode, | |
188 bool speed) | |
189 { | |
190 int gain; | |
191 | |
192 if (rtx_equal_p (old_rtx, new_rtx) || !memory_address_p (mode, new_rtx)) | |
193 return false; | |
194 | |
195 /* Copy propagation is always ok. */ | |
196 if (REG_P (old_rtx) && REG_P (new_rtx)) | |
197 return true; | |
198 | |
199 /* Prefer the new address if it is less expensive. */ | |
200 gain = address_cost (old_rtx, mode, speed) - address_cost (new_rtx, mode, speed); | |
201 | |
202 /* If the addresses have equivalent cost, prefer the new address | |
203 if it has the highest `rtx_cost'. That has the potential of | |
204 eliminating the most insns without additional costs, and it | |
205 is the same that cse.c used to do. */ | |
206 if (gain == 0) | |
207 gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed); | |
208 | |
209 return (gain > 0); | |
210 } | |
211 | |
212 | |
213 /* Flags for the last parameter of propagate_rtx_1. */ | |
214 | |
215 enum { | |
216 /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true; | |
217 if it is false, propagate_rtx_1 returns false if, for at least | |
218 one occurrence OLD, it failed to collapse the result to a constant. | |
219 For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may | |
220 collapse to zero if replacing (reg:M B) with (reg:M A). | |
221 | |
222 PR_CAN_APPEAR is disregarded inside MEMs: in that case, | |
223 propagate_rtx_1 just tries to make cheaper and valid memory | |
224 addresses. */ | |
225 PR_CAN_APPEAR = 1, | |
226 | |
227 /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement | |
228 outside memory addresses. This is needed because propagate_rtx_1 does | |
229 not do any analysis on memory; thus it is very conservative and in general | |
230 it will fail if non-read-only MEMs are found in the source expression. | |
231 | |
232 PR_HANDLE_MEM is set when the source of the propagation was not | |
233 another MEM. Then, it is safe not to treat non-read-only MEMs as | |
234 ``opaque'' objects. */ | |
235 PR_HANDLE_MEM = 2, | |
236 | |
237 /* Set when costs should be optimized for speed. */ | |
238 PR_OPTIMIZE_FOR_SPEED = 4 | |
239 }; | |
240 | |
241 | |
242 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the | |
243 resulting expression. Replace *PX with a new RTL expression if an | |
244 occurrence of OLD was found. | |
245 | |
246 This is only a wrapper around simplify-rtx.c: do not add any pattern | |
247 matching code here. (The sole exception is the handling of LO_SUM, but | |
248 that is because there is no simplify_gen_* function for LO_SUM). */ | |
249 | |
250 static bool | |
251 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) | |
252 { | |
253 rtx x = *px, tem = NULL_RTX, op0, op1, op2; | |
254 enum rtx_code code = GET_CODE (x); | |
255 enum machine_mode mode = GET_MODE (x); | |
256 enum machine_mode op_mode; | |
257 bool can_appear = (flags & PR_CAN_APPEAR) != 0; | |
258 bool valid_ops = true; | |
259 | |
260 if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x)) | |
261 { | |
262 /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether | |
263 they have side effects or not). */ | |
264 *px = (side_effects_p (x) | |
265 ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx) | |
266 : gen_rtx_SCRATCH (GET_MODE (x))); | |
267 return false; | |
268 } | |
269 | |
270 /* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an | |
271 address, and we are *not* inside one. */ | |
272 if (x == old_rtx) | |
273 { | |
274 *px = new_rtx; | |
275 return can_appear; | |
276 } | |
277 | |
278 /* If this is an expression, try recursive substitution. */ | |
279 switch (GET_RTX_CLASS (code)) | |
280 { | |
281 case RTX_UNARY: | |
282 op0 = XEXP (x, 0); | |
283 op_mode = GET_MODE (op0); | |
284 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); | |
285 if (op0 == XEXP (x, 0)) | |
286 return true; | |
287 tem = simplify_gen_unary (code, mode, op0, op_mode); | |
288 break; | |
289 | |
290 case RTX_BIN_ARITH: | |
291 case RTX_COMM_ARITH: | |
292 op0 = XEXP (x, 0); | |
293 op1 = XEXP (x, 1); | |
294 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); | |
295 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); | |
296 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) | |
297 return true; | |
298 tem = simplify_gen_binary (code, mode, op0, op1); | |
299 break; | |
300 | |
301 case RTX_COMPARE: | |
302 case RTX_COMM_COMPARE: | |
303 op0 = XEXP (x, 0); | |
304 op1 = XEXP (x, 1); | |
305 op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); | |
306 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); | |
307 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); | |
308 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) | |
309 return true; | |
310 tem = simplify_gen_relational (code, mode, op_mode, op0, op1); | |
311 break; | |
312 | |
313 case RTX_TERNARY: | |
314 case RTX_BITFIELD_OPS: | |
315 op0 = XEXP (x, 0); | |
316 op1 = XEXP (x, 1); | |
317 op2 = XEXP (x, 2); | |
318 op_mode = GET_MODE (op0); | |
319 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); | |
320 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); | |
321 valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags); | |
322 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2)) | |
323 return true; | |
324 if (op_mode == VOIDmode) | |
325 op_mode = GET_MODE (op0); | |
326 tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2); | |
327 break; | |
328 | |
329 case RTX_EXTRA: | |
330 /* The only case we try to handle is a SUBREG. */ | |
331 if (code == SUBREG) | |
332 { | |
333 op0 = XEXP (x, 0); | |
334 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); | |
335 if (op0 == XEXP (x, 0)) | |
336 return true; | |
337 tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), | |
338 SUBREG_BYTE (x)); | |
339 } | |
340 break; | |
341 | |
342 case RTX_OBJ: | |
343 if (code == MEM && x != new_rtx) | |
344 { | |
345 rtx new_op0; | |
346 op0 = XEXP (x, 0); | |
347 | |
348 /* There are some addresses that we cannot work on. */ | |
349 if (!can_simplify_addr (op0)) | |
350 return true; | |
351 | |
352 op0 = new_op0 = targetm.delegitimize_address (op0); | |
353 valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx, | |
354 flags | PR_CAN_APPEAR); | |
355 | |
356 /* Dismiss transformation that we do not want to carry on. */ | |
357 if (!valid_ops | |
358 || new_op0 == op0 | |
359 || !(GET_MODE (new_op0) == GET_MODE (op0) | |
360 || GET_MODE (new_op0) == VOIDmode)) | |
361 return true; | |
362 | |
363 canonicalize_address (new_op0); | |
364 | |
365 /* Copy propagations are always ok. Otherwise check the costs. */ | |
366 if (!(REG_P (old_rtx) && REG_P (new_rtx)) | |
367 && !should_replace_address (op0, new_op0, GET_MODE (x), | |
368 flags & PR_OPTIMIZE_FOR_SPEED)) | |
369 return true; | |
370 | |
371 tem = replace_equiv_address_nv (x, new_op0); | |
372 } | |
373 | |
374 else if (code == LO_SUM) | |
375 { | |
376 op0 = XEXP (x, 0); | |
377 op1 = XEXP (x, 1); | |
378 | |
379 /* The only simplification we do attempts to remove references to op0 | |
380 or make it constant -- in both cases, op0's invalidity will not | |
381 make the result invalid. */ | |
382 propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR); | |
383 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); | |
384 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) | |
385 return true; | |
386 | |
387 /* (lo_sum (high x) x) -> x */ | |
388 if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1)) | |
389 tem = op1; | |
390 else | |
391 tem = gen_rtx_LO_SUM (mode, op0, op1); | |
392 | |
393 /* OP1 is likely not a legitimate address, otherwise there would have | |
394 been no LO_SUM. We want it to disappear if it is invalid, return | |
395 false in that case. */ | |
396 return memory_address_p (mode, tem); | |
397 } | |
398 | |
399 else if (code == REG) | |
400 { | |
401 if (rtx_equal_p (x, old_rtx)) | |
402 { | |
403 *px = new_rtx; | |
404 return can_appear; | |
405 } | |
406 } | |
407 break; | |
408 | |
409 default: | |
410 break; | |
411 } | |
412 | |
413 /* No change, no trouble. */ | |
414 if (tem == NULL_RTX) | |
415 return true; | |
416 | |
417 *px = tem; | |
418 | |
419 /* The replacement we made so far is valid, if all of the recursive | |
420 replacements were valid, or we could simplify everything to | |
421 a constant. */ | |
422 return valid_ops || can_appear || CONSTANT_P (tem); | |
423 } | |
424 | |
425 | |
426 /* for_each_rtx traversal function that returns 1 if BODY points to | |
427 a non-constant mem. */ | |
428 | |
429 static int | |
430 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED) | |
431 { | |
432 rtx x = *body; | |
433 return MEM_P (x) && !MEM_READONLY_P (x); | |
434 } | |
435 | |
436 | |
437 /* Replace all occurrences of OLD in X with NEW and try to simplify the | |
438 resulting expression (in mode MODE). Return a new expression if it is | |
439 a constant, otherwise X. | |
440 | |
441 Simplifications where occurrences of NEW collapse to a constant are always | |
442 accepted. All simplifications are accepted if NEW is a pseudo too. | |
443 Otherwise, we accept simplifications that have a lower or equal cost. */ | |
444 | |
445 static rtx | |
446 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx, | |
447 bool speed) | |
448 { | |
449 rtx tem; | |
450 bool collapsed; | |
451 int flags; | |
452 | |
453 if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER) | |
454 return NULL_RTX; | |
455 | |
456 flags = 0; | |
457 if (REG_P (new_rtx) || CONSTANT_P (new_rtx)) | |
458 flags |= PR_CAN_APPEAR; | |
459 if (!for_each_rtx (&new_rtx, varying_mem_p, NULL)) | |
460 flags |= PR_HANDLE_MEM; | |
461 | |
462 if (speed) | |
463 flags |= PR_OPTIMIZE_FOR_SPEED; | |
464 | |
465 tem = x; | |
466 collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags); | |
467 if (tem == x || !collapsed) | |
468 return NULL_RTX; | |
469 | |
470 /* gen_lowpart_common will not be able to process VOIDmode entities other | |
471 than CONST_INTs. */ | |
472 if (GET_MODE (tem) == VOIDmode && GET_CODE (tem) != CONST_INT) | |
473 return NULL_RTX; | |
474 | |
475 if (GET_MODE (tem) == VOIDmode) | |
476 tem = rtl_hooks.gen_lowpart_no_emit (mode, tem); | |
477 else | |
478 gcc_assert (GET_MODE (tem) == mode); | |
479 | |
480 return tem; | |
481 } | |
482 | |
483 | |
484 | |
485 | |
486 /* Return true if the register from reference REF is killed | |
487 between FROM to (but not including) TO. */ | |
488 | |
489 static bool | |
490 local_ref_killed_between_p (df_ref ref, rtx from, rtx to) | |
491 { | |
492 rtx insn; | |
493 | |
494 for (insn = from; insn != to; insn = NEXT_INSN (insn)) | |
495 { | |
496 df_ref *def_rec; | |
497 if (!INSN_P (insn)) | |
498 continue; | |
499 | |
500 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) | |
501 { | |
502 df_ref def = *def_rec; | |
503 if (DF_REF_REGNO (ref) == DF_REF_REGNO (def)) | |
504 return true; | |
505 } | |
506 } | |
507 return false; | |
508 } | |
509 | |
510 | |
511 /* Check if the given DEF is available in INSN. This would require full | |
512 computation of available expressions; we check only restricted conditions: | |
513 - if DEF is the sole definition of its register, go ahead; | |
514 - in the same basic block, we check for no definitions killing the | |
515 definition of DEF_INSN; | |
516 - if USE's basic block has DEF's basic block as the sole predecessor, | |
517 we check if the definition is killed after DEF_INSN or before | |
518 TARGET_INSN insn, in their respective basic blocks. */ | |
519 static bool | |
520 use_killed_between (df_ref use, rtx def_insn, rtx target_insn) | |
521 { | |
522 basic_block def_bb = BLOCK_FOR_INSN (def_insn); | |
523 basic_block target_bb = BLOCK_FOR_INSN (target_insn); | |
524 int regno; | |
525 df_ref def; | |
526 | |
527 /* In some obscure situations we can have a def reaching a use | |
528 that is _before_ the def. In other words the def does not | |
529 dominate the use even though the use and def are in the same | |
530 basic block. This can happen when a register may be used | |
531 uninitialized in a loop. In such cases, we must assume that | |
532 DEF is not available. */ | |
533 if (def_bb == target_bb | |
534 ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn) | |
535 : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb)) | |
536 return true; | |
537 | |
538 /* Check if the reg in USE has only one definition. We already | |
539 know that this definition reaches use, or we wouldn't be here. | |
540 However, this is invalid for hard registers because if they are | |
541 live at the beginning of the function it does not mean that we | |
542 have an uninitialized access. */ | |
543 regno = DF_REF_REGNO (use); | |
544 def = DF_REG_DEF_CHAIN (regno); | |
545 if (def | |
546 && DF_REF_NEXT_REG (def) == NULL | |
547 && regno >= FIRST_PSEUDO_REGISTER) | |
548 return false; | |
549 | |
550 /* Check locally if we are in the same basic block. */ | |
551 if (def_bb == target_bb) | |
552 return local_ref_killed_between_p (use, def_insn, target_insn); | |
553 | |
554 /* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */ | |
555 if (single_pred_p (target_bb) | |
556 && single_pred (target_bb) == def_bb) | |
557 { | |
558 df_ref x; | |
559 | |
560 /* See if USE is killed between DEF_INSN and the last insn in the | |
561 basic block containing DEF_INSN. */ | |
562 x = df_bb_regno_last_def_find (def_bb, regno); | |
563 if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn)) | |
564 return true; | |
565 | |
566 /* See if USE is killed between TARGET_INSN and the first insn in the | |
567 basic block containing TARGET_INSN. */ | |
568 x = df_bb_regno_first_def_find (target_bb, regno); | |
569 if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn)) | |
570 return true; | |
571 | |
572 return false; | |
573 } | |
574 | |
575 /* Otherwise assume the worst case. */ | |
576 return true; | |
577 } | |
578 | |
579 | |
580 /* Check if all uses in DEF_INSN can be used in TARGET_INSN. This | |
581 would require full computation of available expressions; | |
582 we check only restricted conditions, see use_killed_between. */ | |
583 static bool | |
584 all_uses_available_at (rtx def_insn, rtx target_insn) | |
585 { | |
586 df_ref *use_rec; | |
587 struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); | |
588 rtx def_set = single_set (def_insn); | |
589 | |
590 gcc_assert (def_set); | |
591 | |
592 /* If target_insn comes right after def_insn, which is very common | |
593 for addresses, we can use a quicker test. */ | |
594 if (NEXT_INSN (def_insn) == target_insn | |
595 && REG_P (SET_DEST (def_set))) | |
596 { | |
597 rtx def_reg = SET_DEST (def_set); | |
598 | |
599 /* If the insn uses the reg that it defines, the substitution is | |
600 invalid. */ | |
601 for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++) | |
602 { | |
603 df_ref use = *use_rec; | |
604 if (rtx_equal_p (DF_REF_REG (use), def_reg)) | |
605 return false; | |
606 } | |
607 for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++) | |
608 { | |
609 df_ref use = *use_rec; | |
610 if (rtx_equal_p (DF_REF_REG (use), def_reg)) | |
611 return false; | |
612 } | |
613 } | |
614 else | |
615 { | |
616 /* Look at all the uses of DEF_INSN, and see if they are not | |
617 killed between DEF_INSN and TARGET_INSN. */ | |
618 for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++) | |
619 { | |
620 df_ref use = *use_rec; | |
621 if (use_killed_between (use, def_insn, target_insn)) | |
622 return false; | |
623 } | |
624 for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++) | |
625 { | |
626 df_ref use = *use_rec; | |
627 if (use_killed_between (use, def_insn, target_insn)) | |
628 return false; | |
629 } | |
630 } | |
631 | |
632 return true; | |
633 } | |
634 | |
635 | |
636 struct find_occurrence_data | |
637 { | |
638 rtx find; | |
639 rtx *retval; | |
640 }; | |
641 | |
642 /* Callback for for_each_rtx, used in find_occurrence. | |
643 See if PX is the rtx we have to find. Return 1 to stop for_each_rtx | |
644 if successful, or 0 to continue traversing otherwise. */ | |
645 | |
646 static int | |
647 find_occurrence_callback (rtx *px, void *data) | |
648 { | |
649 struct find_occurrence_data *fod = (struct find_occurrence_data *) data; | |
650 rtx x = *px; | |
651 rtx find = fod->find; | |
652 | |
653 if (x == find) | |
654 { | |
655 fod->retval = px; | |
656 return 1; | |
657 } | |
658 | |
659 return 0; | |
660 } | |
661 | |
662 /* Return a pointer to one of the occurrences of register FIND in *PX. */ | |
663 | |
664 static rtx * | |
665 find_occurrence (rtx *px, rtx find) | |
666 { | |
667 struct find_occurrence_data data; | |
668 | |
669 gcc_assert (REG_P (find) | |
670 || (GET_CODE (find) == SUBREG | |
671 && REG_P (SUBREG_REG (find)))); | |
672 | |
673 data.find = find; | |
674 data.retval = NULL; | |
675 for_each_rtx (px, find_occurrence_callback, &data); | |
676 return data.retval; | |
677 } | |
678 | |
679 | |
680 /* Inside INSN, the expression rooted at *LOC has been changed, moving some | |
681 uses from USE_VEC. Find those that are present, and create new items | |
682 in the data flow object of the pass. Mark any new uses as having the | |
683 given TYPE. */ | |
684 static void | |
685 update_df (rtx insn, rtx *loc, df_ref *use_rec, enum df_ref_type type, | |
686 int new_flags) | |
687 { | |
688 bool changed = false; | |
689 | |
690 /* Add a use for the registers that were propagated. */ | |
691 while (*use_rec) | |
692 { | |
693 df_ref use = *use_rec; | |
694 df_ref orig_use = use, new_use; | |
695 int width = -1; | |
696 int offset = -1; | |
697 enum machine_mode mode = 0; | |
698 rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use)); | |
699 use_rec++; | |
700 | |
701 if (!new_loc) | |
702 continue; | |
703 | |
704 if (DF_REF_FLAGS_IS_SET (orig_use, DF_REF_SIGN_EXTRACT | DF_REF_ZERO_EXTRACT)) | |
705 { | |
706 width = DF_REF_EXTRACT_WIDTH (orig_use); | |
707 offset = DF_REF_EXTRACT_OFFSET (orig_use); | |
708 mode = DF_REF_EXTRACT_MODE (orig_use); | |
709 } | |
710 | |
711 /* Add a new insn use. Use the original type, because it says if the | |
712 use was within a MEM. */ | |
713 new_use = df_ref_create (DF_REF_REG (orig_use), new_loc, | |
714 insn, BLOCK_FOR_INSN (insn), | |
715 type, DF_REF_FLAGS (orig_use) | new_flags, | |
716 width, offset, mode); | |
717 | |
718 /* Set up the use-def chain. */ | |
719 df_chain_copy (new_use, DF_REF_CHAIN (orig_use)); | |
720 changed = true; | |
721 } | |
722 if (changed) | |
723 df_insn_rescan (insn); | |
724 } | |
725 | |
726 | |
727 /* Try substituting NEW into LOC, which originated from forward propagation | |
728 of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are | |
729 substituting the whole SET_SRC, so we can set a REG_EQUAL note if the | |
730 new insn is not recognized. Return whether the substitution was | |
731 performed. */ | |
732 | |
733 static bool | |
734 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal) | |
735 { | |
736 rtx insn = DF_REF_INSN (use); | |
737 enum df_ref_type type = DF_REF_TYPE (use); | |
738 int flags = DF_REF_FLAGS (use); | |
739 rtx set = single_set (insn); | |
740 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); | |
741 int old_cost = rtx_cost (SET_SRC (set), SET, speed); | |
742 bool ok; | |
743 | |
744 if (dump_file) | |
745 { | |
746 fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn)); | |
747 print_inline_rtx (dump_file, *loc, 2); | |
748 fprintf (dump_file, "\n with "); | |
749 print_inline_rtx (dump_file, new_rtx, 2); | |
750 fprintf (dump_file, "\n"); | |
751 } | |
752 | |
753 validate_unshare_change (insn, loc, new_rtx, true); | |
754 if (!verify_changes (0)) | |
755 { | |
756 if (dump_file) | |
757 fprintf (dump_file, "Changes to insn %d not recognized\n", | |
758 INSN_UID (insn)); | |
759 ok = false; | |
760 } | |
761 | |
762 else if (DF_REF_TYPE (use) == DF_REF_REG_USE | |
763 && rtx_cost (SET_SRC (set), SET, speed) > old_cost) | |
764 { | |
765 if (dump_file) | |
766 fprintf (dump_file, "Changes to insn %d not profitable\n", | |
767 INSN_UID (insn)); | |
768 ok = false; | |
769 } | |
770 | |
771 else | |
772 { | |
773 if (dump_file) | |
774 fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn)); | |
775 ok = true; | |
776 } | |
777 | |
778 if (ok) | |
779 { | |
780 confirm_change_group (); | |
781 num_changes++; | |
782 | |
783 df_ref_remove (use); | |
784 if (!CONSTANT_P (new_rtx)) | |
785 { | |
786 struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); | |
787 update_df (insn, loc, DF_INSN_INFO_USES (insn_info), type, flags); | |
788 update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info), type, flags); | |
789 } | |
790 } | |
791 else | |
792 { | |
793 cancel_changes (0); | |
794 | |
795 /* Can also record a simplified value in a REG_EQUAL note, | |
796 making a new one if one does not already exist. */ | |
797 if (set_reg_equal) | |
798 { | |
799 if (dump_file) | |
800 fprintf (dump_file, " Setting REG_EQUAL note\n"); | |
801 | |
802 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx)); | |
803 | |
804 /* ??? Is this still necessary if we add the note through | |
805 set_unique_reg_note? */ | |
806 if (!CONSTANT_P (new_rtx)) | |
807 { | |
808 struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); | |
809 update_df (insn, loc, DF_INSN_INFO_USES (insn_info), | |
810 type, DF_REF_IN_NOTE); | |
811 update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info), | |
812 type, DF_REF_IN_NOTE); | |
813 } | |
814 } | |
815 } | |
816 | |
817 return ok; | |
818 } | |
819 | |
820 | |
821 /* If USE is a paradoxical subreg, see if it can be replaced by a pseudo. */ | |
822 | |
823 static bool | |
824 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set) | |
825 { | |
826 rtx use_reg = DF_REF_REG (use); | |
827 rtx use_insn, src; | |
828 | |
829 /* Only consider paradoxical subregs... */ | |
830 enum machine_mode use_mode = GET_MODE (use_reg); | |
831 if (GET_CODE (use_reg) != SUBREG | |
832 || !REG_P (SET_DEST (def_set)) | |
833 || GET_MODE_SIZE (use_mode) | |
834 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg)))) | |
835 return false; | |
836 | |
837 /* If this is a paradoxical SUBREG, we have no idea what value the | |
838 extra bits would have. However, if the operand is equivalent to | |
839 a SUBREG whose operand is the same as our mode, and all the modes | |
840 are within a word, we can just use the inner operand because | |
841 these SUBREGs just say how to treat the register. */ | |
842 use_insn = DF_REF_INSN (use); | |
843 src = SET_SRC (def_set); | |
844 if (GET_CODE (src) == SUBREG | |
845 && REG_P (SUBREG_REG (src)) | |
846 && GET_MODE (SUBREG_REG (src)) == use_mode | |
847 && subreg_lowpart_p (src) | |
848 && all_uses_available_at (def_insn, use_insn)) | |
849 return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src), | |
850 def_insn, false); | |
851 else | |
852 return false; | |
853 } | |
854 | |
855 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the | |
856 result. */ | |
857 | |
858 static bool | |
859 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set) | |
860 { | |
861 rtx use_insn = DF_REF_INSN (use); | |
862 rtx use_set = single_set (use_insn); | |
863 rtx src, reg, new_rtx, *loc; | |
864 bool set_reg_equal; | |
865 enum machine_mode mode; | |
866 | |
867 if (!use_set) | |
868 return false; | |
869 | |
870 /* Do not propagate into PC, CC0, etc. */ | |
871 if (GET_MODE (SET_DEST (use_set)) == VOIDmode) | |
872 return false; | |
873 | |
874 /* If def and use are subreg, check if they match. */ | |
875 reg = DF_REF_REG (use); | |
876 if (GET_CODE (reg) == SUBREG | |
877 && GET_CODE (SET_DEST (def_set)) == SUBREG | |
878 && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg) | |
879 || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))) | |
880 return false; | |
881 | |
882 /* Check if the def had a subreg, but the use has the whole reg. */ | |
883 if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) | |
884 return false; | |
885 | |
886 /* Check if the use has a subreg, but the def had the whole reg. Unlike the | |
887 previous case, the optimization is possible and often useful indeed. */ | |
888 if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) | |
889 reg = SUBREG_REG (reg); | |
890 | |
891 /* Check if the substitution is valid (last, because it's the most | |
892 expensive check!). */ | |
893 src = SET_SRC (def_set); | |
894 if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn)) | |
895 return false; | |
896 | |
897 /* Check if the def is loading something from the constant pool; in this | |
898 case we would undo optimization such as compress_float_constant. | |
899 Still, we can set a REG_EQUAL note. */ | |
900 if (MEM_P (src) && MEM_READONLY_P (src)) | |
901 { | |
902 rtx x = avoid_constant_pool_reference (src); | |
903 if (x != src) | |
904 { | |
905 rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); | |
906 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set); | |
907 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x); | |
908 if (old_rtx != new_rtx) | |
909 set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx)); | |
910 } | |
911 return false; | |
912 } | |
913 | |
914 /* Else try simplifying. */ | |
915 | |
916 if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE) | |
917 { | |
918 loc = &SET_DEST (use_set); | |
919 set_reg_equal = false; | |
920 } | |
921 else | |
922 { | |
923 rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); | |
924 if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) | |
925 loc = &XEXP (note, 0); | |
926 else | |
927 loc = &SET_SRC (use_set); | |
928 | |
929 /* Do not replace an existing REG_EQUAL note if the insn is not | |
930 recognized. Either we're already replacing in the note, or | |
931 we'll separately try plugging the definition in the note and | |
932 simplifying. */ | |
933 set_reg_equal = (note == NULL_RTX); | |
934 } | |
935 | |
936 if (GET_MODE (*loc) == VOIDmode) | |
937 mode = GET_MODE (SET_DEST (use_set)); | |
938 else | |
939 mode = GET_MODE (*loc); | |
940 | |
941 new_rtx = propagate_rtx (*loc, mode, reg, src, | |
942 optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn))); | |
943 | |
944 if (!new_rtx) | |
945 return false; | |
946 | |
947 return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal); | |
948 } | |
949 | |
950 | |
951 /* Given a use USE of an insn, if it has a single reaching | |
952 definition, try to forward propagate it into that insn. */ | |
953 | |
954 static void | |
955 forward_propagate_into (df_ref use) | |
956 { | |
957 struct df_link *defs; | |
958 df_ref def; | |
959 rtx def_insn, def_set, use_insn; | |
960 rtx parent; | |
961 | |
962 if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) | |
963 return; | |
964 if (DF_REF_IS_ARTIFICIAL (use)) | |
965 return; | |
966 | |
967 /* Only consider uses that have a single definition. */ | |
968 defs = DF_REF_CHAIN (use); | |
969 if (!defs || defs->next) | |
970 return; | |
971 | |
972 def = defs->ref; | |
973 if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE) | |
974 return; | |
975 if (DF_REF_IS_ARTIFICIAL (def)) | |
976 return; | |
977 | |
978 /* Do not propagate loop invariant definitions inside the loop. */ | |
979 if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father) | |
980 return; | |
981 | |
982 /* Check if the use is still present in the insn! */ | |
983 use_insn = DF_REF_INSN (use); | |
984 if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) | |
985 parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); | |
986 else | |
987 parent = PATTERN (use_insn); | |
988 | |
989 if (!reg_mentioned_p (DF_REF_REG (use), parent)) | |
990 return; | |
991 | |
992 def_insn = DF_REF_INSN (def); | |
993 if (multiple_sets (def_insn)) | |
994 return; | |
995 def_set = single_set (def_insn); | |
996 if (!def_set) | |
997 return; | |
998 | |
999 /* Only try one kind of propagation. If two are possible, we'll | |
1000 do it on the following iterations. */ | |
1001 if (!forward_propagate_and_simplify (use, def_insn, def_set)) | |
1002 forward_propagate_subreg (use, def_insn, def_set); | |
1003 } | |
1004 | |
1005 | |
1006 static void | |
1007 fwprop_init (void) | |
1008 { | |
1009 num_changes = 0; | |
1010 calculate_dominance_info (CDI_DOMINATORS); | |
1011 | |
1012 /* We do not always want to propagate into loops, so we have to find | |
1013 loops and be careful about them. But we have to call flow_loops_find | |
1014 before df_analyze, because flow_loops_find may introduce new jump | |
1015 insns (sadly) if we are not working in cfglayout mode. */ | |
1016 loop_optimizer_init (0); | |
1017 | |
1018 /* Now set up the dataflow problem (we only want use-def chains) and | |
1019 put the dataflow solver to work. */ | |
1020 df_set_flags (DF_EQ_NOTES); | |
1021 df_chain_add_problem (DF_UD_CHAIN); | |
1022 df_analyze (); | |
1023 df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES); | |
1024 df_set_flags (DF_DEFER_INSN_RESCAN); | |
1025 } | |
1026 | |
1027 static void | |
1028 fwprop_done (void) | |
1029 { | |
1030 loop_optimizer_finalize (); | |
1031 | |
1032 free_dominance_info (CDI_DOMINATORS); | |
1033 cleanup_cfg (0); | |
1034 delete_trivially_dead_insns (get_insns (), max_reg_num ()); | |
1035 | |
1036 if (dump_file) | |
1037 fprintf (dump_file, | |
1038 "\nNumber of successful forward propagations: %d\n\n", | |
1039 num_changes); | |
1040 df_remove_problem (df_chain); | |
1041 } | |
1042 | |
1043 | |
1044 | |
1045 /* Main entry point. */ | |
1046 | |
1047 static bool | |
1048 gate_fwprop (void) | |
1049 { | |
1050 return optimize > 0 && flag_forward_propagate; | |
1051 } | |
1052 | |
1053 static unsigned int | |
1054 fwprop (void) | |
1055 { | |
1056 unsigned i; | |
1057 | |
1058 fwprop_init (); | |
1059 | |
1060 /* Go through all the uses. update_df will create new ones at the | |
1061 end, and we'll go through them as well. | |
1062 | |
1063 Do not forward propagate addresses into loops until after unrolling. | |
1064 CSE did so because it was able to fix its own mess, but we are not. */ | |
1065 | |
1066 for (i = 0; i < DF_USES_TABLE_SIZE (); i++) | |
1067 { | |
1068 df_ref use = DF_USES_GET (i); | |
1069 if (use) | |
1070 if (DF_REF_TYPE (use) == DF_REF_REG_USE | |
1071 || DF_REF_BB (use)->loop_father == NULL | |
1072 /* The outer most loop is not really a loop. */ | |
1073 || loop_outer (DF_REF_BB (use)->loop_father) == NULL) | |
1074 forward_propagate_into (use); | |
1075 } | |
1076 | |
1077 fwprop_done (); | |
1078 return 0; | |
1079 } | |
1080 | |
1081 struct rtl_opt_pass pass_rtl_fwprop = | |
1082 { | |
1083 { | |
1084 RTL_PASS, | |
1085 "fwprop1", /* name */ | |
1086 gate_fwprop, /* gate */ | |
1087 fwprop, /* execute */ | |
1088 NULL, /* sub */ | |
1089 NULL, /* next */ | |
1090 0, /* static_pass_number */ | |
1091 TV_FWPROP, /* tv_id */ | |
1092 0, /* properties_required */ | |
1093 0, /* properties_provided */ | |
1094 0, /* properties_destroyed */ | |
1095 0, /* todo_flags_start */ | |
1096 TODO_df_finish | TODO_verify_rtl_sharing | | |
1097 TODO_dump_func /* todo_flags_finish */ | |
1098 } | |
1099 }; | |
1100 | |
1101 static unsigned int | |
1102 fwprop_addr (void) | |
1103 { | |
1104 unsigned i; | |
1105 fwprop_init (); | |
1106 | |
1107 /* Go through all the uses. update_df will create new ones at the | |
1108 end, and we'll go through them as well. */ | |
1109 df_set_flags (DF_DEFER_INSN_RESCAN); | |
1110 | |
1111 for (i = 0; i < DF_USES_TABLE_SIZE (); i++) | |
1112 { | |
1113 df_ref use = DF_USES_GET (i); | |
1114 if (use) | |
1115 if (DF_REF_TYPE (use) != DF_REF_REG_USE | |
1116 && DF_REF_BB (use)->loop_father != NULL | |
1117 /* The outer most loop is not really a loop. */ | |
1118 && loop_outer (DF_REF_BB (use)->loop_father) != NULL) | |
1119 forward_propagate_into (use); | |
1120 } | |
1121 | |
1122 fwprop_done (); | |
1123 | |
1124 return 0; | |
1125 } | |
1126 | |
1127 struct rtl_opt_pass pass_rtl_fwprop_addr = | |
1128 { | |
1129 { | |
1130 RTL_PASS, | |
1131 "fwprop2", /* name */ | |
1132 gate_fwprop, /* gate */ | |
1133 fwprop_addr, /* execute */ | |
1134 NULL, /* sub */ | |
1135 NULL, /* next */ | |
1136 0, /* static_pass_number */ | |
1137 TV_FWPROP, /* tv_id */ | |
1138 0, /* properties_required */ | |
1139 0, /* properties_provided */ | |
1140 0, /* properties_destroyed */ | |
1141 0, /* todo_flags_start */ | |
1142 TODO_df_finish | TODO_verify_rtl_sharing | | |
1143 TODO_dump_func /* todo_flags_finish */ | |
1144 } | |
1145 }; |