Mercurial > hg > CbC > CbC_gcc
comparison gcc/ree.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Redundant Extension Elimination pass for the GNU compiler. | |
2 Copyright (C) 2010-2017 Free Software Foundation, Inc. | |
3 Contributed by Ilya Enkovich (ilya.enkovich@intel.com) | |
4 | |
5 Based on the Redundant Zero-extension elimination pass contributed by | |
6 Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com). | |
7 | |
8 This file is part of GCC. | |
9 | |
10 GCC is free software; you can redistribute it and/or modify it under | |
11 the terms of the GNU General Public License as published by the Free | |
12 Software Foundation; either version 3, or (at your option) any later | |
13 version. | |
14 | |
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 for more details. | |
19 | |
20 You should have received a copy of the GNU General Public License | |
21 along with GCC; see the file COPYING3. If not see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 | |
25 /* Problem Description : | |
26 -------------------- | |
27 This pass is intended to remove redundant extension instructions. | |
28 Such instructions appear for different reasons. We expect some of | |
29 them due to implicit zero-extension in 64-bit registers after writing | |
30 to their lower 32-bit half (e.g. for the x86-64 architecture). | |
31 Another possible reason is a type cast which follows a load (for | |
32 instance a register restore) and which can be combined into a single | |
33 instruction, and for which earlier local passes, e.g. the combiner, | |
34 weren't able to optimize. | |
35 | |
36 How does this pass work ? | |
37 -------------------------- | |
38 | |
39 This pass is run after register allocation. Hence, all registers that | |
40 this pass deals with are hard registers. This pass first looks for an | |
41 extension instruction that could possibly be redundant. Such extension | |
42 instructions show up in RTL with the pattern : | |
43 (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))), | |
44 where x can be any hard register. | |
45 Now, this pass tries to eliminate this instruction by merging the | |
46 extension with the definitions of register x. For instance, if | |
47 one of the definitions of register x was : | |
48 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))), | |
49 followed by extension : | |
50 (set (reg:DI x) (zero_extend:DI (reg:SI x))) | |
51 then the combination converts this into : | |
52 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))). | |
53 If all the merged definitions are recognizable assembly instructions, | |
54 the extension is effectively eliminated. | |
55 | |
56 For example, for the x86-64 architecture, implicit zero-extensions | |
57 are captured with appropriate patterns in the i386.md file. Hence, | |
58 these merged definition can be matched to a single assembly instruction. | |
59 The original extension instruction is then deleted if all the | |
60 definitions can be merged. | |
61 | |
62 However, there are cases where the definition instruction cannot be | |
63 merged with an extension. Examples are CALL instructions. In such | |
64 cases, the original extension is not redundant and this pass does | |
65 not delete it. | |
66 | |
67 Handling conditional moves : | |
68 ---------------------------- | |
69 | |
70 Architectures like x86-64 support conditional moves whose semantics for | |
71 extension differ from the other instructions. For instance, the | |
72 instruction *cmov ebx, eax* | |
73 zero-extends eax onto rax only when the move from ebx to eax happens. | |
74 Otherwise, eax may not be zero-extended. Consider conditional moves as | |
75 RTL instructions of the form | |
76 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))). | |
77 This pass tries to merge an extension with a conditional move by | |
78 actually merging the definitions of y and z with an extension and then | |
79 converting the conditional move into : | |
80 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))). | |
81 Since registers y and z are extended, register x will also be extended | |
82 after the conditional move. Note that this step has to be done | |
83 transitively since the definition of a conditional copy can be | |
84 another conditional copy. | |
85 | |
86 Motivating Example I : | |
87 --------------------- | |
88 For this program : | |
89 ********************************************** | |
90 bad_code.c | |
91 | |
92 int mask[1000]; | |
93 | |
94 int foo(unsigned x) | |
95 { | |
96 if (x < 10) | |
97 x = x * 45; | |
98 else | |
99 x = x * 78; | |
100 return mask[x]; | |
101 } | |
102 ********************************************** | |
103 | |
104 $ gcc -O2 bad_code.c | |
105 ........ | |
106 400315: b8 4e 00 00 00 mov $0x4e,%eax | |
107 40031a: 0f af f8 imul %eax,%edi | |
108 40031d: 89 ff mov %edi,%edi - useless extension | |
109 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax | |
110 400326: c3 retq | |
111 ...... | |
112 400330: ba 2d 00 00 00 mov $0x2d,%edx | |
113 400335: 0f af fa imul %edx,%edi | |
114 400338: 89 ff mov %edi,%edi - useless extension | |
115 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax | |
116 400341: c3 retq | |
117 | |
118 $ gcc -O2 -free bad_code.c | |
119 ...... | |
120 400315: 6b ff 4e imul $0x4e,%edi,%edi | |
121 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
122 40031f: c3 retq | |
123 400320: 6b ff 2d imul $0x2d,%edi,%edi | |
124 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
125 40032a: c3 retq | |
126 | |
127 Motivating Example II : | |
128 --------------------- | |
129 | |
130 Here is an example with a conditional move. | |
131 | |
132 For this program : | |
133 ********************************************** | |
134 | |
135 unsigned long long foo(unsigned x , unsigned y) | |
136 { | |
137 unsigned z; | |
138 if (x > 100) | |
139 z = x + y; | |
140 else | |
141 z = x - y; | |
142 return (unsigned long long)(z); | |
143 } | |
144 | |
145 $ gcc -O2 bad_code.c | |
146 ............ | |
147 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx | |
148 400363: 89 f8 mov %edi,%eax | |
149 400365: 29 f0 sub %esi,%eax | |
150 400367: 83 ff 65 cmp $0x65,%edi | |
151 40036a: 0f 43 c2 cmovae %edx,%eax | |
152 40036d: 89 c0 mov %eax,%eax - useless extension | |
153 40036f: c3 retq | |
154 | |
155 $ gcc -O2 -free bad_code.c | |
156 ............. | |
157 400360: 89 fa mov %edi,%edx | |
158 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax | |
159 400365: 29 f2 sub %esi,%edx | |
160 400367: 83 ff 65 cmp $0x65,%edi | |
161 40036a: 89 d6 mov %edx,%esi | |
162 40036c: 48 0f 42 c6 cmovb %rsi,%rax | |
163 400370: c3 retq | |
164 | |
165 Motivating Example III : | |
166 --------------------- | |
167 | |
168 Here is an example with a type cast. | |
169 | |
170 For this program : | |
171 ********************************************** | |
172 | |
173 void test(int size, unsigned char *in, unsigned char *out) | |
174 { | |
175 int i; | |
176 unsigned char xr, xg, xy=0; | |
177 | |
178 for (i = 0; i < size; i++) { | |
179 xr = *in++; | |
180 xg = *in++; | |
181 xy = (unsigned char) ((19595*xr + 38470*xg) >> 16); | |
182 *out++ = xy; | |
183 } | |
184 } | |
185 | |
186 $ gcc -O2 bad_code.c | |
187 ............ | |
188 10: 0f b6 0e movzbl (%rsi),%ecx | |
189 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax | |
190 17: 48 83 c6 02 add $0x2,%rsi | |
191 1b: 0f b6 c9 movzbl %cl,%ecx - useless extension | |
192 1e: 0f b6 c0 movzbl %al,%eax - useless extension | |
193 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx | |
194 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax | |
195 | |
196 $ gcc -O2 -free bad_code.c | |
197 ............. | |
198 10: 0f b6 0e movzbl (%rsi),%ecx | |
199 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax | |
200 17: 48 83 c6 02 add $0x2,%rsi | |
201 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx | |
202 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax | |
203 | |
204 Usefulness : | |
205 ---------- | |
206 | |
207 The original redundant zero-extension elimination pass reported reduction | |
208 of the dynamic instruction count of a compression benchmark by 2.8% and | |
209 improvement of its run time by about 1%. | |
210 | |
211 The additional performance gain with the enhanced pass is mostly expected | |
212 on in-order architectures where redundancy cannot be compensated by out of | |
213 order execution. Measurements showed up to 10% performance gain (reduced | |
214 run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance | |
215 gain 1%. */ | |
216 | |
217 | |
218 #include "config.h" | |
219 #include "system.h" | |
220 #include "coretypes.h" | |
221 #include "backend.h" | |
222 #include "target.h" | |
223 #include "rtl.h" | |
224 #include "tree.h" | |
225 #include "df.h" | |
226 #include "memmodel.h" | |
227 #include "tm_p.h" | |
228 #include "optabs.h" | |
229 #include "regs.h" | |
230 #include "emit-rtl.h" | |
231 #include "recog.h" | |
232 #include "cfgrtl.h" | |
233 #include "expr.h" | |
234 #include "tree-pass.h" | |
235 | |
236 /* This structure represents a candidate for elimination. */ | |
237 | |
238 struct ext_cand | |
239 { | |
240 /* The expression. */ | |
241 const_rtx expr; | |
242 | |
243 /* The kind of extension. */ | |
244 enum rtx_code code; | |
245 | |
246 /* The destination mode. */ | |
247 machine_mode mode; | |
248 | |
249 /* The instruction where it lives. */ | |
250 rtx_insn *insn; | |
251 }; | |
252 | |
253 | |
254 static int max_insn_uid; | |
255 | |
256 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN. */ | |
257 | |
258 static bool | |
259 update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode, | |
260 machine_mode old_mode, enum rtx_code code) | |
261 { | |
262 rtx *loc = ®_NOTES (insn); | |
263 while (*loc) | |
264 { | |
265 enum reg_note kind = REG_NOTE_KIND (*loc); | |
266 if (kind == REG_EQUAL || kind == REG_EQUIV) | |
267 { | |
268 rtx orig_src = XEXP (*loc, 0); | |
269 /* Update equivalency constants. Recall that RTL constants are | |
270 sign-extended. */ | |
271 if (GET_CODE (orig_src) == CONST_INT | |
272 && HWI_COMPUTABLE_MODE_P (new_mode)) | |
273 { | |
274 if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND) | |
275 /* Nothing needed. */; | |
276 else | |
277 { | |
278 /* Zero-extend the negative constant by masking out the | |
279 bits outside the source mode. */ | |
280 rtx new_const_int | |
281 = gen_int_mode (INTVAL (orig_src) | |
282 & GET_MODE_MASK (old_mode), | |
283 new_mode); | |
284 if (!validate_change (insn, &XEXP (*loc, 0), | |
285 new_const_int, true)) | |
286 return false; | |
287 } | |
288 loc = &XEXP (*loc, 1); | |
289 } | |
290 /* Drop all other notes, they assume a wrong mode. */ | |
291 else if (!validate_change (insn, loc, XEXP (*loc, 1), true)) | |
292 return false; | |
293 } | |
294 else | |
295 loc = &XEXP (*loc, 1); | |
296 } | |
297 return true; | |
298 } | |
299 | |
300 /* Given a insn (CURR_INSN), an extension candidate for removal (CAND) | |
301 and a pointer to the SET rtx (ORIG_SET) that needs to be modified, | |
302 this code modifies the SET rtx to a new SET rtx that extends the | |
303 right hand expression into a register on the left hand side. Note | |
304 that multiple assumptions are made about the nature of the set that | |
305 needs to be true for this to work and is called from merge_def_and_ext. | |
306 | |
307 Original : | |
308 (set (reg a) (expression)) | |
309 | |
310 Transform : | |
311 (set (reg a) (any_extend (expression))) | |
312 | |
313 Special Cases : | |
314 If the expression is a constant or another extension, then directly | |
315 assign it to the register. */ | |
316 | |
317 static bool | |
318 combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set) | |
319 { | |
320 rtx orig_src = SET_SRC (*orig_set); | |
321 machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set)); | |
322 rtx new_set; | |
323 rtx cand_pat = PATTERN (cand->insn); | |
324 | |
325 /* If the extension's source/destination registers are not the same | |
326 then we need to change the original load to reference the destination | |
327 of the extension. Then we need to emit a copy from that destination | |
328 to the original destination of the load. */ | |
329 rtx new_reg; | |
330 bool copy_needed | |
331 = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0))); | |
332 if (copy_needed) | |
333 new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat))); | |
334 else | |
335 new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set))); | |
336 | |
337 /* Merge constants by directly moving the constant into the register under | |
338 some conditions. Recall that RTL constants are sign-extended. */ | |
339 if (GET_CODE (orig_src) == CONST_INT | |
340 && HWI_COMPUTABLE_MODE_P (cand->mode)) | |
341 { | |
342 if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND) | |
343 new_set = gen_rtx_SET (new_reg, orig_src); | |
344 else | |
345 { | |
346 /* Zero-extend the negative constant by masking out the bits outside | |
347 the source mode. */ | |
348 rtx new_const_int | |
349 = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (orig_mode), | |
350 GET_MODE (new_reg)); | |
351 new_set = gen_rtx_SET (new_reg, new_const_int); | |
352 } | |
353 } | |
354 else if (GET_MODE (orig_src) == VOIDmode) | |
355 { | |
356 /* This is mostly due to a call insn that should not be optimized. */ | |
357 return false; | |
358 } | |
359 else if (GET_CODE (orig_src) == cand->code) | |
360 { | |
361 /* Here is a sequence of two extensions. Try to merge them. */ | |
362 rtx temp_extension | |
363 = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0)); | |
364 rtx simplified_temp_extension = simplify_rtx (temp_extension); | |
365 if (simplified_temp_extension) | |
366 temp_extension = simplified_temp_extension; | |
367 new_set = gen_rtx_SET (new_reg, temp_extension); | |
368 } | |
369 else if (GET_CODE (orig_src) == IF_THEN_ELSE) | |
370 { | |
371 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise, | |
372 in general, IF_THEN_ELSE should not be combined. */ | |
373 return false; | |
374 } | |
375 else | |
376 { | |
377 /* This is the normal case. */ | |
378 rtx temp_extension | |
379 = gen_rtx_fmt_e (cand->code, cand->mode, orig_src); | |
380 rtx simplified_temp_extension = simplify_rtx (temp_extension); | |
381 if (simplified_temp_extension) | |
382 temp_extension = simplified_temp_extension; | |
383 new_set = gen_rtx_SET (new_reg, temp_extension); | |
384 } | |
385 | |
386 /* This change is a part of a group of changes. Hence, | |
387 validate_change will not try to commit the change. */ | |
388 if (validate_change (curr_insn, orig_set, new_set, true) | |
389 && update_reg_equal_equiv_notes (curr_insn, cand->mode, orig_mode, | |
390 cand->code)) | |
391 { | |
392 if (dump_file) | |
393 { | |
394 fprintf (dump_file, | |
395 "Tentatively merged extension with definition %s:\n", | |
396 (copy_needed) ? "(copy needed)" : ""); | |
397 print_rtl_single (dump_file, curr_insn); | |
398 } | |
399 return true; | |
400 } | |
401 | |
402 return false; | |
403 } | |
404 | |
405 /* Treat if_then_else insns, where the operands of both branches | |
406 are registers, as copies. For instance, | |
407 Original : | |
408 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c))) | |
409 Transformed : | |
410 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c))) | |
411 DEF_INSN is the if_then_else insn. */ | |
412 | |
413 static bool | |
414 transform_ifelse (ext_cand *cand, rtx_insn *def_insn) | |
415 { | |
416 rtx set_insn = PATTERN (def_insn); | |
417 rtx srcreg, dstreg, srcreg2; | |
418 rtx map_srcreg, map_dstreg, map_srcreg2; | |
419 rtx ifexpr; | |
420 rtx cond; | |
421 rtx new_set; | |
422 | |
423 gcc_assert (GET_CODE (set_insn) == SET); | |
424 | |
425 cond = XEXP (SET_SRC (set_insn), 0); | |
426 dstreg = SET_DEST (set_insn); | |
427 srcreg = XEXP (SET_SRC (set_insn), 1); | |
428 srcreg2 = XEXP (SET_SRC (set_insn), 2); | |
429 /* If the conditional move already has the right or wider mode, | |
430 there is nothing to do. */ | |
431 if (GET_MODE_UNIT_SIZE (GET_MODE (dstreg)) | |
432 >= GET_MODE_UNIT_SIZE (cand->mode)) | |
433 return true; | |
434 | |
435 map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg)); | |
436 map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2)); | |
437 map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg)); | |
438 ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2); | |
439 new_set = gen_rtx_SET (map_dstreg, ifexpr); | |
440 | |
441 if (validate_change (def_insn, &PATTERN (def_insn), new_set, true) | |
442 && update_reg_equal_equiv_notes (def_insn, cand->mode, GET_MODE (dstreg), | |
443 cand->code)) | |
444 { | |
445 if (dump_file) | |
446 { | |
447 fprintf (dump_file, | |
448 "Mode of conditional move instruction extended:\n"); | |
449 print_rtl_single (dump_file, def_insn); | |
450 } | |
451 return true; | |
452 } | |
453 | |
454 return false; | |
455 } | |
456 | |
457 /* Get all the reaching definitions of an instruction. The definitions are | |
458 desired for REG used in INSN. Return the definition list or NULL if a | |
459 definition is missing. If DEST is non-NULL, additionally push the INSN | |
460 of the definitions onto DEST. */ | |
461 | |
462 static struct df_link * | |
463 get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest) | |
464 { | |
465 df_ref use; | |
466 struct df_link *ref_chain, *ref_link; | |
467 | |
468 FOR_EACH_INSN_USE (use, insn) | |
469 { | |
470 if (GET_CODE (DF_REF_REG (use)) == SUBREG) | |
471 return NULL; | |
472 if (REGNO (DF_REF_REG (use)) == REGNO (reg)) | |
473 break; | |
474 } | |
475 | |
476 gcc_assert (use != NULL); | |
477 | |
478 ref_chain = DF_REF_CHAIN (use); | |
479 | |
480 for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
481 { | |
482 /* Problem getting some definition for this instruction. */ | |
483 if (ref_link->ref == NULL) | |
484 return NULL; | |
485 if (DF_REF_INSN_INFO (ref_link->ref) == NULL) | |
486 return NULL; | |
487 /* As global regs are assumed to be defined at each function call | |
488 dataflow can report a call_insn as being a definition of REG. | |
489 But we can't do anything with that in this pass so proceed only | |
490 if the instruction really sets REG in a way that can be deduced | |
491 from the RTL structure. */ | |
492 if (global_regs[REGNO (reg)] | |
493 && !set_of (reg, DF_REF_INSN (ref_link->ref))) | |
494 return NULL; | |
495 } | |
496 | |
497 if (dest) | |
498 for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
499 dest->safe_push (DF_REF_INSN (ref_link->ref)); | |
500 | |
501 return ref_chain; | |
502 } | |
503 | |
504 /* Get all the reaching uses of an instruction. The uses are desired for REG | |
505 set in INSN. Return use list or NULL if a use is missing or irregular. */ | |
506 | |
507 static struct df_link * | |
508 get_uses (rtx_insn *insn, rtx reg) | |
509 { | |
510 df_ref def; | |
511 struct df_link *ref_chain, *ref_link; | |
512 | |
513 FOR_EACH_INSN_DEF (def, insn) | |
514 if (REGNO (DF_REF_REG (def)) == REGNO (reg)) | |
515 break; | |
516 | |
517 gcc_assert (def != NULL); | |
518 | |
519 ref_chain = DF_REF_CHAIN (def); | |
520 | |
521 for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
522 { | |
523 /* Problem getting some use for this instruction. */ | |
524 if (ref_link->ref == NULL) | |
525 return NULL; | |
526 if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR) | |
527 return NULL; | |
528 } | |
529 | |
530 return ref_chain; | |
531 } | |
532 | |
533 /* Return true if INSN is | |
534 (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2))) | |
535 and store x1 and x2 in REG_1 and REG_2. */ | |
536 | |
537 static bool | |
538 is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2) | |
539 { | |
540 rtx expr = single_set (insn); | |
541 | |
542 if (expr != NULL_RTX | |
543 && GET_CODE (expr) == SET | |
544 && GET_CODE (SET_DEST (expr)) == REG | |
545 && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE | |
546 && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG | |
547 && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG) | |
548 { | |
549 *reg1 = XEXP (SET_SRC (expr), 1); | |
550 *reg2 = XEXP (SET_SRC (expr), 2); | |
551 return true; | |
552 } | |
553 | |
554 return false; | |
555 } | |
556 | |
557 enum ext_modified_kind | |
558 { | |
559 /* The insn hasn't been modified by ree pass yet. */ | |
560 EXT_MODIFIED_NONE, | |
561 /* Changed into zero extension. */ | |
562 EXT_MODIFIED_ZEXT, | |
563 /* Changed into sign extension. */ | |
564 EXT_MODIFIED_SEXT | |
565 }; | |
566 | |
567 struct ATTRIBUTE_PACKED ext_modified | |
568 { | |
569 /* Mode from which ree has zero or sign extended the destination. */ | |
570 ENUM_BITFIELD(machine_mode) mode : 8; | |
571 | |
572 /* Kind of modification of the insn. */ | |
573 ENUM_BITFIELD(ext_modified_kind) kind : 2; | |
574 | |
575 unsigned int do_not_reextend : 1; | |
576 | |
577 /* True if the insn is scheduled to be deleted. */ | |
578 unsigned int deleted : 1; | |
579 }; | |
580 | |
581 /* Vectors used by combine_reaching_defs and its helpers. */ | |
582 struct ext_state | |
583 { | |
584 /* In order to avoid constant alloc/free, we keep these | |
585 4 vectors live through the entire find_and_remove_re and just | |
586 truncate them each time. */ | |
587 auto_vec<rtx_insn *> defs_list; | |
588 auto_vec<rtx_insn *> copies_list; | |
589 auto_vec<rtx_insn *> modified_list; | |
590 auto_vec<rtx_insn *> work_list; | |
591 | |
592 /* For instructions that have been successfully modified, this is | |
593 the original mode from which the insn is extending and | |
594 kind of extension. */ | |
595 struct ext_modified *modified; | |
596 }; | |
597 | |
598 /* Reaching Definitions of the extended register could be conditional copies | |
599 or regular definitions. This function separates the two types into two | |
600 lists, STATE->DEFS_LIST and STATE->COPIES_LIST. This is necessary because, | |
601 if a reaching definition is a conditional copy, merging the extension with | |
602 this definition is wrong. Conditional copies are merged by transitively | |
603 merging their definitions. The defs_list is populated with all the reaching | |
604 definitions of the extension instruction (EXTEND_INSN) which must be merged | |
605 with an extension. The copies_list contains all the conditional moves that | |
606 will later be extended into a wider mode conditional move if all the merges | |
607 are successful. The function returns false upon failure, true upon | |
608 success. */ | |
609 | |
610 static bool | |
611 make_defs_and_copies_lists (rtx_insn *extend_insn, const_rtx set_pat, | |
612 ext_state *state) | |
613 { | |
614 rtx src_reg = XEXP (SET_SRC (set_pat), 0); | |
615 bool *is_insn_visited; | |
616 bool ret = true; | |
617 | |
618 state->work_list.truncate (0); | |
619 | |
620 /* Initialize the work list. */ | |
621 if (!get_defs (extend_insn, src_reg, &state->work_list)) | |
622 return false; | |
623 | |
624 is_insn_visited = XCNEWVEC (bool, max_insn_uid); | |
625 | |
626 /* Perform transitive closure for conditional copies. */ | |
627 while (!state->work_list.is_empty ()) | |
628 { | |
629 rtx_insn *def_insn = state->work_list.pop (); | |
630 rtx reg1, reg2; | |
631 | |
632 gcc_assert (INSN_UID (def_insn) < max_insn_uid); | |
633 | |
634 if (is_insn_visited[INSN_UID (def_insn)]) | |
635 continue; | |
636 is_insn_visited[INSN_UID (def_insn)] = true; | |
637 | |
638 if (is_cond_copy_insn (def_insn, ®1, ®2)) | |
639 { | |
640 /* Push it onto the copy list first. */ | |
641 state->copies_list.safe_push (def_insn); | |
642 | |
643 /* Now perform the transitive closure. */ | |
644 if (!get_defs (def_insn, reg1, &state->work_list) | |
645 || !get_defs (def_insn, reg2, &state->work_list)) | |
646 { | |
647 ret = false; | |
648 break; | |
649 } | |
650 } | |
651 else | |
652 state->defs_list.safe_push (def_insn); | |
653 } | |
654 | |
655 XDELETEVEC (is_insn_visited); | |
656 | |
657 return ret; | |
658 } | |
659 | |
660 /* If DEF_INSN has single SET expression, possibly buried inside | |
661 a PARALLEL, return the address of the SET expression, else | |
662 return NULL. This is similar to single_set, except that | |
663 single_set allows multiple SETs when all but one is dead. */ | |
664 static rtx * | |
665 get_sub_rtx (rtx_insn *def_insn) | |
666 { | |
667 enum rtx_code code = GET_CODE (PATTERN (def_insn)); | |
668 rtx *sub_rtx = NULL; | |
669 | |
670 if (code == PARALLEL) | |
671 { | |
672 for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++) | |
673 { | |
674 rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i); | |
675 if (GET_CODE (s_expr) != SET) | |
676 continue; | |
677 | |
678 if (sub_rtx == NULL) | |
679 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i); | |
680 else | |
681 { | |
682 /* PARALLEL with multiple SETs. */ | |
683 return NULL; | |
684 } | |
685 } | |
686 } | |
687 else if (code == SET) | |
688 sub_rtx = &PATTERN (def_insn); | |
689 else | |
690 { | |
691 /* It is not a PARALLEL or a SET, what could it be ? */ | |
692 return NULL; | |
693 } | |
694 | |
695 gcc_assert (sub_rtx != NULL); | |
696 return sub_rtx; | |
697 } | |
698 | |
699 /* Merge the DEF_INSN with an extension. Calls combine_set_extension | |
700 on the SET pattern. */ | |
701 | |
702 static bool | |
703 merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state) | |
704 { | |
705 machine_mode ext_src_mode; | |
706 rtx *sub_rtx; | |
707 | |
708 ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0)); | |
709 sub_rtx = get_sub_rtx (def_insn); | |
710 | |
711 if (sub_rtx == NULL) | |
712 return false; | |
713 | |
714 if (REG_P (SET_DEST (*sub_rtx)) | |
715 && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode | |
716 || ((state->modified[INSN_UID (def_insn)].kind | |
717 == (cand->code == ZERO_EXTEND | |
718 ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)) | |
719 && state->modified[INSN_UID (def_insn)].mode | |
720 == ext_src_mode))) | |
721 { | |
722 if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx))) | |
723 >= GET_MODE_UNIT_SIZE (cand->mode)) | |
724 return true; | |
725 /* If def_insn is already scheduled to be deleted, don't attempt | |
726 to modify it. */ | |
727 if (state->modified[INSN_UID (def_insn)].deleted) | |
728 return false; | |
729 if (combine_set_extension (cand, def_insn, sub_rtx)) | |
730 { | |
731 if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE) | |
732 state->modified[INSN_UID (def_insn)].mode = ext_src_mode; | |
733 return true; | |
734 } | |
735 } | |
736 | |
737 return false; | |
738 } | |
739 | |
740 /* Given SRC, which should be one or more extensions of a REG, strip | |
741 away the extensions and return the REG. */ | |
742 | |
743 static inline rtx | |
744 get_extended_src_reg (rtx src) | |
745 { | |
746 while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND) | |
747 src = XEXP (src, 0); | |
748 gcc_assert (REG_P (src)); | |
749 return src; | |
750 } | |
751 | |
752 /* This function goes through all reaching defs of the source | |
753 of the candidate for elimination (CAND) and tries to combine | |
754 the extension with the definition instruction. The changes | |
755 are made as a group so that even if one definition cannot be | |
756 merged, all reaching definitions end up not being merged. | |
757 When a conditional copy is encountered, merging is attempted | |
758 transitively on its definitions. It returns true upon success | |
759 and false upon failure. */ | |
760 | |
761 static bool | |
762 combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state) | |
763 { | |
764 rtx_insn *def_insn; | |
765 bool merge_successful = true; | |
766 int i; | |
767 int defs_ix; | |
768 bool outcome; | |
769 | |
770 state->defs_list.truncate (0); | |
771 state->copies_list.truncate (0); | |
772 | |
773 outcome = make_defs_and_copies_lists (cand->insn, set_pat, state); | |
774 | |
775 if (!outcome) | |
776 return false; | |
777 | |
778 /* If the destination operand of the extension is a different | |
779 register than the source operand, then additional restrictions | |
780 are needed. Note we have to handle cases where we have nested | |
781 extensions in the source operand. */ | |
782 bool copy_needed | |
783 = (REGNO (SET_DEST (PATTERN (cand->insn))) | |
784 != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn))))); | |
785 if (copy_needed) | |
786 { | |
787 /* Considering transformation of | |
788 (set (reg1) (expression)) | |
789 ... | |
790 (set (reg2) (any_extend (reg1))) | |
791 | |
792 into | |
793 | |
794 (set (reg2) (any_extend (expression))) | |
795 (set (reg1) (reg2)) | |
796 ... */ | |
797 | |
798 /* In theory we could handle more than one reaching def, it | |
799 just makes the code to update the insn stream more complex. */ | |
800 if (state->defs_list.length () != 1) | |
801 return false; | |
802 | |
803 /* We don't have the structure described above if there are | |
804 conditional moves in between the def and the candidate, | |
805 and we will not handle them correctly. See PR68194. */ | |
806 if (state->copies_list.length () > 0) | |
807 return false; | |
808 | |
809 /* We require the candidate not already be modified. It may, | |
810 for example have been changed from a (sign_extend (reg)) | |
811 into (zero_extend (sign_extend (reg))). | |
812 | |
813 Handling that case shouldn't be terribly difficult, but the code | |
814 here and the code to emit copies would need auditing. Until | |
815 we see a need, this is the safe thing to do. */ | |
816 if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE) | |
817 return false; | |
818 | |
819 machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn))); | |
820 rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn))); | |
821 | |
822 /* Ensure we can use the src_reg in dst_mode (needed for | |
823 the (set (reg1) (reg2)) insn mentioned above). */ | |
824 if (!targetm.hard_regno_mode_ok (REGNO (src_reg), dst_mode)) | |
825 return false; | |
826 | |
827 /* Ensure the number of hard registers of the copy match. */ | |
828 if (hard_regno_nregs (REGNO (src_reg), dst_mode) != REG_NREGS (src_reg)) | |
829 return false; | |
830 | |
831 /* There's only one reaching def. */ | |
832 rtx_insn *def_insn = state->defs_list[0]; | |
833 | |
834 /* The defining statement must not have been modified either. */ | |
835 if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE) | |
836 return false; | |
837 | |
838 /* The defining statement and candidate insn must be in the same block. | |
839 This is merely to keep the test for safety and updating the insn | |
840 stream simple. Also ensure that within the block the candidate | |
841 follows the defining insn. */ | |
842 basic_block bb = BLOCK_FOR_INSN (cand->insn); | |
843 if (bb != BLOCK_FOR_INSN (def_insn) | |
844 || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn)) | |
845 return false; | |
846 | |
847 /* If there is an overlap between the destination of DEF_INSN and | |
848 CAND->insn, then this transformation is not safe. Note we have | |
849 to test in the widened mode. */ | |
850 rtx *dest_sub_rtx = get_sub_rtx (def_insn); | |
851 if (dest_sub_rtx == NULL | |
852 || !REG_P (SET_DEST (*dest_sub_rtx))) | |
853 return false; | |
854 | |
855 rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))), | |
856 REGNO (SET_DEST (*dest_sub_rtx))); | |
857 if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn)))) | |
858 return false; | |
859 | |
860 /* On RISC machines we must make sure that changing the mode of SRC_REG | |
861 as destination register will not affect its reaching uses, which may | |
862 read its value in a larger mode because DEF_INSN implicitly sets it | |
863 in word mode. */ | |
864 const unsigned int prec | |
865 = GET_MODE_PRECISION (GET_MODE (SET_DEST (*dest_sub_rtx))); | |
866 if (WORD_REGISTER_OPERATIONS && prec < BITS_PER_WORD) | |
867 { | |
868 struct df_link *uses = get_uses (def_insn, src_reg); | |
869 if (!uses) | |
870 return false; | |
871 | |
872 for (df_link *use = uses; use; use = use->next) | |
873 if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)), | |
874 GET_MODE (SET_DEST (*dest_sub_rtx)))) | |
875 return false; | |
876 } | |
877 | |
878 /* The destination register of the extension insn must not be | |
879 used or set between the def_insn and cand->insn exclusive. */ | |
880 if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)), | |
881 def_insn, cand->insn) | |
882 || reg_set_between_p (SET_DEST (PATTERN (cand->insn)), | |
883 def_insn, cand->insn)) | |
884 return false; | |
885 | |
886 /* We must be able to copy between the two registers. Generate, | |
887 recognize and verify constraints of the copy. Also fail if this | |
888 generated more than one insn. | |
889 | |
890 This generates garbage since we throw away the insn when we're | |
891 done, only to recreate it later if this test was successful. | |
892 | |
893 Make sure to get the mode from the extension (cand->insn). This | |
894 is different than in the code to emit the copy as we have not | |
895 modified the defining insn yet. */ | |
896 start_sequence (); | |
897 rtx pat = PATTERN (cand->insn); | |
898 rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)), | |
899 REGNO (get_extended_src_reg (SET_SRC (pat)))); | |
900 rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)), | |
901 REGNO (SET_DEST (pat))); | |
902 emit_move_insn (new_dst, new_src); | |
903 | |
904 rtx_insn *insn = get_insns(); | |
905 end_sequence (); | |
906 if (NEXT_INSN (insn)) | |
907 return false; | |
908 if (recog_memoized (insn) == -1) | |
909 return false; | |
910 extract_insn (insn); | |
911 if (!constrain_operands (1, get_preferred_alternatives (insn, bb))) | |
912 return false; | |
913 } | |
914 | |
915 | |
916 /* If cand->insn has been already modified, update cand->mode to a wider | |
917 mode if possible, or punt. */ | |
918 if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE) | |
919 { | |
920 machine_mode mode; | |
921 rtx set; | |
922 | |
923 if (state->modified[INSN_UID (cand->insn)].kind | |
924 != (cand->code == ZERO_EXTEND | |
925 ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT) | |
926 || state->modified[INSN_UID (cand->insn)].mode != cand->mode | |
927 || (set = single_set (cand->insn)) == NULL_RTX) | |
928 return false; | |
929 mode = GET_MODE (SET_DEST (set)); | |
930 gcc_assert (GET_MODE_UNIT_SIZE (mode) | |
931 >= GET_MODE_UNIT_SIZE (cand->mode)); | |
932 cand->mode = mode; | |
933 } | |
934 | |
935 merge_successful = true; | |
936 | |
937 /* Go through the defs vector and try to merge all the definitions | |
938 in this vector. */ | |
939 state->modified_list.truncate (0); | |
940 FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn) | |
941 { | |
942 if (merge_def_and_ext (cand, def_insn, state)) | |
943 state->modified_list.safe_push (def_insn); | |
944 else | |
945 { | |
946 merge_successful = false; | |
947 break; | |
948 } | |
949 } | |
950 | |
951 /* Now go through the conditional copies vector and try to merge all | |
952 the copies in this vector. */ | |
953 if (merge_successful) | |
954 { | |
955 FOR_EACH_VEC_ELT (state->copies_list, i, def_insn) | |
956 { | |
957 if (transform_ifelse (cand, def_insn)) | |
958 state->modified_list.safe_push (def_insn); | |
959 else | |
960 { | |
961 merge_successful = false; | |
962 break; | |
963 } | |
964 } | |
965 } | |
966 | |
967 if (merge_successful) | |
968 { | |
969 /* Commit the changes here if possible | |
970 FIXME: It's an all-or-nothing scenario. Even if only one definition | |
971 cannot be merged, we entirely give up. In the future, we should allow | |
972 extensions to be partially eliminated along those paths where the | |
973 definitions could be merged. */ | |
974 if (apply_change_group ()) | |
975 { | |
976 if (dump_file) | |
977 fprintf (dump_file, "All merges were successful.\n"); | |
978 | |
979 FOR_EACH_VEC_ELT (state->modified_list, i, def_insn) | |
980 { | |
981 ext_modified *modified = &state->modified[INSN_UID (def_insn)]; | |
982 if (modified->kind == EXT_MODIFIED_NONE) | |
983 modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT | |
984 : EXT_MODIFIED_SEXT); | |
985 | |
986 if (copy_needed) | |
987 modified->do_not_reextend = 1; | |
988 } | |
989 return true; | |
990 } | |
991 else | |
992 { | |
993 /* Changes need not be cancelled explicitly as apply_change_group | |
994 does it. Print list of definitions in the dump_file for debug | |
995 purposes. This extension cannot be deleted. */ | |
996 if (dump_file) | |
997 { | |
998 fprintf (dump_file, | |
999 "Merge cancelled, non-mergeable definitions:\n"); | |
1000 FOR_EACH_VEC_ELT (state->modified_list, i, def_insn) | |
1001 print_rtl_single (dump_file, def_insn); | |
1002 } | |
1003 } | |
1004 } | |
1005 else | |
1006 { | |
1007 /* Cancel any changes that have been made so far. */ | |
1008 cancel_changes (0); | |
1009 } | |
1010 | |
1011 return false; | |
1012 } | |
1013 | |
1014 /* Add an extension pattern that could be eliminated. */ | |
1015 | |
1016 static void | |
1017 add_removable_extension (const_rtx expr, rtx_insn *insn, | |
1018 vec<ext_cand> *insn_list, | |
1019 unsigned *def_map, | |
1020 bitmap init_regs) | |
1021 { | |
1022 enum rtx_code code; | |
1023 machine_mode mode; | |
1024 unsigned int idx; | |
1025 rtx src, dest; | |
1026 | |
1027 /* We are looking for SET (REG N) (ANY_EXTEND (REG N)). */ | |
1028 if (GET_CODE (expr) != SET) | |
1029 return; | |
1030 | |
1031 src = SET_SRC (expr); | |
1032 code = GET_CODE (src); | |
1033 dest = SET_DEST (expr); | |
1034 mode = GET_MODE (dest); | |
1035 | |
1036 if (REG_P (dest) | |
1037 && (code == SIGN_EXTEND || code == ZERO_EXTEND) | |
1038 && REG_P (XEXP (src, 0))) | |
1039 { | |
1040 rtx reg = XEXP (src, 0); | |
1041 struct df_link *defs, *def; | |
1042 ext_cand *cand; | |
1043 | |
1044 /* Zero-extension of an undefined value is partly defined (it's | |
1045 completely undefined for sign-extension, though). So if there exists | |
1046 a path from the entry to this zero-extension that leaves this register | |
1047 uninitialized, removing the extension could change the behavior of | |
1048 correct programs. So first, check it is not the case. */ | |
1049 if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg))) | |
1050 { | |
1051 if (dump_file) | |
1052 { | |
1053 fprintf (dump_file, "Cannot eliminate extension:\n"); | |
1054 print_rtl_single (dump_file, insn); | |
1055 fprintf (dump_file, " because it can operate on uninitialized" | |
1056 " data\n"); | |
1057 } | |
1058 return; | |
1059 } | |
1060 | |
1061 /* Second, make sure we can get all the reaching definitions. */ | |
1062 defs = get_defs (insn, reg, NULL); | |
1063 if (!defs) | |
1064 { | |
1065 if (dump_file) | |
1066 { | |
1067 fprintf (dump_file, "Cannot eliminate extension:\n"); | |
1068 print_rtl_single (dump_file, insn); | |
1069 fprintf (dump_file, " because of missing definition(s)\n"); | |
1070 } | |
1071 return; | |
1072 } | |
1073 | |
1074 /* Third, make sure the reaching definitions don't feed another and | |
1075 different extension. FIXME: this obviously can be improved. */ | |
1076 for (def = defs; def; def = def->next) | |
1077 if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))]) | |
1078 && idx != -1U | |
1079 && (cand = &(*insn_list)[idx - 1]) | |
1080 && cand->code != code) | |
1081 { | |
1082 if (dump_file) | |
1083 { | |
1084 fprintf (dump_file, "Cannot eliminate extension:\n"); | |
1085 print_rtl_single (dump_file, insn); | |
1086 fprintf (dump_file, " because of other extension\n"); | |
1087 } | |
1088 return; | |
1089 } | |
1090 /* For vector mode extensions, ensure that all uses of the | |
1091 XEXP (src, 0) register are in insn or debug insns, as unlike | |
1092 integral extensions lowpart subreg of the sign/zero extended | |
1093 register are not equal to the original register, so we have | |
1094 to change all uses or none and the current code isn't able | |
1095 to change them all at once in one transaction. */ | |
1096 else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0)))) | |
1097 { | |
1098 if (idx == 0) | |
1099 { | |
1100 struct df_link *ref_chain, *ref_link; | |
1101 | |
1102 ref_chain = DF_REF_CHAIN (def->ref); | |
1103 for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
1104 { | |
1105 if (ref_link->ref == NULL | |
1106 || DF_REF_INSN_INFO (ref_link->ref) == NULL) | |
1107 { | |
1108 idx = -1U; | |
1109 break; | |
1110 } | |
1111 rtx_insn *use_insn = DF_REF_INSN (ref_link->ref); | |
1112 if (use_insn != insn && !DEBUG_INSN_P (use_insn)) | |
1113 { | |
1114 idx = -1U; | |
1115 break; | |
1116 } | |
1117 } | |
1118 if (idx == -1U) | |
1119 def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx; | |
1120 } | |
1121 if (idx == -1U) | |
1122 { | |
1123 if (dump_file) | |
1124 { | |
1125 fprintf (dump_file, "Cannot eliminate extension:\n"); | |
1126 print_rtl_single (dump_file, insn); | |
1127 fprintf (dump_file, | |
1128 " because some vector uses aren't extension\n"); | |
1129 } | |
1130 return; | |
1131 } | |
1132 } | |
1133 | |
1134 /* Fourth, if the extended version occupies more registers than the | |
1135 original and the source of the extension is the same hard register | |
1136 as the destination of the extension, then we can not eliminate | |
1137 the extension without deep analysis, so just punt. | |
1138 | |
1139 We allow this when the registers are different because the | |
1140 code in combine_reaching_defs will handle that case correctly. */ | |
1141 if (hard_regno_nregs (REGNO (dest), mode) != REG_NREGS (reg) | |
1142 && reg_overlap_mentioned_p (dest, reg)) | |
1143 return; | |
1144 | |
1145 /* Then add the candidate to the list and insert the reaching definitions | |
1146 into the definition map. */ | |
1147 ext_cand e = {expr, code, mode, insn}; | |
1148 insn_list->safe_push (e); | |
1149 idx = insn_list->length (); | |
1150 | |
1151 for (def = defs; def; def = def->next) | |
1152 def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx; | |
1153 } | |
1154 } | |
1155 | |
1156 /* Traverse the instruction stream looking for extensions and return the | |
1157 list of candidates. */ | |
1158 | |
1159 static vec<ext_cand> | |
1160 find_removable_extensions (void) | |
1161 { | |
1162 vec<ext_cand> insn_list = vNULL; | |
1163 basic_block bb; | |
1164 rtx_insn *insn; | |
1165 rtx set; | |
1166 unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid); | |
1167 bitmap_head init, kill, gen, tmp; | |
1168 | |
1169 bitmap_initialize (&init, NULL); | |
1170 bitmap_initialize (&kill, NULL); | |
1171 bitmap_initialize (&gen, NULL); | |
1172 bitmap_initialize (&tmp, NULL); | |
1173 | |
1174 FOR_EACH_BB_FN (bb, cfun) | |
1175 { | |
1176 bitmap_copy (&init, DF_MIR_IN (bb)); | |
1177 bitmap_clear (&kill); | |
1178 bitmap_clear (&gen); | |
1179 | |
1180 FOR_BB_INSNS (bb, insn) | |
1181 { | |
1182 if (NONDEBUG_INSN_P (insn)) | |
1183 { | |
1184 set = single_set (insn); | |
1185 if (set != NULL_RTX) | |
1186 add_removable_extension (set, insn, &insn_list, def_map, | |
1187 &init); | |
1188 df_mir_simulate_one_insn (bb, insn, &kill, &gen); | |
1189 bitmap_ior_and_compl (&tmp, &gen, &init, &kill); | |
1190 bitmap_copy (&init, &tmp); | |
1191 } | |
1192 } | |
1193 } | |
1194 | |
1195 XDELETEVEC (def_map); | |
1196 | |
1197 return insn_list; | |
1198 } | |
1199 | |
1200 /* This is the main function that checks the insn stream for redundant | |
1201 extensions and tries to remove them if possible. */ | |
1202 | |
1203 static void | |
1204 find_and_remove_re (void) | |
1205 { | |
1206 ext_cand *curr_cand; | |
1207 rtx_insn *curr_insn = NULL; | |
1208 int num_re_opportunities = 0, num_realized = 0, i; | |
1209 vec<ext_cand> reinsn_list; | |
1210 auto_vec<rtx_insn *> reinsn_del_list; | |
1211 auto_vec<rtx_insn *> reinsn_copy_list; | |
1212 | |
1213 /* Construct DU chain to get all reaching definitions of each | |
1214 extension instruction. */ | |
1215 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
1216 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); | |
1217 df_mir_add_problem (); | |
1218 df_analyze (); | |
1219 df_set_flags (DF_DEFER_INSN_RESCAN); | |
1220 | |
1221 max_insn_uid = get_max_uid (); | |
1222 reinsn_list = find_removable_extensions (); | |
1223 | |
1224 ext_state state; | |
1225 if (reinsn_list.is_empty ()) | |
1226 state.modified = NULL; | |
1227 else | |
1228 state.modified = XCNEWVEC (struct ext_modified, max_insn_uid); | |
1229 | |
1230 FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand) | |
1231 { | |
1232 num_re_opportunities++; | |
1233 | |
1234 /* Try to combine the extension with the definition. */ | |
1235 if (dump_file) | |
1236 { | |
1237 fprintf (dump_file, "Trying to eliminate extension:\n"); | |
1238 print_rtl_single (dump_file, curr_cand->insn); | |
1239 } | |
1240 | |
1241 if (combine_reaching_defs (curr_cand, curr_cand->expr, &state)) | |
1242 { | |
1243 if (dump_file) | |
1244 fprintf (dump_file, "Eliminated the extension.\n"); | |
1245 num_realized++; | |
1246 /* If the RHS of the current candidate is not (extend (reg)), then | |
1247 we do not allow the optimization of extensions where | |
1248 the source and destination registers do not match. Thus | |
1249 checking REG_P here is correct. */ | |
1250 if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0)) | |
1251 && (REGNO (SET_DEST (PATTERN (curr_cand->insn))) | |
1252 != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0)))) | |
1253 { | |
1254 reinsn_copy_list.safe_push (curr_cand->insn); | |
1255 reinsn_copy_list.safe_push (state.defs_list[0]); | |
1256 } | |
1257 reinsn_del_list.safe_push (curr_cand->insn); | |
1258 state.modified[INSN_UID (curr_cand->insn)].deleted = 1; | |
1259 } | |
1260 } | |
1261 | |
1262 /* The copy list contains pairs of insns which describe copies we | |
1263 need to insert into the INSN stream. | |
1264 | |
1265 The first insn in each pair is the extension insn, from which | |
1266 we derive the source and destination of the copy. | |
1267 | |
1268 The second insn in each pair is the memory reference where the | |
1269 extension will ultimately happen. We emit the new copy | |
1270 immediately after this insn. | |
1271 | |
1272 It may first appear that the arguments for the copy are reversed. | |
1273 Remember that the memory reference will be changed to refer to the | |
1274 destination of the extention. So we're actually emitting a copy | |
1275 from the new destination to the old destination. */ | |
1276 for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2) | |
1277 { | |
1278 rtx_insn *curr_insn = reinsn_copy_list[i]; | |
1279 rtx_insn *def_insn = reinsn_copy_list[i + 1]; | |
1280 | |
1281 /* Use the mode of the destination of the defining insn | |
1282 for the mode of the copy. This is necessary if the | |
1283 defining insn was used to eliminate a second extension | |
1284 that was wider than the first. */ | |
1285 rtx sub_rtx = *get_sub_rtx (def_insn); | |
1286 rtx pat = PATTERN (curr_insn); | |
1287 rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)), | |
1288 REGNO (XEXP (SET_SRC (pat), 0))); | |
1289 rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)), | |
1290 REGNO (SET_DEST (pat))); | |
1291 rtx set = gen_rtx_SET (new_dst, new_src); | |
1292 emit_insn_after (set, def_insn); | |
1293 } | |
1294 | |
1295 /* Delete all useless extensions here in one sweep. */ | |
1296 FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn) | |
1297 delete_insn (curr_insn); | |
1298 | |
1299 reinsn_list.release (); | |
1300 XDELETEVEC (state.modified); | |
1301 | |
1302 if (dump_file && num_re_opportunities > 0) | |
1303 fprintf (dump_file, "Elimination opportunities = %d realized = %d\n", | |
1304 num_re_opportunities, num_realized); | |
1305 } | |
1306 | |
1307 /* Find and remove redundant extensions. */ | |
1308 | |
1309 static unsigned int | |
1310 rest_of_handle_ree (void) | |
1311 { | |
1312 find_and_remove_re (); | |
1313 return 0; | |
1314 } | |
1315 | |
1316 namespace { | |
1317 | |
1318 const pass_data pass_data_ree = | |
1319 { | |
1320 RTL_PASS, /* type */ | |
1321 "ree", /* name */ | |
1322 OPTGROUP_NONE, /* optinfo_flags */ | |
1323 TV_REE, /* tv_id */ | |
1324 0, /* properties_required */ | |
1325 0, /* properties_provided */ | |
1326 0, /* properties_destroyed */ | |
1327 0, /* todo_flags_start */ | |
1328 TODO_df_finish, /* todo_flags_finish */ | |
1329 }; | |
1330 | |
1331 class pass_ree : public rtl_opt_pass | |
1332 { | |
1333 public: | |
1334 pass_ree (gcc::context *ctxt) | |
1335 : rtl_opt_pass (pass_data_ree, ctxt) | |
1336 {} | |
1337 | |
1338 /* opt_pass methods: */ | |
1339 virtual bool gate (function *) { return (optimize > 0 && flag_ree); } | |
1340 virtual unsigned int execute (function *) { return rest_of_handle_ree (); } | |
1341 | |
1342 }; // class pass_ree | |
1343 | |
1344 } // anon namespace | |
1345 | |
1346 rtl_opt_pass * | |
1347 make_pass_ree (gcc::context *ctxt) | |
1348 { | |
1349 return new pass_ree (ctxt); | |
1350 } |