Mercurial > hg > CbC > CbC_gcc
comparison gcc/implicit-zee.c @ 63:b7f97abdc517 gcc-4.6-20100522
update gcc from gcc-4.5.0 to gcc-4.6
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 24 May 2010 12:47:05 +0900 |
parents | |
children | f6334be47118 |
comparison
equal
deleted
inserted
replaced
56:3c8a44c06a95 | 63:b7f97abdc517 |
---|---|
1 /* Redundant Zero-extension elimination for targets that implicitly | |
2 zero-extend writes to the lower 32-bit portion of 64-bit registers. | |
3 Copyright (C) 2010 Free Software Foundation, Inc. | |
4 Contributed by Sriraman Tallam (tmsriram@google.com) and | |
5 Silvius Rus (rus@google.com) | |
6 | |
7 This file is part of GCC. | |
8 | |
9 GCC is free software; you can redistribute it and/or modify it under | |
10 the terms of the GNU General Public License as published by the Free | |
11 Software Foundation; either version 3, or (at your option) any later | |
12 version. | |
13 | |
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with GCC; see the file COPYING3. If not see | |
21 <http://www.gnu.org/licenses/>. */ | |
22 | |
23 | |
24 /* Problem Description : | |
25 -------------------- | |
26 This pass is intended to be applicable only to targets that implicitly | |
27 zero-extend 64-bit registers after writing to their lower 32-bit half. | |
28 For instance, x86_64 zero-extends the upper bits of a register | |
29 implicitly whenever an instruction writes to its lower 32-bit half. | |
30 For example, the instruction *add edi,eax* also zero-extends the upper | |
31 32-bits of rax after doing the addition. These zero extensions come | |
32 for free and GCC does not always exploit this well. That is, it has | |
33 been observed that there are plenty of cases where GCC explicitly | |
34 zero-extends registers for x86_64 that are actually useless because | |
35 these registers were already implicitly zero-extended in a prior | |
36 instruction. This pass tries to eliminate such useless zero extension | |
37 instructions. | |
38 | |
39 How does this pass work ? | |
40 -------------------------- | |
41 | |
42 This pass is run after register allocation. Hence, all registers that | |
43 this pass deals with are hard registers. This pass first looks for a | |
44 zero-extension instruction that could possibly be redundant. Such zero | |
45 extension instructions show up in RTL with the pattern : | |
46 (set (reg:DI x) (zero_extend:DI (reg:SI x))). | |
47 where x can be any one of the 64-bit hard registers. | |
48 Now, this pass tries to eliminate this instruction by merging the | |
49 zero-extension with the definitions of register x. For instance, if | |
50 one of the definitions of register x was : | |
51 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))), | |
52 then the combination converts this into : | |
53 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))). | |
54 If all the merged definitions are recognizable assembly instructions, | |
55 the zero-extension is effectively eliminated. For example, in x86_64, | |
56 implicit zero-extensions are captured with appropriate patterns in the | |
57 i386.md file. Hence, these merged definition can be matched to a single | |
58 assembly instruction. The original zero-extension instruction is then | |
59 deleted if all the definitions can be merged. | |
60 | |
61 However, there are cases where the definition instruction cannot be | |
62 merged with a zero-extend. Examples are CALL instructions. In such | |
63 cases, the original zero extension is not redundant and this pass does | |
64 not delete it. | |
65 | |
66 Handling conditional moves : | |
67 ---------------------------- | |
68 | |
69 Architectures like x86_64 support conditional moves whose semantics for | |
70 zero-extension differ from the other instructions. For instance, the | |
71 instruction *cmov ebx, eax* | |
72 zero-extends eax onto rax only when the move from ebx to eax happens. | |
73 Otherwise, eax may not be zero-extended. Conditional moves appear as | |
74 RTL instructions of the form | |
75 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))). | |
76 This pass tries to merge a zero-extension with a conditional move by | |
77 actually merging the defintions of y and z with a zero-extend and then | |
78 converting the conditional move into : | |
79 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))). | |
80 Since registers y and z are zero-extended, register x will also be | |
81 zero-extended after the conditional move. Note that this step has to | |
82 be done transitively since the definition of a conditional copy can be | |
83 another conditional copy. | |
84 | |
85 Motivating Example I : | |
86 --------------------- | |
87 For this program : | |
88 ********************************************** | |
89 bad_code.c | |
90 | |
91 int mask[1000]; | |
92 | |
93 int foo(unsigned x) | |
94 { | |
95 if (x < 10) | |
96 x = x * 45; | |
97 else | |
98 x = x * 78; | |
99 return mask[x]; | |
100 } | |
101 ********************************************** | |
102 | |
103 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination.) | |
104 ........ | |
105 400315: b8 4e 00 00 00 mov $0x4e,%eax | |
106 40031a: 0f af f8 imul %eax,%edi | |
107 40031d: 89 ff mov %edi,%edi ---> Useless extend. | |
108 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax | |
109 400326: c3 retq | |
110 ...... | |
111 400330: ba 2d 00 00 00 mov $0x2d,%edx | |
112 400335: 0f af fa imul %edx,%edi | |
113 400338: 89 ff mov %edi,%edi ---> Useless extend. | |
114 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax | |
115 400341: c3 retq | |
116 | |
117 $ gcc -O2 -fzee bad_code.c | |
118 ...... | |
119 400315: 6b ff 4e imul $0x4e,%edi,%edi | |
120 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
121 40031f: c3 retq | |
122 400320: 6b ff 2d imul $0x2d,%edi,%edi | |
123 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
124 40032a: c3 retq | |
125 | |
126 Motivating Example II : | |
127 --------------------- | |
128 | |
129 Here is an example with a conditional move. | |
130 | |
131 For this program : | |
132 ********************************************** | |
133 | |
134 unsigned long long foo(unsigned x , unsigned y) | |
135 { | |
136 unsigned z; | |
137 if (x > 100) | |
138 z = x + y; | |
139 else | |
140 z = x - y; | |
141 return (unsigned long long)(z); | |
142 } | |
143 | |
144 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination.) | |
145 ............ | |
146 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx | |
147 400363: 89 f8 mov %edi,%eax | |
148 400365: 29 f0 sub %esi,%eax | |
149 400367: 83 ff 65 cmp $0x65,%edi | |
150 40036a: 0f 43 c2 cmovae %edx,%eax | |
151 40036d: 89 c0 mov %eax,%eax ---> Useless extend. | |
152 40036f: c3 retq | |
153 | |
154 $ gcc -O2 -fzee bad_code.c | |
155 ............. | |
156 400360: 89 fa mov %edi,%edx | |
157 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax | |
158 400365: 29 f2 sub %esi,%edx | |
159 400367: 83 ff 65 cmp $0x65,%edi | |
160 40036a: 89 d6 mov %edx,%esi | |
161 40036c: 48 0f 42 c6 cmovb %rsi,%rax | |
162 400370: c3 retq | |
163 | |
164 | |
165 Usefulness : | |
166 ---------- | |
167 | |
168 This pass reduces the dynamic instruction count of a compression benchmark by | |
169 2.8% and improves its run-time by about 1%. The compression benchmark had the | |
170 following code sequence in a very hot region of code before ZEE optimized it : | |
171 | |
172 shr $0x5, %edx | |
173 mov %edx, %edx --> Useless zero-extend. | |
174 | |
175 How to turn on ? | |
176 ---------------- | |
177 -fzee -O2. */ | |
178 | |
179 | |
180 #include "config.h" | |
181 #include "system.h" | |
182 #include "coretypes.h" | |
183 #include "tm.h" | |
184 #include "rtl.h" | |
185 #include "tree.h" | |
186 #include "tm_p.h" | |
187 #include "flags.h" | |
188 #include "regs.h" | |
189 #include "hard-reg-set.h" | |
190 #include "basic-block.h" | |
191 #include "insn-config.h" | |
192 #include "function.h" | |
193 #include "expr.h" | |
194 #include "insn-attr.h" | |
195 #include "recog.h" | |
196 #include "toplev.h" | |
197 #include "target.h" | |
198 #include "timevar.h" | |
199 #include "optabs.h" | |
200 #include "insn-codes.h" | |
201 #include "rtlhooks-def.h" | |
202 /* Include output.h for dump_file. */ | |
203 #include "output.h" | |
204 #include "params.h" | |
205 #include "timevar.h" | |
206 #include "tree-pass.h" | |
207 #include "df.h" | |
208 #include "cgraph.h" | |
209 | |
210 /* This says if a register is newly created for the purpose of | |
211 zero-extension. */ | |
212 | |
213 enum insn_merge_code | |
214 { | |
215 MERGE_NOT_ATTEMPTED = 0, | |
216 MERGE_SUCCESS | |
217 }; | |
218 | |
219 /* This says if a INSN UID or its definition has already been merged | |
220 with a zero-extend or not. */ | |
221 | |
222 static enum insn_merge_code *is_insn_merge_attempted; | |
223 static int max_insn_uid; | |
224 | |
225 /* Returns the merge code status for INSN. */ | |
226 | |
227 static enum insn_merge_code | |
228 get_insn_status (rtx insn) | |
229 { | |
230 gcc_assert (INSN_UID (insn) < max_insn_uid); | |
231 return is_insn_merge_attempted[INSN_UID (insn)]; | |
232 } | |
233 | |
234 /* Sets the merge code status of INSN to CODE. */ | |
235 | |
236 static void | |
237 set_insn_status (rtx insn, enum insn_merge_code code) | |
238 { | |
239 gcc_assert (INSN_UID (insn) < max_insn_uid); | |
240 is_insn_merge_attempted[INSN_UID (insn)] = code; | |
241 } | |
242 | |
243 /* Check to see if this zero-extend matches a pattern | |
244 that could be eliminated. This is called via | |
245 for_each_rtx in function find_and_remove_ze. */ | |
246 | |
247 static int | |
248 is_set_with_extension_DI (rtx *expr, void *data) | |
249 { | |
250 /* Looking only for patterns of the type : | |
251 SET (REG:DI X) (ZERO_EXTEND (REG:SI x)) | |
252 */ | |
253 | |
254 if (GET_CODE (*expr) == SET | |
255 && GET_MODE (SET_DEST (*expr)) == DImode | |
256 && GET_CODE (SET_DEST (*expr)) == REG | |
257 && GET_CODE (SET_SRC (*expr)) == ZERO_EXTEND | |
258 && GET_CODE (XEXP (SET_SRC (*expr),0)) == REG | |
259 && GET_MODE (XEXP (SET_SRC (*expr),0)) == SImode | |
260 && REGNO (SET_DEST (*expr)) == REGNO (XEXP (SET_SRC (*expr),0))) | |
261 { | |
262 *(rtx **)(data) = expr; | |
263 return 1; | |
264 } | |
265 return 0; | |
266 } | |
267 | |
268 /* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET) | |
269 that needs to be modified, this code modifies the SET rtx to a | |
270 new SET rtx that zero_extends the right hand expression into a DImode | |
271 register (NEWREG) on the left hand side. Note that multiple | |
272 assumptions are made about the nature of the set that needs | |
273 to be true for this to work and is called from merge_def_and_ze. | |
274 | |
275 Original : | |
276 (set (reg:SI a) (expression)) | |
277 | |
278 Transform : | |
279 (set (reg:DI a) (zero_extend (expression))) | |
280 | |
281 Special Cases : | |
282 If the expression is a constant or another zero_extend directly | |
283 assign it to the DI mode register. */ | |
284 | |
285 static bool | |
286 combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg) | |
287 { | |
288 rtx temp_extension, simplified_temp_extension, new_set, new_const_int; | |
289 rtx orig_src; | |
290 HOST_WIDE_INT val; | |
291 unsigned int mask, delta_width; | |
292 | |
293 /* Change the SET rtx and validate it. */ | |
294 orig_src = SET_SRC (*orig_set); | |
295 new_set = NULL_RTX; | |
296 | |
297 /* The right hand side can also be VOIDmode. These cases have to be | |
298 handled differently. */ | |
299 | |
300 if (GET_MODE (orig_src) != SImode) | |
301 { | |
302 /* Merge constants by directly moving the constant into the | |
303 DImode register under some conditions. */ | |
304 | |
305 if (GET_CODE (orig_src) == CONST_INT | |
306 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (SImode)) | |
307 { | |
308 if (INTVAL (orig_src) >= 0) | |
309 new_set = gen_rtx_SET (VOIDmode, newreg, orig_src); | |
310 else if (INTVAL (orig_src) < 0) | |
311 { | |
312 /* Zero-extending a negative SImode integer into DImode | |
313 makes it a positive integer. Convert the given negative | |
314 integer into the appropriate integer when zero-extended. */ | |
315 | |
316 delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode); | |
317 mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width; | |
318 val = INTVAL (orig_src); | |
319 val = val & mask; | |
320 new_const_int = gen_rtx_CONST_INT (VOIDmode, val); | |
321 new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int); | |
322 } | |
323 else | |
324 return false; | |
325 } | |
326 else | |
327 { | |
328 /* This is mostly due to a call insn that should not be | |
329 optimized. */ | |
330 | |
331 return false; | |
332 } | |
333 } | |
334 else if (GET_CODE (orig_src) == ZERO_EXTEND) | |
335 { | |
336 /* Here a zero-extend is used to get to SI. Why not make it | |
337 all the way till DI. */ | |
338 | |
339 temp_extension = gen_rtx_ZERO_EXTEND (DImode, XEXP (orig_src, 0)); | |
340 simplified_temp_extension = simplify_rtx (temp_extension); | |
341 if (simplified_temp_extension) | |
342 temp_extension = simplified_temp_extension; | |
343 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension); | |
344 } | |
345 else if (GET_CODE (orig_src) == IF_THEN_ELSE) | |
346 { | |
347 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise, | |
348 in general, IF_THEN_ELSE should not be combined. */ | |
349 | |
350 return false; | |
351 } | |
352 else | |
353 { | |
354 /* This is the normal case we expect. */ | |
355 | |
356 temp_extension = gen_rtx_ZERO_EXTEND (DImode, orig_src); | |
357 simplified_temp_extension = simplify_rtx (temp_extension); | |
358 if (simplified_temp_extension) | |
359 temp_extension = simplified_temp_extension; | |
360 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension); | |
361 } | |
362 | |
363 gcc_assert (new_set != NULL_RTX); | |
364 | |
365 /* This change is a part of a group of changes. Hence, | |
366 validate_change will not try to commit the change. */ | |
367 | |
368 if (validate_change (curr_insn, orig_set, new_set, true)) | |
369 { | |
370 if (dump_file) | |
371 { | |
372 fprintf (dump_file, "Merged Instruction with ZERO_EXTEND:\n"); | |
373 print_rtl_single (dump_file, curr_insn); | |
374 } | |
375 return true; | |
376 } | |
377 return false; | |
378 } | |
379 | |
380 /* This returns the DI mode for the SI register REG_SI. */ | |
381 | |
382 static rtx | |
383 get_reg_di (rtx reg_si) | |
384 { | |
385 rtx newreg; | |
386 | |
387 newreg = gen_rtx_REG (DImode, REGNO (reg_si)); | |
388 gcc_assert (newreg); | |
389 return newreg; | |
390 } | |
391 | |
392 /* Treat if_then_else insns, where the operands of both branches | |
393 are registers, as copies. For instance, | |
394 Original : | |
395 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c))) | |
396 Transformed : | |
397 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c))) | |
398 DEF_INSN is the if_then_else insn. */ | |
399 | |
400 static bool | |
401 transform_ifelse (rtx def_insn) | |
402 { | |
403 rtx set_insn = PATTERN (def_insn); | |
404 rtx srcreg, dstreg, srcreg2; | |
405 rtx map_srcreg, map_dstreg, map_srcreg2; | |
406 rtx ifexpr; | |
407 rtx cond; | |
408 rtx new_set; | |
409 | |
410 gcc_assert (GET_CODE (set_insn) == SET); | |
411 cond = XEXP (SET_SRC (set_insn), 0); | |
412 dstreg = SET_DEST (set_insn); | |
413 srcreg = XEXP (SET_SRC (set_insn), 1); | |
414 srcreg2 = XEXP (SET_SRC (set_insn), 2); | |
415 map_srcreg = get_reg_di (srcreg); | |
416 map_srcreg2 = get_reg_di (srcreg2); | |
417 map_dstreg = get_reg_di (dstreg); | |
418 ifexpr = gen_rtx_IF_THEN_ELSE (DImode, cond, map_srcreg, map_srcreg2); | |
419 new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr); | |
420 | |
421 if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)) | |
422 { | |
423 if (dump_file) | |
424 { | |
425 fprintf (dump_file, "Cond_Move Instruction's mode extended :\n"); | |
426 print_rtl_single (dump_file, def_insn); | |
427 } | |
428 return true; | |
429 } | |
430 else | |
431 return false; | |
432 } | |
433 | |
434 /* Function to get all the immediate definitions of an instruction. | |
435 The reaching definitions are desired for WHICH_REG used in | |
436 CURR_INSN. This function returns 0 if there was an error getting | |
437 a definition. Upon success, this function returns the number of | |
438 definitions and stores the definitions in DEST. */ | |
439 | |
440 static int | |
441 get_defs (rtx curr_insn, rtx which_reg, VEC (rtx,heap) **dest) | |
442 { | |
443 df_ref reg_info, *defs; | |
444 struct df_link *def_chain; | |
445 int n_refs = 0; | |
446 | |
447 defs = DF_INSN_USES (curr_insn); | |
448 reg_info = NULL; | |
449 | |
450 while (*defs) | |
451 { | |
452 reg_info = *defs; | |
453 if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG) | |
454 return 0; | |
455 if (REGNO (DF_REF_REG (reg_info)) == REGNO (which_reg)) | |
456 break; | |
457 defs++; | |
458 } | |
459 | |
460 gcc_assert (reg_info != NULL && defs != NULL); | |
461 def_chain = DF_REF_CHAIN (reg_info); | |
462 | |
463 while (def_chain) | |
464 { | |
465 /* Problem getting some definition for this instruction. */ | |
466 | |
467 if (def_chain->ref == NULL) | |
468 return 0; | |
469 if (DF_REF_INSN_INFO (def_chain->ref) == NULL) | |
470 return 0; | |
471 def_chain = def_chain->next; | |
472 } | |
473 | |
474 def_chain = DF_REF_CHAIN (reg_info); | |
475 | |
476 if (dest == NULL) | |
477 return 1; | |
478 | |
479 while (def_chain) | |
480 { | |
481 VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (def_chain->ref)); | |
482 def_chain = def_chain->next; | |
483 n_refs++; | |
484 } | |
485 return n_refs; | |
486 } | |
487 | |
488 /* rtx function to check if this SET insn, EXPR, is a conditional copy insn : | |
489 (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c))) | |
490 Called from is_insn_cond_copy. DATA stores the two registers on each | |
491 side of the condition. */ | |
492 | |
493 static int | |
494 is_this_a_cmove (rtx expr, void *data) | |
495 { | |
496 /* Check for conditional (if-then-else) copy. */ | |
497 | |
498 if (GET_CODE (expr) == SET | |
499 && GET_CODE (SET_DEST (expr)) == REG | |
500 && GET_MODE (SET_DEST (expr)) == SImode | |
501 && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE | |
502 && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG | |
503 && GET_MODE (XEXP (SET_SRC (expr), 1)) == SImode | |
504 && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG | |
505 && GET_MODE (XEXP (SET_SRC (expr), 2)) == SImode) | |
506 { | |
507 ((rtx *)data)[0] = XEXP (SET_SRC (expr), 1); | |
508 ((rtx *)data)[1] = XEXP (SET_SRC (expr), 2); | |
509 return 1; | |
510 } | |
511 return 0; | |
512 } | |
513 | |
514 /* This returns 1 if it found | |
515 (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2))) | |
516 in the DEF_INSN pattern. It stores the x1 and x2 in COPY_REG_1 | |
517 and COPY_REG_2. */ | |
518 | |
519 static int | |
520 is_insn_cond_copy (rtx def_insn, rtx *copy_reg_1, rtx *copy_reg_2) | |
521 { | |
522 int type; | |
523 rtx set_expr; | |
524 rtx srcreg[2]; | |
525 | |
526 srcreg[0] = NULL_RTX; | |
527 srcreg[1] = NULL_RTX; | |
528 | |
529 set_expr = single_set (def_insn); | |
530 | |
531 if (set_expr == NULL_RTX) | |
532 return 0; | |
533 | |
534 type = is_this_a_cmove (set_expr, (void *) srcreg); | |
535 | |
536 if (type) | |
537 { | |
538 *copy_reg_1 = srcreg[0]; | |
539 *copy_reg_2 = srcreg[1]; | |
540 return type; | |
541 } | |
542 | |
543 return 0; | |
544 } | |
545 | |
546 /* Reaching Definitions of the zero-extended register could be conditional | |
547 copies or regular definitions. This function separates the two types into | |
548 two lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a | |
549 reaching definition is a conditional copy, combining the zero_extend with | |
550 this definition is wrong. Conditional copies are merged by transitively | |
551 merging its definitions. The defs_list is populated with all the reaching | |
552 definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must | |
553 be merged with a zero_extend. The copies_list contains all the conditional | |
554 moves that will later be extended into a DI mode conditonal move if all the | |
555 merges are successful. The function returns false when there is a failure | |
556 in getting some definitions, like that of parameters. It returns 1 upon | |
557 success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN | |
558 were merged previously. */ | |
559 | |
560 static int | |
561 make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat, | |
562 VEC (rtx,heap) **defs_list, | |
563 VEC (rtx,heap) **copies_list) | |
564 { | |
565 bool *is_insn_visited; | |
566 VEC (rtx,heap) *work_list; | |
567 rtx srcreg, copy_reg_1, copy_reg_2; | |
568 rtx def_insn; | |
569 int n_defs = 0; | |
570 int vec_index = 0; | |
571 int n_worklist = 0; | |
572 int i, is_copy; | |
573 | |
574 srcreg = XEXP (SET_SRC (set_pat), 0); | |
575 work_list = VEC_alloc (rtx, heap, 8); | |
576 | |
577 /* Initialize the Work List */ | |
578 n_worklist = get_defs (zero_extend_insn, srcreg, &work_list); | |
579 | |
580 if (n_worklist == 0) | |
581 { | |
582 VEC_free (rtx, heap, work_list); | |
583 /* The number of defs being equal to zero can only imply that all of its | |
584 definitions have been previously merged. */ | |
585 return 2; | |
586 } | |
587 | |
588 is_insn_visited = XNEWVEC (bool, max_insn_uid); | |
589 | |
590 for (i = 0; i < max_insn_uid; i++) | |
591 is_insn_visited[i] = false; | |
592 | |
593 | |
594 /* Perform transitive closure for conditional copies. */ | |
595 while (n_worklist > vec_index) | |
596 { | |
597 def_insn = VEC_index (rtx, work_list, vec_index); | |
598 gcc_assert (INSN_UID (def_insn) < max_insn_uid); | |
599 | |
600 if (is_insn_visited[INSN_UID (def_insn)]) | |
601 { | |
602 vec_index++; | |
603 continue; | |
604 } | |
605 | |
606 is_insn_visited[INSN_UID (def_insn)] = true; | |
607 copy_reg_1 = copy_reg_2 = NULL_RTX; | |
608 is_copy = is_insn_cond_copy (def_insn, ©_reg_1, ©_reg_2); | |
609 if (is_copy) | |
610 { | |
611 gcc_assert (copy_reg_1 && copy_reg_2); | |
612 | |
613 /* Push it into the copy list first. */ | |
614 | |
615 VEC_safe_push (rtx, heap, *copies_list, def_insn); | |
616 | |
617 /* Perform transitive closure here */ | |
618 | |
619 n_defs = get_defs (def_insn, copy_reg_1, &work_list); | |
620 | |
621 if (n_defs == 0) | |
622 { | |
623 VEC_free (rtx, heap, work_list); | |
624 XDELETEVEC (is_insn_visited); | |
625 return 0; | |
626 } | |
627 n_worklist += n_defs; | |
628 | |
629 n_defs = get_defs (def_insn, copy_reg_2, &work_list); | |
630 if (n_defs == 0) | |
631 { | |
632 VEC_free (rtx, heap, work_list); | |
633 XDELETEVEC (is_insn_visited); | |
634 return 0; | |
635 } | |
636 n_worklist += n_defs; | |
637 } | |
638 else | |
639 { | |
640 VEC_safe_push (rtx, heap, *defs_list, def_insn); | |
641 } | |
642 vec_index++; | |
643 } | |
644 | |
645 VEC_free (rtx, heap, work_list); | |
646 XDELETEVEC (is_insn_visited); | |
647 return 1; | |
648 } | |
649 | |
650 /* Merge the DEF_INSN with a zero-extend. Calls combine_set_zero_extend | |
651 on the SET pattern. */ | |
652 | |
653 static bool | |
654 merge_def_and_ze (rtx def_insn) | |
655 { | |
656 enum rtx_code code; | |
657 rtx setreg; | |
658 rtx *sub_rtx; | |
659 rtx s_expr; | |
660 int i; | |
661 | |
662 code = GET_CODE (PATTERN (def_insn)); | |
663 sub_rtx = NULL; | |
664 | |
665 if (code == PARALLEL) | |
666 { | |
667 for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++) | |
668 { | |
669 s_expr = XVECEXP (PATTERN (def_insn), 0, i); | |
670 if (GET_CODE (s_expr) != SET) | |
671 continue; | |
672 | |
673 if (sub_rtx == NULL) | |
674 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i); | |
675 else | |
676 { | |
677 /* PARALLEL with multiple SETs. */ | |
678 return false; | |
679 } | |
680 } | |
681 } | |
682 else if (code == SET) | |
683 sub_rtx = &PATTERN (def_insn); | |
684 else | |
685 { | |
686 /* It is not a PARALLEL or a SET, what could it be ? */ | |
687 return false; | |
688 } | |
689 | |
690 gcc_assert (sub_rtx != NULL); | |
691 | |
692 if (GET_CODE (SET_DEST (*sub_rtx)) == REG | |
693 && GET_MODE (SET_DEST (*sub_rtx)) == SImode) | |
694 { | |
695 setreg = get_reg_di (SET_DEST (*sub_rtx)); | |
696 return combine_set_zero_extend (def_insn, sub_rtx, setreg); | |
697 } | |
698 else | |
699 return false; | |
700 return true; | |
701 } | |
702 | |
703 /* This function goes through all reaching defs of the source | |
704 of the zero extension instruction (ZERO_EXTEND_INSN) and | |
705 tries to combine the zero extension with the definition | |
706 instruction. The changes are made as a group so that even | |
707 if one definition cannot be merged, all reaching definitions | |
708 end up not being merged. When a conditional copy is encountered, | |
709 merging is attempted transitively on its definitions. It returns | |
710 true upon success and false upon failure. */ | |
711 | |
712 static bool | |
713 combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) | |
714 { | |
715 rtx def_insn; | |
716 bool merge_successful = true; | |
717 int i; | |
718 int defs_ix; | |
719 int outcome; | |
720 | |
721 /* To store the definitions that have been merged. */ | |
722 | |
723 VEC (rtx, heap) *defs_list, *copies_list, *vec; | |
724 enum insn_merge_code merge_code; | |
725 | |
726 defs_list = VEC_alloc (rtx, heap, 8); | |
727 copies_list = VEC_alloc (rtx, heap, 8); | |
728 | |
729 outcome = make_defs_and_copies_lists (zero_extend_insn, | |
730 set_pat, &defs_list, &copies_list); | |
731 | |
732 /* outcome == 2 implies that all the definitions for this zero_extend were | |
733 merged while previously when handling other zero_extends. */ | |
734 | |
735 if (outcome == 2) | |
736 { | |
737 VEC_free (rtx, heap, defs_list); | |
738 VEC_free (rtx, heap, copies_list); | |
739 if (dump_file) | |
740 fprintf (dump_file, "All definitions have been merged previously...\n"); | |
741 return true; | |
742 } | |
743 | |
744 if (outcome == 0) | |
745 { | |
746 VEC_free (rtx, heap, defs_list); | |
747 VEC_free (rtx, heap, copies_list); | |
748 return false; | |
749 } | |
750 | |
751 merge_successful = true; | |
752 | |
753 /* Go through the defs vector and try to merge all the definitions | |
754 in this vector. */ | |
755 | |
756 vec = VEC_alloc (rtx, heap, 8); | |
757 for (defs_ix = 0; VEC_iterate (rtx, defs_list, defs_ix, def_insn); defs_ix++) | |
758 { | |
759 merge_code = get_insn_status (def_insn); | |
760 gcc_assert (merge_code == MERGE_NOT_ATTEMPTED); | |
761 | |
762 if (merge_def_and_ze (def_insn)) | |
763 VEC_safe_push (rtx, heap, vec, def_insn); | |
764 else | |
765 { | |
766 merge_successful = false; | |
767 break; | |
768 } | |
769 } | |
770 | |
771 /* Now go through the conditional copies vector and try to merge all | |
772 the copies in this vector. */ | |
773 | |
774 if (merge_successful) | |
775 { | |
776 for (i = 0; VEC_iterate (rtx, copies_list, i, def_insn); i++) | |
777 { | |
778 if (transform_ifelse (def_insn)) | |
779 { | |
780 VEC_safe_push (rtx, heap, vec, def_insn); | |
781 } | |
782 else | |
783 { | |
784 merge_successful = false; | |
785 break; | |
786 } | |
787 } | |
788 } | |
789 | |
790 if (merge_successful) | |
791 { | |
792 /* Commit the changes here if possible */ | |
793 /* XXX : Now, it is an all or nothing scenario. Even if one definition | |
794 cannot be merged we totally bail. In future, allow zero-extensions to | |
795 be partially eliminated along those paths where the definitions could | |
796 be merged. */ | |
797 | |
798 if (apply_change_group ()) | |
799 { | |
800 if (dump_file) | |
801 fprintf (dump_file, "All merges were successful ....\n"); | |
802 | |
803 for (i = 0; VEC_iterate (rtx, vec, i, def_insn); i++) | |
804 { | |
805 set_insn_status (def_insn, MERGE_SUCCESS); | |
806 } | |
807 | |
808 VEC_free (rtx, heap, vec); | |
809 VEC_free (rtx, heap, defs_list); | |
810 VEC_free (rtx, heap, copies_list); | |
811 return true; | |
812 } | |
813 else | |
814 { | |
815 /* Changes need not be cancelled explicitly as apply_change_group () | |
816 does it. Print list of definitions in the dump_file for debug | |
817 purposes. This zero-extension cannot be deleted. */ | |
818 | |
819 if (dump_file) | |
820 { | |
821 for (i = 0; VEC_iterate (rtx, vec, i, def_insn); i++) | |
822 { | |
823 fprintf (dump_file, " Ummergable definitions : \n"); | |
824 print_rtl_single (dump_file, def_insn); | |
825 } | |
826 } | |
827 } | |
828 } | |
829 else | |
830 { | |
831 /* Cancel any changes that have been made so far. */ | |
832 cancel_changes (0); | |
833 } | |
834 | |
835 VEC_free (rtx, heap, vec); | |
836 VEC_free (rtx, heap, defs_list); | |
837 VEC_free (rtx, heap, copies_list); | |
838 return false; | |
839 } | |
840 | |
841 /* Goes through the instruction stream looking for zero-extends. If the zero | |
842 extension instruction has atleast one def it adds it to a list of possible | |
843 candidates for deletion. It returns the list of candidates. */ | |
844 | |
845 static VEC (rtx,heap)* | |
846 find_removable_zero_extends (void) | |
847 { | |
848 VEC (rtx, heap) *zeinsn_list; | |
849 basic_block curr_block; | |
850 rtx curr_insn; | |
851 rtx *set_insn; | |
852 rtx which_reg; | |
853 int type ; | |
854 int has_defs; | |
855 | |
856 zeinsn_list = VEC_alloc (rtx, heap, 8); | |
857 FOR_EACH_BB (curr_block) | |
858 { | |
859 FOR_BB_INSNS (curr_block, curr_insn) | |
860 { | |
861 if (!INSN_P (curr_insn)) | |
862 continue; | |
863 | |
864 type = for_each_rtx (&PATTERN (curr_insn), | |
865 is_set_with_extension_DI, | |
866 (void *)&set_insn); | |
867 | |
868 if (!type) | |
869 continue; | |
870 | |
871 which_reg = XEXP (SET_SRC (*set_insn), 0); | |
872 has_defs = get_defs (curr_insn, which_reg, NULL); | |
873 if (has_defs) | |
874 VEC_safe_push (rtx, heap, zeinsn_list, curr_insn); | |
875 else if (dump_file) | |
876 { | |
877 fprintf (dump_file, "Cannot eliminate zero extension : \n"); | |
878 print_rtl_single (dump_file, curr_insn); | |
879 fprintf (dump_file, | |
880 "This has no defs. Could be extending parameters.\n"); | |
881 } | |
882 } | |
883 } | |
884 return zeinsn_list; | |
885 } | |
886 | |
887 /* This is the main function that checks the insn stream for redundant | |
888 zero extensions and tries to remove them if possible. */ | |
889 | |
890 static unsigned int | |
891 find_and_remove_ze (void) | |
892 { | |
893 rtx curr_insn = NULL_RTX; | |
894 int i; | |
895 int ix; | |
896 long long num_realized = 0; | |
897 long long num_ze_opportunities = 0; | |
898 VEC (rtx, heap) *zeinsn_list; | |
899 VEC (rtx, heap) *zeinsn_del_list; | |
900 | |
901 /* Construct DU chain to get all reaching definitions of each | |
902 zero-extension instruction. */ | |
903 | |
904 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); | |
905 df_analyze (); | |
906 | |
907 max_insn_uid = get_max_uid (); | |
908 | |
909 is_insn_merge_attempted = XNEWVEC (enum insn_merge_code, | |
910 sizeof (enum insn_merge_code)* max_insn_uid); | |
911 | |
912 for (i = 0; i < max_insn_uid; i++) | |
913 { | |
914 is_insn_merge_attempted[i] = MERGE_NOT_ATTEMPTED; | |
915 } | |
916 | |
917 num_ze_opportunities = num_realized = 0; | |
918 | |
919 zeinsn_del_list = VEC_alloc (rtx, heap, 4); | |
920 | |
921 zeinsn_list = find_removable_zero_extends (); | |
922 | |
923 for (ix = 0; VEC_iterate (rtx, zeinsn_list, ix, curr_insn); ix++) | |
924 { | |
925 num_ze_opportunities++; | |
926 /* Try to combine the zero-extends with the definition here. */ | |
927 | |
928 if (dump_file) | |
929 { | |
930 fprintf (dump_file, "Trying to eliminate zero extension : \n"); | |
931 print_rtl_single (dump_file, curr_insn); | |
932 } | |
933 | |
934 if (combine_reaching_defs (curr_insn, PATTERN (curr_insn))) | |
935 { | |
936 if (dump_file) | |
937 fprintf (dump_file, "Eliminated the zero extension...\n"); | |
938 num_realized++; | |
939 VEC_safe_push (rtx, heap, zeinsn_del_list, curr_insn); | |
940 } | |
941 } | |
942 | |
943 /* Delete all useless zero extensions here in one sweep. */ | |
944 for (ix = 0; VEC_iterate (rtx, zeinsn_del_list, ix, curr_insn); ix++) | |
945 { | |
946 delete_insn (curr_insn); | |
947 } | |
948 | |
949 free (is_insn_merge_attempted); | |
950 VEC_free (rtx, heap, zeinsn_list); | |
951 VEC_free (rtx, heap, zeinsn_del_list); | |
952 | |
953 if (dump_file && num_ze_opportunities > 0) | |
954 fprintf (dump_file, "\n %s : num_zee_opportunities = %lld " | |
955 "num_realized = %lld \n", | |
956 current_function_name (), | |
957 num_ze_opportunities, num_realized); | |
958 | |
959 df_finish_pass (false); | |
960 return 0; | |
961 } | |
962 | |
963 /* Find and remove redundant zero extensions. */ | |
964 | |
965 static unsigned int | |
966 rest_of_handle_zee (void) | |
967 { | |
968 timevar_push (TV_ZEE); | |
969 find_and_remove_ze (); | |
970 timevar_pop (TV_ZEE); | |
971 return 0; | |
972 } | |
973 | |
974 /* Run zee pass when flag_zee is set at optimization level > 0. */ | |
975 | |
976 static bool | |
977 gate_handle_zee (void) | |
978 { | |
979 return (optimize > 0 && flag_zee); | |
980 } | |
981 | |
982 struct rtl_opt_pass pass_implicit_zee = | |
983 { | |
984 { | |
985 RTL_PASS, | |
986 "zee", /* name */ | |
987 gate_handle_zee, /* gate */ | |
988 rest_of_handle_zee, /* execute */ | |
989 NULL, /* sub */ | |
990 NULL, /* next */ | |
991 0, /* static_pass_number */ | |
992 TV_ZEE, /* tv_id */ | |
993 0, /* properties_required */ | |
994 0, /* properties_provided */ | |
995 0, /* properties_destroyed */ | |
996 0, /* todo_flags_start */ | |
997 TODO_ggc_collect | | |
998 TODO_dump_func | | |
999 TODO_verify_rtl_sharing, /* todo_flags_finish */ | |
1000 } | |
1001 }; |