Mercurial > hg > CbC > CbC_gcc
comparison gcc/gcse.c @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
65:65488c3d617d | 67:f6334be47118 |
---|---|
1 /* Global common subexpression elimination/Partial redundancy elimination | 1 /* Global common subexpression elimination/Partial redundancy elimination |
2 and global constant/copy propagation for GNU compiler. | 2 and global constant/copy propagation for GNU compiler. |
3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, | 3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, |
4 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. | 4 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
8 GCC is free software; you can redistribute it and/or modify it under | 8 GCC is free software; you can redistribute it and/or modify it under |
9 the terms of the GNU General Public License as published by the Free | 9 the terms of the GNU General Public License as published by the Free |
141 | 141 |
142 #include "config.h" | 142 #include "config.h" |
143 #include "system.h" | 143 #include "system.h" |
144 #include "coretypes.h" | 144 #include "coretypes.h" |
145 #include "tm.h" | 145 #include "tm.h" |
146 #include "diagnostic-core.h" | |
146 #include "toplev.h" | 147 #include "toplev.h" |
147 | 148 |
148 #include "rtl.h" | 149 #include "rtl.h" |
149 #include "tree.h" | 150 #include "tree.h" |
150 #include "tm_p.h" | 151 #include "tm_p.h" |
167 #include "tree-pass.h" | 168 #include "tree-pass.h" |
168 #include "hashtab.h" | 169 #include "hashtab.h" |
169 #include "df.h" | 170 #include "df.h" |
170 #include "dbgcnt.h" | 171 #include "dbgcnt.h" |
171 #include "target.h" | 172 #include "target.h" |
173 #include "gcse.h" | |
172 | 174 |
173 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations | 175 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations |
174 are a superset of those done by classic GCSE. | 176 are a superset of those done by classic GCSE. |
175 | 177 |
176 We perform the following steps: | 178 We perform the following steps: |
259 doesn't have this problem as much as it computes the reaching defs of | 261 doesn't have this problem as much as it computes the reaching defs of |
260 each register in each block and thus can try to use an existing | 262 each register in each block and thus can try to use an existing |
261 register. */ | 263 register. */ |
262 | 264 |
263 /* GCSE global vars. */ | 265 /* GCSE global vars. */ |
266 | |
267 struct target_gcse default_target_gcse; | |
268 #if SWITCHABLE_TARGET | |
269 struct target_gcse *this_target_gcse = &default_target_gcse; | |
270 #endif | |
264 | 271 |
265 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */ | 272 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */ |
266 int flag_rerun_cse_after_global_opts; | 273 int flag_rerun_cse_after_global_opts; |
267 | 274 |
268 /* An obstack for our working variables. */ | 275 /* An obstack for our working variables. */ |
293 struct occr *avail_occr; | 300 struct occr *avail_occr; |
294 /* Non-null if the computation is PRE redundant. | 301 /* Non-null if the computation is PRE redundant. |
295 The value is the newly created pseudo-reg to record a copy of the | 302 The value is the newly created pseudo-reg to record a copy of the |
296 expression in all the places that reach the redundant copy. */ | 303 expression in all the places that reach the redundant copy. */ |
297 rtx reaching_reg; | 304 rtx reaching_reg; |
305 /* Maximum distance in instructions this expression can travel. | |
306 We avoid moving simple expressions for more than a few instructions | |
307 to keep register pressure under control. | |
308 A value of "0" removes restrictions on how far the expression can | |
309 travel. */ | |
310 int max_distance; | |
298 }; | 311 }; |
299 | 312 |
300 /* Occurrence of an expression. | 313 /* Occurrence of an expression. |
301 There is one per basic block. If a pattern appears more than once the | 314 There is one per basic block. If a pattern appears more than once the |
302 last appearance is used [or first for anticipatable expressions]. */ | 315 last appearance is used [or first for anticipatable expressions]. */ |
314 /* ??? This is mutually exclusive with deleted_p, so they could share | 327 /* ??? This is mutually exclusive with deleted_p, so they could share |
315 the same byte. */ | 328 the same byte. */ |
316 char copied_p; | 329 char copied_p; |
317 }; | 330 }; |
318 | 331 |
332 typedef struct occr *occr_t; | |
333 DEF_VEC_P (occr_t); | |
334 DEF_VEC_ALLOC_P (occr_t, heap); | |
335 | |
319 /* Expression and copy propagation hash tables. | 336 /* Expression and copy propagation hash tables. |
320 Each hash table is an array of buckets. | 337 Each hash table is an array of buckets. |
321 ??? It is known that if it were an array of entries, structure elements | 338 ??? It is known that if it were an array of entries, structure elements |
322 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is | 339 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is |
323 not clear whether in the final analysis a sufficient amount of memory would | 340 not clear whether in the final analysis a sufficient amount of memory would |
415 static int local_copy_prop_count; | 432 static int local_copy_prop_count; |
416 /* Number of global constants propagated. */ | 433 /* Number of global constants propagated. */ |
417 static int global_const_prop_count; | 434 static int global_const_prop_count; |
418 /* Number of global copies propagated. */ | 435 /* Number of global copies propagated. */ |
419 static int global_copy_prop_count; | 436 static int global_copy_prop_count; |
437 | |
438 /* Doing code hoisting. */ | |
439 static bool doing_code_hoisting_p = false; | |
420 | 440 |
421 /* For available exprs */ | 441 /* For available exprs */ |
422 static sbitmap *ae_kill; | 442 static sbitmap *ae_kill; |
423 | 443 |
424 static void compute_can_copy (void); | 444 static void compute_can_copy (void); |
429 static void free_gcse_mem (void); | 449 static void free_gcse_mem (void); |
430 static void hash_scan_insn (rtx, struct hash_table_d *); | 450 static void hash_scan_insn (rtx, struct hash_table_d *); |
431 static void hash_scan_set (rtx, rtx, struct hash_table_d *); | 451 static void hash_scan_set (rtx, rtx, struct hash_table_d *); |
432 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); | 452 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); |
433 static void hash_scan_call (rtx, rtx, struct hash_table_d *); | 453 static void hash_scan_call (rtx, rtx, struct hash_table_d *); |
434 static int want_to_gcse_p (rtx); | 454 static int want_to_gcse_p (rtx, int *); |
435 static bool gcse_constant_p (const_rtx); | 455 static bool gcse_constant_p (const_rtx); |
436 static int oprs_unchanged_p (const_rtx, const_rtx, int); | 456 static int oprs_unchanged_p (const_rtx, const_rtx, int); |
437 static int oprs_anticipatable_p (const_rtx, const_rtx); | 457 static int oprs_anticipatable_p (const_rtx, const_rtx); |
438 static int oprs_available_p (const_rtx, const_rtx); | 458 static int oprs_available_p (const_rtx, const_rtx); |
439 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, | 459 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int, |
440 struct hash_table_d *); | 460 struct hash_table_d *); |
441 static void insert_set_in_table (rtx, rtx, struct hash_table_d *); | 461 static void insert_set_in_table (rtx, rtx, struct hash_table_d *); |
442 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); | 462 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); |
443 static unsigned int hash_set (int, int); | 463 static unsigned int hash_set (int, int); |
444 static int expr_equiv_p (const_rtx, const_rtx); | 464 static int expr_equiv_p (const_rtx, const_rtx); |
459 static void mark_clobber (rtx, rtx); | 479 static void mark_clobber (rtx, rtx); |
460 static void mark_oprs_set (rtx); | 480 static void mark_oprs_set (rtx); |
461 static void alloc_cprop_mem (int, int); | 481 static void alloc_cprop_mem (int, int); |
462 static void free_cprop_mem (void); | 482 static void free_cprop_mem (void); |
463 static void compute_transp (const_rtx, int, sbitmap *, int); | 483 static void compute_transp (const_rtx, int, sbitmap *, int); |
464 static void compute_transpout (void); | |
465 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, | 484 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, |
466 struct hash_table_d *); | 485 struct hash_table_d *); |
467 static void compute_cprop_data (void); | 486 static void compute_cprop_data (void); |
468 static void find_used_regs (rtx *, void *); | 487 static void find_used_regs (rtx *, void *); |
469 static int try_replace_reg (rtx, rtx, rtx); | 488 static int try_replace_reg (rtx, rtx, rtx); |
483 static void alloc_pre_mem (int, int); | 502 static void alloc_pre_mem (int, int); |
484 static void free_pre_mem (void); | 503 static void free_pre_mem (void); |
485 static void compute_pre_data (void); | 504 static void compute_pre_data (void); |
486 static int pre_expr_reaches_here_p (basic_block, struct expr *, | 505 static int pre_expr_reaches_here_p (basic_block, struct expr *, |
487 basic_block); | 506 basic_block); |
488 static void insert_insn_end_basic_block (struct expr *, basic_block, int); | 507 static void insert_insn_end_basic_block (struct expr *, basic_block); |
489 static void pre_insert_copy_insn (struct expr *, rtx); | 508 static void pre_insert_copy_insn (struct expr *, rtx); |
490 static void pre_insert_copies (void); | 509 static void pre_insert_copies (void); |
491 static int pre_delete (void); | 510 static int pre_delete (void); |
492 static int pre_gcse (void); | 511 static int pre_gcse (void); |
493 static int one_pre_gcse_pass (void); | 512 static int one_pre_gcse_pass (void); |
494 static void add_label_notes (rtx, rtx); | 513 static void add_label_notes (rtx, rtx); |
495 static void alloc_code_hoist_mem (int, int); | 514 static void alloc_code_hoist_mem (int, int); |
496 static void free_code_hoist_mem (void); | 515 static void free_code_hoist_mem (void); |
497 static void compute_code_hoist_vbeinout (void); | 516 static void compute_code_hoist_vbeinout (void); |
498 static void compute_code_hoist_data (void); | 517 static void compute_code_hoist_data (void); |
499 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *); | 518 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *, |
519 int, int *); | |
500 static int hoist_code (void); | 520 static int hoist_code (void); |
501 static int one_code_hoisting_pass (void); | 521 static int one_code_hoisting_pass (void); |
502 static rtx process_insert_insn (struct expr *); | 522 static rtx process_insert_insn (struct expr *); |
503 static int pre_edge_insert (struct edge_list *, struct expr **); | 523 static int pre_edge_insert (struct edge_list *, struct expr **); |
504 static int pre_expr_reaches_here_p_work (basic_block, struct expr *, | 524 static int pre_expr_reaches_here_p_work (basic_block, struct expr *, |
536 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T))) | 556 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T))) |
537 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S))) | 557 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S))) |
538 | 558 |
539 /* Misc. utilities. */ | 559 /* Misc. utilities. */ |
540 | 560 |
541 /* Nonzero for each mode that supports (set (reg) (reg)). | 561 #define can_copy \ |
542 This is trivially true for integer and floating point values. | 562 (this_target_gcse->x_can_copy) |
543 It may or may not be true for condition codes. */ | 563 #define can_copy_init_p \ |
544 static char can_copy[(int) NUM_MACHINE_MODES]; | 564 (this_target_gcse->x_can_copy_init_p) |
545 | 565 |
546 /* Compute which modes support reg/reg copy operations. */ | 566 /* Compute which modes support reg/reg copy operations. */ |
547 | 567 |
548 static void | 568 static void |
549 compute_can_copy (void) | 569 compute_can_copy (void) |
576 /* Returns whether the mode supports reg/reg copy operations. */ | 596 /* Returns whether the mode supports reg/reg copy operations. */ |
577 | 597 |
578 bool | 598 bool |
579 can_copy_p (enum machine_mode mode) | 599 can_copy_p (enum machine_mode mode) |
580 { | 600 { |
581 static bool can_copy_init_p = false; | |
582 | |
583 if (! can_copy_init_p) | 601 if (! can_copy_init_p) |
584 { | 602 { |
585 compute_can_copy (); | 603 compute_can_copy (); |
586 can_copy_init_p = true; | 604 can_copy_init_p = true; |
587 } | 605 } |
752 | 770 |
753 /* See whether X, the source of a set, is something we want to consider for | 771 /* See whether X, the source of a set, is something we want to consider for |
754 GCSE. */ | 772 GCSE. */ |
755 | 773 |
756 static int | 774 static int |
757 want_to_gcse_p (rtx x) | 775 want_to_gcse_p (rtx x, int *max_distance_ptr) |
758 { | 776 { |
759 #ifdef STACK_REGS | 777 #ifdef STACK_REGS |
760 /* On register stack architectures, don't GCSE constants from the | 778 /* On register stack architectures, don't GCSE constants from the |
761 constant pool, as the benefits are often swamped by the overhead | 779 constant pool, as the benefits are often swamped by the overhead |
762 of shuffling the register stack between basic blocks. */ | 780 of shuffling the register stack between basic blocks. */ |
763 if (IS_STACK_MODE (GET_MODE (x))) | 781 if (IS_STACK_MODE (GET_MODE (x))) |
764 x = avoid_constant_pool_reference (x); | 782 x = avoid_constant_pool_reference (x); |
765 #endif | 783 #endif |
766 | 784 |
785 /* GCSE'ing constants: | |
786 | |
787 We do not specifically distinguish between constant and non-constant | |
788 expressions in PRE and Hoist. We use rtx_cost below to limit | |
789 the maximum distance simple expressions can travel. | |
790 | |
791 Nevertheless, constants are much easier to GCSE, and, hence, | |
792 it is easy to overdo the optimizations. Usually, excessive PRE and | |
793 Hoisting of constant leads to increased register pressure. | |
794 | |
795 RA can deal with this by rematerialing some of the constants. | |
796 Therefore, it is important that the back-end generates sets of constants | |
797 in a way that allows reload rematerialize them under high register | |
798 pressure, i.e., a pseudo register with REG_EQUAL to constant | |
799 is set only once. Failing to do so will result in IRA/reload | |
800 spilling such constants under high register pressure instead of | |
801 rematerializing them. */ | |
802 | |
767 switch (GET_CODE (x)) | 803 switch (GET_CODE (x)) |
768 { | 804 { |
769 case REG: | 805 case REG: |
770 case SUBREG: | 806 case SUBREG: |
807 case CALL: | |
808 return 0; | |
809 | |
771 case CONST_INT: | 810 case CONST_INT: |
772 case CONST_DOUBLE: | 811 case CONST_DOUBLE: |
773 case CONST_FIXED: | 812 case CONST_FIXED: |
774 case CONST_VECTOR: | 813 case CONST_VECTOR: |
775 case CALL: | 814 if (!doing_code_hoisting_p) |
776 return 0; | 815 /* Do not PRE constants. */ |
816 return 0; | |
817 | |
818 /* FALLTHRU */ | |
777 | 819 |
778 default: | 820 default: |
821 if (doing_code_hoisting_p) | |
822 /* PRE doesn't implement max_distance restriction. */ | |
823 { | |
824 int cost; | |
825 int max_distance; | |
826 | |
827 gcc_assert (!optimize_function_for_speed_p (cfun) | |
828 && optimize_function_for_size_p (cfun)); | |
829 cost = rtx_cost (x, SET, 0); | |
830 | |
831 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST)) | |
832 { | |
833 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10; | |
834 if (max_distance == 0) | |
835 return 0; | |
836 | |
837 gcc_assert (max_distance > 0); | |
838 } | |
839 else | |
840 max_distance = 0; | |
841 | |
842 if (max_distance_ptr) | |
843 *max_distance_ptr = max_distance; | |
844 } | |
845 | |
779 return can_assign_to_reg_without_clobbers_p (x); | 846 return can_assign_to_reg_without_clobbers_p (x); |
780 } | 847 } |
781 } | 848 } |
782 | 849 |
783 /* Used internally by can_assign_to_reg_without_clobbers_p. */ | 850 /* Used internally by can_assign_to_reg_without_clobbers_p. */ |
1087 | 1154 |
1088 MODE is the mode of the value X is being stored into. | 1155 MODE is the mode of the value X is being stored into. |
1089 It is only used if X is a CONST_INT. | 1156 It is only used if X is a CONST_INT. |
1090 | 1157 |
1091 ANTIC_P is nonzero if X is an anticipatable expression. | 1158 ANTIC_P is nonzero if X is an anticipatable expression. |
1092 AVAIL_P is nonzero if X is an available expression. */ | 1159 AVAIL_P is nonzero if X is an available expression. |
1160 | |
1161 MAX_DISTANCE is the maximum distance in instructions this expression can | |
1162 be moved. */ | |
1093 | 1163 |
1094 static void | 1164 static void |
1095 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, | 1165 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, |
1096 int avail_p, struct hash_table_d *table) | 1166 int avail_p, int max_distance, struct hash_table_d *table) |
1097 { | 1167 { |
1098 int found, do_not_record_p; | 1168 int found, do_not_record_p; |
1099 unsigned int hash; | 1169 unsigned int hash; |
1100 struct expr *cur_expr, *last_expr = NULL; | 1170 struct expr *cur_expr, *last_expr = NULL; |
1101 struct occr *antic_occr, *avail_occr; | 1171 struct occr *antic_occr, *avail_occr; |
1134 cur_expr->expr = x; | 1204 cur_expr->expr = x; |
1135 cur_expr->bitmap_index = table->n_elems++; | 1205 cur_expr->bitmap_index = table->n_elems++; |
1136 cur_expr->next_same_hash = NULL; | 1206 cur_expr->next_same_hash = NULL; |
1137 cur_expr->antic_occr = NULL; | 1207 cur_expr->antic_occr = NULL; |
1138 cur_expr->avail_occr = NULL; | 1208 cur_expr->avail_occr = NULL; |
1139 } | 1209 gcc_assert (max_distance >= 0); |
1210 cur_expr->max_distance = max_distance; | |
1211 } | |
1212 else | |
1213 gcc_assert (cur_expr->max_distance == max_distance); | |
1140 | 1214 |
1141 /* Now record the occurrence(s). */ | 1215 /* Now record the occurrence(s). */ |
1142 if (antic_p) | 1216 if (antic_p) |
1143 { | 1217 { |
1144 antic_occr = cur_expr->antic_occr; | 1218 antic_occr = cur_expr->antic_occr; |
1235 cur_expr->expr = copy_rtx (x); | 1309 cur_expr->expr = copy_rtx (x); |
1236 cur_expr->bitmap_index = table->n_elems++; | 1310 cur_expr->bitmap_index = table->n_elems++; |
1237 cur_expr->next_same_hash = NULL; | 1311 cur_expr->next_same_hash = NULL; |
1238 cur_expr->antic_occr = NULL; | 1312 cur_expr->antic_occr = NULL; |
1239 cur_expr->avail_occr = NULL; | 1313 cur_expr->avail_occr = NULL; |
1314 /* Not used for set_p tables. */ | |
1315 cur_expr->max_distance = 0; | |
1240 } | 1316 } |
1241 | 1317 |
1242 /* Now record the occurrence. */ | 1318 /* Now record the occurrence. */ |
1243 cur_occr = cur_expr->avail_occr; | 1319 cur_occr = cur_expr->avail_occr; |
1244 | 1320 |
1304 | 1380 |
1305 else if (REG_P (dest)) | 1381 else if (REG_P (dest)) |
1306 { | 1382 { |
1307 unsigned int regno = REGNO (dest); | 1383 unsigned int regno = REGNO (dest); |
1308 rtx tmp; | 1384 rtx tmp; |
1385 int max_distance = 0; | |
1309 | 1386 |
1310 /* See if a REG_EQUAL note shows this equivalent to a simpler expression. | 1387 /* See if a REG_EQUAL note shows this equivalent to a simpler expression. |
1311 | 1388 |
1312 This allows us to do a single GCSE pass and still eliminate | 1389 This allows us to do a single GCSE pass and still eliminate |
1313 redundant constants, addresses or other expressions that are | 1390 redundant constants, addresses or other expressions that are |
1326 if (note != 0 | 1403 if (note != 0 |
1327 && REG_NOTE_KIND (note) == REG_EQUAL | 1404 && REG_NOTE_KIND (note) == REG_EQUAL |
1328 && !REG_P (src) | 1405 && !REG_P (src) |
1329 && (table->set_p | 1406 && (table->set_p |
1330 ? gcse_constant_p (XEXP (note, 0)) | 1407 ? gcse_constant_p (XEXP (note, 0)) |
1331 : want_to_gcse_p (XEXP (note, 0)))) | 1408 : want_to_gcse_p (XEXP (note, 0), NULL))) |
1332 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); | 1409 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); |
1333 | 1410 |
1334 /* Only record sets of pseudo-regs in the hash table. */ | 1411 /* Only record sets of pseudo-regs in the hash table. */ |
1335 if (! table->set_p | 1412 if (! table->set_p |
1336 && regno >= FIRST_PSEUDO_REGISTER | 1413 && regno >= FIRST_PSEUDO_REGISTER |
1341 /* ??? We can now easily create new EH landing pads at the | 1418 /* ??? We can now easily create new EH landing pads at the |
1342 gimple level, for splitting edges; there's no reason we | 1419 gimple level, for splitting edges; there's no reason we |
1343 can't do the same thing at the rtl level. */ | 1420 can't do the same thing at the rtl level. */ |
1344 && !can_throw_internal (insn) | 1421 && !can_throw_internal (insn) |
1345 /* Is SET_SRC something we want to gcse? */ | 1422 /* Is SET_SRC something we want to gcse? */ |
1346 && want_to_gcse_p (src) | 1423 && want_to_gcse_p (src, &max_distance) |
1347 /* Don't CSE a nop. */ | 1424 /* Don't CSE a nop. */ |
1348 && ! set_noop_p (pat) | 1425 && ! set_noop_p (pat) |
1349 /* Don't GCSE if it has attached REG_EQUIV note. | 1426 /* Don't GCSE if it has attached REG_EQUIV note. |
1350 At this point this only function parameters should have | 1427 At this point this only function parameters should have |
1351 REG_EQUIV notes and if the argument slot is used somewhere | 1428 REG_EQUIV notes and if the argument slot is used somewhere |
1365 available if this is a branch, because we can't insert | 1442 available if this is a branch, because we can't insert |
1366 a set after the branch. */ | 1443 a set after the branch. */ |
1367 int avail_p = (oprs_available_p (src, insn) | 1444 int avail_p = (oprs_available_p (src, insn) |
1368 && ! JUMP_P (insn)); | 1445 && ! JUMP_P (insn)); |
1369 | 1446 |
1370 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table); | 1447 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, |
1448 max_distance, table); | |
1371 } | 1449 } |
1372 | 1450 |
1373 /* Record sets for constant/copy propagation. */ | 1451 /* Record sets for constant/copy propagation. */ |
1374 else if (table->set_p | 1452 else if (table->set_p |
1375 && regno >= FIRST_PSEUDO_REGISTER | 1453 && regno >= FIRST_PSEUDO_REGISTER |
1380 || gcse_constant_p (src)) | 1458 || gcse_constant_p (src)) |
1381 /* A copy is not available if its src or dest is subsequently | 1459 /* A copy is not available if its src or dest is subsequently |
1382 modified. Here we want to search from INSN+1 on, but | 1460 modified. Here we want to search from INSN+1 on, but |
1383 oprs_available_p searches from INSN on. */ | 1461 oprs_available_p searches from INSN on. */ |
1384 && (insn == BB_END (BLOCK_FOR_INSN (insn)) | 1462 && (insn == BB_END (BLOCK_FOR_INSN (insn)) |
1385 || (tmp = next_nonnote_insn (insn)) == NULL_RTX | 1463 || (tmp = next_nonnote_nondebug_insn (insn)) == NULL_RTX |
1386 || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn) | 1464 || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn) |
1387 || oprs_available_p (pat, tmp))) | 1465 || oprs_available_p (pat, tmp))) |
1388 insert_set_in_table (pat, insn, table); | 1466 insert_set_in_table (pat, insn, table); |
1389 } | 1467 } |
1390 /* In case of store we want to consider the memory value as available in | 1468 /* In case of store we want to consider the memory value as available in |
1391 the REG stored in that memory. This makes it possible to remove | 1469 the REG stored in that memory. This makes it possible to remove |
1392 redundant loads from due to stores to the same location. */ | 1470 redundant loads from due to stores to the same location. */ |
1393 else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) | 1471 else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) |
1394 { | 1472 { |
1395 unsigned int regno = REGNO (src); | 1473 unsigned int regno = REGNO (src); |
1474 int max_distance = 0; | |
1396 | 1475 |
1397 /* Do not do this for constant/copy propagation. */ | 1476 /* Do not do this for constant/copy propagation. */ |
1398 if (! table->set_p | 1477 if (! table->set_p |
1399 /* Only record sets of pseudo-regs in the hash table. */ | 1478 /* Only record sets of pseudo-regs in the hash table. */ |
1400 && regno >= FIRST_PSEUDO_REGISTER | 1479 && regno >= FIRST_PSEUDO_REGISTER |
1402 && can_copy_p (GET_MODE (src)) | 1481 && can_copy_p (GET_MODE (src)) |
1403 /* GCSE commonly inserts instruction after the insn. We can't | 1482 /* GCSE commonly inserts instruction after the insn. We can't |
1404 do that easily for EH edges so disable GCSE on these for now. */ | 1483 do that easily for EH edges so disable GCSE on these for now. */ |
1405 && !can_throw_internal (insn) | 1484 && !can_throw_internal (insn) |
1406 /* Is SET_DEST something we want to gcse? */ | 1485 /* Is SET_DEST something we want to gcse? */ |
1407 && want_to_gcse_p (dest) | 1486 && want_to_gcse_p (dest, &max_distance) |
1408 /* Don't CSE a nop. */ | 1487 /* Don't CSE a nop. */ |
1409 && ! set_noop_p (pat) | 1488 && ! set_noop_p (pat) |
1410 /* Don't GCSE if it has attached REG_EQUIV note. | 1489 /* Don't GCSE if it has attached REG_EQUIV note. |
1411 At this point this only function parameters should have | 1490 At this point this only function parameters should have |
1412 REG_EQUIV notes and if the argument slot is used somewhere | 1491 REG_EQUIV notes and if the argument slot is used somewhere |
1424 int avail_p = oprs_available_p (dest, insn) | 1503 int avail_p = oprs_available_p (dest, insn) |
1425 && ! JUMP_P (insn); | 1504 && ! JUMP_P (insn); |
1426 | 1505 |
1427 /* Record the memory expression (DEST) in the hash table. */ | 1506 /* Record the memory expression (DEST) in the hash table. */ |
1428 insert_expr_in_table (dest, GET_MODE (dest), insn, | 1507 insert_expr_in_table (dest, GET_MODE (dest), insn, |
1429 antic_p, avail_p, table); | 1508 antic_p, avail_p, max_distance, table); |
1430 } | 1509 } |
1431 } | 1510 } |
1432 } | 1511 } |
1433 | 1512 |
1434 static void | 1513 static void |
1510 | 1589 |
1511 for (i = 0; i < (int) table->n_elems; i++) | 1590 for (i = 0; i < (int) table->n_elems; i++) |
1512 if (flat_table[i] != 0) | 1591 if (flat_table[i] != 0) |
1513 { | 1592 { |
1514 expr = flat_table[i]; | 1593 expr = flat_table[i]; |
1515 fprintf (file, "Index %d (hash value %d)\n ", | 1594 fprintf (file, "Index %d (hash value %d; max distance %d)\n ", |
1516 expr->bitmap_index, hash_val[i]); | 1595 expr->bitmap_index, hash_val[i], expr->max_distance); |
1517 print_rtl (file, expr->expr); | 1596 print_rtl (file, expr->expr); |
1518 fprintf (file, "\n"); | 1597 fprintf (file, "\n"); |
1519 } | 1598 } |
1520 | 1599 |
1521 fprintf (file, "\n"); | 1600 fprintf (file, "\n"); |
1667 | 1746 |
1668 /* First pass over the instructions records information used to | 1747 /* First pass over the instructions records information used to |
1669 determine when registers and memory are first and last set. */ | 1748 determine when registers and memory are first and last set. */ |
1670 FOR_BB_INSNS (current_bb, insn) | 1749 FOR_BB_INSNS (current_bb, insn) |
1671 { | 1750 { |
1672 if (! INSN_P (insn)) | 1751 if (!NONDEBUG_INSN_P (insn)) |
1673 continue; | 1752 continue; |
1674 | 1753 |
1675 if (CALL_P (insn)) | 1754 if (CALL_P (insn)) |
1676 { | 1755 { |
1677 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) | 1756 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
1690 hash_scan_set (implicit_sets[current_bb->index], | 1769 hash_scan_set (implicit_sets[current_bb->index], |
1691 BB_HEAD (current_bb), table); | 1770 BB_HEAD (current_bb), table); |
1692 | 1771 |
1693 /* The next pass builds the hash table. */ | 1772 /* The next pass builds the hash table. */ |
1694 FOR_BB_INSNS (current_bb, insn) | 1773 FOR_BB_INSNS (current_bb, insn) |
1695 if (INSN_P (insn)) | 1774 if (NONDEBUG_INSN_P (insn)) |
1696 hash_scan_insn (insn, table); | 1775 hash_scan_insn (insn, table); |
1697 } | 1776 } |
1698 | 1777 |
1699 free (reg_avail_info); | 1778 free (reg_avail_info); |
1700 reg_avail_info = NULL; | 1779 reg_avail_info = NULL; |
2270 | 2349 |
2271 if (!rtx_equal_p (src, SET_SRC (set)) | 2350 if (!rtx_equal_p (src, SET_SRC (set)) |
2272 && validate_change (insn, &SET_SRC (set), src, 0)) | 2351 && validate_change (insn, &SET_SRC (set), src, 0)) |
2273 success = 1; | 2352 success = 1; |
2274 | 2353 |
2275 /* If we've failed to do replacement, have a single SET, don't already | 2354 /* If we've failed perform the replacement, have a single SET to |
2276 have a note, and have no special SET, add a REG_EQUAL note to not | 2355 a REG destination and don't yet have a note, add a REG_EQUAL note |
2277 lose information. */ | 2356 to not lose information. */ |
2278 if (!success && note == 0 && set != 0 | 2357 if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set))) |
2279 && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT | |
2280 && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART) | |
2281 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); | 2358 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); |
2282 } | 2359 } |
2283 | 2360 |
2284 /* REG_EQUAL may get simplified into register. | 2361 /* REG_EQUAL may get simplified into register. |
2285 We don't allow that. Remove that note. This code ought | 2362 We don't allow that. Remove that note. This code ought |
2659 if (REG_P (x) | 2736 if (REG_P (x) |
2660 && (REGNO (x) >= FIRST_PSEUDO_REGISTER | 2737 && (REGNO (x) >= FIRST_PSEUDO_REGISTER |
2661 || (GET_CODE (PATTERN (insn)) != USE | 2738 || (GET_CODE (PATTERN (insn)) != USE |
2662 && asm_noperands (PATTERN (insn)) < 0))) | 2739 && asm_noperands (PATTERN (insn)) < 0))) |
2663 { | 2740 { |
2664 cselib_val *val = cselib_lookup (x, GET_MODE (x), 0); | 2741 cselib_val *val = cselib_lookup (x, GET_MODE (x), 0, VOIDmode); |
2665 struct elt_loc_list *l; | 2742 struct elt_loc_list *l; |
2666 | 2743 |
2667 if (!val) | 2744 if (!val) |
2668 return false; | 2745 return false; |
2669 for (l = val->locs; l; l = l->next) | 2746 for (l = val->locs; l; l = l->next) |
3165 | 3242 |
3166 /* Local properties of expressions. */ | 3243 /* Local properties of expressions. */ |
3167 /* Nonzero for expressions that are transparent in the block. */ | 3244 /* Nonzero for expressions that are transparent in the block. */ |
3168 static sbitmap *transp; | 3245 static sbitmap *transp; |
3169 | 3246 |
3170 /* Nonzero for expressions that are transparent at the end of the block. | |
3171 This is only zero for expressions killed by abnormal critical edge | |
3172 created by a calls. */ | |
3173 static sbitmap *transpout; | |
3174 | |
3175 /* Nonzero for expressions that are computed (available) in the block. */ | 3247 /* Nonzero for expressions that are computed (available) in the block. */ |
3176 static sbitmap *comp; | 3248 static sbitmap *comp; |
3177 | 3249 |
3178 /* Nonzero for expressions that are locally anticipatable in the block. */ | 3250 /* Nonzero for expressions that are locally anticipatable in the block. */ |
3179 static sbitmap *antloc; | 3251 static sbitmap *antloc; |
3233 | 3305 |
3234 transp = comp = NULL; | 3306 transp = comp = NULL; |
3235 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; | 3307 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; |
3236 } | 3308 } |
3237 | 3309 |
3310 /* Remove certain expressions from anticipatable and transparent | |
3311 sets of basic blocks that have incoming abnormal edge. | |
3312 For PRE remove potentially trapping expressions to avoid placing | |
3313 them on abnormal edges. For hoisting remove memory references that | |
3314 can be clobbered by calls. */ | |
3315 | |
3316 static void | |
3317 prune_expressions (bool pre_p) | |
3318 { | |
3319 sbitmap prune_exprs; | |
3320 unsigned int ui; | |
3321 basic_block bb; | |
3322 | |
3323 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems); | |
3324 sbitmap_zero (prune_exprs); | |
3325 for (ui = 0; ui < expr_hash_table.size; ui++) | |
3326 { | |
3327 struct expr *e; | |
3328 for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash) | |
3329 { | |
3330 /* Note potentially trapping expressions. */ | |
3331 if (may_trap_p (e->expr)) | |
3332 { | |
3333 SET_BIT (prune_exprs, e->bitmap_index); | |
3334 continue; | |
3335 } | |
3336 | |
3337 if (!pre_p && MEM_P (e->expr)) | |
3338 /* Note memory references that can be clobbered by a call. | |
3339 We do not split abnormal edges in hoisting, so would | |
3340 a memory reference get hoisted along an abnormal edge, | |
3341 it would be placed /before/ the call. Therefore, only | |
3342 constant memory references can be hoisted along abnormal | |
3343 edges. */ | |
3344 { | |
3345 if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF | |
3346 && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0))) | |
3347 continue; | |
3348 | |
3349 if (MEM_READONLY_P (e->expr) | |
3350 && !MEM_VOLATILE_P (e->expr) | |
3351 && MEM_NOTRAP_P (e->expr)) | |
3352 /* Constant memory reference, e.g., a PIC address. */ | |
3353 continue; | |
3354 | |
3355 /* ??? Optimally, we would use interprocedural alias | |
3356 analysis to determine if this mem is actually killed | |
3357 by this call. */ | |
3358 | |
3359 SET_BIT (prune_exprs, e->bitmap_index); | |
3360 } | |
3361 } | |
3362 } | |
3363 | |
3364 FOR_EACH_BB (bb) | |
3365 { | |
3366 edge e; | |
3367 edge_iterator ei; | |
3368 | |
3369 /* If the current block is the destination of an abnormal edge, we | |
3370 kill all trapping (for PRE) and memory (for hoist) expressions | |
3371 because we won't be able to properly place the instruction on | |
3372 the edge. So make them neither anticipatable nor transparent. | |
3373 This is fairly conservative. | |
3374 | |
3375 ??? For hoisting it may be necessary to check for set-and-jump | |
3376 instructions here, not just for abnormal edges. The general problem | |
3377 is that when an expression cannot not be placed right at the end of | |
3378 a basic block we should account for any side-effects of a subsequent | |
3379 jump instructions that could clobber the expression. It would | |
3380 be best to implement this check along the lines of | |
3381 hoist_expr_reaches_here_p where the target block is already known | |
3382 and, hence, there's no need to conservatively prune expressions on | |
3383 "intermediate" set-and-jump instructions. */ | |
3384 FOR_EACH_EDGE (e, ei, bb->preds) | |
3385 if ((e->flags & EDGE_ABNORMAL) | |
3386 && (pre_p || CALL_P (BB_END (e->src)))) | |
3387 { | |
3388 sbitmap_difference (antloc[bb->index], | |
3389 antloc[bb->index], prune_exprs); | |
3390 sbitmap_difference (transp[bb->index], | |
3391 transp[bb->index], prune_exprs); | |
3392 break; | |
3393 } | |
3394 } | |
3395 | |
3396 sbitmap_free (prune_exprs); | |
3397 } | |
3398 | |
3399 /* It may be necessary to insert a large number of insns on edges to | |
3400 make the existing occurrences of expressions fully redundant. This | |
3401 routine examines the set of insertions and deletions and if the ratio | |
3402 of insertions to deletions is too high for a particular expression, then | |
3403 the expression is removed from the insertion/deletion sets. | |
3404 | |
3405 N_ELEMS is the number of elements in the hash table. */ | |
3406 | |
3407 static void | |
3408 prune_insertions_deletions (int n_elems) | |
3409 { | |
3410 sbitmap_iterator sbi; | |
3411 sbitmap prune_exprs; | |
3412 | |
3413 /* We always use I to iterate over blocks/edges and J to iterate over | |
3414 expressions. */ | |
3415 unsigned int i, j; | |
3416 | |
3417 /* Counts for the number of times an expression needs to be inserted and | |
3418 number of times an expression can be removed as a result. */ | |
3419 int *insertions = GCNEWVEC (int, n_elems); | |
3420 int *deletions = GCNEWVEC (int, n_elems); | |
3421 | |
3422 /* Set of expressions which require too many insertions relative to | |
3423 the number of deletions achieved. We will prune these out of the | |
3424 insertion/deletion sets. */ | |
3425 prune_exprs = sbitmap_alloc (n_elems); | |
3426 sbitmap_zero (prune_exprs); | |
3427 | |
3428 /* Iterate over the edges counting the number of times each expression | |
3429 needs to be inserted. */ | |
3430 for (i = 0; i < (unsigned) n_edges; i++) | |
3431 { | |
3432 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi) | |
3433 insertions[j]++; | |
3434 } | |
3435 | |
3436 /* Similarly for deletions, but those occur in blocks rather than on | |
3437 edges. */ | |
3438 for (i = 0; i < (unsigned) last_basic_block; i++) | |
3439 { | |
3440 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi) | |
3441 deletions[j]++; | |
3442 } | |
3443 | |
3444 /* Now that we have accurate counts, iterate over the elements in the | |
3445 hash table and see if any need too many insertions relative to the | |
3446 number of evaluations that can be removed. If so, mark them in | |
3447 PRUNE_EXPRS. */ | |
3448 for (j = 0; j < (unsigned) n_elems; j++) | |
3449 if (deletions[j] | |
3450 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO) | |
3451 SET_BIT (prune_exprs, j); | |
3452 | |
3453 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */ | |
3454 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi) | |
3455 { | |
3456 for (i = 0; i < (unsigned) n_edges; i++) | |
3457 RESET_BIT (pre_insert_map[i], j); | |
3458 | |
3459 for (i = 0; i < (unsigned) last_basic_block; i++) | |
3460 RESET_BIT (pre_delete_map[i], j); | |
3461 } | |
3462 | |
3463 sbitmap_free (prune_exprs); | |
3464 free (insertions); | |
3465 free (deletions); | |
3466 } | |
3467 | |
3238 /* Top level routine to do the dataflow analysis needed by PRE. */ | 3468 /* Top level routine to do the dataflow analysis needed by PRE. */ |
3239 | 3469 |
3240 static void | 3470 static void |
3241 compute_pre_data (void) | 3471 compute_pre_data (void) |
3242 { | 3472 { |
3243 sbitmap trapping_expr; | |
3244 basic_block bb; | 3473 basic_block bb; |
3245 unsigned int ui; | |
3246 | 3474 |
3247 compute_local_properties (transp, comp, antloc, &expr_hash_table); | 3475 compute_local_properties (transp, comp, antloc, &expr_hash_table); |
3476 prune_expressions (true); | |
3248 sbitmap_vector_zero (ae_kill, last_basic_block); | 3477 sbitmap_vector_zero (ae_kill, last_basic_block); |
3249 | |
3250 /* Collect expressions which might trap. */ | |
3251 trapping_expr = sbitmap_alloc (expr_hash_table.n_elems); | |
3252 sbitmap_zero (trapping_expr); | |
3253 for (ui = 0; ui < expr_hash_table.size; ui++) | |
3254 { | |
3255 struct expr *e; | |
3256 for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash) | |
3257 if (may_trap_p (e->expr)) | |
3258 SET_BIT (trapping_expr, e->bitmap_index); | |
3259 } | |
3260 | 3478 |
3261 /* Compute ae_kill for each basic block using: | 3479 /* Compute ae_kill for each basic block using: |
3262 | 3480 |
3263 ~(TRANSP | COMP) | 3481 ~(TRANSP | COMP) |
3264 */ | 3482 */ |
3265 | 3483 |
3266 FOR_EACH_BB (bb) | 3484 FOR_EACH_BB (bb) |
3267 { | 3485 { |
3268 edge e; | |
3269 edge_iterator ei; | |
3270 | |
3271 /* If the current block is the destination of an abnormal edge, we | |
3272 kill all trapping expressions because we won't be able to properly | |
3273 place the instruction on the edge. So make them neither | |
3274 anticipatable nor transparent. This is fairly conservative. */ | |
3275 FOR_EACH_EDGE (e, ei, bb->preds) | |
3276 if (e->flags & EDGE_ABNORMAL) | |
3277 { | |
3278 sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr); | |
3279 sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr); | |
3280 break; | |
3281 } | |
3282 | |
3283 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); | 3486 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); |
3284 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); | 3487 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); |
3285 } | 3488 } |
3286 | 3489 |
3287 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc, | 3490 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc, |
3288 ae_kill, &pre_insert_map, &pre_delete_map); | 3491 ae_kill, &pre_insert_map, &pre_delete_map); |
3289 sbitmap_vector_free (antloc); | 3492 sbitmap_vector_free (antloc); |
3290 antloc = NULL; | 3493 antloc = NULL; |
3291 sbitmap_vector_free (ae_kill); | 3494 sbitmap_vector_free (ae_kill); |
3292 ae_kill = NULL; | 3495 ae_kill = NULL; |
3293 sbitmap_free (trapping_expr); | 3496 |
3497 prune_insertions_deletions (expr_hash_table.n_elems); | |
3294 } | 3498 } |
3295 | 3499 |
3296 /* PRE utilities */ | 3500 /* PRE utilities */ |
3297 | 3501 |
3298 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach | 3502 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach |
3403 return pat; | 3607 return pat; |
3404 } | 3608 } |
3405 | 3609 |
3406 /* Add EXPR to the end of basic block BB. | 3610 /* Add EXPR to the end of basic block BB. |
3407 | 3611 |
3408 This is used by both the PRE and code hoisting. | 3612 This is used by both the PRE and code hoisting. */ |
3409 | |
3410 For PRE, we want to verify that the expr is either transparent | |
3411 or locally anticipatable in the target block. This check makes | |
3412 no sense for code hoisting. */ | |
3413 | 3613 |
3414 static void | 3614 static void |
3415 insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre) | 3615 insert_insn_end_basic_block (struct expr *expr, basic_block bb) |
3416 { | 3616 { |
3417 rtx insn = BB_END (bb); | 3617 rtx insn = BB_END (bb); |
3418 rtx new_insn; | 3618 rtx new_insn; |
3419 rtx reg = expr->reaching_reg; | 3619 rtx reg = expr->reaching_reg; |
3420 int regno = REGNO (reg); | 3620 int regno = REGNO (reg); |
3437 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))) | 3637 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))) |
3438 { | 3638 { |
3439 #ifdef HAVE_cc0 | 3639 #ifdef HAVE_cc0 |
3440 rtx note; | 3640 rtx note; |
3441 #endif | 3641 #endif |
3442 /* It should always be the case that we can put these instructions | |
3443 anywhere in the basic block with performing PRE optimizations. | |
3444 Check this. */ | |
3445 gcc_assert (!NONJUMP_INSN_P (insn) || !pre | |
3446 || TEST_BIT (antloc[bb->index], expr->bitmap_index) | |
3447 || TEST_BIT (transp[bb->index], expr->bitmap_index)); | |
3448 | 3642 |
3449 /* If this is a jump table, then we can't insert stuff here. Since | 3643 /* If this is a jump table, then we can't insert stuff here. Since |
3450 we know the previous real insn must be the tablejump, we insert | 3644 we know the previous real insn must be the tablejump, we insert |
3451 the new instruction just before the tablejump. */ | 3645 the new instruction just before the tablejump. */ |
3452 if (GET_CODE (PATTERN (insn)) == ADDR_VEC | 3646 if (GET_CODE (PATTERN (insn)) == ADDR_VEC |
3453 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) | 3647 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) |
3454 insn = prev_real_insn (insn); | 3648 insn = prev_active_insn (insn); |
3455 | 3649 |
3456 #ifdef HAVE_cc0 | 3650 #ifdef HAVE_cc0 |
3457 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts | 3651 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts |
3458 if cc0 isn't set. */ | 3652 if cc0 isn't set. */ |
3459 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX); | 3653 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX); |
3479 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)) | 3673 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)) |
3480 { | 3674 { |
3481 /* Keeping in mind targets with small register classes and parameters | 3675 /* Keeping in mind targets with small register classes and parameters |
3482 in registers, we search backward and place the instructions before | 3676 in registers, we search backward and place the instructions before |
3483 the first parameter is loaded. Do this for everyone for consistency | 3677 the first parameter is loaded. Do this for everyone for consistency |
3484 and a presumption that we'll get better code elsewhere as well. | 3678 and a presumption that we'll get better code elsewhere as well. */ |
3485 | |
3486 It should always be the case that we can put these instructions | |
3487 anywhere in the basic block with performing PRE optimizations. | |
3488 Check this. */ | |
3489 | |
3490 gcc_assert (!pre | |
3491 || TEST_BIT (antloc[bb->index], expr->bitmap_index) | |
3492 || TEST_BIT (transp[bb->index], expr->bitmap_index)); | |
3493 | 3679 |
3494 /* Since different machines initialize their parameter registers | 3680 /* Since different machines initialize their parameter registers |
3495 in different orders, assume nothing. Collect the set of all | 3681 in different orders, assume nothing. Collect the set of all |
3496 parameter registers. */ | 3682 parameter registers. */ |
3497 insn = find_first_parameter_load (insn, BB_HEAD (bb)); | 3683 insn = find_first_parameter_load (insn, BB_HEAD (bb)); |
3584 detailed in Morgans book P277 (sec 10.5) for | 3770 detailed in Morgans book P277 (sec 10.5) for |
3585 handling this situation. This one is easiest for | 3771 handling this situation. This one is easiest for |
3586 now. */ | 3772 now. */ |
3587 | 3773 |
3588 if (eg->flags & EDGE_ABNORMAL) | 3774 if (eg->flags & EDGE_ABNORMAL) |
3589 insert_insn_end_basic_block (index_map[j], bb, 0); | 3775 insert_insn_end_basic_block (index_map[j], bb); |
3590 else | 3776 else |
3591 { | 3777 { |
3592 insn = process_insert_insn (index_map[j]); | 3778 insn = process_insert_insn (index_map[j]); |
3593 insert_insn_on_edge (insn, eg); | 3779 insert_insn_on_edge (insn, eg); |
3594 } | 3780 } |
4043 for (j = XVECLEN (x, i) - 1; j >= 0; j--) | 4229 for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
4044 add_label_notes (XVECEXP (x, i, j), insn); | 4230 add_label_notes (XVECEXP (x, i, j), insn); |
4045 } | 4231 } |
4046 } | 4232 } |
4047 | 4233 |
4048 /* Compute transparent outgoing information for each block. | |
4049 | |
4050 An expression is transparent to an edge unless it is killed by | |
4051 the edge itself. This can only happen with abnormal control flow, | |
4052 when the edge is traversed through a call. This happens with | |
4053 non-local labels and exceptions. | |
4054 | |
4055 This would not be necessary if we split the edge. While this is | |
4056 normally impossible for abnormal critical edges, with some effort | |
4057 it should be possible with exception handling, since we still have | |
4058 control over which handler should be invoked. But due to increased | |
4059 EH table sizes, this may not be worthwhile. */ | |
4060 | |
4061 static void | |
4062 compute_transpout (void) | |
4063 { | |
4064 basic_block bb; | |
4065 unsigned int i; | |
4066 struct expr *expr; | |
4067 | |
4068 sbitmap_vector_ones (transpout, last_basic_block); | |
4069 | |
4070 FOR_EACH_BB (bb) | |
4071 { | |
4072 /* Note that flow inserted a nop at the end of basic blocks that | |
4073 end in call instructions for reasons other than abnormal | |
4074 control flow. */ | |
4075 if (! CALL_P (BB_END (bb))) | |
4076 continue; | |
4077 | |
4078 for (i = 0; i < expr_hash_table.size; i++) | |
4079 for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash) | |
4080 if (MEM_P (expr->expr)) | |
4081 { | |
4082 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF | |
4083 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0))) | |
4084 continue; | |
4085 | |
4086 /* ??? Optimally, we would use interprocedural alias | |
4087 analysis to determine if this mem is actually killed | |
4088 by this call. */ | |
4089 RESET_BIT (transpout[bb->index], expr->bitmap_index); | |
4090 } | |
4091 } | |
4092 } | |
4093 | |
4094 /* Code Hoisting variables and subroutines. */ | 4234 /* Code Hoisting variables and subroutines. */ |
4095 | 4235 |
4096 /* Very busy expressions. */ | 4236 /* Very busy expressions. */ |
4097 static sbitmap *hoist_vbein; | 4237 static sbitmap *hoist_vbein; |
4098 static sbitmap *hoist_vbeout; | 4238 static sbitmap *hoist_vbeout; |
4099 | 4239 |
4100 /* Hoistable expressions. */ | |
4101 static sbitmap *hoist_exprs; | |
4102 | |
4103 /* ??? We could compute post dominators and run this algorithm in | 4240 /* ??? We could compute post dominators and run this algorithm in |
4104 reverse to perform tail merging, doing so would probably be | 4241 reverse to perform tail merging, doing so would probably be |
4105 more effective than the tail merging code in jump.c. | 4242 more effective than the tail merging code in jump.c. |
4106 | 4243 |
4107 It's unclear if tail merging could be run in parallel with | 4244 It's unclear if tail merging could be run in parallel with |
4116 transp = sbitmap_vector_alloc (n_blocks, n_exprs); | 4253 transp = sbitmap_vector_alloc (n_blocks, n_exprs); |
4117 comp = sbitmap_vector_alloc (n_blocks, n_exprs); | 4254 comp = sbitmap_vector_alloc (n_blocks, n_exprs); |
4118 | 4255 |
4119 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); | 4256 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); |
4120 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); | 4257 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); |
4121 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); | |
4122 transpout = sbitmap_vector_alloc (n_blocks, n_exprs); | |
4123 } | 4258 } |
4124 | 4259 |
4125 /* Free vars used for code hoisting analysis. */ | 4260 /* Free vars used for code hoisting analysis. */ |
4126 | 4261 |
4127 static void | 4262 static void |
4131 sbitmap_vector_free (transp); | 4266 sbitmap_vector_free (transp); |
4132 sbitmap_vector_free (comp); | 4267 sbitmap_vector_free (comp); |
4133 | 4268 |
4134 sbitmap_vector_free (hoist_vbein); | 4269 sbitmap_vector_free (hoist_vbein); |
4135 sbitmap_vector_free (hoist_vbeout); | 4270 sbitmap_vector_free (hoist_vbeout); |
4136 sbitmap_vector_free (hoist_exprs); | |
4137 sbitmap_vector_free (transpout); | |
4138 | 4271 |
4139 free_dominance_info (CDI_DOMINATORS); | 4272 free_dominance_info (CDI_DOMINATORS); |
4140 } | 4273 } |
4141 | 4274 |
4142 /* Compute the very busy expressions at entry/exit from each block. | 4275 /* Compute the very busy expressions at entry/exit from each block. |
4163 /* We scan the blocks in the reverse order to speed up | 4296 /* We scan the blocks in the reverse order to speed up |
4164 the convergence. */ | 4297 the convergence. */ |
4165 FOR_EACH_BB_REVERSE (bb) | 4298 FOR_EACH_BB_REVERSE (bb) |
4166 { | 4299 { |
4167 if (bb->next_bb != EXIT_BLOCK_PTR) | 4300 if (bb->next_bb != EXIT_BLOCK_PTR) |
4168 sbitmap_intersection_of_succs (hoist_vbeout[bb->index], | 4301 { |
4169 hoist_vbein, bb->index); | 4302 sbitmap_intersection_of_succs (hoist_vbeout[bb->index], |
4303 hoist_vbein, bb->index); | |
4304 | |
4305 /* Include expressions in VBEout that are calculated | |
4306 in BB and available at its end. */ | |
4307 sbitmap_a_or_b (hoist_vbeout[bb->index], | |
4308 hoist_vbeout[bb->index], comp[bb->index]); | |
4309 } | |
4170 | 4310 |
4171 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], | 4311 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], |
4172 antloc[bb->index], | 4312 antloc[bb->index], |
4173 hoist_vbeout[bb->index], | 4313 hoist_vbeout[bb->index], |
4174 transp[bb->index]); | 4314 transp[bb->index]); |
4176 | 4316 |
4177 passes++; | 4317 passes++; |
4178 } | 4318 } |
4179 | 4319 |
4180 if (dump_file) | 4320 if (dump_file) |
4181 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); | 4321 { |
4322 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); | |
4323 | |
4324 FOR_EACH_BB (bb) | |
4325 { | |
4326 fprintf (dump_file, "vbein (%d): ", bb->index); | |
4327 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]); | |
4328 fprintf (dump_file, "vbeout(%d): ", bb->index); | |
4329 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]); | |
4330 } | |
4331 } | |
4182 } | 4332 } |
4183 | 4333 |
4184 /* Top level routine to do the dataflow analysis needed by code hoisting. */ | 4334 /* Top level routine to do the dataflow analysis needed by code hoisting. */ |
4185 | 4335 |
4186 static void | 4336 static void |
4187 compute_code_hoist_data (void) | 4337 compute_code_hoist_data (void) |
4188 { | 4338 { |
4189 compute_local_properties (transp, comp, antloc, &expr_hash_table); | 4339 compute_local_properties (transp, comp, antloc, &expr_hash_table); |
4190 compute_transpout (); | 4340 prune_expressions (false); |
4191 compute_code_hoist_vbeinout (); | 4341 compute_code_hoist_vbeinout (); |
4192 calculate_dominance_info (CDI_DOMINATORS); | 4342 calculate_dominance_info (CDI_DOMINATORS); |
4193 if (dump_file) | 4343 if (dump_file) |
4194 fprintf (dump_file, "\n"); | 4344 fprintf (dump_file, "\n"); |
4195 } | 4345 } |
4196 | 4346 |
4197 /* Determine if the expression identified by EXPR_INDEX would | 4347 /* Determine if the expression identified by EXPR_INDEX would |
4198 reach BB unimpared if it was placed at the end of EXPR_BB. | 4348 reach BB unimpared if it was placed at the end of EXPR_BB. |
4349 Stop the search if the expression would need to be moved more | |
4350 than DISTANCE instructions. | |
4199 | 4351 |
4200 It's unclear exactly what Muchnick meant by "unimpared". It seems | 4352 It's unclear exactly what Muchnick meant by "unimpared". It seems |
4201 to me that the expression must either be computed or transparent in | 4353 to me that the expression must either be computed or transparent in |
4202 *every* block in the path(s) from EXPR_BB to BB. Any other definition | 4354 *every* block in the path(s) from EXPR_BB to BB. Any other definition |
4203 would allow the expression to be hoisted out of loops, even if | 4355 would allow the expression to be hoisted out of loops, even if |
4206 Contrast this to reachability for PRE where an expression is | 4358 Contrast this to reachability for PRE where an expression is |
4207 considered reachable if *any* path reaches instead of *all* | 4359 considered reachable if *any* path reaches instead of *all* |
4208 paths. */ | 4360 paths. */ |
4209 | 4361 |
4210 static int | 4362 static int |
4211 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited) | 4363 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, |
4364 char *visited, int distance, int *bb_size) | |
4212 { | 4365 { |
4213 edge pred; | 4366 edge pred; |
4214 edge_iterator ei; | 4367 edge_iterator ei; |
4215 int visited_allocated_locally = 0; | 4368 int visited_allocated_locally = 0; |
4216 | 4369 |
4370 /* Terminate the search if distance, for which EXPR is allowed to move, | |
4371 is exhausted. */ | |
4372 if (distance > 0) | |
4373 { | |
4374 distance -= bb_size[bb->index]; | |
4375 | |
4376 if (distance <= 0) | |
4377 return 0; | |
4378 } | |
4379 else | |
4380 gcc_assert (distance == 0); | |
4217 | 4381 |
4218 if (visited == NULL) | 4382 if (visited == NULL) |
4219 { | 4383 { |
4220 visited_allocated_locally = 1; | 4384 visited_allocated_locally = 1; |
4221 visited = XCNEWVEC (char, last_basic_block); | 4385 visited = XCNEWVEC (char, last_basic_block); |
4230 else if (pred_bb == expr_bb) | 4394 else if (pred_bb == expr_bb) |
4231 continue; | 4395 continue; |
4232 else if (visited[pred_bb->index]) | 4396 else if (visited[pred_bb->index]) |
4233 continue; | 4397 continue; |
4234 | 4398 |
4235 /* Does this predecessor generate this expression? */ | |
4236 else if (TEST_BIT (comp[pred_bb->index], expr_index)) | |
4237 break; | |
4238 else if (! TEST_BIT (transp[pred_bb->index], expr_index)) | 4399 else if (! TEST_BIT (transp[pred_bb->index], expr_index)) |
4239 break; | 4400 break; |
4240 | 4401 |
4241 /* Not killed. */ | 4402 /* Not killed. */ |
4242 else | 4403 else |
4243 { | 4404 { |
4244 visited[pred_bb->index] = 1; | 4405 visited[pred_bb->index] = 1; |
4245 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, | 4406 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb, |
4246 pred_bb, visited)) | 4407 visited, distance, bb_size)) |
4247 break; | 4408 break; |
4248 } | 4409 } |
4249 } | 4410 } |
4250 if (visited_allocated_locally) | 4411 if (visited_allocated_locally) |
4251 free (visited); | 4412 free (visited); |
4252 | 4413 |
4253 return (pred == NULL); | 4414 return (pred == NULL); |
4254 } | 4415 } |
4255 | 4416 |
4417 /* Find occurence in BB. */ | |
4418 static struct occr * | |
4419 find_occr_in_bb (struct occr *occr, basic_block bb) | |
4420 { | |
4421 /* Find the right occurrence of this expression. */ | |
4422 while (occr && BLOCK_FOR_INSN (occr->insn) != bb) | |
4423 occr = occr->next; | |
4424 | |
4425 return occr; | |
4426 } | |
4427 | |
4256 /* Actually perform code hoisting. */ | 4428 /* Actually perform code hoisting. */ |
4257 | 4429 |
4258 static int | 4430 static int |
4259 hoist_code (void) | 4431 hoist_code (void) |
4260 { | 4432 { |
4261 basic_block bb, dominated; | 4433 basic_block bb, dominated; |
4434 VEC (basic_block, heap) *dom_tree_walk; | |
4435 unsigned int dom_tree_walk_index; | |
4262 VEC (basic_block, heap) *domby; | 4436 VEC (basic_block, heap) *domby; |
4263 unsigned int i,j; | 4437 unsigned int i,j; |
4264 struct expr **index_map; | 4438 struct expr **index_map; |
4265 struct expr *expr; | 4439 struct expr *expr; |
4440 int *to_bb_head; | |
4441 int *bb_size; | |
4266 int changed = 0; | 4442 int changed = 0; |
4267 | |
4268 sbitmap_vector_zero (hoist_exprs, last_basic_block); | |
4269 | 4443 |
4270 /* Compute a mapping from expression number (`bitmap_index') to | 4444 /* Compute a mapping from expression number (`bitmap_index') to |
4271 hash table entry. */ | 4445 hash table entry. */ |
4272 | 4446 |
4273 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems); | 4447 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems); |
4274 for (i = 0; i < expr_hash_table.size; i++) | 4448 for (i = 0; i < expr_hash_table.size; i++) |
4275 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash) | 4449 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash) |
4276 index_map[expr->bitmap_index] = expr; | 4450 index_map[expr->bitmap_index] = expr; |
4277 | 4451 |
4452 /* Calculate sizes of basic blocks and note how far | |
4453 each instruction is from the start of its block. We then use this | |
4454 data to restrict distance an expression can travel. */ | |
4455 | |
4456 to_bb_head = XCNEWVEC (int, get_max_uid ()); | |
4457 bb_size = XCNEWVEC (int, last_basic_block); | |
4458 | |
4459 FOR_EACH_BB (bb) | |
4460 { | |
4461 rtx insn; | |
4462 int to_head; | |
4463 | |
4464 to_head = 0; | |
4465 FOR_BB_INSNS (bb, insn) | |
4466 { | |
4467 /* Don't count debug instructions to avoid them affecting | |
4468 decision choices. */ | |
4469 if (NONDEBUG_INSN_P (insn)) | |
4470 to_bb_head[INSN_UID (insn)] = to_head++; | |
4471 } | |
4472 | |
4473 bb_size[bb->index] = to_head; | |
4474 } | |
4475 | |
4476 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1 | |
4477 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest | |
4478 == ENTRY_BLOCK_PTR->next_bb)); | |
4479 | |
4480 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS, | |
4481 ENTRY_BLOCK_PTR->next_bb); | |
4482 | |
4278 /* Walk over each basic block looking for potentially hoistable | 4483 /* Walk over each basic block looking for potentially hoistable |
4279 expressions, nothing gets hoisted from the entry block. */ | 4484 expressions, nothing gets hoisted from the entry block. */ |
4280 FOR_EACH_BB (bb) | 4485 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb) |
4281 { | 4486 { |
4282 int found = 0; | 4487 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH); |
4283 int insn_inserted_p; | 4488 |
4284 | 4489 if (VEC_length (basic_block, domby) == 0) |
4285 domby = get_dominated_by (CDI_DOMINATORS, bb); | 4490 continue; |
4491 | |
4286 /* Examine each expression that is very busy at the exit of this | 4492 /* Examine each expression that is very busy at the exit of this |
4287 block. These are the potentially hoistable expressions. */ | 4493 block. These are the potentially hoistable expressions. */ |
4288 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) | 4494 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) |
4289 { | 4495 { |
4290 int hoistable = 0; | 4496 if (TEST_BIT (hoist_vbeout[bb->index], i)) |
4291 | |
4292 if (TEST_BIT (hoist_vbeout[bb->index], i) | |
4293 && TEST_BIT (transpout[bb->index], i)) | |
4294 { | 4497 { |
4498 /* Current expression. */ | |
4499 struct expr *expr = index_map[i]; | |
4500 /* Number of occurences of EXPR that can be hoisted to BB. */ | |
4501 int hoistable = 0; | |
4502 /* Basic blocks that have occurences reachable from BB. */ | |
4503 bitmap_head _from_bbs, *from_bbs = &_from_bbs; | |
4504 /* Occurences reachable from BB. */ | |
4505 VEC (occr_t, heap) *occrs_to_hoist = NULL; | |
4506 /* We want to insert the expression into BB only once, so | |
4507 note when we've inserted it. */ | |
4508 int insn_inserted_p; | |
4509 occr_t occr; | |
4510 | |
4511 bitmap_initialize (from_bbs, 0); | |
4512 | |
4513 /* If an expression is computed in BB and is available at end of | |
4514 BB, hoist all occurences dominated by BB to BB. */ | |
4515 if (TEST_BIT (comp[bb->index], i)) | |
4516 { | |
4517 occr = find_occr_in_bb (expr->antic_occr, bb); | |
4518 | |
4519 if (occr) | |
4520 { | |
4521 /* An occurence might've been already deleted | |
4522 while processing a dominator of BB. */ | |
4523 if (occr->deleted_p) | |
4524 gcc_assert (MAX_HOIST_DEPTH > 1); | |
4525 else | |
4526 { | |
4527 gcc_assert (NONDEBUG_INSN_P (occr->insn)); | |
4528 hoistable++; | |
4529 } | |
4530 } | |
4531 else | |
4532 hoistable++; | |
4533 } | |
4534 | |
4295 /* We've found a potentially hoistable expression, now | 4535 /* We've found a potentially hoistable expression, now |
4296 we look at every block BB dominates to see if it | 4536 we look at every block BB dominates to see if it |
4297 computes the expression. */ | 4537 computes the expression. */ |
4298 for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) | 4538 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated) |
4299 { | 4539 { |
4540 int max_distance; | |
4541 | |
4300 /* Ignore self dominance. */ | 4542 /* Ignore self dominance. */ |
4301 if (bb == dominated) | 4543 if (bb == dominated) |
4302 continue; | 4544 continue; |
4303 /* We've found a dominated block, now see if it computes | 4545 /* We've found a dominated block, now see if it computes |
4304 the busy expression and whether or not moving that | 4546 the busy expression and whether or not moving that |
4305 expression to the "beginning" of that block is safe. */ | 4547 expression to the "beginning" of that block is safe. */ |
4306 if (!TEST_BIT (antloc[dominated->index], i)) | 4548 if (!TEST_BIT (antloc[dominated->index], i)) |
4307 continue; | 4549 continue; |
4308 | 4550 |
4551 occr = find_occr_in_bb (expr->antic_occr, dominated); | |
4552 gcc_assert (occr); | |
4553 | |
4554 /* An occurence might've been already deleted | |
4555 while processing a dominator of BB. */ | |
4556 if (occr->deleted_p) | |
4557 { | |
4558 gcc_assert (MAX_HOIST_DEPTH > 1); | |
4559 continue; | |
4560 } | |
4561 gcc_assert (NONDEBUG_INSN_P (occr->insn)); | |
4562 | |
4563 max_distance = expr->max_distance; | |
4564 if (max_distance > 0) | |
4565 /* Adjust MAX_DISTANCE to account for the fact that | |
4566 OCCR won't have to travel all of DOMINATED, but | |
4567 only part of it. */ | |
4568 max_distance += (bb_size[dominated->index] | |
4569 - to_bb_head[INSN_UID (occr->insn)]); | |
4570 | |
4309 /* Note if the expression would reach the dominated block | 4571 /* Note if the expression would reach the dominated block |
4310 unimpared if it was placed at the end of BB. | 4572 unimpared if it was placed at the end of BB. |
4311 | 4573 |
4312 Keep track of how many times this expression is hoistable | 4574 Keep track of how many times this expression is hoistable |
4313 from a dominated block into BB. */ | 4575 from a dominated block into BB. */ |
4314 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) | 4576 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, |
4315 hoistable++; | 4577 max_distance, bb_size)) |
4578 { | |
4579 hoistable++; | |
4580 VEC_safe_push (occr_t, heap, | |
4581 occrs_to_hoist, occr); | |
4582 bitmap_set_bit (from_bbs, dominated->index); | |
4583 } | |
4316 } | 4584 } |
4317 | 4585 |
4318 /* If we found more than one hoistable occurrence of this | 4586 /* If we found more than one hoistable occurrence of this |
4319 expression, then note it in the bitmap of expressions to | 4587 expression, then note it in the vector of expressions to |
4320 hoist. It makes no sense to hoist things which are computed | 4588 hoist. It makes no sense to hoist things which are computed |
4321 in only one BB, and doing so tends to pessimize register | 4589 in only one BB, and doing so tends to pessimize register |
4322 allocation. One could increase this value to try harder | 4590 allocation. One could increase this value to try harder |
4323 to avoid any possible code expansion due to register | 4591 to avoid any possible code expansion due to register |
4324 allocation issues; however experiments have shown that | 4592 allocation issues; however experiments have shown that |
4325 the vast majority of hoistable expressions are only movable | 4593 the vast majority of hoistable expressions are only movable |
4326 from two successors, so raising this threshold is likely | 4594 from two successors, so raising this threshold is likely |
4327 to nullify any benefit we get from code hoisting. */ | 4595 to nullify any benefit we get from code hoisting. */ |
4328 if (hoistable > 1) | 4596 if (hoistable > 1 && dbg_cnt (hoist_insn)) |
4329 { | 4597 { |
4330 SET_BIT (hoist_exprs[bb->index], i); | 4598 /* If (hoistable != VEC_length), then there is |
4331 found = 1; | 4599 an occurence of EXPR in BB itself. Don't waste |
4332 } | 4600 time looking for LCA in this case. */ |
4333 } | 4601 if ((unsigned) hoistable |
4334 } | 4602 == VEC_length (occr_t, occrs_to_hoist)) |
4335 /* If we found nothing to hoist, then quit now. */ | |
4336 if (! found) | |
4337 { | |
4338 VEC_free (basic_block, heap, domby); | |
4339 continue; | |
4340 } | |
4341 | |
4342 /* Loop over all the hoistable expressions. */ | |
4343 for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++) | |
4344 { | |
4345 /* We want to insert the expression into BB only once, so | |
4346 note when we've inserted it. */ | |
4347 insn_inserted_p = 0; | |
4348 | |
4349 /* These tests should be the same as the tests above. */ | |
4350 if (TEST_BIT (hoist_exprs[bb->index], i)) | |
4351 { | |
4352 /* We've found a potentially hoistable expression, now | |
4353 we look at every block BB dominates to see if it | |
4354 computes the expression. */ | |
4355 for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) | |
4356 { | |
4357 /* Ignore self dominance. */ | |
4358 if (bb == dominated) | |
4359 continue; | |
4360 | |
4361 /* We've found a dominated block, now see if it computes | |
4362 the busy expression and whether or not moving that | |
4363 expression to the "beginning" of that block is safe. */ | |
4364 if (!TEST_BIT (antloc[dominated->index], i)) | |
4365 continue; | |
4366 | |
4367 /* The expression is computed in the dominated block and | |
4368 it would be safe to compute it at the start of the | |
4369 dominated block. Now we have to determine if the | |
4370 expression would reach the dominated block if it was | |
4371 placed at the end of BB. */ | |
4372 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) | |
4373 { | 4603 { |
4374 struct expr *expr = index_map[i]; | 4604 basic_block lca; |
4375 struct occr *occr = expr->antic_occr; | 4605 |
4376 rtx insn; | 4606 lca = nearest_common_dominator_for_set (CDI_DOMINATORS, |
4377 rtx set; | 4607 from_bbs); |
4378 | 4608 if (lca != bb) |
4379 /* Find the right occurrence of this expression. */ | 4609 /* Punt, it's better to hoist these occurences to |
4380 while (BLOCK_FOR_INSN (occr->insn) != dominated && occr) | 4610 LCA. */ |
4381 occr = occr->next; | 4611 VEC_free (occr_t, heap, occrs_to_hoist); |
4382 | |
4383 gcc_assert (occr); | |
4384 insn = occr->insn; | |
4385 set = single_set (insn); | |
4386 gcc_assert (set); | |
4387 | |
4388 /* Create a pseudo-reg to store the result of reaching | |
4389 expressions into. Get the mode for the new pseudo | |
4390 from the mode of the original destination pseudo. */ | |
4391 if (expr->reaching_reg == NULL) | |
4392 expr->reaching_reg | |
4393 = gen_reg_rtx_and_attrs (SET_DEST (set)); | |
4394 | |
4395 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn); | |
4396 delete_insn (insn); | |
4397 occr->deleted_p = 1; | |
4398 changed = 1; | |
4399 gcse_subst_count++; | |
4400 | |
4401 if (!insn_inserted_p) | |
4402 { | |
4403 insert_insn_end_basic_block (index_map[i], bb, 0); | |
4404 insn_inserted_p = 1; | |
4405 } | |
4406 } | 4612 } |
4407 } | 4613 } |
4614 else | |
4615 /* Punt, no point hoisting a single occurence. */ | |
4616 VEC_free (occr_t, heap, occrs_to_hoist); | |
4617 | |
4618 insn_inserted_p = 0; | |
4619 | |
4620 /* Walk through occurences of I'th expressions we want | |
4621 to hoist to BB and make the transformations. */ | |
4622 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr) | |
4623 { | |
4624 rtx insn; | |
4625 rtx set; | |
4626 | |
4627 gcc_assert (!occr->deleted_p); | |
4628 | |
4629 insn = occr->insn; | |
4630 set = single_set (insn); | |
4631 gcc_assert (set); | |
4632 | |
4633 /* Create a pseudo-reg to store the result of reaching | |
4634 expressions into. Get the mode for the new pseudo | |
4635 from the mode of the original destination pseudo. | |
4636 | |
4637 It is important to use new pseudos whenever we | |
4638 emit a set. This will allow reload to use | |
4639 rematerialization for such registers. */ | |
4640 if (!insn_inserted_p) | |
4641 expr->reaching_reg | |
4642 = gen_reg_rtx_and_attrs (SET_DEST (set)); | |
4643 | |
4644 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), | |
4645 insn); | |
4646 delete_insn (insn); | |
4647 occr->deleted_p = 1; | |
4648 changed = 1; | |
4649 gcse_subst_count++; | |
4650 | |
4651 if (!insn_inserted_p) | |
4652 { | |
4653 insert_insn_end_basic_block (expr, bb); | |
4654 insn_inserted_p = 1; | |
4655 } | |
4656 } | |
4657 | |
4658 VEC_free (occr_t, heap, occrs_to_hoist); | |
4659 bitmap_clear (from_bbs); | |
4408 } | 4660 } |
4409 } | 4661 } |
4410 VEC_free (basic_block, heap, domby); | 4662 VEC_free (basic_block, heap, domby); |
4411 } | 4663 } |
4412 | 4664 |
4665 VEC_free (basic_block, heap, dom_tree_walk); | |
4666 free (bb_size); | |
4667 free (to_bb_head); | |
4413 free (index_map); | 4668 free (index_map); |
4414 | 4669 |
4415 return changed; | 4670 return changed; |
4416 } | 4671 } |
4417 | 4672 |
4430 /* Return if there's nothing to do, or it is too expensive. */ | 4685 /* Return if there's nothing to do, or it is too expensive. */ |
4431 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1 | 4686 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1 |
4432 || is_too_expensive (_("GCSE disabled"))) | 4687 || is_too_expensive (_("GCSE disabled"))) |
4433 return 0; | 4688 return 0; |
4434 | 4689 |
4690 doing_code_hoisting_p = true; | |
4691 | |
4435 /* We need alias. */ | 4692 /* We need alias. */ |
4436 init_alias_analysis (); | 4693 init_alias_analysis (); |
4437 | 4694 |
4438 bytes_used = 0; | 4695 bytes_used = 0; |
4439 gcc_obstack_init (&gcse_obstack); | 4696 gcc_obstack_init (&gcse_obstack); |
4464 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ", | 4721 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ", |
4465 current_function_name (), n_basic_blocks, bytes_used); | 4722 current_function_name (), n_basic_blocks, bytes_used); |
4466 fprintf (dump_file, "%d substs, %d insns created\n", | 4723 fprintf (dump_file, "%d substs, %d insns created\n", |
4467 gcse_subst_count, gcse_create_count); | 4724 gcse_subst_count, gcse_create_count); |
4468 } | 4725 } |
4726 | |
4727 doing_code_hoisting_p = false; | |
4469 | 4728 |
4470 return changed; | 4729 return changed; |
4471 } | 4730 } |
4472 | 4731 |
4473 /* Here we provide the things required to do store motion towards | 4732 /* Here we provide the things required to do store motion towards |
4665 | 4924 |
4666 if (GET_MODE (x) == BLKmode) | 4925 if (GET_MODE (x) == BLKmode) |
4667 return 0; | 4926 return 0; |
4668 | 4927 |
4669 /* If we are handling exceptions, we must be careful with memory references | 4928 /* If we are handling exceptions, we must be careful with memory references |
4670 that may trap. If we are not, the behavior is undefined, so we may just | 4929 that may trap. If we are not, the behavior is undefined, so we may just |
4671 continue. */ | 4930 continue. */ |
4672 if (flag_non_call_exceptions && may_trap_p (x)) | 4931 if (cfun->can_throw_non_call_exceptions && may_trap_p (x)) |
4673 return 0; | 4932 return 0; |
4674 | 4933 |
4675 if (side_effects_p (x)) | 4934 if (side_effects_p (x)) |
4676 return 0; | 4935 return 0; |
4677 | 4936 |
5056 } | 5315 } |
5057 | 5316 |
5058 static unsigned int | 5317 static unsigned int |
5059 execute_rtl_cprop (void) | 5318 execute_rtl_cprop (void) |
5060 { | 5319 { |
5320 int changed; | |
5061 delete_unreachable_blocks (); | 5321 delete_unreachable_blocks (); |
5062 df_set_flags (DF_LR_RUN_DCE); | 5322 df_set_flags (DF_LR_RUN_DCE); |
5063 df_analyze (); | 5323 df_analyze (); |
5064 flag_rerun_cse_after_global_opts |= one_cprop_pass (); | 5324 changed = one_cprop_pass (); |
5325 flag_rerun_cse_after_global_opts |= changed; | |
5326 if (changed) | |
5327 cleanup_cfg (0); | |
5065 return 0; | 5328 return 0; |
5066 } | 5329 } |
5067 | 5330 |
5068 static bool | 5331 static bool |
5069 gate_rtl_pre (void) | 5332 gate_rtl_pre (void) |
5075 } | 5338 } |
5076 | 5339 |
5077 static unsigned int | 5340 static unsigned int |
5078 execute_rtl_pre (void) | 5341 execute_rtl_pre (void) |
5079 { | 5342 { |
5343 int changed; | |
5080 delete_unreachable_blocks (); | 5344 delete_unreachable_blocks (); |
5081 df_analyze (); | 5345 df_analyze (); |
5082 flag_rerun_cse_after_global_opts |= one_pre_gcse_pass (); | 5346 changed = one_pre_gcse_pass (); |
5347 flag_rerun_cse_after_global_opts |= changed; | |
5348 if (changed) | |
5349 cleanup_cfg (0); | |
5083 return 0; | 5350 return 0; |
5084 } | 5351 } |
5085 | 5352 |
5086 static bool | 5353 static bool |
5087 gate_rtl_hoist (void) | 5354 gate_rtl_hoist (void) |
5096 } | 5363 } |
5097 | 5364 |
5098 static unsigned int | 5365 static unsigned int |
5099 execute_rtl_hoist (void) | 5366 execute_rtl_hoist (void) |
5100 { | 5367 { |
5368 int changed; | |
5101 delete_unreachable_blocks (); | 5369 delete_unreachable_blocks (); |
5102 df_analyze (); | 5370 df_analyze (); |
5103 flag_rerun_cse_after_global_opts |= one_code_hoisting_pass (); | 5371 changed = one_code_hoisting_pass (); |
5372 flag_rerun_cse_after_global_opts |= changed; | |
5373 if (changed) | |
5374 cleanup_cfg (0); | |
5104 return 0; | 5375 return 0; |
5105 } | 5376 } |
5106 | 5377 |
5107 struct rtl_opt_pass pass_rtl_cprop = | 5378 struct rtl_opt_pass pass_rtl_cprop = |
5108 { | 5379 { |
5166 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */ | 5437 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */ |
5167 } | 5438 } |
5168 }; | 5439 }; |
5169 | 5440 |
5170 #include "gt-gcse.h" | 5441 #include "gt-gcse.h" |
5442 |