comparison gcc/gcse.c @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents b7f97abdc517
children 04ced10e8804
comparison
equal deleted inserted replaced
65:65488c3d617d 67:f6334be47118
1 /* Global common subexpression elimination/Partial redundancy elimination 1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler. 2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
4 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 4 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
8 GCC is free software; you can redistribute it and/or modify it under 8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free 9 the terms of the GNU General Public License as published by the Free
141 141
142 #include "config.h" 142 #include "config.h"
143 #include "system.h" 143 #include "system.h"
144 #include "coretypes.h" 144 #include "coretypes.h"
145 #include "tm.h" 145 #include "tm.h"
146 #include "diagnostic-core.h"
146 #include "toplev.h" 147 #include "toplev.h"
147 148
148 #include "rtl.h" 149 #include "rtl.h"
149 #include "tree.h" 150 #include "tree.h"
150 #include "tm_p.h" 151 #include "tm_p.h"
167 #include "tree-pass.h" 168 #include "tree-pass.h"
168 #include "hashtab.h" 169 #include "hashtab.h"
169 #include "df.h" 170 #include "df.h"
170 #include "dbgcnt.h" 171 #include "dbgcnt.h"
171 #include "target.h" 172 #include "target.h"
173 #include "gcse.h"
172 174
173 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations 175 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
174 are a superset of those done by classic GCSE. 176 are a superset of those done by classic GCSE.
175 177
176 We perform the following steps: 178 We perform the following steps:
259 doesn't have this problem as much as it computes the reaching defs of 261 doesn't have this problem as much as it computes the reaching defs of
260 each register in each block and thus can try to use an existing 262 each register in each block and thus can try to use an existing
261 register. */ 263 register. */
262 264
263 /* GCSE global vars. */ 265 /* GCSE global vars. */
266
267 struct target_gcse default_target_gcse;
268 #if SWITCHABLE_TARGET
269 struct target_gcse *this_target_gcse = &default_target_gcse;
270 #endif
264 271
265 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */ 272 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
266 int flag_rerun_cse_after_global_opts; 273 int flag_rerun_cse_after_global_opts;
267 274
268 /* An obstack for our working variables. */ 275 /* An obstack for our working variables. */
293 struct occr *avail_occr; 300 struct occr *avail_occr;
294 /* Non-null if the computation is PRE redundant. 301 /* Non-null if the computation is PRE redundant.
295 The value is the newly created pseudo-reg to record a copy of the 302 The value is the newly created pseudo-reg to record a copy of the
296 expression in all the places that reach the redundant copy. */ 303 expression in all the places that reach the redundant copy. */
297 rtx reaching_reg; 304 rtx reaching_reg;
305 /* Maximum distance in instructions this expression can travel.
306 We avoid moving simple expressions for more than a few instructions
307 to keep register pressure under control.
308 A value of "0" removes restrictions on how far the expression can
309 travel. */
310 int max_distance;
298 }; 311 };
299 312
300 /* Occurrence of an expression. 313 /* Occurrence of an expression.
301 There is one per basic block. If a pattern appears more than once the 314 There is one per basic block. If a pattern appears more than once the
302 last appearance is used [or first for anticipatable expressions]. */ 315 last appearance is used [or first for anticipatable expressions]. */
314 /* ??? This is mutually exclusive with deleted_p, so they could share 327 /* ??? This is mutually exclusive with deleted_p, so they could share
315 the same byte. */ 328 the same byte. */
316 char copied_p; 329 char copied_p;
317 }; 330 };
318 331
332 typedef struct occr *occr_t;
333 DEF_VEC_P (occr_t);
334 DEF_VEC_ALLOC_P (occr_t, heap);
335
319 /* Expression and copy propagation hash tables. 336 /* Expression and copy propagation hash tables.
320 Each hash table is an array of buckets. 337 Each hash table is an array of buckets.
321 ??? It is known that if it were an array of entries, structure elements 338 ??? It is known that if it were an array of entries, structure elements
322 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is 339 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
323 not clear whether in the final analysis a sufficient amount of memory would 340 not clear whether in the final analysis a sufficient amount of memory would
415 static int local_copy_prop_count; 432 static int local_copy_prop_count;
416 /* Number of global constants propagated. */ 433 /* Number of global constants propagated. */
417 static int global_const_prop_count; 434 static int global_const_prop_count;
418 /* Number of global copies propagated. */ 435 /* Number of global copies propagated. */
419 static int global_copy_prop_count; 436 static int global_copy_prop_count;
437
438 /* Doing code hoisting. */
439 static bool doing_code_hoisting_p = false;
420 440
421 /* For available exprs */ 441 /* For available exprs */
422 static sbitmap *ae_kill; 442 static sbitmap *ae_kill;
423 443
424 static void compute_can_copy (void); 444 static void compute_can_copy (void);
429 static void free_gcse_mem (void); 449 static void free_gcse_mem (void);
430 static void hash_scan_insn (rtx, struct hash_table_d *); 450 static void hash_scan_insn (rtx, struct hash_table_d *);
431 static void hash_scan_set (rtx, rtx, struct hash_table_d *); 451 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
432 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); 452 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
433 static void hash_scan_call (rtx, rtx, struct hash_table_d *); 453 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
434 static int want_to_gcse_p (rtx); 454 static int want_to_gcse_p (rtx, int *);
435 static bool gcse_constant_p (const_rtx); 455 static bool gcse_constant_p (const_rtx);
436 static int oprs_unchanged_p (const_rtx, const_rtx, int); 456 static int oprs_unchanged_p (const_rtx, const_rtx, int);
437 static int oprs_anticipatable_p (const_rtx, const_rtx); 457 static int oprs_anticipatable_p (const_rtx, const_rtx);
438 static int oprs_available_p (const_rtx, const_rtx); 458 static int oprs_available_p (const_rtx, const_rtx);
439 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, 459 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
440 struct hash_table_d *); 460 struct hash_table_d *);
441 static void insert_set_in_table (rtx, rtx, struct hash_table_d *); 461 static void insert_set_in_table (rtx, rtx, struct hash_table_d *);
442 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); 462 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
443 static unsigned int hash_set (int, int); 463 static unsigned int hash_set (int, int);
444 static int expr_equiv_p (const_rtx, const_rtx); 464 static int expr_equiv_p (const_rtx, const_rtx);
459 static void mark_clobber (rtx, rtx); 479 static void mark_clobber (rtx, rtx);
460 static void mark_oprs_set (rtx); 480 static void mark_oprs_set (rtx);
461 static void alloc_cprop_mem (int, int); 481 static void alloc_cprop_mem (int, int);
462 static void free_cprop_mem (void); 482 static void free_cprop_mem (void);
463 static void compute_transp (const_rtx, int, sbitmap *, int); 483 static void compute_transp (const_rtx, int, sbitmap *, int);
464 static void compute_transpout (void);
465 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, 484 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
466 struct hash_table_d *); 485 struct hash_table_d *);
467 static void compute_cprop_data (void); 486 static void compute_cprop_data (void);
468 static void find_used_regs (rtx *, void *); 487 static void find_used_regs (rtx *, void *);
469 static int try_replace_reg (rtx, rtx, rtx); 488 static int try_replace_reg (rtx, rtx, rtx);
483 static void alloc_pre_mem (int, int); 502 static void alloc_pre_mem (int, int);
484 static void free_pre_mem (void); 503 static void free_pre_mem (void);
485 static void compute_pre_data (void); 504 static void compute_pre_data (void);
486 static int pre_expr_reaches_here_p (basic_block, struct expr *, 505 static int pre_expr_reaches_here_p (basic_block, struct expr *,
487 basic_block); 506 basic_block);
488 static void insert_insn_end_basic_block (struct expr *, basic_block, int); 507 static void insert_insn_end_basic_block (struct expr *, basic_block);
489 static void pre_insert_copy_insn (struct expr *, rtx); 508 static void pre_insert_copy_insn (struct expr *, rtx);
490 static void pre_insert_copies (void); 509 static void pre_insert_copies (void);
491 static int pre_delete (void); 510 static int pre_delete (void);
492 static int pre_gcse (void); 511 static int pre_gcse (void);
493 static int one_pre_gcse_pass (void); 512 static int one_pre_gcse_pass (void);
494 static void add_label_notes (rtx, rtx); 513 static void add_label_notes (rtx, rtx);
495 static void alloc_code_hoist_mem (int, int); 514 static void alloc_code_hoist_mem (int, int);
496 static void free_code_hoist_mem (void); 515 static void free_code_hoist_mem (void);
497 static void compute_code_hoist_vbeinout (void); 516 static void compute_code_hoist_vbeinout (void);
498 static void compute_code_hoist_data (void); 517 static void compute_code_hoist_data (void);
499 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *); 518 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
519 int, int *);
500 static int hoist_code (void); 520 static int hoist_code (void);
501 static int one_code_hoisting_pass (void); 521 static int one_code_hoisting_pass (void);
502 static rtx process_insert_insn (struct expr *); 522 static rtx process_insert_insn (struct expr *);
503 static int pre_edge_insert (struct edge_list *, struct expr **); 523 static int pre_edge_insert (struct edge_list *, struct expr **);
504 static int pre_expr_reaches_here_p_work (basic_block, struct expr *, 524 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
536 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T))) 556 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
537 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S))) 557 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
538 558
539 /* Misc. utilities. */ 559 /* Misc. utilities. */
540 560
541 /* Nonzero for each mode that supports (set (reg) (reg)). 561 #define can_copy \
542 This is trivially true for integer and floating point values. 562 (this_target_gcse->x_can_copy)
543 It may or may not be true for condition codes. */ 563 #define can_copy_init_p \
544 static char can_copy[(int) NUM_MACHINE_MODES]; 564 (this_target_gcse->x_can_copy_init_p)
545 565
546 /* Compute which modes support reg/reg copy operations. */ 566 /* Compute which modes support reg/reg copy operations. */
547 567
548 static void 568 static void
549 compute_can_copy (void) 569 compute_can_copy (void)
576 /* Returns whether the mode supports reg/reg copy operations. */ 596 /* Returns whether the mode supports reg/reg copy operations. */
577 597
578 bool 598 bool
579 can_copy_p (enum machine_mode mode) 599 can_copy_p (enum machine_mode mode)
580 { 600 {
581 static bool can_copy_init_p = false;
582
583 if (! can_copy_init_p) 601 if (! can_copy_init_p)
584 { 602 {
585 compute_can_copy (); 603 compute_can_copy ();
586 can_copy_init_p = true; 604 can_copy_init_p = true;
587 } 605 }
752 770
753 /* See whether X, the source of a set, is something we want to consider for 771 /* See whether X, the source of a set, is something we want to consider for
754 GCSE. */ 772 GCSE. */
755 773
756 static int 774 static int
757 want_to_gcse_p (rtx x) 775 want_to_gcse_p (rtx x, int *max_distance_ptr)
758 { 776 {
759 #ifdef STACK_REGS 777 #ifdef STACK_REGS
760 /* On register stack architectures, don't GCSE constants from the 778 /* On register stack architectures, don't GCSE constants from the
761 constant pool, as the benefits are often swamped by the overhead 779 constant pool, as the benefits are often swamped by the overhead
762 of shuffling the register stack between basic blocks. */ 780 of shuffling the register stack between basic blocks. */
763 if (IS_STACK_MODE (GET_MODE (x))) 781 if (IS_STACK_MODE (GET_MODE (x)))
764 x = avoid_constant_pool_reference (x); 782 x = avoid_constant_pool_reference (x);
765 #endif 783 #endif
766 784
785 /* GCSE'ing constants:
786
787 We do not specifically distinguish between constant and non-constant
788 expressions in PRE and Hoist. We use rtx_cost below to limit
789 the maximum distance simple expressions can travel.
790
791 Nevertheless, constants are much easier to GCSE, and, hence,
792 it is easy to overdo the optimizations. Usually, excessive PRE and
793 Hoisting of constant leads to increased register pressure.
794
795 RA can deal with this by rematerialing some of the constants.
796 Therefore, it is important that the back-end generates sets of constants
797 in a way that allows reload rematerialize them under high register
798 pressure, i.e., a pseudo register with REG_EQUAL to constant
799 is set only once. Failing to do so will result in IRA/reload
800 spilling such constants under high register pressure instead of
801 rematerializing them. */
802
767 switch (GET_CODE (x)) 803 switch (GET_CODE (x))
768 { 804 {
769 case REG: 805 case REG:
770 case SUBREG: 806 case SUBREG:
807 case CALL:
808 return 0;
809
771 case CONST_INT: 810 case CONST_INT:
772 case CONST_DOUBLE: 811 case CONST_DOUBLE:
773 case CONST_FIXED: 812 case CONST_FIXED:
774 case CONST_VECTOR: 813 case CONST_VECTOR:
775 case CALL: 814 if (!doing_code_hoisting_p)
776 return 0; 815 /* Do not PRE constants. */
816 return 0;
817
818 /* FALLTHRU */
777 819
778 default: 820 default:
821 if (doing_code_hoisting_p)
822 /* PRE doesn't implement max_distance restriction. */
823 {
824 int cost;
825 int max_distance;
826
827 gcc_assert (!optimize_function_for_speed_p (cfun)
828 && optimize_function_for_size_p (cfun));
829 cost = rtx_cost (x, SET, 0);
830
831 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
832 {
833 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
834 if (max_distance == 0)
835 return 0;
836
837 gcc_assert (max_distance > 0);
838 }
839 else
840 max_distance = 0;
841
842 if (max_distance_ptr)
843 *max_distance_ptr = max_distance;
844 }
845
779 return can_assign_to_reg_without_clobbers_p (x); 846 return can_assign_to_reg_without_clobbers_p (x);
780 } 847 }
781 } 848 }
782 849
783 /* Used internally by can_assign_to_reg_without_clobbers_p. */ 850 /* Used internally by can_assign_to_reg_without_clobbers_p. */
1087 1154
1088 MODE is the mode of the value X is being stored into. 1155 MODE is the mode of the value X is being stored into.
1089 It is only used if X is a CONST_INT. 1156 It is only used if X is a CONST_INT.
1090 1157
1091 ANTIC_P is nonzero if X is an anticipatable expression. 1158 ANTIC_P is nonzero if X is an anticipatable expression.
1092 AVAIL_P is nonzero if X is an available expression. */ 1159 AVAIL_P is nonzero if X is an available expression.
1160
1161 MAX_DISTANCE is the maximum distance in instructions this expression can
1162 be moved. */
1093 1163
1094 static void 1164 static void
1095 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, 1165 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1096 int avail_p, struct hash_table_d *table) 1166 int avail_p, int max_distance, struct hash_table_d *table)
1097 { 1167 {
1098 int found, do_not_record_p; 1168 int found, do_not_record_p;
1099 unsigned int hash; 1169 unsigned int hash;
1100 struct expr *cur_expr, *last_expr = NULL; 1170 struct expr *cur_expr, *last_expr = NULL;
1101 struct occr *antic_occr, *avail_occr; 1171 struct occr *antic_occr, *avail_occr;
1134 cur_expr->expr = x; 1204 cur_expr->expr = x;
1135 cur_expr->bitmap_index = table->n_elems++; 1205 cur_expr->bitmap_index = table->n_elems++;
1136 cur_expr->next_same_hash = NULL; 1206 cur_expr->next_same_hash = NULL;
1137 cur_expr->antic_occr = NULL; 1207 cur_expr->antic_occr = NULL;
1138 cur_expr->avail_occr = NULL; 1208 cur_expr->avail_occr = NULL;
1139 } 1209 gcc_assert (max_distance >= 0);
1210 cur_expr->max_distance = max_distance;
1211 }
1212 else
1213 gcc_assert (cur_expr->max_distance == max_distance);
1140 1214
1141 /* Now record the occurrence(s). */ 1215 /* Now record the occurrence(s). */
1142 if (antic_p) 1216 if (antic_p)
1143 { 1217 {
1144 antic_occr = cur_expr->antic_occr; 1218 antic_occr = cur_expr->antic_occr;
1235 cur_expr->expr = copy_rtx (x); 1309 cur_expr->expr = copy_rtx (x);
1236 cur_expr->bitmap_index = table->n_elems++; 1310 cur_expr->bitmap_index = table->n_elems++;
1237 cur_expr->next_same_hash = NULL; 1311 cur_expr->next_same_hash = NULL;
1238 cur_expr->antic_occr = NULL; 1312 cur_expr->antic_occr = NULL;
1239 cur_expr->avail_occr = NULL; 1313 cur_expr->avail_occr = NULL;
1314 /* Not used for set_p tables. */
1315 cur_expr->max_distance = 0;
1240 } 1316 }
1241 1317
1242 /* Now record the occurrence. */ 1318 /* Now record the occurrence. */
1243 cur_occr = cur_expr->avail_occr; 1319 cur_occr = cur_expr->avail_occr;
1244 1320
1304 1380
1305 else if (REG_P (dest)) 1381 else if (REG_P (dest))
1306 { 1382 {
1307 unsigned int regno = REGNO (dest); 1383 unsigned int regno = REGNO (dest);
1308 rtx tmp; 1384 rtx tmp;
1385 int max_distance = 0;
1309 1386
1310 /* See if a REG_EQUAL note shows this equivalent to a simpler expression. 1387 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1311 1388
1312 This allows us to do a single GCSE pass and still eliminate 1389 This allows us to do a single GCSE pass and still eliminate
1313 redundant constants, addresses or other expressions that are 1390 redundant constants, addresses or other expressions that are
1326 if (note != 0 1403 if (note != 0
1327 && REG_NOTE_KIND (note) == REG_EQUAL 1404 && REG_NOTE_KIND (note) == REG_EQUAL
1328 && !REG_P (src) 1405 && !REG_P (src)
1329 && (table->set_p 1406 && (table->set_p
1330 ? gcse_constant_p (XEXP (note, 0)) 1407 ? gcse_constant_p (XEXP (note, 0))
1331 : want_to_gcse_p (XEXP (note, 0)))) 1408 : want_to_gcse_p (XEXP (note, 0), NULL)))
1332 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); 1409 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
1333 1410
1334 /* Only record sets of pseudo-regs in the hash table. */ 1411 /* Only record sets of pseudo-regs in the hash table. */
1335 if (! table->set_p 1412 if (! table->set_p
1336 && regno >= FIRST_PSEUDO_REGISTER 1413 && regno >= FIRST_PSEUDO_REGISTER
1341 /* ??? We can now easily create new EH landing pads at the 1418 /* ??? We can now easily create new EH landing pads at the
1342 gimple level, for splitting edges; there's no reason we 1419 gimple level, for splitting edges; there's no reason we
1343 can't do the same thing at the rtl level. */ 1420 can't do the same thing at the rtl level. */
1344 && !can_throw_internal (insn) 1421 && !can_throw_internal (insn)
1345 /* Is SET_SRC something we want to gcse? */ 1422 /* Is SET_SRC something we want to gcse? */
1346 && want_to_gcse_p (src) 1423 && want_to_gcse_p (src, &max_distance)
1347 /* Don't CSE a nop. */ 1424 /* Don't CSE a nop. */
1348 && ! set_noop_p (pat) 1425 && ! set_noop_p (pat)
1349 /* Don't GCSE if it has attached REG_EQUIV note. 1426 /* Don't GCSE if it has attached REG_EQUIV note.
1350 At this point this only function parameters should have 1427 At this point this only function parameters should have
1351 REG_EQUIV notes and if the argument slot is used somewhere 1428 REG_EQUIV notes and if the argument slot is used somewhere
1365 available if this is a branch, because we can't insert 1442 available if this is a branch, because we can't insert
1366 a set after the branch. */ 1443 a set after the branch. */
1367 int avail_p = (oprs_available_p (src, insn) 1444 int avail_p = (oprs_available_p (src, insn)
1368 && ! JUMP_P (insn)); 1445 && ! JUMP_P (insn));
1369 1446
1370 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table); 1447 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1448 max_distance, table);
1371 } 1449 }
1372 1450
1373 /* Record sets for constant/copy propagation. */ 1451 /* Record sets for constant/copy propagation. */
1374 else if (table->set_p 1452 else if (table->set_p
1375 && regno >= FIRST_PSEUDO_REGISTER 1453 && regno >= FIRST_PSEUDO_REGISTER
1380 || gcse_constant_p (src)) 1458 || gcse_constant_p (src))
1381 /* A copy is not available if its src or dest is subsequently 1459 /* A copy is not available if its src or dest is subsequently
1382 modified. Here we want to search from INSN+1 on, but 1460 modified. Here we want to search from INSN+1 on, but
1383 oprs_available_p searches from INSN on. */ 1461 oprs_available_p searches from INSN on. */
1384 && (insn == BB_END (BLOCK_FOR_INSN (insn)) 1462 && (insn == BB_END (BLOCK_FOR_INSN (insn))
1385 || (tmp = next_nonnote_insn (insn)) == NULL_RTX 1463 || (tmp = next_nonnote_nondebug_insn (insn)) == NULL_RTX
1386 || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn) 1464 || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn)
1387 || oprs_available_p (pat, tmp))) 1465 || oprs_available_p (pat, tmp)))
1388 insert_set_in_table (pat, insn, table); 1466 insert_set_in_table (pat, insn, table);
1389 } 1467 }
1390 /* In case of store we want to consider the memory value as available in 1468 /* In case of store we want to consider the memory value as available in
1391 the REG stored in that memory. This makes it possible to remove 1469 the REG stored in that memory. This makes it possible to remove
1392 redundant loads from due to stores to the same location. */ 1470 redundant loads from due to stores to the same location. */
1393 else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) 1471 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1394 { 1472 {
1395 unsigned int regno = REGNO (src); 1473 unsigned int regno = REGNO (src);
1474 int max_distance = 0;
1396 1475
1397 /* Do not do this for constant/copy propagation. */ 1476 /* Do not do this for constant/copy propagation. */
1398 if (! table->set_p 1477 if (! table->set_p
1399 /* Only record sets of pseudo-regs in the hash table. */ 1478 /* Only record sets of pseudo-regs in the hash table. */
1400 && regno >= FIRST_PSEUDO_REGISTER 1479 && regno >= FIRST_PSEUDO_REGISTER
1402 && can_copy_p (GET_MODE (src)) 1481 && can_copy_p (GET_MODE (src))
1403 /* GCSE commonly inserts instruction after the insn. We can't 1482 /* GCSE commonly inserts instruction after the insn. We can't
1404 do that easily for EH edges so disable GCSE on these for now. */ 1483 do that easily for EH edges so disable GCSE on these for now. */
1405 && !can_throw_internal (insn) 1484 && !can_throw_internal (insn)
1406 /* Is SET_DEST something we want to gcse? */ 1485 /* Is SET_DEST something we want to gcse? */
1407 && want_to_gcse_p (dest) 1486 && want_to_gcse_p (dest, &max_distance)
1408 /* Don't CSE a nop. */ 1487 /* Don't CSE a nop. */
1409 && ! set_noop_p (pat) 1488 && ! set_noop_p (pat)
1410 /* Don't GCSE if it has attached REG_EQUIV note. 1489 /* Don't GCSE if it has attached REG_EQUIV note.
1411 At this point this only function parameters should have 1490 At this point this only function parameters should have
1412 REG_EQUIV notes and if the argument slot is used somewhere 1491 REG_EQUIV notes and if the argument slot is used somewhere
1424 int avail_p = oprs_available_p (dest, insn) 1503 int avail_p = oprs_available_p (dest, insn)
1425 && ! JUMP_P (insn); 1504 && ! JUMP_P (insn);
1426 1505
1427 /* Record the memory expression (DEST) in the hash table. */ 1506 /* Record the memory expression (DEST) in the hash table. */
1428 insert_expr_in_table (dest, GET_MODE (dest), insn, 1507 insert_expr_in_table (dest, GET_MODE (dest), insn,
1429 antic_p, avail_p, table); 1508 antic_p, avail_p, max_distance, table);
1430 } 1509 }
1431 } 1510 }
1432 } 1511 }
1433 1512
1434 static void 1513 static void
1510 1589
1511 for (i = 0; i < (int) table->n_elems; i++) 1590 for (i = 0; i < (int) table->n_elems; i++)
1512 if (flat_table[i] != 0) 1591 if (flat_table[i] != 0)
1513 { 1592 {
1514 expr = flat_table[i]; 1593 expr = flat_table[i];
1515 fprintf (file, "Index %d (hash value %d)\n ", 1594 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1516 expr->bitmap_index, hash_val[i]); 1595 expr->bitmap_index, hash_val[i], expr->max_distance);
1517 print_rtl (file, expr->expr); 1596 print_rtl (file, expr->expr);
1518 fprintf (file, "\n"); 1597 fprintf (file, "\n");
1519 } 1598 }
1520 1599
1521 fprintf (file, "\n"); 1600 fprintf (file, "\n");
1667 1746
1668 /* First pass over the instructions records information used to 1747 /* First pass over the instructions records information used to
1669 determine when registers and memory are first and last set. */ 1748 determine when registers and memory are first and last set. */
1670 FOR_BB_INSNS (current_bb, insn) 1749 FOR_BB_INSNS (current_bb, insn)
1671 { 1750 {
1672 if (! INSN_P (insn)) 1751 if (!NONDEBUG_INSN_P (insn))
1673 continue; 1752 continue;
1674 1753
1675 if (CALL_P (insn)) 1754 if (CALL_P (insn))
1676 { 1755 {
1677 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 1756 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1690 hash_scan_set (implicit_sets[current_bb->index], 1769 hash_scan_set (implicit_sets[current_bb->index],
1691 BB_HEAD (current_bb), table); 1770 BB_HEAD (current_bb), table);
1692 1771
1693 /* The next pass builds the hash table. */ 1772 /* The next pass builds the hash table. */
1694 FOR_BB_INSNS (current_bb, insn) 1773 FOR_BB_INSNS (current_bb, insn)
1695 if (INSN_P (insn)) 1774 if (NONDEBUG_INSN_P (insn))
1696 hash_scan_insn (insn, table); 1775 hash_scan_insn (insn, table);
1697 } 1776 }
1698 1777
1699 free (reg_avail_info); 1778 free (reg_avail_info);
1700 reg_avail_info = NULL; 1779 reg_avail_info = NULL;
2270 2349
2271 if (!rtx_equal_p (src, SET_SRC (set)) 2350 if (!rtx_equal_p (src, SET_SRC (set))
2272 && validate_change (insn, &SET_SRC (set), src, 0)) 2351 && validate_change (insn, &SET_SRC (set), src, 0))
2273 success = 1; 2352 success = 1;
2274 2353
2275 /* If we've failed to do replacement, have a single SET, don't already 2354 /* If we've failed perform the replacement, have a single SET to
2276 have a note, and have no special SET, add a REG_EQUAL note to not 2355 a REG destination and don't yet have a note, add a REG_EQUAL note
2277 lose information. */ 2356 to not lose information. */
2278 if (!success && note == 0 && set != 0 2357 if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set)))
2279 && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT
2280 && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART)
2281 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); 2358 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
2282 } 2359 }
2283 2360
2284 /* REG_EQUAL may get simplified into register. 2361 /* REG_EQUAL may get simplified into register.
2285 We don't allow that. Remove that note. This code ought 2362 We don't allow that. Remove that note. This code ought
2659 if (REG_P (x) 2736 if (REG_P (x)
2660 && (REGNO (x) >= FIRST_PSEUDO_REGISTER 2737 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
2661 || (GET_CODE (PATTERN (insn)) != USE 2738 || (GET_CODE (PATTERN (insn)) != USE
2662 && asm_noperands (PATTERN (insn)) < 0))) 2739 && asm_noperands (PATTERN (insn)) < 0)))
2663 { 2740 {
2664 cselib_val *val = cselib_lookup (x, GET_MODE (x), 0); 2741 cselib_val *val = cselib_lookup (x, GET_MODE (x), 0, VOIDmode);
2665 struct elt_loc_list *l; 2742 struct elt_loc_list *l;
2666 2743
2667 if (!val) 2744 if (!val)
2668 return false; 2745 return false;
2669 for (l = val->locs; l; l = l->next) 2746 for (l = val->locs; l; l = l->next)
3165 3242
3166 /* Local properties of expressions. */ 3243 /* Local properties of expressions. */
3167 /* Nonzero for expressions that are transparent in the block. */ 3244 /* Nonzero for expressions that are transparent in the block. */
3168 static sbitmap *transp; 3245 static sbitmap *transp;
3169 3246
3170 /* Nonzero for expressions that are transparent at the end of the block.
3171 This is only zero for expressions killed by abnormal critical edge
3172 created by a calls. */
3173 static sbitmap *transpout;
3174
3175 /* Nonzero for expressions that are computed (available) in the block. */ 3247 /* Nonzero for expressions that are computed (available) in the block. */
3176 static sbitmap *comp; 3248 static sbitmap *comp;
3177 3249
3178 /* Nonzero for expressions that are locally anticipatable in the block. */ 3250 /* Nonzero for expressions that are locally anticipatable in the block. */
3179 static sbitmap *antloc; 3251 static sbitmap *antloc;
3233 3305
3234 transp = comp = NULL; 3306 transp = comp = NULL;
3235 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; 3307 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
3236 } 3308 }
3237 3309
3310 /* Remove certain expressions from anticipatable and transparent
3311 sets of basic blocks that have incoming abnormal edge.
3312 For PRE remove potentially trapping expressions to avoid placing
3313 them on abnormal edges. For hoisting remove memory references that
3314 can be clobbered by calls. */
3315
3316 static void
3317 prune_expressions (bool pre_p)
3318 {
3319 sbitmap prune_exprs;
3320 unsigned int ui;
3321 basic_block bb;
3322
3323 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
3324 sbitmap_zero (prune_exprs);
3325 for (ui = 0; ui < expr_hash_table.size; ui++)
3326 {
3327 struct expr *e;
3328 for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
3329 {
3330 /* Note potentially trapping expressions. */
3331 if (may_trap_p (e->expr))
3332 {
3333 SET_BIT (prune_exprs, e->bitmap_index);
3334 continue;
3335 }
3336
3337 if (!pre_p && MEM_P (e->expr))
3338 /* Note memory references that can be clobbered by a call.
3339 We do not split abnormal edges in hoisting, so would
3340 a memory reference get hoisted along an abnormal edge,
3341 it would be placed /before/ the call. Therefore, only
3342 constant memory references can be hoisted along abnormal
3343 edges. */
3344 {
3345 if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF
3346 && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0)))
3347 continue;
3348
3349 if (MEM_READONLY_P (e->expr)
3350 && !MEM_VOLATILE_P (e->expr)
3351 && MEM_NOTRAP_P (e->expr))
3352 /* Constant memory reference, e.g., a PIC address. */
3353 continue;
3354
3355 /* ??? Optimally, we would use interprocedural alias
3356 analysis to determine if this mem is actually killed
3357 by this call. */
3358
3359 SET_BIT (prune_exprs, e->bitmap_index);
3360 }
3361 }
3362 }
3363
3364 FOR_EACH_BB (bb)
3365 {
3366 edge e;
3367 edge_iterator ei;
3368
3369 /* If the current block is the destination of an abnormal edge, we
3370 kill all trapping (for PRE) and memory (for hoist) expressions
3371 because we won't be able to properly place the instruction on
3372 the edge. So make them neither anticipatable nor transparent.
3373 This is fairly conservative.
3374
3375 ??? For hoisting it may be necessary to check for set-and-jump
3376 instructions here, not just for abnormal edges. The general problem
3377 is that when an expression cannot not be placed right at the end of
3378 a basic block we should account for any side-effects of a subsequent
3379 jump instructions that could clobber the expression. It would
3380 be best to implement this check along the lines of
3381 hoist_expr_reaches_here_p where the target block is already known
3382 and, hence, there's no need to conservatively prune expressions on
3383 "intermediate" set-and-jump instructions. */
3384 FOR_EACH_EDGE (e, ei, bb->preds)
3385 if ((e->flags & EDGE_ABNORMAL)
3386 && (pre_p || CALL_P (BB_END (e->src))))
3387 {
3388 sbitmap_difference (antloc[bb->index],
3389 antloc[bb->index], prune_exprs);
3390 sbitmap_difference (transp[bb->index],
3391 transp[bb->index], prune_exprs);
3392 break;
3393 }
3394 }
3395
3396 sbitmap_free (prune_exprs);
3397 }
3398
3399 /* It may be necessary to insert a large number of insns on edges to
3400 make the existing occurrences of expressions fully redundant. This
3401 routine examines the set of insertions and deletions and if the ratio
3402 of insertions to deletions is too high for a particular expression, then
3403 the expression is removed from the insertion/deletion sets.
3404
3405 N_ELEMS is the number of elements in the hash table. */
3406
3407 static void
3408 prune_insertions_deletions (int n_elems)
3409 {
3410 sbitmap_iterator sbi;
3411 sbitmap prune_exprs;
3412
3413 /* We always use I to iterate over blocks/edges and J to iterate over
3414 expressions. */
3415 unsigned int i, j;
3416
3417 /* Counts for the number of times an expression needs to be inserted and
3418 number of times an expression can be removed as a result. */
3419 int *insertions = GCNEWVEC (int, n_elems);
3420 int *deletions = GCNEWVEC (int, n_elems);
3421
3422 /* Set of expressions which require too many insertions relative to
3423 the number of deletions achieved. We will prune these out of the
3424 insertion/deletion sets. */
3425 prune_exprs = sbitmap_alloc (n_elems);
3426 sbitmap_zero (prune_exprs);
3427
3428 /* Iterate over the edges counting the number of times each expression
3429 needs to be inserted. */
3430 for (i = 0; i < (unsigned) n_edges; i++)
3431 {
3432 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
3433 insertions[j]++;
3434 }
3435
3436 /* Similarly for deletions, but those occur in blocks rather than on
3437 edges. */
3438 for (i = 0; i < (unsigned) last_basic_block; i++)
3439 {
3440 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
3441 deletions[j]++;
3442 }
3443
3444 /* Now that we have accurate counts, iterate over the elements in the
3445 hash table and see if any need too many insertions relative to the
3446 number of evaluations that can be removed. If so, mark them in
3447 PRUNE_EXPRS. */
3448 for (j = 0; j < (unsigned) n_elems; j++)
3449 if (deletions[j]
3450 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
3451 SET_BIT (prune_exprs, j);
3452
3453 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
3454 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
3455 {
3456 for (i = 0; i < (unsigned) n_edges; i++)
3457 RESET_BIT (pre_insert_map[i], j);
3458
3459 for (i = 0; i < (unsigned) last_basic_block; i++)
3460 RESET_BIT (pre_delete_map[i], j);
3461 }
3462
3463 sbitmap_free (prune_exprs);
3464 free (insertions);
3465 free (deletions);
3466 }
3467
3238 /* Top level routine to do the dataflow analysis needed by PRE. */ 3468 /* Top level routine to do the dataflow analysis needed by PRE. */
3239 3469
3240 static void 3470 static void
3241 compute_pre_data (void) 3471 compute_pre_data (void)
3242 { 3472 {
3243 sbitmap trapping_expr;
3244 basic_block bb; 3473 basic_block bb;
3245 unsigned int ui;
3246 3474
3247 compute_local_properties (transp, comp, antloc, &expr_hash_table); 3475 compute_local_properties (transp, comp, antloc, &expr_hash_table);
3476 prune_expressions (true);
3248 sbitmap_vector_zero (ae_kill, last_basic_block); 3477 sbitmap_vector_zero (ae_kill, last_basic_block);
3249
3250 /* Collect expressions which might trap. */
3251 trapping_expr = sbitmap_alloc (expr_hash_table.n_elems);
3252 sbitmap_zero (trapping_expr);
3253 for (ui = 0; ui < expr_hash_table.size; ui++)
3254 {
3255 struct expr *e;
3256 for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
3257 if (may_trap_p (e->expr))
3258 SET_BIT (trapping_expr, e->bitmap_index);
3259 }
3260 3478
3261 /* Compute ae_kill for each basic block using: 3479 /* Compute ae_kill for each basic block using:
3262 3480
3263 ~(TRANSP | COMP) 3481 ~(TRANSP | COMP)
3264 */ 3482 */
3265 3483
3266 FOR_EACH_BB (bb) 3484 FOR_EACH_BB (bb)
3267 { 3485 {
3268 edge e;
3269 edge_iterator ei;
3270
3271 /* If the current block is the destination of an abnormal edge, we
3272 kill all trapping expressions because we won't be able to properly
3273 place the instruction on the edge. So make them neither
3274 anticipatable nor transparent. This is fairly conservative. */
3275 FOR_EACH_EDGE (e, ei, bb->preds)
3276 if (e->flags & EDGE_ABNORMAL)
3277 {
3278 sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr);
3279 sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr);
3280 break;
3281 }
3282
3283 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); 3486 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
3284 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); 3487 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
3285 } 3488 }
3286 3489
3287 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc, 3490 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
3288 ae_kill, &pre_insert_map, &pre_delete_map); 3491 ae_kill, &pre_insert_map, &pre_delete_map);
3289 sbitmap_vector_free (antloc); 3492 sbitmap_vector_free (antloc);
3290 antloc = NULL; 3493 antloc = NULL;
3291 sbitmap_vector_free (ae_kill); 3494 sbitmap_vector_free (ae_kill);
3292 ae_kill = NULL; 3495 ae_kill = NULL;
3293 sbitmap_free (trapping_expr); 3496
3497 prune_insertions_deletions (expr_hash_table.n_elems);
3294 } 3498 }
3295 3499
3296 /* PRE utilities */ 3500 /* PRE utilities */
3297 3501
3298 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach 3502 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
3403 return pat; 3607 return pat;
3404 } 3608 }
3405 3609
3406 /* Add EXPR to the end of basic block BB. 3610 /* Add EXPR to the end of basic block BB.
3407 3611
3408 This is used by both the PRE and code hoisting. 3612 This is used by both the PRE and code hoisting. */
3409
3410 For PRE, we want to verify that the expr is either transparent
3411 or locally anticipatable in the target block. This check makes
3412 no sense for code hoisting. */
3413 3613
3414 static void 3614 static void
3415 insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre) 3615 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
3416 { 3616 {
3417 rtx insn = BB_END (bb); 3617 rtx insn = BB_END (bb);
3418 rtx new_insn; 3618 rtx new_insn;
3419 rtx reg = expr->reaching_reg; 3619 rtx reg = expr->reaching_reg;
3420 int regno = REGNO (reg); 3620 int regno = REGNO (reg);
3437 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))) 3637 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
3438 { 3638 {
3439 #ifdef HAVE_cc0 3639 #ifdef HAVE_cc0
3440 rtx note; 3640 rtx note;
3441 #endif 3641 #endif
3442 /* It should always be the case that we can put these instructions
3443 anywhere in the basic block with performing PRE optimizations.
3444 Check this. */
3445 gcc_assert (!NONJUMP_INSN_P (insn) || !pre
3446 || TEST_BIT (antloc[bb->index], expr->bitmap_index)
3447 || TEST_BIT (transp[bb->index], expr->bitmap_index));
3448 3642
3449 /* If this is a jump table, then we can't insert stuff here. Since 3643 /* If this is a jump table, then we can't insert stuff here. Since
3450 we know the previous real insn must be the tablejump, we insert 3644 we know the previous real insn must be the tablejump, we insert
3451 the new instruction just before the tablejump. */ 3645 the new instruction just before the tablejump. */
3452 if (GET_CODE (PATTERN (insn)) == ADDR_VEC 3646 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
3453 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 3647 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3454 insn = prev_real_insn (insn); 3648 insn = prev_active_insn (insn);
3455 3649
3456 #ifdef HAVE_cc0 3650 #ifdef HAVE_cc0
3457 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts 3651 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
3458 if cc0 isn't set. */ 3652 if cc0 isn't set. */
3459 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX); 3653 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
3479 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)) 3673 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
3480 { 3674 {
3481 /* Keeping in mind targets with small register classes and parameters 3675 /* Keeping in mind targets with small register classes and parameters
3482 in registers, we search backward and place the instructions before 3676 in registers, we search backward and place the instructions before
3483 the first parameter is loaded. Do this for everyone for consistency 3677 the first parameter is loaded. Do this for everyone for consistency
3484 and a presumption that we'll get better code elsewhere as well. 3678 and a presumption that we'll get better code elsewhere as well. */
3485
3486 It should always be the case that we can put these instructions
3487 anywhere in the basic block with performing PRE optimizations.
3488 Check this. */
3489
3490 gcc_assert (!pre
3491 || TEST_BIT (antloc[bb->index], expr->bitmap_index)
3492 || TEST_BIT (transp[bb->index], expr->bitmap_index));
3493 3679
3494 /* Since different machines initialize their parameter registers 3680 /* Since different machines initialize their parameter registers
3495 in different orders, assume nothing. Collect the set of all 3681 in different orders, assume nothing. Collect the set of all
3496 parameter registers. */ 3682 parameter registers. */
3497 insn = find_first_parameter_load (insn, BB_HEAD (bb)); 3683 insn = find_first_parameter_load (insn, BB_HEAD (bb));
3584 detailed in Morgans book P277 (sec 10.5) for 3770 detailed in Morgans book P277 (sec 10.5) for
3585 handling this situation. This one is easiest for 3771 handling this situation. This one is easiest for
3586 now. */ 3772 now. */
3587 3773
3588 if (eg->flags & EDGE_ABNORMAL) 3774 if (eg->flags & EDGE_ABNORMAL)
3589 insert_insn_end_basic_block (index_map[j], bb, 0); 3775 insert_insn_end_basic_block (index_map[j], bb);
3590 else 3776 else
3591 { 3777 {
3592 insn = process_insert_insn (index_map[j]); 3778 insn = process_insert_insn (index_map[j]);
3593 insert_insn_on_edge (insn, eg); 3779 insert_insn_on_edge (insn, eg);
3594 } 3780 }
4043 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 4229 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4044 add_label_notes (XVECEXP (x, i, j), insn); 4230 add_label_notes (XVECEXP (x, i, j), insn);
4045 } 4231 }
4046 } 4232 }
4047 4233
4048 /* Compute transparent outgoing information for each block.
4049
4050 An expression is transparent to an edge unless it is killed by
4051 the edge itself. This can only happen with abnormal control flow,
4052 when the edge is traversed through a call. This happens with
4053 non-local labels and exceptions.
4054
4055 This would not be necessary if we split the edge. While this is
4056 normally impossible for abnormal critical edges, with some effort
4057 it should be possible with exception handling, since we still have
4058 control over which handler should be invoked. But due to increased
4059 EH table sizes, this may not be worthwhile. */
4060
4061 static void
4062 compute_transpout (void)
4063 {
4064 basic_block bb;
4065 unsigned int i;
4066 struct expr *expr;
4067
4068 sbitmap_vector_ones (transpout, last_basic_block);
4069
4070 FOR_EACH_BB (bb)
4071 {
4072 /* Note that flow inserted a nop at the end of basic blocks that
4073 end in call instructions for reasons other than abnormal
4074 control flow. */
4075 if (! CALL_P (BB_END (bb)))
4076 continue;
4077
4078 for (i = 0; i < expr_hash_table.size; i++)
4079 for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash)
4080 if (MEM_P (expr->expr))
4081 {
4082 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4083 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4084 continue;
4085
4086 /* ??? Optimally, we would use interprocedural alias
4087 analysis to determine if this mem is actually killed
4088 by this call. */
4089 RESET_BIT (transpout[bb->index], expr->bitmap_index);
4090 }
4091 }
4092 }
4093
4094 /* Code Hoisting variables and subroutines. */ 4234 /* Code Hoisting variables and subroutines. */
4095 4235
4096 /* Very busy expressions. */ 4236 /* Very busy expressions. */
4097 static sbitmap *hoist_vbein; 4237 static sbitmap *hoist_vbein;
4098 static sbitmap *hoist_vbeout; 4238 static sbitmap *hoist_vbeout;
4099 4239
4100 /* Hoistable expressions. */
4101 static sbitmap *hoist_exprs;
4102
4103 /* ??? We could compute post dominators and run this algorithm in 4240 /* ??? We could compute post dominators and run this algorithm in
4104 reverse to perform tail merging, doing so would probably be 4241 reverse to perform tail merging, doing so would probably be
4105 more effective than the tail merging code in jump.c. 4242 more effective than the tail merging code in jump.c.
4106 4243
4107 It's unclear if tail merging could be run in parallel with 4244 It's unclear if tail merging could be run in parallel with
4116 transp = sbitmap_vector_alloc (n_blocks, n_exprs); 4253 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4117 comp = sbitmap_vector_alloc (n_blocks, n_exprs); 4254 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4118 4255
4119 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); 4256 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
4120 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); 4257 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
4121 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
4122 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4123 } 4258 }
4124 4259
4125 /* Free vars used for code hoisting analysis. */ 4260 /* Free vars used for code hoisting analysis. */
4126 4261
4127 static void 4262 static void
4131 sbitmap_vector_free (transp); 4266 sbitmap_vector_free (transp);
4132 sbitmap_vector_free (comp); 4267 sbitmap_vector_free (comp);
4133 4268
4134 sbitmap_vector_free (hoist_vbein); 4269 sbitmap_vector_free (hoist_vbein);
4135 sbitmap_vector_free (hoist_vbeout); 4270 sbitmap_vector_free (hoist_vbeout);
4136 sbitmap_vector_free (hoist_exprs);
4137 sbitmap_vector_free (transpout);
4138 4271
4139 free_dominance_info (CDI_DOMINATORS); 4272 free_dominance_info (CDI_DOMINATORS);
4140 } 4273 }
4141 4274
4142 /* Compute the very busy expressions at entry/exit from each block. 4275 /* Compute the very busy expressions at entry/exit from each block.
4163 /* We scan the blocks in the reverse order to speed up 4296 /* We scan the blocks in the reverse order to speed up
4164 the convergence. */ 4297 the convergence. */
4165 FOR_EACH_BB_REVERSE (bb) 4298 FOR_EACH_BB_REVERSE (bb)
4166 { 4299 {
4167 if (bb->next_bb != EXIT_BLOCK_PTR) 4300 if (bb->next_bb != EXIT_BLOCK_PTR)
4168 sbitmap_intersection_of_succs (hoist_vbeout[bb->index], 4301 {
4169 hoist_vbein, bb->index); 4302 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
4303 hoist_vbein, bb->index);
4304
4305 /* Include expressions in VBEout that are calculated
4306 in BB and available at its end. */
4307 sbitmap_a_or_b (hoist_vbeout[bb->index],
4308 hoist_vbeout[bb->index], comp[bb->index]);
4309 }
4170 4310
4171 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], 4311 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
4172 antloc[bb->index], 4312 antloc[bb->index],
4173 hoist_vbeout[bb->index], 4313 hoist_vbeout[bb->index],
4174 transp[bb->index]); 4314 transp[bb->index]);
4176 4316
4177 passes++; 4317 passes++;
4178 } 4318 }
4179 4319
4180 if (dump_file) 4320 if (dump_file)
4181 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); 4321 {
4322 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
4323
4324 FOR_EACH_BB (bb)
4325 {
4326 fprintf (dump_file, "vbein (%d): ", bb->index);
4327 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
4328 fprintf (dump_file, "vbeout(%d): ", bb->index);
4329 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
4330 }
4331 }
4182 } 4332 }
4183 4333
4184 /* Top level routine to do the dataflow analysis needed by code hoisting. */ 4334 /* Top level routine to do the dataflow analysis needed by code hoisting. */
4185 4335
4186 static void 4336 static void
4187 compute_code_hoist_data (void) 4337 compute_code_hoist_data (void)
4188 { 4338 {
4189 compute_local_properties (transp, comp, antloc, &expr_hash_table); 4339 compute_local_properties (transp, comp, antloc, &expr_hash_table);
4190 compute_transpout (); 4340 prune_expressions (false);
4191 compute_code_hoist_vbeinout (); 4341 compute_code_hoist_vbeinout ();
4192 calculate_dominance_info (CDI_DOMINATORS); 4342 calculate_dominance_info (CDI_DOMINATORS);
4193 if (dump_file) 4343 if (dump_file)
4194 fprintf (dump_file, "\n"); 4344 fprintf (dump_file, "\n");
4195 } 4345 }
4196 4346
4197 /* Determine if the expression identified by EXPR_INDEX would 4347 /* Determine if the expression identified by EXPR_INDEX would
4198 reach BB unimpared if it was placed at the end of EXPR_BB. 4348 reach BB unimpared if it was placed at the end of EXPR_BB.
4349 Stop the search if the expression would need to be moved more
4350 than DISTANCE instructions.
4199 4351
4200 It's unclear exactly what Muchnick meant by "unimpared". It seems 4352 It's unclear exactly what Muchnick meant by "unimpared". It seems
4201 to me that the expression must either be computed or transparent in 4353 to me that the expression must either be computed or transparent in
4202 *every* block in the path(s) from EXPR_BB to BB. Any other definition 4354 *every* block in the path(s) from EXPR_BB to BB. Any other definition
4203 would allow the expression to be hoisted out of loops, even if 4355 would allow the expression to be hoisted out of loops, even if
4206 Contrast this to reachability for PRE where an expression is 4358 Contrast this to reachability for PRE where an expression is
4207 considered reachable if *any* path reaches instead of *all* 4359 considered reachable if *any* path reaches instead of *all*
4208 paths. */ 4360 paths. */
4209 4361
4210 static int 4362 static int
4211 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited) 4363 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
4364 char *visited, int distance, int *bb_size)
4212 { 4365 {
4213 edge pred; 4366 edge pred;
4214 edge_iterator ei; 4367 edge_iterator ei;
4215 int visited_allocated_locally = 0; 4368 int visited_allocated_locally = 0;
4216 4369
4370 /* Terminate the search if distance, for which EXPR is allowed to move,
4371 is exhausted. */
4372 if (distance > 0)
4373 {
4374 distance -= bb_size[bb->index];
4375
4376 if (distance <= 0)
4377 return 0;
4378 }
4379 else
4380 gcc_assert (distance == 0);
4217 4381
4218 if (visited == NULL) 4382 if (visited == NULL)
4219 { 4383 {
4220 visited_allocated_locally = 1; 4384 visited_allocated_locally = 1;
4221 visited = XCNEWVEC (char, last_basic_block); 4385 visited = XCNEWVEC (char, last_basic_block);
4230 else if (pred_bb == expr_bb) 4394 else if (pred_bb == expr_bb)
4231 continue; 4395 continue;
4232 else if (visited[pred_bb->index]) 4396 else if (visited[pred_bb->index])
4233 continue; 4397 continue;
4234 4398
4235 /* Does this predecessor generate this expression? */
4236 else if (TEST_BIT (comp[pred_bb->index], expr_index))
4237 break;
4238 else if (! TEST_BIT (transp[pred_bb->index], expr_index)) 4399 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
4239 break; 4400 break;
4240 4401
4241 /* Not killed. */ 4402 /* Not killed. */
4242 else 4403 else
4243 { 4404 {
4244 visited[pred_bb->index] = 1; 4405 visited[pred_bb->index] = 1;
4245 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, 4406 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
4246 pred_bb, visited)) 4407 visited, distance, bb_size))
4247 break; 4408 break;
4248 } 4409 }
4249 } 4410 }
4250 if (visited_allocated_locally) 4411 if (visited_allocated_locally)
4251 free (visited); 4412 free (visited);
4252 4413
4253 return (pred == NULL); 4414 return (pred == NULL);
4254 } 4415 }
4255 4416
4417 /* Find occurence in BB. */
4418 static struct occr *
4419 find_occr_in_bb (struct occr *occr, basic_block bb)
4420 {
4421 /* Find the right occurrence of this expression. */
4422 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
4423 occr = occr->next;
4424
4425 return occr;
4426 }
4427
4256 /* Actually perform code hoisting. */ 4428 /* Actually perform code hoisting. */
4257 4429
4258 static int 4430 static int
4259 hoist_code (void) 4431 hoist_code (void)
4260 { 4432 {
4261 basic_block bb, dominated; 4433 basic_block bb, dominated;
4434 VEC (basic_block, heap) *dom_tree_walk;
4435 unsigned int dom_tree_walk_index;
4262 VEC (basic_block, heap) *domby; 4436 VEC (basic_block, heap) *domby;
4263 unsigned int i,j; 4437 unsigned int i,j;
4264 struct expr **index_map; 4438 struct expr **index_map;
4265 struct expr *expr; 4439 struct expr *expr;
4440 int *to_bb_head;
4441 int *bb_size;
4266 int changed = 0; 4442 int changed = 0;
4267
4268 sbitmap_vector_zero (hoist_exprs, last_basic_block);
4269 4443
4270 /* Compute a mapping from expression number (`bitmap_index') to 4444 /* Compute a mapping from expression number (`bitmap_index') to
4271 hash table entry. */ 4445 hash table entry. */
4272 4446
4273 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems); 4447 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
4274 for (i = 0; i < expr_hash_table.size; i++) 4448 for (i = 0; i < expr_hash_table.size; i++)
4275 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash) 4449 for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4276 index_map[expr->bitmap_index] = expr; 4450 index_map[expr->bitmap_index] = expr;
4277 4451
4452 /* Calculate sizes of basic blocks and note how far
4453 each instruction is from the start of its block. We then use this
4454 data to restrict distance an expression can travel. */
4455
4456 to_bb_head = XCNEWVEC (int, get_max_uid ());
4457 bb_size = XCNEWVEC (int, last_basic_block);
4458
4459 FOR_EACH_BB (bb)
4460 {
4461 rtx insn;
4462 int to_head;
4463
4464 to_head = 0;
4465 FOR_BB_INSNS (bb, insn)
4466 {
4467 /* Don't count debug instructions to avoid them affecting
4468 decision choices. */
4469 if (NONDEBUG_INSN_P (insn))
4470 to_bb_head[INSN_UID (insn)] = to_head++;
4471 }
4472
4473 bb_size[bb->index] = to_head;
4474 }
4475
4476 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
4477 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
4478 == ENTRY_BLOCK_PTR->next_bb));
4479
4480 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
4481 ENTRY_BLOCK_PTR->next_bb);
4482
4278 /* Walk over each basic block looking for potentially hoistable 4483 /* Walk over each basic block looking for potentially hoistable
4279 expressions, nothing gets hoisted from the entry block. */ 4484 expressions, nothing gets hoisted from the entry block. */
4280 FOR_EACH_BB (bb) 4485 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
4281 { 4486 {
4282 int found = 0; 4487 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
4283 int insn_inserted_p; 4488
4284 4489 if (VEC_length (basic_block, domby) == 0)
4285 domby = get_dominated_by (CDI_DOMINATORS, bb); 4490 continue;
4491
4286 /* Examine each expression that is very busy at the exit of this 4492 /* Examine each expression that is very busy at the exit of this
4287 block. These are the potentially hoistable expressions. */ 4493 block. These are the potentially hoistable expressions. */
4288 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) 4494 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
4289 { 4495 {
4290 int hoistable = 0; 4496 if (TEST_BIT (hoist_vbeout[bb->index], i))
4291
4292 if (TEST_BIT (hoist_vbeout[bb->index], i)
4293 && TEST_BIT (transpout[bb->index], i))
4294 { 4497 {
4498 /* Current expression. */
4499 struct expr *expr = index_map[i];
4500 /* Number of occurences of EXPR that can be hoisted to BB. */
4501 int hoistable = 0;
4502 /* Basic blocks that have occurences reachable from BB. */
4503 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
4504 /* Occurences reachable from BB. */
4505 VEC (occr_t, heap) *occrs_to_hoist = NULL;
4506 /* We want to insert the expression into BB only once, so
4507 note when we've inserted it. */
4508 int insn_inserted_p;
4509 occr_t occr;
4510
4511 bitmap_initialize (from_bbs, 0);
4512
4513 /* If an expression is computed in BB and is available at end of
4514 BB, hoist all occurences dominated by BB to BB. */
4515 if (TEST_BIT (comp[bb->index], i))
4516 {
4517 occr = find_occr_in_bb (expr->antic_occr, bb);
4518
4519 if (occr)
4520 {
4521 /* An occurence might've been already deleted
4522 while processing a dominator of BB. */
4523 if (occr->deleted_p)
4524 gcc_assert (MAX_HOIST_DEPTH > 1);
4525 else
4526 {
4527 gcc_assert (NONDEBUG_INSN_P (occr->insn));
4528 hoistable++;
4529 }
4530 }
4531 else
4532 hoistable++;
4533 }
4534
4295 /* We've found a potentially hoistable expression, now 4535 /* We've found a potentially hoistable expression, now
4296 we look at every block BB dominates to see if it 4536 we look at every block BB dominates to see if it
4297 computes the expression. */ 4537 computes the expression. */
4298 for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) 4538 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
4299 { 4539 {
4540 int max_distance;
4541
4300 /* Ignore self dominance. */ 4542 /* Ignore self dominance. */
4301 if (bb == dominated) 4543 if (bb == dominated)
4302 continue; 4544 continue;
4303 /* We've found a dominated block, now see if it computes 4545 /* We've found a dominated block, now see if it computes
4304 the busy expression and whether or not moving that 4546 the busy expression and whether or not moving that
4305 expression to the "beginning" of that block is safe. */ 4547 expression to the "beginning" of that block is safe. */
4306 if (!TEST_BIT (antloc[dominated->index], i)) 4548 if (!TEST_BIT (antloc[dominated->index], i))
4307 continue; 4549 continue;
4308 4550
4551 occr = find_occr_in_bb (expr->antic_occr, dominated);
4552 gcc_assert (occr);
4553
4554 /* An occurence might've been already deleted
4555 while processing a dominator of BB. */
4556 if (occr->deleted_p)
4557 {
4558 gcc_assert (MAX_HOIST_DEPTH > 1);
4559 continue;
4560 }
4561 gcc_assert (NONDEBUG_INSN_P (occr->insn));
4562
4563 max_distance = expr->max_distance;
4564 if (max_distance > 0)
4565 /* Adjust MAX_DISTANCE to account for the fact that
4566 OCCR won't have to travel all of DOMINATED, but
4567 only part of it. */
4568 max_distance += (bb_size[dominated->index]
4569 - to_bb_head[INSN_UID (occr->insn)]);
4570
4309 /* Note if the expression would reach the dominated block 4571 /* Note if the expression would reach the dominated block
4310 unimpared if it was placed at the end of BB. 4572 unimpared if it was placed at the end of BB.
4311 4573
4312 Keep track of how many times this expression is hoistable 4574 Keep track of how many times this expression is hoistable
4313 from a dominated block into BB. */ 4575 from a dominated block into BB. */
4314 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) 4576 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
4315 hoistable++; 4577 max_distance, bb_size))
4578 {
4579 hoistable++;
4580 VEC_safe_push (occr_t, heap,
4581 occrs_to_hoist, occr);
4582 bitmap_set_bit (from_bbs, dominated->index);
4583 }
4316 } 4584 }
4317 4585
4318 /* If we found more than one hoistable occurrence of this 4586 /* If we found more than one hoistable occurrence of this
4319 expression, then note it in the bitmap of expressions to 4587 expression, then note it in the vector of expressions to
4320 hoist. It makes no sense to hoist things which are computed 4588 hoist. It makes no sense to hoist things which are computed
4321 in only one BB, and doing so tends to pessimize register 4589 in only one BB, and doing so tends to pessimize register
4322 allocation. One could increase this value to try harder 4590 allocation. One could increase this value to try harder
4323 to avoid any possible code expansion due to register 4591 to avoid any possible code expansion due to register
4324 allocation issues; however experiments have shown that 4592 allocation issues; however experiments have shown that
4325 the vast majority of hoistable expressions are only movable 4593 the vast majority of hoistable expressions are only movable
4326 from two successors, so raising this threshold is likely 4594 from two successors, so raising this threshold is likely
4327 to nullify any benefit we get from code hoisting. */ 4595 to nullify any benefit we get from code hoisting. */
4328 if (hoistable > 1) 4596 if (hoistable > 1 && dbg_cnt (hoist_insn))
4329 { 4597 {
4330 SET_BIT (hoist_exprs[bb->index], i); 4598 /* If (hoistable != VEC_length), then there is
4331 found = 1; 4599 an occurence of EXPR in BB itself. Don't waste
4332 } 4600 time looking for LCA in this case. */
4333 } 4601 if ((unsigned) hoistable
4334 } 4602 == VEC_length (occr_t, occrs_to_hoist))
4335 /* If we found nothing to hoist, then quit now. */
4336 if (! found)
4337 {
4338 VEC_free (basic_block, heap, domby);
4339 continue;
4340 }
4341
4342 /* Loop over all the hoistable expressions. */
4343 for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
4344 {
4345 /* We want to insert the expression into BB only once, so
4346 note when we've inserted it. */
4347 insn_inserted_p = 0;
4348
4349 /* These tests should be the same as the tests above. */
4350 if (TEST_BIT (hoist_exprs[bb->index], i))
4351 {
4352 /* We've found a potentially hoistable expression, now
4353 we look at every block BB dominates to see if it
4354 computes the expression. */
4355 for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4356 {
4357 /* Ignore self dominance. */
4358 if (bb == dominated)
4359 continue;
4360
4361 /* We've found a dominated block, now see if it computes
4362 the busy expression and whether or not moving that
4363 expression to the "beginning" of that block is safe. */
4364 if (!TEST_BIT (antloc[dominated->index], i))
4365 continue;
4366
4367 /* The expression is computed in the dominated block and
4368 it would be safe to compute it at the start of the
4369 dominated block. Now we have to determine if the
4370 expression would reach the dominated block if it was
4371 placed at the end of BB. */
4372 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
4373 { 4603 {
4374 struct expr *expr = index_map[i]; 4604 basic_block lca;
4375 struct occr *occr = expr->antic_occr; 4605
4376 rtx insn; 4606 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
4377 rtx set; 4607 from_bbs);
4378 4608 if (lca != bb)
4379 /* Find the right occurrence of this expression. */ 4609 /* Punt, it's better to hoist these occurences to
4380 while (BLOCK_FOR_INSN (occr->insn) != dominated && occr) 4610 LCA. */
4381 occr = occr->next; 4611 VEC_free (occr_t, heap, occrs_to_hoist);
4382
4383 gcc_assert (occr);
4384 insn = occr->insn;
4385 set = single_set (insn);
4386 gcc_assert (set);
4387
4388 /* Create a pseudo-reg to store the result of reaching
4389 expressions into. Get the mode for the new pseudo
4390 from the mode of the original destination pseudo. */
4391 if (expr->reaching_reg == NULL)
4392 expr->reaching_reg
4393 = gen_reg_rtx_and_attrs (SET_DEST (set));
4394
4395 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
4396 delete_insn (insn);
4397 occr->deleted_p = 1;
4398 changed = 1;
4399 gcse_subst_count++;
4400
4401 if (!insn_inserted_p)
4402 {
4403 insert_insn_end_basic_block (index_map[i], bb, 0);
4404 insn_inserted_p = 1;
4405 }
4406 } 4612 }
4407 } 4613 }
4614 else
4615 /* Punt, no point hoisting a single occurence. */
4616 VEC_free (occr_t, heap, occrs_to_hoist);
4617
4618 insn_inserted_p = 0;
4619
4620 /* Walk through occurences of I'th expressions we want
4621 to hoist to BB and make the transformations. */
4622 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
4623 {
4624 rtx insn;
4625 rtx set;
4626
4627 gcc_assert (!occr->deleted_p);
4628
4629 insn = occr->insn;
4630 set = single_set (insn);
4631 gcc_assert (set);
4632
4633 /* Create a pseudo-reg to store the result of reaching
4634 expressions into. Get the mode for the new pseudo
4635 from the mode of the original destination pseudo.
4636
4637 It is important to use new pseudos whenever we
4638 emit a set. This will allow reload to use
4639 rematerialization for such registers. */
4640 if (!insn_inserted_p)
4641 expr->reaching_reg
4642 = gen_reg_rtx_and_attrs (SET_DEST (set));
4643
4644 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set),
4645 insn);
4646 delete_insn (insn);
4647 occr->deleted_p = 1;
4648 changed = 1;
4649 gcse_subst_count++;
4650
4651 if (!insn_inserted_p)
4652 {
4653 insert_insn_end_basic_block (expr, bb);
4654 insn_inserted_p = 1;
4655 }
4656 }
4657
4658 VEC_free (occr_t, heap, occrs_to_hoist);
4659 bitmap_clear (from_bbs);
4408 } 4660 }
4409 } 4661 }
4410 VEC_free (basic_block, heap, domby); 4662 VEC_free (basic_block, heap, domby);
4411 } 4663 }
4412 4664
4665 VEC_free (basic_block, heap, dom_tree_walk);
4666 free (bb_size);
4667 free (to_bb_head);
4413 free (index_map); 4668 free (index_map);
4414 4669
4415 return changed; 4670 return changed;
4416 } 4671 }
4417 4672
4430 /* Return if there's nothing to do, or it is too expensive. */ 4685 /* Return if there's nothing to do, or it is too expensive. */
4431 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1 4686 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
4432 || is_too_expensive (_("GCSE disabled"))) 4687 || is_too_expensive (_("GCSE disabled")))
4433 return 0; 4688 return 0;
4434 4689
4690 doing_code_hoisting_p = true;
4691
4435 /* We need alias. */ 4692 /* We need alias. */
4436 init_alias_analysis (); 4693 init_alias_analysis ();
4437 4694
4438 bytes_used = 0; 4695 bytes_used = 0;
4439 gcc_obstack_init (&gcse_obstack); 4696 gcc_obstack_init (&gcse_obstack);
4464 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ", 4721 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
4465 current_function_name (), n_basic_blocks, bytes_used); 4722 current_function_name (), n_basic_blocks, bytes_used);
4466 fprintf (dump_file, "%d substs, %d insns created\n", 4723 fprintf (dump_file, "%d substs, %d insns created\n",
4467 gcse_subst_count, gcse_create_count); 4724 gcse_subst_count, gcse_create_count);
4468 } 4725 }
4726
4727 doing_code_hoisting_p = false;
4469 4728
4470 return changed; 4729 return changed;
4471 } 4730 }
4472 4731
4473 /* Here we provide the things required to do store motion towards 4732 /* Here we provide the things required to do store motion towards
4665 4924
4666 if (GET_MODE (x) == BLKmode) 4925 if (GET_MODE (x) == BLKmode)
4667 return 0; 4926 return 0;
4668 4927
4669 /* If we are handling exceptions, we must be careful with memory references 4928 /* If we are handling exceptions, we must be careful with memory references
4670 that may trap. If we are not, the behavior is undefined, so we may just 4929 that may trap. If we are not, the behavior is undefined, so we may just
4671 continue. */ 4930 continue. */
4672 if (flag_non_call_exceptions && may_trap_p (x)) 4931 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
4673 return 0; 4932 return 0;
4674 4933
4675 if (side_effects_p (x)) 4934 if (side_effects_p (x))
4676 return 0; 4935 return 0;
4677 4936
5056 } 5315 }
5057 5316
5058 static unsigned int 5317 static unsigned int
5059 execute_rtl_cprop (void) 5318 execute_rtl_cprop (void)
5060 { 5319 {
5320 int changed;
5061 delete_unreachable_blocks (); 5321 delete_unreachable_blocks ();
5062 df_set_flags (DF_LR_RUN_DCE); 5322 df_set_flags (DF_LR_RUN_DCE);
5063 df_analyze (); 5323 df_analyze ();
5064 flag_rerun_cse_after_global_opts |= one_cprop_pass (); 5324 changed = one_cprop_pass ();
5325 flag_rerun_cse_after_global_opts |= changed;
5326 if (changed)
5327 cleanup_cfg (0);
5065 return 0; 5328 return 0;
5066 } 5329 }
5067 5330
5068 static bool 5331 static bool
5069 gate_rtl_pre (void) 5332 gate_rtl_pre (void)
5075 } 5338 }
5076 5339
5077 static unsigned int 5340 static unsigned int
5078 execute_rtl_pre (void) 5341 execute_rtl_pre (void)
5079 { 5342 {
5343 int changed;
5080 delete_unreachable_blocks (); 5344 delete_unreachable_blocks ();
5081 df_analyze (); 5345 df_analyze ();
5082 flag_rerun_cse_after_global_opts |= one_pre_gcse_pass (); 5346 changed = one_pre_gcse_pass ();
5347 flag_rerun_cse_after_global_opts |= changed;
5348 if (changed)
5349 cleanup_cfg (0);
5083 return 0; 5350 return 0;
5084 } 5351 }
5085 5352
5086 static bool 5353 static bool
5087 gate_rtl_hoist (void) 5354 gate_rtl_hoist (void)
5096 } 5363 }
5097 5364
5098 static unsigned int 5365 static unsigned int
5099 execute_rtl_hoist (void) 5366 execute_rtl_hoist (void)
5100 { 5367 {
5368 int changed;
5101 delete_unreachable_blocks (); 5369 delete_unreachable_blocks ();
5102 df_analyze (); 5370 df_analyze ();
5103 flag_rerun_cse_after_global_opts |= one_code_hoisting_pass (); 5371 changed = one_code_hoisting_pass ();
5372 flag_rerun_cse_after_global_opts |= changed;
5373 if (changed)
5374 cleanup_cfg (0);
5104 return 0; 5375 return 0;
5105 } 5376 }
5106 5377
5107 struct rtl_opt_pass pass_rtl_cprop = 5378 struct rtl_opt_pass pass_rtl_cprop =
5108 { 5379 {
5166 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */ 5437 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
5167 } 5438 }
5168 }; 5439 };
5169 5440
5170 #include "gt-gcse.h" 5441 #include "gt-gcse.h"
5442