comparison gcc/tree-predcom.c @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Fri, 12 Feb 2010 23:39:51 +0900
parents a06113de4d67
children b7f97abdc517
comparison
equal deleted inserted replaced
52:c156f1bd5cd9 55:77e2b8dfacca
1 /* Predictive commoning. 1 /* Predictive commoning.
2 Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. 2 Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
3 3
4 This file is part of GCC. 4 This file is part of GCC.
5 5
6 GCC is free software; you can redistribute it and/or modify it 6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the 7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any 8 Free Software Foundation; either version 3, or (at your option) any
9 later version. 9 later version.
10 10
11 GCC is distributed in the hope that it will be useful, but WITHOUT 11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details. 14 for more details.
15 15
16 You should have received a copy of the GNU General Public License 16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see 17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */ 18 <http://www.gnu.org/licenses/>. */
19 19
20 /* This file implements the predictive commoning optimization. Predictive 20 /* This file implements the predictive commoning optimization. Predictive
27 incorporate data dependence analysis to detect the opportunities, perform 27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately, 28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account. 29 and if needed, we could also take register pressure into account.
30 30
31 Let us demonstrate what is done on an example: 31 Let us demonstrate what is done on an example:
32 32
33 for (i = 0; i < 100; i++) 33 for (i = 0; i < 100; i++)
34 { 34 {
35 a[i+2] = a[i] + a[i+1]; 35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i; 36 b[10] = b[10] + i;
37 c[i] = c[99 - i]; 37 c[i] = c[99 - i];
61 chains whose head is the write whose values are used by the reads in 61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently, 62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains 63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling 64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch. 65 factor in order to get rid of the copies on the loop latch.
66 66
67 In our example, we get the following chains (the chain for c is invalid). 67 In our example, we get the following chains (the chain for c is invalid).
68 68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2} 69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0} 70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1} 71 d[i + 1]{read,+0}, d[i]{write,+1}
74 together with the distance of this reuse. I.e. we take the last 74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references 75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove 76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the 77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the 78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance 79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with 80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the 81 step 0. In our example (the numbers indicate the distance of the
82 reuse), 82 reuse),
83 83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*) 84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
130 (N + 1) for each root reference (N for references for that we avoided 130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F 131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are 132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can 133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated. 134 be coalesced and the copies can be eliminated.
135 135
136 TODO -- copy propagation and other optimizations may change the live 136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced; 137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure. 138 this may increase the register pressure.
139 139
140 In our case, F = 2 and the (main loop of the) result is 140 In our case, F = 2 and the (main loop of the) result is
204 204
205 /* The maximum number of iterations between the considered memory 205 /* The maximum number of iterations between the considered memory
206 references. */ 206 references. */
207 207
208 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8) 208 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
209 209
210 /* Data references (or phi nodes that carry data reference values across 210 /* Data references (or phi nodes that carry data reference values across
211 loop iterations). */ 211 loop iterations). */
212 212
213 typedef struct dref 213 typedef struct dref_d
214 { 214 {
215 /* The reference itself. */ 215 /* The reference itself. */
216 struct data_reference *ref; 216 struct data_reference *ref;
217 217
218 /* The statement in that the reference appears. */ 218 /* The statement in that the reference appears. */
702 struct data_reference *dr, *dra, *drb; 702 struct data_reference *dr, *dra, *drb;
703 struct data_dependence_relation *ddr; 703 struct data_dependence_relation *ddr;
704 struct component *comp_list = NULL, *comp; 704 struct component *comp_list = NULL, *comp;
705 dref dataref; 705 dref dataref;
706 basic_block last_always_executed = last_always_executed_block (loop); 706 basic_block last_always_executed = last_always_executed_block (loop);
707 707
708 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 708 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
709 { 709 {
710 if (!DR_REF (dr)) 710 if (!DR_REF (dr))
711 { 711 {
712 /* A fake reference for call or asm_expr that may clobber memory; 712 /* A fake reference for call or asm_expr that may clobber memory;
752 /* If both A and B are reads, we may ignore unsuitable dependences. */ 752 /* If both A and B are reads, we may ignore unsuitable dependences. */
753 if (DR_IS_READ (dra) && DR_IS_READ (drb) 753 if (DR_IS_READ (dra) && DR_IS_READ (drb)
754 && (ia == bad || ib == bad 754 && (ia == bad || ib == bad
755 || !determine_offset (dra, drb, &dummy_off))) 755 || !determine_offset (dra, drb, &dummy_off)))
756 continue; 756 continue;
757 757
758 merge_comps (comp_father, comp_size, ia, ib); 758 merge_comps (comp_father, comp_size, ia, ib);
759 } 759 }
760 760
761 comps = XCNEWVEC (struct component *, n); 761 comps = XCNEWVEC (struct component *, n);
762 bad = component_of (comp_father, n); 762 bad = component_of (comp_father, n);
773 comp = XCNEW (struct component); 773 comp = XCNEW (struct component);
774 comp->refs = VEC_alloc (dref, heap, comp_size[ca]); 774 comp->refs = VEC_alloc (dref, heap, comp_size[ca]);
775 comps[ca] = comp; 775 comps[ca] = comp;
776 } 776 }
777 777
778 dataref = XCNEW (struct dref); 778 dataref = XCNEW (struct dref_d);
779 dataref->ref = dr; 779 dataref->ref = dr;
780 dataref->stmt = DR_STMT (dr); 780 dataref->stmt = DR_STMT (dr);
781 dataref->offset = double_int_zero; 781 dataref->offset = double_int_zero;
782 dataref->distance = 0; 782 dataref->distance = 0;
783 783
806 } 806 }
807 807
808 /* Returns true if the component COMP satisfies the conditions 808 /* Returns true if the component COMP satisfies the conditions
809 described in 2) at the beginning of this file. LOOP is the current 809 described in 2) at the beginning of this file. LOOP is the current
810 loop. */ 810 loop. */
811 811
812 static bool 812 static bool
813 suitable_component_p (struct loop *loop, struct component *comp) 813 suitable_component_p (struct loop *loop, struct component *comp)
814 { 814 {
815 unsigned i; 815 unsigned i;
816 dref a, first; 816 dref a, first;
857 if (has_write && comp->comp_step == RS_ANY) 857 if (has_write && comp->comp_step == RS_ANY)
858 return false; 858 return false;
859 859
860 return true; 860 return true;
861 } 861 }
862 862
863 /* Check the conditions on references inside each of components COMPS, 863 /* Check the conditions on references inside each of components COMPS,
864 and remove the unsuitable components from the list. The new list 864 and remove the unsuitable components from the list. The new list
865 of components is returned. The conditions are described in 2) at 865 of components is returned. The conditions are described in 2) at
866 the beginning of this file. LOOP is the current loop. */ 866 the beginning of this file. LOOP is the current loop. */
867 867
1124 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */ 1124 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1125 1125
1126 static void 1126 static void
1127 insert_looparound_copy (chain_p chain, dref ref, gimple phi) 1127 insert_looparound_copy (chain_p chain, dref ref, gimple phi)
1128 { 1128 {
1129 dref nw = XCNEW (struct dref), aref; 1129 dref nw = XCNEW (struct dref_d), aref;
1130 unsigned i; 1130 unsigned i;
1131 1131
1132 nw->stmt = phi; 1132 nw->stmt = phi;
1133 nw->distance = ref->distance + 1; 1133 nw->distance = ref->distance + 1;
1134 nw->always_accessed = 1; 1134 nw->always_accessed = 1;
1253 /* Turn the phi node into GIMPLE_ASSIGN. */ 1253 /* Turn the phi node into GIMPLE_ASSIGN. */
1254 new_stmt = gimple_build_assign (val, new_tree); 1254 new_stmt = gimple_build_assign (val, new_tree);
1255 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT); 1255 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1256 return; 1256 return;
1257 } 1257 }
1258 1258
1259 /* Since the reference is of gimple_reg type, it should only 1259 /* Since the reference is of gimple_reg type, it should only
1260 appear as lhs or rhs of modify statement. */ 1260 appear as lhs or rhs of modify statement. */
1261 gcc_assert (is_gimple_assign (stmt)); 1261 gcc_assert (is_gimple_assign (stmt));
1262 1262
1263 bsi = gsi_for_stmt (stmt); 1263 bsi = gsi_for_stmt (stmt);
1273 } 1273 }
1274 1274
1275 if (in_lhs) 1275 if (in_lhs)
1276 { 1276 {
1277 /* We have statement 1277 /* We have statement
1278 1278
1279 OLD = VAL 1279 OLD = VAL
1280 1280
1281 If OLD is a memory reference, then VAL is gimple_val, and we transform 1281 If OLD is a memory reference, then VAL is gimple_val, and we transform
1282 this to 1282 this to
1283 1283
1284 OLD = VAL 1284 OLD = VAL
1285 NEW = VAL 1285 NEW = VAL
1286 1286
1287 Otherwise, we are replacing a combination chain, 1287 Otherwise, we are replacing a combination chain,
1288 VAL is the expression that performs the combination, and OLD is an 1288 VAL is the expression that performs the combination, and OLD is an
1289 SSA name. In this case, we transform the assignment to 1289 SSA name. In this case, we transform the assignment to
1290 1290
1291 OLD = VAL 1291 OLD = VAL
1292 NEW = OLD 1292 NEW = OLD
1421 /* Marks all virtual operands of statement STMT for renaming. */ 1421 /* Marks all virtual operands of statement STMT for renaming. */
1422 1422
1423 void 1423 void
1424 mark_virtual_ops_for_renaming (gimple stmt) 1424 mark_virtual_ops_for_renaming (gimple stmt)
1425 { 1425 {
1426 ssa_op_iter iter;
1427 tree var; 1426 tree var;
1428 1427
1429 if (gimple_code (stmt) == GIMPLE_PHI) 1428 if (gimple_code (stmt) == GIMPLE_PHI)
1430 { 1429 {
1431 var = PHI_RESULT (stmt); 1430 var = PHI_RESULT (stmt);
1437 mark_sym_for_renaming (var); 1436 mark_sym_for_renaming (var);
1438 return; 1437 return;
1439 } 1438 }
1440 1439
1441 update_stmt (stmt); 1440 update_stmt (stmt);
1442 1441 if (gimple_vuse (stmt))
1443 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_VIRTUALS) 1442 mark_sym_for_renaming (gimple_vop (cfun));
1444 {
1445 if (TREE_CODE (var) == SSA_NAME)
1446 var = SSA_NAME_VAR (var);
1447 mark_sym_for_renaming (var);
1448 }
1449 }
1450
1451 /* Calls mark_virtual_ops_for_renaming for all members of LIST. */
1452
1453 static void
1454 mark_virtual_ops_for_renaming_list (gimple_seq list)
1455 {
1456 gimple_stmt_iterator gsi;
1457
1458 for (gsi = gsi_start (list); !gsi_end_p (gsi); gsi_next (&gsi))
1459 mark_virtual_ops_for_renaming (gsi_stmt (gsi));
1460 } 1443 }
1461 1444
1462 /* Returns a new temporary variable used for the I-th variable carrying 1445 /* Returns a new temporary variable used for the I-th variable carrying
1463 value of REF. The variable's uid is marked in TMP_VARS. */ 1446 value of REF. The variable's uid is marked in TMP_VARS. */
1464 1447
1511 var = predcom_tmp_var (ref, i, tmp_vars); 1494 var = predcom_tmp_var (ref, i, tmp_vars);
1512 VEC_quick_push (tree, chain->vars, var); 1495 VEC_quick_push (tree, chain->vars, var);
1513 } 1496 }
1514 if (reuse_first) 1497 if (reuse_first)
1515 VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0)); 1498 VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0));
1516 1499
1517 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++) 1500 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
1518 VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL)); 1501 VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL));
1519 1502
1520 for (i = 0; i < n; i++) 1503 for (i = 0; i < n; i++)
1521 { 1504 {
1523 next = VEC_index (tree, chain->vars, i + 1); 1506 next = VEC_index (tree, chain->vars, i + 1);
1524 init = get_init_expr (chain, i); 1507 init = get_init_expr (chain, i);
1525 1508
1526 init = force_gimple_operand (init, &stmts, true, NULL_TREE); 1509 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1527 if (stmts) 1510 if (stmts)
1528 { 1511 gsi_insert_seq_on_edge_immediate (entry, stmts);
1529 mark_virtual_ops_for_renaming_list (stmts);
1530 gsi_insert_seq_on_edge_immediate (entry, stmts);
1531 }
1532 1512
1533 phi = create_phi_node (var, loop->header); 1513 phi = create_phi_node (var, loop->header);
1534 SSA_NAME_DEF_STMT (var) = phi; 1514 SSA_NAME_DEF_STMT (var) = phi;
1535 add_phi_arg (phi, init, entry); 1515 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1536 add_phi_arg (phi, next, latch); 1516 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1537 } 1517 }
1538 } 1518 }
1539 1519
1540 /* Create the variables and initialization statement for root of chain 1520 /* Create the variables and initialization statement for root of chain
1541 CHAIN. Uids of the newly created temporary variables are marked 1521 CHAIN. Uids of the newly created temporary variables are marked
1579 *vars = VEC_alloc (tree, heap, written ? 2 : 1); 1559 *vars = VEC_alloc (tree, heap, written ? 2 : 1);
1580 var = predcom_tmp_var (ref, 0, tmp_vars); 1560 var = predcom_tmp_var (ref, 0, tmp_vars);
1581 VEC_quick_push (tree, *vars, var); 1561 VEC_quick_push (tree, *vars, var);
1582 if (written) 1562 if (written)
1583 VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0)); 1563 VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0));
1584 1564
1585 for (i = 0; VEC_iterate (tree, *vars, i, var); i++) 1565 for (i = 0; VEC_iterate (tree, *vars, i, var); i++)
1586 VEC_replace (tree, *vars, i, make_ssa_name (var, NULL)); 1566 VEC_replace (tree, *vars, i, make_ssa_name (var, NULL));
1587 1567
1588 var = VEC_index (tree, *vars, 0); 1568 var = VEC_index (tree, *vars, 0);
1589 1569
1590 init = force_gimple_operand (init, &stmts, written, NULL_TREE); 1570 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1591 if (stmts) 1571 if (stmts)
1592 { 1572 gsi_insert_seq_on_edge_immediate (entry, stmts);
1593 mark_virtual_ops_for_renaming_list (stmts);
1594 gsi_insert_seq_on_edge_immediate (entry, stmts);
1595 }
1596 1573
1597 if (written) 1574 if (written)
1598 { 1575 {
1599 next = VEC_index (tree, *vars, 1); 1576 next = VEC_index (tree, *vars, 1);
1600 phi = create_phi_node (var, loop->header); 1577 phi = create_phi_node (var, loop->header);
1601 SSA_NAME_DEF_STMT (var) = phi; 1578 SSA_NAME_DEF_STMT (var) = phi;
1602 add_phi_arg (phi, init, entry); 1579 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1603 add_phi_arg (phi, next, latch); 1580 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1604 } 1581 }
1605 else 1582 else
1606 { 1583 {
1607 gimple init_stmt = gimple_build_assign (var, init); 1584 gimple init_stmt = gimple_build_assign (var, init);
1608 mark_virtual_ops_for_renaming (init_stmt); 1585 mark_virtual_ops_for_renaming (init_stmt);
1625 gcc_assert (chain->type == CT_INVARIANT); 1602 gcc_assert (chain->type == CT_INVARIANT);
1626 gcc_assert (!chain->combined); 1603 gcc_assert (!chain->combined);
1627 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++) 1604 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1628 if (!DR_IS_READ (a->ref)) 1605 if (!DR_IS_READ (a->ref))
1629 n_writes++; 1606 n_writes++;
1630 1607
1631 /* If there are no reads in the loop, there is nothing to do. */ 1608 /* If there are no reads in the loop, there is nothing to do. */
1632 if (n_writes == VEC_length (dref, chain->refs)) 1609 if (n_writes == VEC_length (dref, chain->refs))
1633 return; 1610 return;
1634 1611
1635 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0, 1612 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1651 VEC_replace (tree, vars, 0, var); 1628 VEC_replace (tree, vars, 0, var);
1652 } 1629 }
1653 else 1630 else
1654 ridx = 1; 1631 ridx = 1;
1655 } 1632 }
1656 1633
1657 replace_ref_with (a->stmt, VEC_index (tree, vars, ridx), 1634 replace_ref_with (a->stmt, VEC_index (tree, vars, ridx),
1658 !is_read, !is_read); 1635 !is_read, !is_read);
1659 } 1636 }
1660 1637
1661 VEC_free (tree, heap, vars); 1638 VEC_free (tree, heap, vars);
1721 } 1698 }
1722 1699
1723 while (1) 1700 while (1)
1724 { 1701 {
1725 gimple_stmt_iterator bsi; 1702 gimple_stmt_iterator bsi;
1726 1703
1727 bsi = gsi_for_stmt (stmt); 1704 bsi = gsi_for_stmt (stmt);
1728 1705
1729 name = gimple_assign_lhs (stmt); 1706 name = gimple_assign_lhs (stmt);
1730 gcc_assert (TREE_CODE (name) == SSA_NAME); 1707 gcc_assert (TREE_CODE (name) == SSA_NAME);
1731 1708
1825 if (chain->type == CT_INVARIANT) 1802 if (chain->type == CT_INVARIANT)
1826 execute_load_motion (loop, chain, tmp_vars); 1803 execute_load_motion (loop, chain, tmp_vars);
1827 else 1804 else
1828 execute_pred_commoning_chain (loop, chain, tmp_vars); 1805 execute_pred_commoning_chain (loop, chain, tmp_vars);
1829 } 1806 }
1830 1807
1831 update_ssa (TODO_update_ssa_only_virtuals); 1808 update_ssa (TODO_update_ssa_only_virtuals);
1832 } 1809 }
1833 1810
1834 /* For each reference in CHAINS, if its defining statement is 1811 /* For each reference in CHAINS, if its defining statement is
1835 phi node, record the ssa name that is defined by it. */ 1812 phi node, record the ssa name that is defined by it. */
1889 /* Restore phi nodes that were replaced by ssa names before 1866 /* Restore phi nodes that were replaced by ssa names before
1890 tree_transform_and_unroll_loop (see detailed description in 1867 tree_transform_and_unroll_loop (see detailed description in
1891 tree_predictive_commoning_loop). */ 1868 tree_predictive_commoning_loop). */
1892 replace_names_by_phis (dta->chains); 1869 replace_names_by_phis (dta->chains);
1893 execute_pred_commoning (loop, dta->chains, dta->tmp_vars); 1870 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1894 }
1895
1896 /* Returns true if we can and should unroll LOOP FACTOR times. Number
1897 of iterations of the loop is returned in NITER. */
1898
1899 static bool
1900 should_unroll_loop_p (struct loop *loop, unsigned factor,
1901 struct tree_niter_desc *niter)
1902 {
1903 edge exit;
1904
1905 if (factor == 1)
1906 return false;
1907
1908 /* Check whether unrolling is possible. We only want to unroll loops
1909 for that we are able to determine number of iterations. We also
1910 want to split the extra iterations of the loop from its end,
1911 therefore we require that the loop has precisely one
1912 exit. */
1913
1914 exit = single_dom_exit (loop);
1915 if (!exit)
1916 return false;
1917
1918 if (!number_of_iterations_exit (loop, exit, niter, false))
1919 return false;
1920
1921 /* And of course, we must be able to duplicate the loop. */
1922 if (!can_duplicate_loop_p (loop))
1923 return false;
1924
1925 /* The final loop should be small enough. */
1926 if (tree_num_loop_insns (loop, &eni_size_weights) * factor
1927 > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
1928 return false;
1929
1930 return true;
1931 } 1871 }
1932 1872
1933 /* Base NAME and all the names in the chain of phi nodes that use it 1873 /* Base NAME and all the names in the chain of phi nodes that use it
1934 on variable VAR. The phi nodes are recognized by being in the copies of 1874 on variable VAR. The phi nodes are recognized by being in the copies of
1935 the header of the LOOP. */ 1875 the header of the LOOP. */
2360 new_chain->length = ch1->length; 2300 new_chain->length = ch1->length;
2361 2301
2362 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1) 2302 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2363 && VEC_iterate (dref, ch2->refs, i, r2)); i++) 2303 && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2364 { 2304 {
2365 nw = XCNEW (struct dref); 2305 nw = XCNEW (struct dref_d);
2366 nw->stmt = stmt_combining_refs (r1, r2); 2306 nw->stmt = stmt_combining_refs (r1, r2);
2367 nw->distance = r1->distance; 2307 nw->distance = r1->distance;
2368 2308
2369 VEC_safe_push (dref, heap, new_chain->refs, nw); 2309 VEC_safe_push (dref, heap, new_chain->refs, nw);
2370 } 2310 }
2419 } 2359 }
2420 } 2360 }
2421 } 2361 }
2422 } 2362 }
2423 2363
2424 /* Sets alias information based on data reference DR for REF,
2425 if necessary. */
2426
2427 static void
2428 set_alias_info (tree ref, struct data_reference *dr)
2429 {
2430 tree var;
2431 tree tag = DR_SYMBOL_TAG (dr);
2432
2433 gcc_assert (tag != NULL_TREE);
2434
2435 ref = get_base_address (ref);
2436 if (!ref || !INDIRECT_REF_P (ref))
2437 return;
2438
2439 var = SSA_NAME_VAR (TREE_OPERAND (ref, 0));
2440 if (var_ann (var)->symbol_mem_tag)
2441 return;
2442
2443 if (!MTAG_P (tag))
2444 new_type_alias (var, tag, ref);
2445 else
2446 var_ann (var)->symbol_mem_tag = tag;
2447 }
2448
2449 /* Prepare initializers for CHAIN in LOOP. Returns false if this is 2364 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2450 impossible because one of these initializers may trap, true otherwise. */ 2365 impossible because one of these initializers may trap, true otherwise. */
2451 2366
2452 static bool 2367 static bool
2453 prepare_initializers_chain (struct loop *loop, chain_p chain) 2368 prepare_initializers_chain (struct loop *loop, chain_p chain)
2483 continue; 2398 continue;
2484 2399
2485 init = ref_at_iteration (loop, DR_REF (dr), (int) i - n); 2400 init = ref_at_iteration (loop, DR_REF (dr), (int) i - n);
2486 if (!init) 2401 if (!init)
2487 return false; 2402 return false;
2488 2403
2489 if (!chain->all_always_accessed && tree_could_trap_p (init)) 2404 if (!chain->all_always_accessed && tree_could_trap_p (init))
2490 return false; 2405 return false;
2491 2406
2492 init = force_gimple_operand (init, &stmts, false, NULL_TREE); 2407 init = force_gimple_operand (init, &stmts, false, NULL_TREE);
2493 if (stmts) 2408 if (stmts)
2494 { 2409 gsi_insert_seq_on_edge_immediate (entry, stmts);
2495 mark_virtual_ops_for_renaming_list (stmts);
2496 gsi_insert_seq_on_edge_immediate (entry, stmts);
2497 }
2498 set_alias_info (init, dr);
2499 2410
2500 VEC_replace (tree, chain->inits, i, init); 2411 VEC_replace (tree, chain->inits, i, init);
2501 } 2412 }
2502 2413
2503 return true; 2414 return true;
2594 2505
2595 /* Determine the unroll factor, and if the loop should be unrolled, ensure 2506 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2596 that its number of iterations is divisible by the factor. */ 2507 that its number of iterations is divisible by the factor. */
2597 unroll_factor = determine_unroll_factor (chains); 2508 unroll_factor = determine_unroll_factor (chains);
2598 scev_reset (); 2509 scev_reset ();
2599 unroll = should_unroll_loop_p (loop, unroll_factor, &desc); 2510 unroll = (unroll_factor > 1
2511 && can_unroll_loop_p (loop, unroll_factor, &desc));
2600 exit = single_dom_exit (loop); 2512 exit = single_dom_exit (loop);
2601 2513
2602 /* Execute the predictive commoning transformations, and possibly unroll the 2514 /* Execute the predictive commoning transformations, and possibly unroll the
2603 loop. */ 2515 loop. */
2604 if (unroll) 2516 if (unroll)
2608 if (dump_file && (dump_flags & TDF_DETAILS)) 2520 if (dump_file && (dump_flags & TDF_DETAILS))
2609 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor); 2521 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2610 2522
2611 dta.chains = chains; 2523 dta.chains = chains;
2612 dta.tmp_vars = tmp_vars; 2524 dta.tmp_vars = tmp_vars;
2613 2525
2614 update_ssa (TODO_update_ssa_only_virtuals); 2526 update_ssa (TODO_update_ssa_only_virtuals);
2615 2527
2616 /* Cfg manipulations performed in tree_transform_and_unroll_loop before 2528 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2617 execute_pred_commoning_cbck is called may cause phi nodes to be 2529 execute_pred_commoning_cbck is called may cause phi nodes to be
2618 reallocated, which is a problem since CHAINS may point to these 2530 reallocated, which is a problem since CHAINS may point to these