comparison gcc/loop-unroll.c @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents b7f97abdc517
children 04ced10e8804
comparison
equal deleted inserted replaced
65:65488c3d617d 67:f6334be47118
499 apply_opt_in_copies (opt_info, npeel, false, true); 499 apply_opt_in_copies (opt_info, npeel, false, true);
500 free_opt_info (opt_info); 500 free_opt_info (opt_info);
501 } 501 }
502 502
503 /* Remove the exit edges. */ 503 /* Remove the exit edges. */
504 for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++) 504 FOR_EACH_VEC_ELT (edge, remove_edges, i, ein)
505 remove_path (ein); 505 remove_path (ein);
506 VEC_free (edge, heap, remove_edges); 506 VEC_free (edge, heap, remove_edges);
507 } 507 }
508 508
509 ein = desc->in_edge; 509 ein = desc->in_edge;
787 desc->niter /= max_unroll + 1; 787 desc->niter /= max_unroll + 1;
788 desc->niter_max /= max_unroll + 1; 788 desc->niter_max /= max_unroll + 1;
789 desc->niter_expr = GEN_INT (desc->niter); 789 desc->niter_expr = GEN_INT (desc->niter);
790 790
791 /* Remove the edges. */ 791 /* Remove the edges. */
792 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) 792 FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
793 remove_path (e); 793 remove_path (e);
794 VEC_free (edge, heap, remove_edges); 794 VEC_free (edge, heap, remove_edges);
795 795
796 if (dump_file) 796 if (dump_file)
797 fprintf (dump_file, 797 fprintf (dump_file,
898 /* ??? We used to assume that INSNS can contain control flow insns, and 898 /* ??? We used to assume that INSNS can contain control flow insns, and
899 that we had to try to find sub basic blocks in BB to maintain a valid 899 that we had to try to find sub basic blocks in BB to maintain a valid
900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB 900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
901 and call break_superblocks when going out of cfglayout mode. But it 901 and call break_superblocks when going out of cfglayout mode. But it
902 turns out that this never happens; and that if it does ever happen, 902 turns out that this never happens; and that if it does ever happen,
903 the verify_flow_info call in loop_optimizer_finalize would fail. 903 the TODO_verify_flow at the end of the RTL loop passes would fail.
904 904
905 There are two reasons why we expected we could have control flow insns 905 There are two reasons why we expected we could have control flow insns
906 in INSNS. The first is when a comparison has to be done in parts, and 906 in INSNS. The first is when a comparison has to be done in parts, and
907 the second is when the number of iterations is computed for loops with 907 the second is when the number of iterations is computed for loops with
908 the number of iterations known at runtime. In both cases, test cases 908 the number of iterations known at runtime. In both cases, test cases
990 { 990 {
991 VEC (basic_block, heap) *ldom; 991 VEC (basic_block, heap) *ldom;
992 basic_block bb; 992 basic_block bb;
993 993
994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); 994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
995 for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++) 995 FOR_EACH_VEC_ELT (basic_block, ldom, j, bb)
996 if (!flow_bb_inside_loop_p (loop, bb)) 996 if (!flow_bb_inside_loop_p (loop, bb))
997 VEC_safe_push (basic_block, heap, dom_bbs, bb); 997 VEC_safe_push (basic_block, heap, dom_bbs, bb);
998 998
999 VEC_free (basic_block, heap, ldom); 999 VEC_free (basic_block, heap, ldom);
1000 } 1000 }
1159 desc->in_edge = EDGE_SUCC (exit_block, 0); 1159 desc->in_edge = EDGE_SUCC (exit_block, 0);
1160 } 1160 }
1161 } 1161 }
1162 1162
1163 /* Remove the edges. */ 1163 /* Remove the edges. */
1164 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) 1164 FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
1165 remove_path (e); 1165 remove_path (e);
1166 VEC_free (edge, heap, remove_edges); 1166 VEC_free (edge, heap, remove_edges);
1167 1167
1168 /* We must be careful when updating the number of iterations due to 1168 /* We must be careful when updating the number of iterations due to
1169 preconditioning and the fact that the value must be valid at entry 1169 preconditioning and the fact that the value must be valid at entry
1614 */ 1614 */
1615 1615
1616 static struct var_to_expand * 1616 static struct var_to_expand *
1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn) 1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1618 { 1618 {
1619 rtx set, dest, src, op1, op2, something; 1619 rtx set, dest, src;
1620 struct var_to_expand *ves; 1620 struct var_to_expand *ves;
1621 enum machine_mode mode1, mode2;
1622 unsigned accum_pos; 1621 unsigned accum_pos;
1622 enum rtx_code code;
1623 int debug_uses = 0; 1623 int debug_uses = 0;
1624 1624
1625 set = single_set (insn); 1625 set = single_set (insn);
1626 if (!set) 1626 if (!set)
1627 return NULL; 1627 return NULL;
1628 1628
1629 dest = SET_DEST (set); 1629 dest = SET_DEST (set);
1630 src = SET_SRC (set); 1630 src = SET_SRC (set);
1631 1631 code = GET_CODE (src);
1632 if (GET_CODE (src) != PLUS 1632
1633 && GET_CODE (src) != MINUS 1633 if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1634 && GET_CODE (src) != MULT)
1635 return NULL; 1634 return NULL;
1635
1636 if (FLOAT_MODE_P (GET_MODE (dest)))
1637 {
1638 if (!flag_associative_math)
1639 return NULL;
1640 /* In the case of FMA, we're also changing the rounding. */
1641 if (code == FMA && !flag_unsafe_math_optimizations)
1642 return NULL;
1643 }
1636 1644
1637 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn 1645 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1638 in MD. But if there is no optab to generate the insn, we can not 1646 in MD. But if there is no optab to generate the insn, we can not
1639 perform the variable expansion. This can happen if an MD provides 1647 perform the variable expansion. This can happen if an MD provides
1640 an insn but not a named pattern to generate it, for example to avoid 1648 an insn but not a named pattern to generate it, for example to avoid
1641 producing code that needs additional mode switches like for x87/mmx. 1649 producing code that needs additional mode switches like for x87/mmx.
1642 1650
1643 So we check have_insn_for which looks for an optab for the operation 1651 So we check have_insn_for which looks for an optab for the operation
1644 in SRC. If it doesn't exist, we can't perform the expansion even 1652 in SRC. If it doesn't exist, we can't perform the expansion even
1645 though INSN is valid. */ 1653 though INSN is valid. */
1646 if (!have_insn_for (GET_CODE (src), GET_MODE (src))) 1654 if (!have_insn_for (code, GET_MODE (src)))
1647 return NULL; 1655 return NULL;
1648
1649 op1 = XEXP (src, 0);
1650 op2 = XEXP (src, 1);
1651 1656
1652 if (!REG_P (dest) 1657 if (!REG_P (dest)
1653 && !(GET_CODE (dest) == SUBREG 1658 && !(GET_CODE (dest) == SUBREG
1654 && REG_P (SUBREG_REG (dest)))) 1659 && REG_P (SUBREG_REG (dest))))
1655 return NULL; 1660 return NULL;
1656 1661
1657 if (rtx_equal_p (dest, op1)) 1662 /* Find the accumulator use within the operation. */
1663 if (code == FMA)
1664 {
1665 /* We only support accumulation via FMA in the ADD position. */
1666 if (!rtx_equal_p (dest, XEXP (src, 2)))
1667 return NULL;
1668 accum_pos = 2;
1669 }
1670 else if (rtx_equal_p (dest, XEXP (src, 0)))
1658 accum_pos = 0; 1671 accum_pos = 0;
1659 else if (rtx_equal_p (dest, op2)) 1672 else if (rtx_equal_p (dest, XEXP (src, 1)))
1660 accum_pos = 1; 1673 {
1674 /* The method of expansion that we are using; which includes the
1675 initialization of the expansions with zero and the summation of
1676 the expansions at the end of the computation will yield wrong
1677 results for (x = something - x) thus avoid using it in that case. */
1678 if (code == MINUS)
1679 return NULL;
1680 accum_pos = 1;
1681 }
1661 else 1682 else
1662 return NULL; 1683 return NULL;
1663 1684
1664 /* The method of expansion that we are using; which includes 1685 /* It must not otherwise be used. */
1665 the initialization of the expansions with zero and the summation of 1686 if (code == FMA)
1666 the expansions at the end of the computation will yield wrong results 1687 {
1667 for (x = something - x) thus avoid using it in that case. */ 1688 if (rtx_referenced_p (dest, XEXP (src, 0))
1668 if (accum_pos == 1 1689 || rtx_referenced_p (dest, XEXP (src, 1)))
1669 && GET_CODE (src) == MINUS) 1690 return NULL;
1670 return NULL; 1691 }
1671 1692 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1672 something = (accum_pos == 0) ? op2 : op1;
1673
1674 if (rtx_referenced_p (dest, something))
1675 return NULL; 1693 return NULL;
1676 1694
1695 /* It must be used in exactly one insn. */
1677 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) 1696 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1678 return NULL; 1697 return NULL;
1679 1698
1680 mode1 = GET_MODE (dest);
1681 mode2 = GET_MODE (something);
1682 if ((FLOAT_MODE_P (mode1)
1683 || FLOAT_MODE_P (mode2))
1684 && !flag_associative_math)
1685 return NULL;
1686
1687 if (dump_file) 1699 if (dump_file)
1688 { 1700 {
1689 fprintf (dump_file, 1701 fprintf (dump_file, "\n;; Expanding Accumulator ");
1690 "\n;; Expanding Accumulator "); 1702 print_rtl (dump_file, dest);
1691 print_rtl (dump_file, dest); 1703 fprintf (dump_file, "\n");
1692 fprintf (dump_file, "\n"); 1704 }
1693 }
1694 1705
1695 if (debug_uses) 1706 if (debug_uses)
1696 /* Instead of resetting the debug insns, we could replace each 1707 /* Instead of resetting the debug insns, we could replace each
1697 debug use in the loop with the sum or product of all expanded 1708 debug use in the loop with the sum or product of all expanded
1698 accummulators. Since we'll only know of all expansions at the 1709 accummulators. Since we'll only know of all expansions at the
2121 2132
2122 if (VEC_length (rtx, ve->var_expansions) == 0) 2133 if (VEC_length (rtx, ve->var_expansions) == 0)
2123 return; 2134 return;
2124 2135
2125 start_sequence (); 2136 start_sequence ();
2126 if (ve->op == PLUS || ve->op == MINUS) 2137 switch (ve->op)
2127 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2138 {
2128 { 2139 case FMA:
2129 if (honor_signed_zero_p) 2140 /* Note that we only accumulate FMA via the ADD operand. */
2130 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); 2141 case PLUS:
2131 else 2142 case MINUS:
2132 zero_init = CONST0_RTX (mode); 2143 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2133 2144 {
2134 emit_move_insn (var, zero_init); 2145 if (honor_signed_zero_p)
2135 } 2146 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2136 else if (ve->op == MULT) 2147 else
2137 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2148 zero_init = CONST0_RTX (mode);
2138 { 2149 emit_move_insn (var, zero_init);
2139 zero_init = CONST1_RTX (GET_MODE (var)); 2150 }
2140 emit_move_insn (var, zero_init); 2151 break;
2141 } 2152
2153 case MULT:
2154 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2155 {
2156 zero_init = CONST1_RTX (GET_MODE (var));
2157 emit_move_insn (var, zero_init);
2158 }
2159 break;
2160
2161 default:
2162 gcc_unreachable ();
2163 }
2142 2164
2143 seq = get_insns (); 2165 seq = get_insns ();
2144 end_sequence (); 2166 end_sequence ();
2145 2167
2146 insn = BB_HEAD (place); 2168 insn = BB_HEAD (place);
2163 2185
2164 if (VEC_length (rtx, ve->var_expansions) == 0) 2186 if (VEC_length (rtx, ve->var_expansions) == 0)
2165 return; 2187 return;
2166 2188
2167 start_sequence (); 2189 start_sequence ();
2168 if (ve->op == PLUS || ve->op == MINUS) 2190 switch (ve->op)
2169 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2191 {
2170 { 2192 case FMA:
2171 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), 2193 /* Note that we only accumulate FMA via the ADD operand. */
2172 var, sum); 2194 case PLUS:
2173 } 2195 case MINUS:
2174 else if (ve->op == MULT) 2196 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2175 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2197 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
2176 { 2198 break;
2177 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), 2199
2178 var, sum); 2200 case MULT:
2179 } 2201 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2202 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
2203 break;
2204
2205 default:
2206 gcc_unreachable ();
2207 }
2180 2208
2181 expr = force_operand (sum, ve->reg); 2209 expr = force_operand (sum, ve->reg);
2182 if (expr != ve->reg) 2210 if (expr != ve->reg)
2183 emit_move_insn (ve->reg, expr); 2211 emit_move_insn (ve->reg, expr);
2184 seq = get_insns (); 2212 seq = get_insns ();