Mercurial > hg > CbC > CbC_gcc
comparison gcc/loop-unroll.c @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
65:65488c3d617d | 67:f6334be47118 |
---|---|
499 apply_opt_in_copies (opt_info, npeel, false, true); | 499 apply_opt_in_copies (opt_info, npeel, false, true); |
500 free_opt_info (opt_info); | 500 free_opt_info (opt_info); |
501 } | 501 } |
502 | 502 |
503 /* Remove the exit edges. */ | 503 /* Remove the exit edges. */ |
504 for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++) | 504 FOR_EACH_VEC_ELT (edge, remove_edges, i, ein) |
505 remove_path (ein); | 505 remove_path (ein); |
506 VEC_free (edge, heap, remove_edges); | 506 VEC_free (edge, heap, remove_edges); |
507 } | 507 } |
508 | 508 |
509 ein = desc->in_edge; | 509 ein = desc->in_edge; |
787 desc->niter /= max_unroll + 1; | 787 desc->niter /= max_unroll + 1; |
788 desc->niter_max /= max_unroll + 1; | 788 desc->niter_max /= max_unroll + 1; |
789 desc->niter_expr = GEN_INT (desc->niter); | 789 desc->niter_expr = GEN_INT (desc->niter); |
790 | 790 |
791 /* Remove the edges. */ | 791 /* Remove the edges. */ |
792 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) | 792 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) |
793 remove_path (e); | 793 remove_path (e); |
794 VEC_free (edge, heap, remove_edges); | 794 VEC_free (edge, heap, remove_edges); |
795 | 795 |
796 if (dump_file) | 796 if (dump_file) |
797 fprintf (dump_file, | 797 fprintf (dump_file, |
898 /* ??? We used to assume that INSNS can contain control flow insns, and | 898 /* ??? We used to assume that INSNS can contain control flow insns, and |
899 that we had to try to find sub basic blocks in BB to maintain a valid | 899 that we had to try to find sub basic blocks in BB to maintain a valid |
900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB | 900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB |
901 and call break_superblocks when going out of cfglayout mode. But it | 901 and call break_superblocks when going out of cfglayout mode. But it |
902 turns out that this never happens; and that if it does ever happen, | 902 turns out that this never happens; and that if it does ever happen, |
903 the verify_flow_info call in loop_optimizer_finalize would fail. | 903 the TODO_verify_flow at the end of the RTL loop passes would fail. |
904 | 904 |
905 There are two reasons why we expected we could have control flow insns | 905 There are two reasons why we expected we could have control flow insns |
906 in INSNS. The first is when a comparison has to be done in parts, and | 906 in INSNS. The first is when a comparison has to be done in parts, and |
907 the second is when the number of iterations is computed for loops with | 907 the second is when the number of iterations is computed for loops with |
908 the number of iterations known at runtime. In both cases, test cases | 908 the number of iterations known at runtime. In both cases, test cases |
990 { | 990 { |
991 VEC (basic_block, heap) *ldom; | 991 VEC (basic_block, heap) *ldom; |
992 basic_block bb; | 992 basic_block bb; |
993 | 993 |
994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); | 994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); |
995 for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++) | 995 FOR_EACH_VEC_ELT (basic_block, ldom, j, bb) |
996 if (!flow_bb_inside_loop_p (loop, bb)) | 996 if (!flow_bb_inside_loop_p (loop, bb)) |
997 VEC_safe_push (basic_block, heap, dom_bbs, bb); | 997 VEC_safe_push (basic_block, heap, dom_bbs, bb); |
998 | 998 |
999 VEC_free (basic_block, heap, ldom); | 999 VEC_free (basic_block, heap, ldom); |
1000 } | 1000 } |
1159 desc->in_edge = EDGE_SUCC (exit_block, 0); | 1159 desc->in_edge = EDGE_SUCC (exit_block, 0); |
1160 } | 1160 } |
1161 } | 1161 } |
1162 | 1162 |
1163 /* Remove the edges. */ | 1163 /* Remove the edges. */ |
1164 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) | 1164 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) |
1165 remove_path (e); | 1165 remove_path (e); |
1166 VEC_free (edge, heap, remove_edges); | 1166 VEC_free (edge, heap, remove_edges); |
1167 | 1167 |
1168 /* We must be careful when updating the number of iterations due to | 1168 /* We must be careful when updating the number of iterations due to |
1169 preconditioning and the fact that the value must be valid at entry | 1169 preconditioning and the fact that the value must be valid at entry |
1614 */ | 1614 */ |
1615 | 1615 |
1616 static struct var_to_expand * | 1616 static struct var_to_expand * |
1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn) | 1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn) |
1618 { | 1618 { |
1619 rtx set, dest, src, op1, op2, something; | 1619 rtx set, dest, src; |
1620 struct var_to_expand *ves; | 1620 struct var_to_expand *ves; |
1621 enum machine_mode mode1, mode2; | |
1622 unsigned accum_pos; | 1621 unsigned accum_pos; |
1622 enum rtx_code code; | |
1623 int debug_uses = 0; | 1623 int debug_uses = 0; |
1624 | 1624 |
1625 set = single_set (insn); | 1625 set = single_set (insn); |
1626 if (!set) | 1626 if (!set) |
1627 return NULL; | 1627 return NULL; |
1628 | 1628 |
1629 dest = SET_DEST (set); | 1629 dest = SET_DEST (set); |
1630 src = SET_SRC (set); | 1630 src = SET_SRC (set); |
1631 | 1631 code = GET_CODE (src); |
1632 if (GET_CODE (src) != PLUS | 1632 |
1633 && GET_CODE (src) != MINUS | 1633 if (code != PLUS && code != MINUS && code != MULT && code != FMA) |
1634 && GET_CODE (src) != MULT) | |
1635 return NULL; | 1634 return NULL; |
1635 | |
1636 if (FLOAT_MODE_P (GET_MODE (dest))) | |
1637 { | |
1638 if (!flag_associative_math) | |
1639 return NULL; | |
1640 /* In the case of FMA, we're also changing the rounding. */ | |
1641 if (code == FMA && !flag_unsafe_math_optimizations) | |
1642 return NULL; | |
1643 } | |
1636 | 1644 |
1637 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn | 1645 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn |
1638 in MD. But if there is no optab to generate the insn, we can not | 1646 in MD. But if there is no optab to generate the insn, we can not |
1639 perform the variable expansion. This can happen if an MD provides | 1647 perform the variable expansion. This can happen if an MD provides |
1640 an insn but not a named pattern to generate it, for example to avoid | 1648 an insn but not a named pattern to generate it, for example to avoid |
1641 producing code that needs additional mode switches like for x87/mmx. | 1649 producing code that needs additional mode switches like for x87/mmx. |
1642 | 1650 |
1643 So we check have_insn_for which looks for an optab for the operation | 1651 So we check have_insn_for which looks for an optab for the operation |
1644 in SRC. If it doesn't exist, we can't perform the expansion even | 1652 in SRC. If it doesn't exist, we can't perform the expansion even |
1645 though INSN is valid. */ | 1653 though INSN is valid. */ |
1646 if (!have_insn_for (GET_CODE (src), GET_MODE (src))) | 1654 if (!have_insn_for (code, GET_MODE (src))) |
1647 return NULL; | 1655 return NULL; |
1648 | |
1649 op1 = XEXP (src, 0); | |
1650 op2 = XEXP (src, 1); | |
1651 | 1656 |
1652 if (!REG_P (dest) | 1657 if (!REG_P (dest) |
1653 && !(GET_CODE (dest) == SUBREG | 1658 && !(GET_CODE (dest) == SUBREG |
1654 && REG_P (SUBREG_REG (dest)))) | 1659 && REG_P (SUBREG_REG (dest)))) |
1655 return NULL; | 1660 return NULL; |
1656 | 1661 |
1657 if (rtx_equal_p (dest, op1)) | 1662 /* Find the accumulator use within the operation. */ |
1663 if (code == FMA) | |
1664 { | |
1665 /* We only support accumulation via FMA in the ADD position. */ | |
1666 if (!rtx_equal_p (dest, XEXP (src, 2))) | |
1667 return NULL; | |
1668 accum_pos = 2; | |
1669 } | |
1670 else if (rtx_equal_p (dest, XEXP (src, 0))) | |
1658 accum_pos = 0; | 1671 accum_pos = 0; |
1659 else if (rtx_equal_p (dest, op2)) | 1672 else if (rtx_equal_p (dest, XEXP (src, 1))) |
1660 accum_pos = 1; | 1673 { |
1674 /* The method of expansion that we are using; which includes the | |
1675 initialization of the expansions with zero and the summation of | |
1676 the expansions at the end of the computation will yield wrong | |
1677 results for (x = something - x) thus avoid using it in that case. */ | |
1678 if (code == MINUS) | |
1679 return NULL; | |
1680 accum_pos = 1; | |
1681 } | |
1661 else | 1682 else |
1662 return NULL; | 1683 return NULL; |
1663 | 1684 |
1664 /* The method of expansion that we are using; which includes | 1685 /* It must not otherwise be used. */ |
1665 the initialization of the expansions with zero and the summation of | 1686 if (code == FMA) |
1666 the expansions at the end of the computation will yield wrong results | 1687 { |
1667 for (x = something - x) thus avoid using it in that case. */ | 1688 if (rtx_referenced_p (dest, XEXP (src, 0)) |
1668 if (accum_pos == 1 | 1689 || rtx_referenced_p (dest, XEXP (src, 1))) |
1669 && GET_CODE (src) == MINUS) | 1690 return NULL; |
1670 return NULL; | 1691 } |
1671 | 1692 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos))) |
1672 something = (accum_pos == 0) ? op2 : op1; | |
1673 | |
1674 if (rtx_referenced_p (dest, something)) | |
1675 return NULL; | 1693 return NULL; |
1676 | 1694 |
1695 /* It must be used in exactly one insn. */ | |
1677 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) | 1696 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) |
1678 return NULL; | 1697 return NULL; |
1679 | 1698 |
1680 mode1 = GET_MODE (dest); | |
1681 mode2 = GET_MODE (something); | |
1682 if ((FLOAT_MODE_P (mode1) | |
1683 || FLOAT_MODE_P (mode2)) | |
1684 && !flag_associative_math) | |
1685 return NULL; | |
1686 | |
1687 if (dump_file) | 1699 if (dump_file) |
1688 { | 1700 { |
1689 fprintf (dump_file, | 1701 fprintf (dump_file, "\n;; Expanding Accumulator "); |
1690 "\n;; Expanding Accumulator "); | 1702 print_rtl (dump_file, dest); |
1691 print_rtl (dump_file, dest); | 1703 fprintf (dump_file, "\n"); |
1692 fprintf (dump_file, "\n"); | 1704 } |
1693 } | |
1694 | 1705 |
1695 if (debug_uses) | 1706 if (debug_uses) |
1696 /* Instead of resetting the debug insns, we could replace each | 1707 /* Instead of resetting the debug insns, we could replace each |
1697 debug use in the loop with the sum or product of all expanded | 1708 debug use in the loop with the sum or product of all expanded |
1698 accummulators. Since we'll only know of all expansions at the | 1709 accummulators. Since we'll only know of all expansions at the |
2121 | 2132 |
2122 if (VEC_length (rtx, ve->var_expansions) == 0) | 2133 if (VEC_length (rtx, ve->var_expansions) == 0) |
2123 return; | 2134 return; |
2124 | 2135 |
2125 start_sequence (); | 2136 start_sequence (); |
2126 if (ve->op == PLUS || ve->op == MINUS) | 2137 switch (ve->op) |
2127 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | 2138 { |
2128 { | 2139 case FMA: |
2129 if (honor_signed_zero_p) | 2140 /* Note that we only accumulate FMA via the ADD operand. */ |
2130 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); | 2141 case PLUS: |
2131 else | 2142 case MINUS: |
2132 zero_init = CONST0_RTX (mode); | 2143 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) |
2133 | 2144 { |
2134 emit_move_insn (var, zero_init); | 2145 if (honor_signed_zero_p) |
2135 } | 2146 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); |
2136 else if (ve->op == MULT) | 2147 else |
2137 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | 2148 zero_init = CONST0_RTX (mode); |
2138 { | 2149 emit_move_insn (var, zero_init); |
2139 zero_init = CONST1_RTX (GET_MODE (var)); | 2150 } |
2140 emit_move_insn (var, zero_init); | 2151 break; |
2141 } | 2152 |
2153 case MULT: | |
2154 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) | |
2155 { | |
2156 zero_init = CONST1_RTX (GET_MODE (var)); | |
2157 emit_move_insn (var, zero_init); | |
2158 } | |
2159 break; | |
2160 | |
2161 default: | |
2162 gcc_unreachable (); | |
2163 } | |
2142 | 2164 |
2143 seq = get_insns (); | 2165 seq = get_insns (); |
2144 end_sequence (); | 2166 end_sequence (); |
2145 | 2167 |
2146 insn = BB_HEAD (place); | 2168 insn = BB_HEAD (place); |
2163 | 2185 |
2164 if (VEC_length (rtx, ve->var_expansions) == 0) | 2186 if (VEC_length (rtx, ve->var_expansions) == 0) |
2165 return; | 2187 return; |
2166 | 2188 |
2167 start_sequence (); | 2189 start_sequence (); |
2168 if (ve->op == PLUS || ve->op == MINUS) | 2190 switch (ve->op) |
2169 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | 2191 { |
2170 { | 2192 case FMA: |
2171 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), | 2193 /* Note that we only accumulate FMA via the ADD operand. */ |
2172 var, sum); | 2194 case PLUS: |
2173 } | 2195 case MINUS: |
2174 else if (ve->op == MULT) | 2196 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) |
2175 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | 2197 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); |
2176 { | 2198 break; |
2177 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), | 2199 |
2178 var, sum); | 2200 case MULT: |
2179 } | 2201 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) |
2202 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); | |
2203 break; | |
2204 | |
2205 default: | |
2206 gcc_unreachable (); | |
2207 } | |
2180 | 2208 |
2181 expr = force_operand (sum, ve->reg); | 2209 expr = force_operand (sum, ve->reg); |
2182 if (expr != ve->reg) | 2210 if (expr != ve->reg) |
2183 emit_move_insn (ve->reg, expr); | 2211 emit_move_insn (ve->reg, expr); |
2184 seq = get_insns (); | 2212 seq = get_insns (); |