111
|
1 ;; ARM ldrd/strd peephole optimizations.
|
|
2 ;;
|
|
3 ;; Copyright (C) 2013-2017 Free Software Foundation, Inc.
|
|
4 ;;
|
|
5 ;; Written by Greta Yorsh <greta.yorsh@arm.com>
|
|
6
|
|
7 ;; This file is part of GCC.
|
|
8 ;;
|
|
9 ;; GCC is free software; you can redistribute it and/or modify it
|
|
10 ;; under the terms of the GNU General Public License as published by
|
|
11 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
12 ;; any later version.
|
|
13 ;;
|
|
14 ;; GCC is distributed in the hope that it will be useful, but
|
|
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
17 ;; General Public License for more details.
|
|
18 ;;
|
|
19 ;; You should have received a copy of the GNU General Public License
|
|
20 ;; along with GCC; see the file COPYING3. If not see
|
|
21 ;; <http://www.gnu.org/licenses/>.
|
|
22
|
|
23 ;; The following peephole optimizations identify consecutive memory
|
|
24 ;; accesses, and try to rearrange the operands to enable generation of
|
|
25 ;; ldrd/strd.
|
|
26
|
|
27 (define_peephole2 ; ldrd
|
|
28 [(set (match_operand:SI 0 "arm_general_register_operand" "")
|
|
29 (match_operand:SI 2 "memory_operand" ""))
|
|
30 (set (match_operand:SI 1 "arm_general_register_operand" "")
|
|
31 (match_operand:SI 3 "memory_operand" ""))]
|
|
32 "TARGET_LDRD"
|
|
33 [(const_int 0)]
|
|
34 {
|
|
35 if (!gen_operands_ldrd_strd (operands, true, false, false))
|
|
36 FAIL;
|
|
37 else if (TARGET_ARM)
|
|
38 {
|
|
39 /* In ARM state, the destination registers of LDRD/STRD must be
|
|
40 consecutive. We emit DImode access. */
|
|
41 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
|
|
42 operands[2] = adjust_address (operands[2], DImode, 0);
|
|
43 /* Emit [(set (match_dup 0) (match_dup 2))] */
|
|
44 emit_insn (gen_rtx_SET (operands[0], operands[2]));
|
|
45 DONE;
|
|
46 }
|
|
47 else if (TARGET_THUMB2)
|
|
48 {
|
|
49 /* Emit the pattern:
|
|
50 [(parallel [(set (match_dup 0) (match_dup 2))
|
|
51 (set (match_dup 1) (match_dup 3))])] */
|
|
52 rtx t1 = gen_rtx_SET (operands[0], operands[2]);
|
|
53 rtx t2 = gen_rtx_SET (operands[1], operands[3]);
|
|
54 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
|
|
55 DONE;
|
|
56 }
|
|
57 })
|
|
58
|
|
59 (define_peephole2 ; strd
|
|
60 [(set (match_operand:SI 2 "memory_operand" "")
|
|
61 (match_operand:SI 0 "arm_general_register_operand" ""))
|
|
62 (set (match_operand:SI 3 "memory_operand" "")
|
|
63 (match_operand:SI 1 "arm_general_register_operand" ""))]
|
|
64 "TARGET_LDRD"
|
|
65 [(const_int 0)]
|
|
66 {
|
|
67 if (!gen_operands_ldrd_strd (operands, false, false, false))
|
|
68 FAIL;
|
|
69 else if (TARGET_ARM)
|
|
70 {
|
|
71 /* In ARM state, the destination registers of LDRD/STRD must be
|
|
72 consecutive. We emit DImode access. */
|
|
73 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
|
|
74 operands[2] = adjust_address (operands[2], DImode, 0);
|
|
75 /* Emit [(set (match_dup 2) (match_dup 0))] */
|
|
76 emit_insn (gen_rtx_SET (operands[2], operands[0]));
|
|
77 DONE;
|
|
78 }
|
|
79 else if (TARGET_THUMB2)
|
|
80 {
|
|
81 /* Emit the pattern:
|
|
82 [(parallel [(set (match_dup 2) (match_dup 0))
|
|
83 (set (match_dup 3) (match_dup 1))])] */
|
|
84 rtx t1 = gen_rtx_SET (operands[2], operands[0]);
|
|
85 rtx t2 = gen_rtx_SET (operands[3], operands[1]);
|
|
86 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
|
|
87 DONE;
|
|
88 }
|
|
89 })
|
|
90
|
|
91 ;; The following peepholes reorder registers to enable LDRD/STRD.
|
|
92 (define_peephole2 ; strd of constants
|
|
93 [(set (match_operand:SI 0 "arm_general_register_operand" "")
|
|
94 (match_operand:SI 4 "const_int_operand" ""))
|
|
95 (set (match_operand:SI 2 "memory_operand" "")
|
|
96 (match_dup 0))
|
|
97 (set (match_operand:SI 1 "arm_general_register_operand" "")
|
|
98 (match_operand:SI 5 "const_int_operand" ""))
|
|
99 (set (match_operand:SI 3 "memory_operand" "")
|
|
100 (match_dup 1))]
|
|
101 "TARGET_LDRD"
|
|
102 [(const_int 0)]
|
|
103 {
|
|
104 if (!gen_operands_ldrd_strd (operands, false, true, false))
|
|
105 FAIL;
|
|
106 else if (TARGET_ARM)
|
|
107 {
|
|
108 rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
|
|
109 operands[2] = adjust_address (operands[2], DImode, 0);
|
|
110 /* Emit the pattern:
|
|
111 [(set (match_dup 0) (match_dup 4))
|
|
112 (set (match_dup 1) (match_dup 5))
|
|
113 (set (match_dup 2) tmp)] */
|
|
114 emit_insn (gen_rtx_SET (operands[0], operands[4]));
|
|
115 emit_insn (gen_rtx_SET (operands[1], operands[5]));
|
|
116 emit_insn (gen_rtx_SET (operands[2], tmp));
|
|
117 DONE;
|
|
118 }
|
|
119 else if (TARGET_THUMB2)
|
|
120 {
|
|
121 /* Emit the pattern:
|
|
122 [(set (match_dup 0) (match_dup 4))
|
|
123 (set (match_dup 1) (match_dup 5))
|
|
124 (parallel [(set (match_dup 2) (match_dup 0))
|
|
125 (set (match_dup 3) (match_dup 1))])] */
|
|
126 emit_insn (gen_rtx_SET (operands[0], operands[4]));
|
|
127 emit_insn (gen_rtx_SET (operands[1], operands[5]));
|
|
128 rtx t1 = gen_rtx_SET (operands[2], operands[0]);
|
|
129 rtx t2 = gen_rtx_SET (operands[3], operands[1]);
|
|
130 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
|
|
131 DONE;
|
|
132 }
|
|
133 })
|
|
134
|
|
135 (define_peephole2 ; strd of constants
|
|
136 [(set (match_operand:SI 0 "arm_general_register_operand" "")
|
|
137 (match_operand:SI 4 "const_int_operand" ""))
|
|
138 (set (match_operand:SI 1 "arm_general_register_operand" "")
|
|
139 (match_operand:SI 5 "const_int_operand" ""))
|
|
140 (set (match_operand:SI 2 "memory_operand" "")
|
|
141 (match_dup 0))
|
|
142 (set (match_operand:SI 3 "memory_operand" "")
|
|
143 (match_dup 1))]
|
|
144 "TARGET_LDRD"
|
|
145 [(const_int 0)]
|
|
146 {
|
|
147 if (!gen_operands_ldrd_strd (operands, false, true, false))
|
|
148 FAIL;
|
|
149 else if (TARGET_ARM)
|
|
150 {
|
|
151 rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
|
|
152 operands[2] = adjust_address (operands[2], DImode, 0);
|
|
153 /* Emit the pattern
|
|
154 [(set (match_dup 0) (match_dup 4))
|
|
155 (set (match_dup 1) (match_dup 5))
|
|
156 (set (match_dup 2) tmp)] */
|
|
157 emit_insn (gen_rtx_SET (operands[0], operands[4]));
|
|
158 emit_insn (gen_rtx_SET (operands[1], operands[5]));
|
|
159 emit_insn (gen_rtx_SET (operands[2], tmp));
|
|
160 DONE;
|
|
161 }
|
|
162 else if (TARGET_THUMB2)
|
|
163 {
|
|
164 /* Emit the pattern:
|
|
165 [(set (match_dup 0) (match_dup 4))
|
|
166 (set (match_dup 1) (match_dup 5))
|
|
167 (parallel [(set (match_dup 2) (match_dup 0))
|
|
168 (set (match_dup 3) (match_dup 1))])] */
|
|
169 emit_insn (gen_rtx_SET (operands[0], operands[4]));
|
|
170 emit_insn (gen_rtx_SET (operands[1], operands[5]));
|
|
171 rtx t1 = gen_rtx_SET (operands[2], operands[0]);
|
|
172 rtx t2 = gen_rtx_SET (operands[3], operands[1]);
|
|
173 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
|
|
174 DONE;
|
|
175 }
|
|
176 })
|
|
177
|
|
178 ;; The following two peephole optimizations are only relevant for ARM
|
|
179 ;; mode where LDRD/STRD require consecutive registers.
|
|
180
|
|
181 (define_peephole2 ; swap the destination registers of two loads
|
|
182 ; before a commutative operation.
|
|
183 [(set (match_operand:SI 0 "arm_general_register_operand" "")
|
|
184 (match_operand:SI 2 "memory_operand" ""))
|
|
185 (set (match_operand:SI 1 "arm_general_register_operand" "")
|
|
186 (match_operand:SI 3 "memory_operand" ""))
|
|
187 (set (match_operand:SI 4 "arm_general_register_operand" "")
|
|
188 (match_operator:SI 5 "commutative_binary_operator"
|
|
189 [(match_operand 6 "arm_general_register_operand" "")
|
|
190 (match_operand 7 "arm_general_register_operand" "") ]))]
|
|
191 "TARGET_LDRD && TARGET_ARM
|
|
192 && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
|
|
193 ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
|
|
194 && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
|
|
195 && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
|
|
196 [(set (match_dup 0) (match_dup 2))
|
|
197 (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
|
|
198 {
|
|
199 if (!gen_operands_ldrd_strd (operands, true, false, true))
|
|
200 {
|
|
201 FAIL;
|
|
202 }
|
|
203 else
|
|
204 {
|
|
205 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
|
|
206 operands[2] = adjust_address (operands[2], DImode, 0);
|
|
207 }
|
|
208 }
|
|
209 )
|
|
210
|
|
211 (define_peephole2 ; swap the destination registers of two loads
|
|
212 ; before a commutative operation that sets the flags.
|
|
213 [(set (match_operand:SI 0 "arm_general_register_operand" "")
|
|
214 (match_operand:SI 2 "memory_operand" ""))
|
|
215 (set (match_operand:SI 1 "arm_general_register_operand" "")
|
|
216 (match_operand:SI 3 "memory_operand" ""))
|
|
217 (parallel
|
|
218 [(set (match_operand:SI 4 "arm_general_register_operand" "")
|
|
219 (match_operator:SI 5 "commutative_binary_operator"
|
|
220 [(match_operand 6 "arm_general_register_operand" "")
|
|
221 (match_operand 7 "arm_general_register_operand" "") ]))
|
|
222 (clobber (reg:CC CC_REGNUM))])]
|
|
223 "TARGET_LDRD && TARGET_ARM
|
|
224 && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
|
|
225 ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
|
|
226 && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
|
|
227 && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
|
|
228 [(set (match_dup 0) (match_dup 2))
|
|
229 (parallel
|
|
230 [(set (match_dup 4)
|
|
231 (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
|
|
232 (clobber (reg:CC CC_REGNUM))])]
|
|
233 {
|
|
234 if (!gen_operands_ldrd_strd (operands, true, false, true))
|
|
235 {
|
|
236 FAIL;
|
|
237 }
|
|
238 else
|
|
239 {
|
|
240 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
|
|
241 operands[2] = adjust_address (operands[2], DImode, 0);
|
|
242 }
|
|
243 }
|
|
244 )
|
|
245
|
|
246 ;; TODO: Handle LDRD/STRD with writeback:
|
|
247 ;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
|
|
248 ;; (b) Patterns may be followed by an update of the base address.
|