0
|
1 ;; ARM Cortex-A8 NEON scheduling description.
|
|
2 ;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
|
|
3 ;; Contributed by CodeSourcery.
|
|
4
|
|
5 ;; This file is part of GCC.
|
|
6
|
|
7 ;; GCC is free software; you can redistribute it and/or modify it
|
|
8 ;; under the terms of the GNU General Public License as published
|
|
9 ;; by the Free Software Foundation; either version 3, or (at your
|
|
10 ;; option) any later version.
|
|
11
|
|
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
15 ;; License for more details.
|
|
16
|
|
17 ;; You should have received a copy of the GNU General Public License
|
|
18 ;; along with GCC; see the file COPYING3. If not see
|
|
19 ;; <http://www.gnu.org/licenses/>.
|
|
20
|
|
21
|
|
22 (define_automaton "cortex_a8_neon")
|
|
23
|
|
24 ;; Only one load, store, permute, MCR or MRC instruction can be issued
|
|
25 ;; per cycle.
|
|
26 (define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon")
|
|
27
|
|
28 ;; Only one data-processing instruction can be issued per cycle.
|
|
29 (define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon")
|
|
30
|
|
31 ;; The VFPLite unit (non-pipelined).
|
|
32 (define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon")
|
|
33
|
|
34 ;; We need a special mutual exclusion (to be used in addition to
|
|
35 ;; cortex_a8_neon_issue_dp) for the case when an instruction such as
|
|
36 ;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
|
|
37 ;; E2 of the floating-point add pipeline. On the cycle previous to that
|
|
38 ;; forward we must prevent issue of any instruction to the floating-point
|
|
39 ;; add pipeline, but still allow issue of a data-processing instruction
|
|
40 ;; to any of the other pipelines.
|
|
41 (define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon")
|
|
42
|
|
43 ;; Patterns of reservation.
|
|
44 ;; We model the NEON issue units as running in parallel with the core ones.
|
|
45 ;; We assume that multi-cycle NEON instructions get decomposed into
|
|
46 ;; micro-ops as they are issued into the NEON pipeline, and not as they
|
|
47 ;; are issued into the ARM pipeline. Dual issue may not occur except
|
|
48 ;; upon the first and last cycles of a multi-cycle instruction, but it
|
|
49 ;; is unclear whether two multi-cycle instructions can issue together (in
|
|
50 ;; this model they cannot). It is also unclear whether a pair of
|
|
51 ;; a multi-cycle and single-cycle instructions, that could potentially
|
|
52 ;; issue together, only do so if (say) the single-cycle one precedes
|
|
53 ;; the other.
|
|
54
|
|
55 (define_reservation "cortex_a8_neon_dp"
|
|
56 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp")
|
|
57 (define_reservation "cortex_a8_neon_dp_2"
|
|
58 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
|
|
59 cortex_a8_neon_issue_dp")
|
|
60 (define_reservation "cortex_a8_neon_dp_4"
|
|
61 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
|
|
62 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
63 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
64 cortex_a8_neon_issue_dp")
|
|
65
|
|
66 (define_reservation "cortex_a8_neon_fadd"
|
|
67 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
|
|
68 cortex_a8_neon_issue_fadd")
|
|
69 (define_reservation "cortex_a8_neon_fadd_2"
|
|
70 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
|
|
71 cortex_a8_neon_issue_fadd,\
|
|
72 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd")
|
|
73
|
|
74 (define_reservation "cortex_a8_neon_perm"
|
|
75 "(cortex_a8_alu0|cortex_a8_alu1)+\
|
|
76 cortex_a8_neon_issue_perm")
|
|
77 (define_reservation "cortex_a8_neon_perm_2"
|
|
78 "(cortex_a8_alu0|cortex_a8_alu1)+\
|
|
79 cortex_a8_neon_issue_perm,\
|
|
80 cortex_a8_neon_issue_perm")
|
|
81 (define_reservation "cortex_a8_neon_perm_3"
|
|
82 "(cortex_a8_alu0|cortex_a8_alu1)+\
|
|
83 cortex_a8_neon_issue_perm,\
|
|
84 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
85 cortex_a8_neon_issue_perm")
|
|
86
|
|
87 (define_reservation "cortex_a8_neon_ls"
|
|
88 "cortex_a8_issue_ls+cortex_a8_neon_issue_perm")
|
|
89 (define_reservation "cortex_a8_neon_ls_2"
|
|
90 "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
|
|
91 cortex_a8_neon_issue_perm")
|
|
92 (define_reservation "cortex_a8_neon_ls_3"
|
|
93 "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
|
|
94 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
95 cortex_a8_neon_issue_perm")
|
|
96 (define_reservation "cortex_a8_neon_ls_4"
|
|
97 "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
|
|
98 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
99 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
100 cortex_a8_neon_issue_perm")
|
|
101 (define_reservation "cortex_a8_neon_ls_5"
|
|
102 "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
|
|
103 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
104 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
105 cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
|
|
106 cortex_a8_neon_issue_perm")
|
|
107
|
|
108 (define_reservation "cortex_a8_neon_fmul_then_fadd"
|
|
109 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
|
|
110 nothing*3,\
|
|
111 cortex_a8_neon_issue_fadd")
|
|
112 (define_reservation "cortex_a8_neon_fmul_then_fadd_2"
|
|
113 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
|
|
114 cortex_a8_neon_issue_dp,\
|
|
115 nothing*2,\
|
|
116 cortex_a8_neon_issue_fadd,\
|
|
117 cortex_a8_neon_issue_fadd")
|
|
118
|
|
119 ;; VFP instructions can only be single-issued into the NEON pipeline.
|
|
120 (define_reservation "cortex_a8_vfp"
|
|
121 "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
|
|
122 cortex_a8_neon_issue_perm+cortex_a8_vfplite")
|
|
123
|
|
124 ;; VFP instructions.
|
|
125 ;; The VFPLite unit that executes these isn't pipelined; we give the
|
|
126 ;; worst-case latencies (and choose the double-precision ones where we
|
|
127 ;; do not distinguish on precision). We assume RunFast mode is not
|
|
128 ;; enabled and therefore do not model the possible VFP instruction
|
|
129 ;; execution in the NEON floating point pipelines, nor additional
|
|
130 ;; latencies for the processing of subnormals.
|
|
131 ;;
|
|
132 ;; TODO: RunFast mode could potentially be enabled when -ffast-math
|
|
133 ;; is specified.
|
|
134
|
|
135 (define_insn_reservation "cortex_a8_vfp_add_sub" 10
|
|
136 (and (eq_attr "tune" "cortexa8")
|
|
137 (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
|
|
138 "cortex_a8_vfp,cortex_a8_vfplite*9")
|
|
139
|
|
140 (define_insn_reservation "cortex_a8_vfp_muls" 12
|
|
141 (and (eq_attr "tune" "cortexa8")
|
|
142 (eq_attr "type" "fmuls"))
|
|
143 "cortex_a8_vfp,cortex_a8_vfplite*11")
|
|
144
|
|
145 (define_insn_reservation "cortex_a8_vfp_muld" 17
|
|
146 (and (eq_attr "tune" "cortexa8")
|
|
147 (eq_attr "type" "fmuld"))
|
|
148 "cortex_a8_vfp,cortex_a8_vfplite*16")
|
|
149
|
|
150 (define_insn_reservation "cortex_a8_vfp_macs" 21
|
|
151 (and (eq_attr "tune" "cortexa8")
|
|
152 (eq_attr "type" "fmacs"))
|
|
153 "cortex_a8_vfp,cortex_a8_vfplite*20")
|
|
154
|
|
155 (define_insn_reservation "cortex_a8_vfp_macd" 26
|
|
156 (and (eq_attr "tune" "cortexa8")
|
|
157 (eq_attr "type" "fmacd"))
|
|
158 "cortex_a8_vfp,cortex_a8_vfplite*25")
|
|
159
|
|
160 (define_insn_reservation "cortex_a8_vfp_divs" 37
|
|
161 (and (eq_attr "tune" "cortexa8")
|
|
162 (eq_attr "type" "fdivs"))
|
|
163 "cortex_a8_vfp,cortex_a8_vfplite*36")
|
|
164
|
|
165 (define_insn_reservation "cortex_a8_vfp_divd" 65
|
|
166 (and (eq_attr "tune" "cortexa8")
|
|
167 (eq_attr "type" "fdivd"))
|
|
168 "cortex_a8_vfp,cortex_a8_vfplite*64")
|
|
169
|
|
170 ;; Comparisons can actually take 7 cycles sometimes instead of four,
|
|
171 ;; but given all the other instructions lumped into type=ffarith that
|
|
172 ;; take four cycles, we pick that latency.
|
|
173 (define_insn_reservation "cortex_a8_vfp_farith" 4
|
|
174 (and (eq_attr "tune" "cortexa8")
|
|
175 (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
|
|
176 "cortex_a8_vfp,cortex_a8_vfplite*3")
|
|
177
|
|
178 (define_insn_reservation "cortex_a8_vfp_cvt" 7
|
|
179 (and (eq_attr "tune" "cortexa8")
|
|
180 (eq_attr "type" "f_cvt"))
|
|
181 "cortex_a8_vfp,cortex_a8_vfplite*6")
|
|
182
|
|
183 ;; NEON -> core transfers.
|
|
184
|
|
185 (define_insn_reservation "neon_mrc" 20
|
|
186 (and (eq_attr "tune" "cortexa8")
|
|
187 (eq_attr "neon_type" "neon_mrc"))
|
|
188 "cortex_a8_neon_ls")
|
|
189
|
|
190 (define_insn_reservation "neon_mrrc" 21
|
|
191 (and (eq_attr "tune" "cortexa8")
|
|
192 (eq_attr "neon_type" "neon_mrrc"))
|
|
193 "cortex_a8_neon_ls_2")
|
|
194
|
|
195 ;; The remainder of this file is auto-generated by neon-schedgen.
|
|
196
|
|
197 ;; Instructions using this reservation read their source operands at N2, and
|
|
198 ;; produce a result at N3.
|
|
199 (define_insn_reservation "neon_int_1" 3
|
|
200 (and (eq_attr "tune" "cortexa8")
|
|
201 (eq_attr "neon_type" "neon_int_1"))
|
|
202 "cortex_a8_neon_dp")
|
|
203
|
|
204 ;; Instructions using this reservation read their (D|Q)m operands at N1,
|
|
205 ;; their (D|Q)n operands at N2, and produce a result at N3.
|
|
206 (define_insn_reservation "neon_int_2" 3
|
|
207 (and (eq_attr "tune" "cortexa8")
|
|
208 (eq_attr "neon_type" "neon_int_2"))
|
|
209 "cortex_a8_neon_dp")
|
|
210
|
|
211 ;; Instructions using this reservation read their source operands at N1, and
|
|
212 ;; produce a result at N3.
|
|
213 (define_insn_reservation "neon_int_3" 3
|
|
214 (and (eq_attr "tune" "cortexa8")
|
|
215 (eq_attr "neon_type" "neon_int_3"))
|
|
216 "cortex_a8_neon_dp")
|
|
217
|
|
218 ;; Instructions using this reservation read their source operands at N2, and
|
|
219 ;; produce a result at N4.
|
|
220 (define_insn_reservation "neon_int_4" 4
|
|
221 (and (eq_attr "tune" "cortexa8")
|
|
222 (eq_attr "neon_type" "neon_int_4"))
|
|
223 "cortex_a8_neon_dp")
|
|
224
|
|
225 ;; Instructions using this reservation read their (D|Q)m operands at N1,
|
|
226 ;; their (D|Q)n operands at N2, and produce a result at N4.
|
|
227 (define_insn_reservation "neon_int_5" 4
|
|
228 (and (eq_attr "tune" "cortexa8")
|
|
229 (eq_attr "neon_type" "neon_int_5"))
|
|
230 "cortex_a8_neon_dp")
|
|
231
|
|
232 ;; Instructions using this reservation read their source operands at N1, and
|
|
233 ;; produce a result at N4.
|
|
234 (define_insn_reservation "neon_vqneg_vqabs" 4
|
|
235 (and (eq_attr "tune" "cortexa8")
|
|
236 (eq_attr "neon_type" "neon_vqneg_vqabs"))
|
|
237 "cortex_a8_neon_dp")
|
|
238
|
|
239 ;; Instructions using this reservation produce a result at N3.
|
|
240 (define_insn_reservation "neon_vmov" 3
|
|
241 (and (eq_attr "tune" "cortexa8")
|
|
242 (eq_attr "neon_type" "neon_vmov"))
|
|
243 "cortex_a8_neon_dp")
|
|
244
|
|
245 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
246 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
|
|
247 ;; produce a result at N6.
|
|
248 (define_insn_reservation "neon_vaba" 6
|
|
249 (and (eq_attr "tune" "cortexa8")
|
|
250 (eq_attr "neon_type" "neon_vaba"))
|
|
251 "cortex_a8_neon_dp")
|
|
252
|
|
253 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
254 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
|
|
255 ;; produce a result at N6 on cycle 2.
|
|
256 (define_insn_reservation "neon_vaba_qqq" 7
|
|
257 (and (eq_attr "tune" "cortexa8")
|
|
258 (eq_attr "neon_type" "neon_vaba_qqq"))
|
|
259 "cortex_a8_neon_dp_2")
|
|
260
|
|
261 ;; Instructions using this reservation read their (D|Q)m operands at N1,
|
|
262 ;; their (D|Q)d operands at N3, and produce a result at N6.
|
|
263 (define_insn_reservation "neon_vsma" 6
|
|
264 (and (eq_attr "tune" "cortexa8")
|
|
265 (eq_attr "neon_type" "neon_vsma"))
|
|
266 "cortex_a8_neon_dp")
|
|
267
|
|
268 ;; Instructions using this reservation read their source operands at N2, and
|
|
269 ;; produce a result at N6.
|
|
270 (define_insn_reservation "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
|
|
271 (and (eq_attr "tune" "cortexa8")
|
|
272 (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
|
|
273 "cortex_a8_neon_dp")
|
|
274
|
|
275 ;; Instructions using this reservation read their source operands at N2, and
|
|
276 ;; produce a result at N6 on cycle 2.
|
|
277 (define_insn_reservation "neon_mul_qqq_8_16_32_ddd_32" 7
|
|
278 (and (eq_attr "tune" "cortexa8")
|
|
279 (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
|
|
280 "cortex_a8_neon_dp_2")
|
|
281
|
|
282 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
283 ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
|
|
284 (define_insn_reservation "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
|
|
285 (and (eq_attr "tune" "cortexa8")
|
|
286 (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
|
|
287 "cortex_a8_neon_dp_2")
|
|
288
|
|
289 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
290 ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
|
|
291 ;; produce a result at N6.
|
|
292 (define_insn_reservation "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
|
|
293 (and (eq_attr "tune" "cortexa8")
|
|
294 (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
|
|
295 "cortex_a8_neon_dp")
|
|
296
|
|
297 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
298 ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
|
|
299 ;; produce a result at N6 on cycle 2.
|
|
300 (define_insn_reservation "neon_mla_qqq_8_16" 7
|
|
301 (and (eq_attr "tune" "cortexa8")
|
|
302 (eq_attr "neon_type" "neon_mla_qqq_8_16"))
|
|
303 "cortex_a8_neon_dp_2")
|
|
304
|
|
305 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
306 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
|
|
307 ;; produce a result at N6 on cycle 2.
|
|
308 (define_insn_reservation "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
|
|
309 (and (eq_attr "tune" "cortexa8")
|
|
310 (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
|
|
311 "cortex_a8_neon_dp_2")
|
|
312
|
|
313 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
314 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
|
|
315 ;; produce a result at N6 on cycle 4.
|
|
316 (define_insn_reservation "neon_mla_qqq_32_qqd_32_scalar" 9
|
|
317 (and (eq_attr "tune" "cortexa8")
|
|
318 (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
|
|
319 "cortex_a8_neon_dp_4")
|
|
320
|
|
321 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
322 ;; their (D|Q)m operands at N1, and produce a result at N6.
|
|
323 (define_insn_reservation "neon_mul_ddd_16_scalar_32_16_long_scalar" 6
|
|
324 (and (eq_attr "tune" "cortexa8")
|
|
325 (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
|
|
326 "cortex_a8_neon_dp")
|
|
327
|
|
328 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
329 ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
|
|
330 (define_insn_reservation "neon_mul_qqd_32_scalar" 9
|
|
331 (and (eq_attr "tune" "cortexa8")
|
|
332 (eq_attr "neon_type" "neon_mul_qqd_32_scalar"))
|
|
333 "cortex_a8_neon_dp_4")
|
|
334
|
|
335 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
336 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
|
|
337 ;; produce a result at N6.
|
|
338 (define_insn_reservation "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
|
|
339 (and (eq_attr "tune" "cortexa8")
|
|
340 (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
|
|
341 "cortex_a8_neon_dp")
|
|
342
|
|
343 ;; Instructions using this reservation read their source operands at N1, and
|
|
344 ;; produce a result at N3.
|
|
345 (define_insn_reservation "neon_shift_1" 3
|
|
346 (and (eq_attr "tune" "cortexa8")
|
|
347 (eq_attr "neon_type" "neon_shift_1"))
|
|
348 "cortex_a8_neon_dp")
|
|
349
|
|
350 ;; Instructions using this reservation read their source operands at N1, and
|
|
351 ;; produce a result at N4.
|
|
352 (define_insn_reservation "neon_shift_2" 4
|
|
353 (and (eq_attr "tune" "cortexa8")
|
|
354 (eq_attr "neon_type" "neon_shift_2"))
|
|
355 "cortex_a8_neon_dp")
|
|
356
|
|
357 ;; Instructions using this reservation read their source operands at N1, and
|
|
358 ;; produce a result at N3 on cycle 2.
|
|
359 (define_insn_reservation "neon_shift_3" 4
|
|
360 (and (eq_attr "tune" "cortexa8")
|
|
361 (eq_attr "neon_type" "neon_shift_3"))
|
|
362 "cortex_a8_neon_dp_2")
|
|
363
|
|
364 ;; Instructions using this reservation read their source operands at N1, and
|
|
365 ;; produce a result at N1.
|
|
366 (define_insn_reservation "neon_vshl_ddd" 1
|
|
367 (and (eq_attr "tune" "cortexa8")
|
|
368 (eq_attr "neon_type" "neon_vshl_ddd"))
|
|
369 "cortex_a8_neon_dp")
|
|
370
|
|
371 ;; Instructions using this reservation read their source operands at N1, and
|
|
372 ;; produce a result at N4 on cycle 2.
|
|
373 (define_insn_reservation "neon_vqshl_vrshl_vqrshl_qqq" 5
|
|
374 (and (eq_attr "tune" "cortexa8")
|
|
375 (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
|
|
376 "cortex_a8_neon_dp_2")
|
|
377
|
|
378 ;; Instructions using this reservation read their (D|Q)m operands at N1,
|
|
379 ;; their (D|Q)d operands at N3, and produce a result at N6.
|
|
380 (define_insn_reservation "neon_vsra_vrsra" 6
|
|
381 (and (eq_attr "tune" "cortexa8")
|
|
382 (eq_attr "neon_type" "neon_vsra_vrsra"))
|
|
383 "cortex_a8_neon_dp")
|
|
384
|
|
385 ;; Instructions using this reservation read their source operands at N2, and
|
|
386 ;; produce a result at N5.
|
|
387 (define_insn_reservation "neon_fp_vadd_ddd_vabs_dd" 5
|
|
388 (and (eq_attr "tune" "cortexa8")
|
|
389 (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd"))
|
|
390 "cortex_a8_neon_fadd")
|
|
391
|
|
392 ;; Instructions using this reservation read their source operands at N2, and
|
|
393 ;; produce a result at N5 on cycle 2.
|
|
394 (define_insn_reservation "neon_fp_vadd_qqq_vabs_qq" 6
|
|
395 (and (eq_attr "tune" "cortexa8")
|
|
396 (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq"))
|
|
397 "cortex_a8_neon_fadd_2")
|
|
398
|
|
399 ;; Instructions using this reservation read their source operands at N1, and
|
|
400 ;; produce a result at N5.
|
|
401 (define_insn_reservation "neon_fp_vsum" 5
|
|
402 (and (eq_attr "tune" "cortexa8")
|
|
403 (eq_attr "neon_type" "neon_fp_vsum"))
|
|
404 "cortex_a8_neon_fadd")
|
|
405
|
|
406 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
407 ;; their (D|Q)m operands at N1, and produce a result at N5.
|
|
408 (define_insn_reservation "neon_fp_vmul_ddd" 5
|
|
409 (and (eq_attr "tune" "cortexa8")
|
|
410 (eq_attr "neon_type" "neon_fp_vmul_ddd"))
|
|
411 "cortex_a8_neon_dp")
|
|
412
|
|
413 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
414 ;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
|
|
415 (define_insn_reservation "neon_fp_vmul_qqd" 6
|
|
416 (and (eq_attr "tune" "cortexa8")
|
|
417 (eq_attr "neon_type" "neon_fp_vmul_qqd"))
|
|
418 "cortex_a8_neon_dp_2")
|
|
419
|
|
420 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
421 ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
|
|
422 ;; produce a result at N9.
|
|
423 (define_insn_reservation "neon_fp_vmla_ddd" 9
|
|
424 (and (eq_attr "tune" "cortexa8")
|
|
425 (eq_attr "neon_type" "neon_fp_vmla_ddd"))
|
|
426 "cortex_a8_neon_fmul_then_fadd")
|
|
427
|
|
428 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
429 ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
|
|
430 ;; produce a result at N9 on cycle 2.
|
|
431 (define_insn_reservation "neon_fp_vmla_qqq" 10
|
|
432 (and (eq_attr "tune" "cortexa8")
|
|
433 (eq_attr "neon_type" "neon_fp_vmla_qqq"))
|
|
434 "cortex_a8_neon_fmul_then_fadd_2")
|
|
435
|
|
436 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
437 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
|
|
438 ;; produce a result at N9.
|
|
439 (define_insn_reservation "neon_fp_vmla_ddd_scalar" 9
|
|
440 (and (eq_attr "tune" "cortexa8")
|
|
441 (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar"))
|
|
442 "cortex_a8_neon_fmul_then_fadd")
|
|
443
|
|
444 ;; Instructions using this reservation read their (D|Q)n operands at N2,
|
|
445 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
|
|
446 ;; produce a result at N9 on cycle 2.
|
|
447 (define_insn_reservation "neon_fp_vmla_qqq_scalar" 10
|
|
448 (and (eq_attr "tune" "cortexa8")
|
|
449 (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar"))
|
|
450 "cortex_a8_neon_fmul_then_fadd_2")
|
|
451
|
|
452 ;; Instructions using this reservation read their source operands at N2, and
|
|
453 ;; produce a result at N9.
|
|
454 (define_insn_reservation "neon_fp_vrecps_vrsqrts_ddd" 9
|
|
455 (and (eq_attr "tune" "cortexa8")
|
|
456 (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
|
|
457 "cortex_a8_neon_fmul_then_fadd")
|
|
458
|
|
459 ;; Instructions using this reservation read their source operands at N2, and
|
|
460 ;; produce a result at N9 on cycle 2.
|
|
461 (define_insn_reservation "neon_fp_vrecps_vrsqrts_qqq" 10
|
|
462 (and (eq_attr "tune" "cortexa8")
|
|
463 (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq"))
|
|
464 "cortex_a8_neon_fmul_then_fadd_2")
|
|
465
|
|
466 ;; Instructions using this reservation read their source operands at N1, and
|
|
467 ;; produce a result at N2.
|
|
468 (define_insn_reservation "neon_bp_simple" 2
|
|
469 (and (eq_attr "tune" "cortexa8")
|
|
470 (eq_attr "neon_type" "neon_bp_simple"))
|
|
471 "cortex_a8_neon_perm")
|
|
472
|
|
473 ;; Instructions using this reservation read their source operands at N1, and
|
|
474 ;; produce a result at N2 on cycle 2.
|
|
475 (define_insn_reservation "neon_bp_2cycle" 3
|
|
476 (and (eq_attr "tune" "cortexa8")
|
|
477 (eq_attr "neon_type" "neon_bp_2cycle"))
|
|
478 "cortex_a8_neon_perm_2")
|
|
479
|
|
480 ;; Instructions using this reservation read their source operands at N1, and
|
|
481 ;; produce a result at N2 on cycle 3.
|
|
482 (define_insn_reservation "neon_bp_3cycle" 4
|
|
483 (and (eq_attr "tune" "cortexa8")
|
|
484 (eq_attr "neon_type" "neon_bp_3cycle"))
|
|
485 "cortex_a8_neon_perm_3")
|
|
486
|
|
487 ;; Instructions using this reservation produce a result at N1.
|
|
488 (define_insn_reservation "neon_ldr" 1
|
|
489 (and (eq_attr "tune" "cortexa8")
|
|
490 (eq_attr "neon_type" "neon_ldr"))
|
|
491 "cortex_a8_neon_ls")
|
|
492
|
|
493 ;; Instructions using this reservation read their source operands at N1.
|
|
494 (define_insn_reservation "neon_str" 0
|
|
495 (and (eq_attr "tune" "cortexa8")
|
|
496 (eq_attr "neon_type" "neon_str"))
|
|
497 "cortex_a8_neon_ls")
|
|
498
|
|
499 ;; Instructions using this reservation produce a result at N1 on cycle 2.
|
|
500 (define_insn_reservation "neon_vld1_1_2_regs" 2
|
|
501 (and (eq_attr "tune" "cortexa8")
|
|
502 (eq_attr "neon_type" "neon_vld1_1_2_regs"))
|
|
503 "cortex_a8_neon_ls_2")
|
|
504
|
|
505 ;; Instructions using this reservation produce a result at N1 on cycle 3.
|
|
506 (define_insn_reservation "neon_vld1_3_4_regs" 3
|
|
507 (and (eq_attr "tune" "cortexa8")
|
|
508 (eq_attr "neon_type" "neon_vld1_3_4_regs"))
|
|
509 "cortex_a8_neon_ls_3")
|
|
510
|
|
511 ;; Instructions using this reservation produce a result at N2 on cycle 2.
|
|
512 (define_insn_reservation "neon_vld2_2_regs_vld1_vld2_all_lanes" 3
|
|
513 (and (eq_attr "tune" "cortexa8")
|
|
514 (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
|
|
515 "cortex_a8_neon_ls_2")
|
|
516
|
|
517 ;; Instructions using this reservation produce a result at N2 on cycle 3.
|
|
518 (define_insn_reservation "neon_vld2_4_regs" 4
|
|
519 (and (eq_attr "tune" "cortexa8")
|
|
520 (eq_attr "neon_type" "neon_vld2_4_regs"))
|
|
521 "cortex_a8_neon_ls_3")
|
|
522
|
|
523 ;; Instructions using this reservation produce a result at N2 on cycle 4.
|
|
524 (define_insn_reservation "neon_vld3_vld4" 5
|
|
525 (and (eq_attr "tune" "cortexa8")
|
|
526 (eq_attr "neon_type" "neon_vld3_vld4"))
|
|
527 "cortex_a8_neon_ls_4")
|
|
528
|
|
529 ;; Instructions using this reservation read their source operands at N1.
|
|
530 (define_insn_reservation "neon_vst1_1_2_regs_vst2_2_regs" 0
|
|
531 (and (eq_attr "tune" "cortexa8")
|
|
532 (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
|
|
533 "cortex_a8_neon_ls_2")
|
|
534
|
|
535 ;; Instructions using this reservation read their source operands at N1.
|
|
536 (define_insn_reservation "neon_vst1_3_4_regs" 0
|
|
537 (and (eq_attr "tune" "cortexa8")
|
|
538 (eq_attr "neon_type" "neon_vst1_3_4_regs"))
|
|
539 "cortex_a8_neon_ls_3")
|
|
540
|
|
541 ;; Instructions using this reservation read their source operands at N1.
|
|
542 (define_insn_reservation "neon_vst2_4_regs_vst3_vst4" 0
|
|
543 (and (eq_attr "tune" "cortexa8")
|
|
544 (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4"))
|
|
545 "cortex_a8_neon_ls_4")
|
|
546
|
|
547 ;; Instructions using this reservation read their source operands at N1.
|
|
548 (define_insn_reservation "neon_vst3_vst4" 0
|
|
549 (and (eq_attr "tune" "cortexa8")
|
|
550 (eq_attr "neon_type" "neon_vst3_vst4"))
|
|
551 "cortex_a8_neon_ls_4")
|
|
552
|
|
553 ;; Instructions using this reservation read their source operands at N1, and
|
|
554 ;; produce a result at N2 on cycle 3.
|
|
555 (define_insn_reservation "neon_vld1_vld2_lane" 4
|
|
556 (and (eq_attr "tune" "cortexa8")
|
|
557 (eq_attr "neon_type" "neon_vld1_vld2_lane"))
|
|
558 "cortex_a8_neon_ls_3")
|
|
559
|
|
560 ;; Instructions using this reservation read their source operands at N1, and
|
|
561 ;; produce a result at N2 on cycle 5.
|
|
562 (define_insn_reservation "neon_vld3_vld4_lane" 6
|
|
563 (and (eq_attr "tune" "cortexa8")
|
|
564 (eq_attr "neon_type" "neon_vld3_vld4_lane"))
|
|
565 "cortex_a8_neon_ls_5")
|
|
566
|
|
567 ;; Instructions using this reservation read their source operands at N1.
|
|
568 (define_insn_reservation "neon_vst1_vst2_lane" 0
|
|
569 (and (eq_attr "tune" "cortexa8")
|
|
570 (eq_attr "neon_type" "neon_vst1_vst2_lane"))
|
|
571 "cortex_a8_neon_ls_2")
|
|
572
|
|
573 ;; Instructions using this reservation read their source operands at N1.
|
|
574 (define_insn_reservation "neon_vst3_vst4_lane" 0
|
|
575 (and (eq_attr "tune" "cortexa8")
|
|
576 (eq_attr "neon_type" "neon_vst3_vst4_lane"))
|
|
577 "cortex_a8_neon_ls_3")
|
|
578
|
|
579 ;; Instructions using this reservation produce a result at N2 on cycle 2.
|
|
580 (define_insn_reservation "neon_vld3_vld4_all_lanes" 3
|
|
581 (and (eq_attr "tune" "cortexa8")
|
|
582 (eq_attr "neon_type" "neon_vld3_vld4_all_lanes"))
|
|
583 "cortex_a8_neon_ls_3")
|
|
584
|
|
585 ;; Instructions using this reservation produce a result at N2.
|
|
586 (define_insn_reservation "neon_mcr" 2
|
|
587 (and (eq_attr "tune" "cortexa8")
|
|
588 (eq_attr "neon_type" "neon_mcr"))
|
|
589 "cortex_a8_neon_perm")
|
|
590
|
|
591 ;; Instructions using this reservation produce a result at N2.
|
|
592 (define_insn_reservation "neon_mcr_2_mcrr" 2
|
|
593 (and (eq_attr "tune" "cortexa8")
|
|
594 (eq_attr "neon_type" "neon_mcr_2_mcrr"))
|
|
595 "cortex_a8_neon_perm_2")
|
|
596
|
|
597 ;; Exceptions to the default latencies.
|
|
598
|
|
599 (define_bypass 1 "neon_mcr_2_mcrr"
|
|
600 "neon_int_1,\
|
|
601 neon_int_4,\
|
|
602 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
603 neon_mul_qqq_8_16_32_ddd_32,\
|
|
604 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
605 neon_mla_qqq_8_16,\
|
|
606 neon_fp_vadd_ddd_vabs_dd,\
|
|
607 neon_fp_vadd_qqq_vabs_qq,\
|
|
608 neon_fp_vmla_ddd,\
|
|
609 neon_fp_vmla_qqq,\
|
|
610 neon_fp_vrecps_vrsqrts_ddd,\
|
|
611 neon_fp_vrecps_vrsqrts_qqq")
|
|
612
|
|
613 (define_bypass 1 "neon_mcr"
|
|
614 "neon_int_1,\
|
|
615 neon_int_4,\
|
|
616 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
617 neon_mul_qqq_8_16_32_ddd_32,\
|
|
618 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
619 neon_mla_qqq_8_16,\
|
|
620 neon_fp_vadd_ddd_vabs_dd,\
|
|
621 neon_fp_vadd_qqq_vabs_qq,\
|
|
622 neon_fp_vmla_ddd,\
|
|
623 neon_fp_vmla_qqq,\
|
|
624 neon_fp_vrecps_vrsqrts_ddd,\
|
|
625 neon_fp_vrecps_vrsqrts_qqq")
|
|
626
|
|
627 (define_bypass 2 "neon_vld3_vld4_all_lanes"
|
|
628 "neon_int_1,\
|
|
629 neon_int_4,\
|
|
630 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
631 neon_mul_qqq_8_16_32_ddd_32,\
|
|
632 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
633 neon_mla_qqq_8_16,\
|
|
634 neon_fp_vadd_ddd_vabs_dd,\
|
|
635 neon_fp_vadd_qqq_vabs_qq,\
|
|
636 neon_fp_vmla_ddd,\
|
|
637 neon_fp_vmla_qqq,\
|
|
638 neon_fp_vrecps_vrsqrts_ddd,\
|
|
639 neon_fp_vrecps_vrsqrts_qqq")
|
|
640
|
|
641 (define_bypass 5 "neon_vld3_vld4_lane"
|
|
642 "neon_int_1,\
|
|
643 neon_int_4,\
|
|
644 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
645 neon_mul_qqq_8_16_32_ddd_32,\
|
|
646 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
647 neon_mla_qqq_8_16,\
|
|
648 neon_fp_vadd_ddd_vabs_dd,\
|
|
649 neon_fp_vadd_qqq_vabs_qq,\
|
|
650 neon_fp_vmla_ddd,\
|
|
651 neon_fp_vmla_qqq,\
|
|
652 neon_fp_vrecps_vrsqrts_ddd,\
|
|
653 neon_fp_vrecps_vrsqrts_qqq")
|
|
654
|
|
655 (define_bypass 3 "neon_vld1_vld2_lane"
|
|
656 "neon_int_1,\
|
|
657 neon_int_4,\
|
|
658 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
659 neon_mul_qqq_8_16_32_ddd_32,\
|
|
660 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
661 neon_mla_qqq_8_16,\
|
|
662 neon_fp_vadd_ddd_vabs_dd,\
|
|
663 neon_fp_vadd_qqq_vabs_qq,\
|
|
664 neon_fp_vmla_ddd,\
|
|
665 neon_fp_vmla_qqq,\
|
|
666 neon_fp_vrecps_vrsqrts_ddd,\
|
|
667 neon_fp_vrecps_vrsqrts_qqq")
|
|
668
|
|
669 (define_bypass 4 "neon_vld3_vld4"
|
|
670 "neon_int_1,\
|
|
671 neon_int_4,\
|
|
672 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
673 neon_mul_qqq_8_16_32_ddd_32,\
|
|
674 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
675 neon_mla_qqq_8_16,\
|
|
676 neon_fp_vadd_ddd_vabs_dd,\
|
|
677 neon_fp_vadd_qqq_vabs_qq,\
|
|
678 neon_fp_vmla_ddd,\
|
|
679 neon_fp_vmla_qqq,\
|
|
680 neon_fp_vrecps_vrsqrts_ddd,\
|
|
681 neon_fp_vrecps_vrsqrts_qqq")
|
|
682
|
|
683 (define_bypass 3 "neon_vld2_4_regs"
|
|
684 "neon_int_1,\
|
|
685 neon_int_4,\
|
|
686 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
687 neon_mul_qqq_8_16_32_ddd_32,\
|
|
688 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
689 neon_mla_qqq_8_16,\
|
|
690 neon_fp_vadd_ddd_vabs_dd,\
|
|
691 neon_fp_vadd_qqq_vabs_qq,\
|
|
692 neon_fp_vmla_ddd,\
|
|
693 neon_fp_vmla_qqq,\
|
|
694 neon_fp_vrecps_vrsqrts_ddd,\
|
|
695 neon_fp_vrecps_vrsqrts_qqq")
|
|
696
|
|
697 (define_bypass 2 "neon_vld2_2_regs_vld1_vld2_all_lanes"
|
|
698 "neon_int_1,\
|
|
699 neon_int_4,\
|
|
700 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
701 neon_mul_qqq_8_16_32_ddd_32,\
|
|
702 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
703 neon_mla_qqq_8_16,\
|
|
704 neon_fp_vadd_ddd_vabs_dd,\
|
|
705 neon_fp_vadd_qqq_vabs_qq,\
|
|
706 neon_fp_vmla_ddd,\
|
|
707 neon_fp_vmla_qqq,\
|
|
708 neon_fp_vrecps_vrsqrts_ddd,\
|
|
709 neon_fp_vrecps_vrsqrts_qqq")
|
|
710
|
|
711 (define_bypass 2 "neon_vld1_3_4_regs"
|
|
712 "neon_int_1,\
|
|
713 neon_int_4,\
|
|
714 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
715 neon_mul_qqq_8_16_32_ddd_32,\
|
|
716 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
717 neon_mla_qqq_8_16,\
|
|
718 neon_fp_vadd_ddd_vabs_dd,\
|
|
719 neon_fp_vadd_qqq_vabs_qq,\
|
|
720 neon_fp_vmla_ddd,\
|
|
721 neon_fp_vmla_qqq,\
|
|
722 neon_fp_vrecps_vrsqrts_ddd,\
|
|
723 neon_fp_vrecps_vrsqrts_qqq")
|
|
724
|
|
725 (define_bypass 1 "neon_vld1_1_2_regs"
|
|
726 "neon_int_1,\
|
|
727 neon_int_4,\
|
|
728 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
729 neon_mul_qqq_8_16_32_ddd_32,\
|
|
730 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
731 neon_mla_qqq_8_16,\
|
|
732 neon_fp_vadd_ddd_vabs_dd,\
|
|
733 neon_fp_vadd_qqq_vabs_qq,\
|
|
734 neon_fp_vmla_ddd,\
|
|
735 neon_fp_vmla_qqq,\
|
|
736 neon_fp_vrecps_vrsqrts_ddd,\
|
|
737 neon_fp_vrecps_vrsqrts_qqq")
|
|
738
|
|
739 (define_bypass 0 "neon_ldr"
|
|
740 "neon_int_1,\
|
|
741 neon_int_4,\
|
|
742 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
743 neon_mul_qqq_8_16_32_ddd_32,\
|
|
744 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
745 neon_mla_qqq_8_16,\
|
|
746 neon_fp_vadd_ddd_vabs_dd,\
|
|
747 neon_fp_vadd_qqq_vabs_qq,\
|
|
748 neon_fp_vmla_ddd,\
|
|
749 neon_fp_vmla_qqq,\
|
|
750 neon_fp_vrecps_vrsqrts_ddd,\
|
|
751 neon_fp_vrecps_vrsqrts_qqq")
|
|
752
|
|
753 (define_bypass 3 "neon_bp_3cycle"
|
|
754 "neon_int_1,\
|
|
755 neon_int_4,\
|
|
756 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
757 neon_mul_qqq_8_16_32_ddd_32,\
|
|
758 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
759 neon_mla_qqq_8_16,\
|
|
760 neon_fp_vadd_ddd_vabs_dd,\
|
|
761 neon_fp_vadd_qqq_vabs_qq,\
|
|
762 neon_fp_vmla_ddd,\
|
|
763 neon_fp_vmla_qqq,\
|
|
764 neon_fp_vrecps_vrsqrts_ddd,\
|
|
765 neon_fp_vrecps_vrsqrts_qqq")
|
|
766
|
|
767 (define_bypass 2 "neon_bp_2cycle"
|
|
768 "neon_int_1,\
|
|
769 neon_int_4,\
|
|
770 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
771 neon_mul_qqq_8_16_32_ddd_32,\
|
|
772 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
773 neon_mla_qqq_8_16,\
|
|
774 neon_fp_vadd_ddd_vabs_dd,\
|
|
775 neon_fp_vadd_qqq_vabs_qq,\
|
|
776 neon_fp_vmla_ddd,\
|
|
777 neon_fp_vmla_qqq,\
|
|
778 neon_fp_vrecps_vrsqrts_ddd,\
|
|
779 neon_fp_vrecps_vrsqrts_qqq")
|
|
780
|
|
781 (define_bypass 1 "neon_bp_simple"
|
|
782 "neon_int_1,\
|
|
783 neon_int_4,\
|
|
784 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
785 neon_mul_qqq_8_16_32_ddd_32,\
|
|
786 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
787 neon_mla_qqq_8_16,\
|
|
788 neon_fp_vadd_ddd_vabs_dd,\
|
|
789 neon_fp_vadd_qqq_vabs_qq,\
|
|
790 neon_fp_vmla_ddd,\
|
|
791 neon_fp_vmla_qqq,\
|
|
792 neon_fp_vrecps_vrsqrts_ddd,\
|
|
793 neon_fp_vrecps_vrsqrts_qqq")
|
|
794
|
|
795 (define_bypass 9 "neon_fp_vrecps_vrsqrts_qqq"
|
|
796 "neon_int_1,\
|
|
797 neon_int_4,\
|
|
798 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
799 neon_mul_qqq_8_16_32_ddd_32,\
|
|
800 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
801 neon_mla_qqq_8_16,\
|
|
802 neon_fp_vadd_ddd_vabs_dd,\
|
|
803 neon_fp_vadd_qqq_vabs_qq,\
|
|
804 neon_fp_vmla_ddd,\
|
|
805 neon_fp_vmla_qqq,\
|
|
806 neon_fp_vrecps_vrsqrts_ddd,\
|
|
807 neon_fp_vrecps_vrsqrts_qqq")
|
|
808
|
|
809 (define_bypass 8 "neon_fp_vrecps_vrsqrts_ddd"
|
|
810 "neon_int_1,\
|
|
811 neon_int_4,\
|
|
812 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
813 neon_mul_qqq_8_16_32_ddd_32,\
|
|
814 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
815 neon_mla_qqq_8_16,\
|
|
816 neon_fp_vadd_ddd_vabs_dd,\
|
|
817 neon_fp_vadd_qqq_vabs_qq,\
|
|
818 neon_fp_vmla_ddd,\
|
|
819 neon_fp_vmla_qqq,\
|
|
820 neon_fp_vrecps_vrsqrts_ddd,\
|
|
821 neon_fp_vrecps_vrsqrts_qqq")
|
|
822
|
|
823 (define_bypass 9 "neon_fp_vmla_qqq_scalar"
|
|
824 "neon_int_1,\
|
|
825 neon_int_4,\
|
|
826 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
827 neon_mul_qqq_8_16_32_ddd_32,\
|
|
828 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
829 neon_mla_qqq_8_16,\
|
|
830 neon_fp_vadd_ddd_vabs_dd,\
|
|
831 neon_fp_vadd_qqq_vabs_qq,\
|
|
832 neon_fp_vmla_ddd,\
|
|
833 neon_fp_vmla_qqq,\
|
|
834 neon_fp_vrecps_vrsqrts_ddd,\
|
|
835 neon_fp_vrecps_vrsqrts_qqq")
|
|
836
|
|
837 (define_bypass 8 "neon_fp_vmla_ddd_scalar"
|
|
838 "neon_int_1,\
|
|
839 neon_int_4,\
|
|
840 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
841 neon_mul_qqq_8_16_32_ddd_32,\
|
|
842 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
843 neon_mla_qqq_8_16,\
|
|
844 neon_fp_vadd_ddd_vabs_dd,\
|
|
845 neon_fp_vadd_qqq_vabs_qq,\
|
|
846 neon_fp_vmla_ddd,\
|
|
847 neon_fp_vmla_qqq,\
|
|
848 neon_fp_vrecps_vrsqrts_ddd,\
|
|
849 neon_fp_vrecps_vrsqrts_qqq")
|
|
850
|
|
851 (define_bypass 9 "neon_fp_vmla_qqq"
|
|
852 "neon_int_1,\
|
|
853 neon_int_4,\
|
|
854 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
855 neon_mul_qqq_8_16_32_ddd_32,\
|
|
856 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
857 neon_mla_qqq_8_16,\
|
|
858 neon_fp_vadd_ddd_vabs_dd,\
|
|
859 neon_fp_vadd_qqq_vabs_qq,\
|
|
860 neon_fp_vmla_ddd,\
|
|
861 neon_fp_vmla_qqq,\
|
|
862 neon_fp_vrecps_vrsqrts_ddd,\
|
|
863 neon_fp_vrecps_vrsqrts_qqq")
|
|
864
|
|
865 (define_bypass 8 "neon_fp_vmla_ddd"
|
|
866 "neon_int_1,\
|
|
867 neon_int_4,\
|
|
868 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
869 neon_mul_qqq_8_16_32_ddd_32,\
|
|
870 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
871 neon_mla_qqq_8_16,\
|
|
872 neon_fp_vadd_ddd_vabs_dd,\
|
|
873 neon_fp_vadd_qqq_vabs_qq,\
|
|
874 neon_fp_vmla_ddd,\
|
|
875 neon_fp_vmla_qqq,\
|
|
876 neon_fp_vrecps_vrsqrts_ddd,\
|
|
877 neon_fp_vrecps_vrsqrts_qqq")
|
|
878
|
|
879 (define_bypass 5 "neon_fp_vmul_qqd"
|
|
880 "neon_int_1,\
|
|
881 neon_int_4,\
|
|
882 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
883 neon_mul_qqq_8_16_32_ddd_32,\
|
|
884 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
885 neon_mla_qqq_8_16,\
|
|
886 neon_fp_vadd_ddd_vabs_dd,\
|
|
887 neon_fp_vadd_qqq_vabs_qq,\
|
|
888 neon_fp_vmla_ddd,\
|
|
889 neon_fp_vmla_qqq,\
|
|
890 neon_fp_vrecps_vrsqrts_ddd,\
|
|
891 neon_fp_vrecps_vrsqrts_qqq")
|
|
892
|
|
893 (define_bypass 4 "neon_fp_vmul_ddd"
|
|
894 "neon_int_1,\
|
|
895 neon_int_4,\
|
|
896 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
897 neon_mul_qqq_8_16_32_ddd_32,\
|
|
898 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
899 neon_mla_qqq_8_16,\
|
|
900 neon_fp_vadd_ddd_vabs_dd,\
|
|
901 neon_fp_vadd_qqq_vabs_qq,\
|
|
902 neon_fp_vmla_ddd,\
|
|
903 neon_fp_vmla_qqq,\
|
|
904 neon_fp_vrecps_vrsqrts_ddd,\
|
|
905 neon_fp_vrecps_vrsqrts_qqq")
|
|
906
|
|
907 (define_bypass 4 "neon_fp_vsum"
|
|
908 "neon_int_1,\
|
|
909 neon_int_4,\
|
|
910 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
911 neon_mul_qqq_8_16_32_ddd_32,\
|
|
912 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
913 neon_mla_qqq_8_16,\
|
|
914 neon_fp_vadd_ddd_vabs_dd,\
|
|
915 neon_fp_vadd_qqq_vabs_qq,\
|
|
916 neon_fp_vmla_ddd,\
|
|
917 neon_fp_vmla_qqq,\
|
|
918 neon_fp_vrecps_vrsqrts_ddd,\
|
|
919 neon_fp_vrecps_vrsqrts_qqq")
|
|
920
|
|
921 (define_bypass 5 "neon_fp_vadd_qqq_vabs_qq"
|
|
922 "neon_int_1,\
|
|
923 neon_int_4,\
|
|
924 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
925 neon_mul_qqq_8_16_32_ddd_32,\
|
|
926 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
927 neon_mla_qqq_8_16,\
|
|
928 neon_fp_vadd_ddd_vabs_dd,\
|
|
929 neon_fp_vadd_qqq_vabs_qq,\
|
|
930 neon_fp_vmla_ddd,\
|
|
931 neon_fp_vmla_qqq,\
|
|
932 neon_fp_vrecps_vrsqrts_ddd,\
|
|
933 neon_fp_vrecps_vrsqrts_qqq")
|
|
934
|
|
935 (define_bypass 4 "neon_fp_vadd_ddd_vabs_dd"
|
|
936 "neon_int_1,\
|
|
937 neon_int_4,\
|
|
938 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
939 neon_mul_qqq_8_16_32_ddd_32,\
|
|
940 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
941 neon_mla_qqq_8_16,\
|
|
942 neon_fp_vadd_ddd_vabs_dd,\
|
|
943 neon_fp_vadd_qqq_vabs_qq,\
|
|
944 neon_fp_vmla_ddd,\
|
|
945 neon_fp_vmla_qqq,\
|
|
946 neon_fp_vrecps_vrsqrts_ddd,\
|
|
947 neon_fp_vrecps_vrsqrts_qqq")
|
|
948
|
|
949 (define_bypass 5 "neon_vsra_vrsra"
|
|
950 "neon_int_1,\
|
|
951 neon_int_4,\
|
|
952 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
953 neon_mul_qqq_8_16_32_ddd_32,\
|
|
954 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
955 neon_mla_qqq_8_16,\
|
|
956 neon_fp_vadd_ddd_vabs_dd,\
|
|
957 neon_fp_vadd_qqq_vabs_qq,\
|
|
958 neon_fp_vmla_ddd,\
|
|
959 neon_fp_vmla_qqq,\
|
|
960 neon_fp_vrecps_vrsqrts_ddd,\
|
|
961 neon_fp_vrecps_vrsqrts_qqq")
|
|
962
|
|
963 (define_bypass 4 "neon_vqshl_vrshl_vqrshl_qqq"
|
|
964 "neon_int_1,\
|
|
965 neon_int_4,\
|
|
966 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
967 neon_mul_qqq_8_16_32_ddd_32,\
|
|
968 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
969 neon_mla_qqq_8_16,\
|
|
970 neon_fp_vadd_ddd_vabs_dd,\
|
|
971 neon_fp_vadd_qqq_vabs_qq,\
|
|
972 neon_fp_vmla_ddd,\
|
|
973 neon_fp_vmla_qqq,\
|
|
974 neon_fp_vrecps_vrsqrts_ddd,\
|
|
975 neon_fp_vrecps_vrsqrts_qqq")
|
|
976
|
|
977 (define_bypass 0 "neon_vshl_ddd"
|
|
978 "neon_int_1,\
|
|
979 neon_int_4,\
|
|
980 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
981 neon_mul_qqq_8_16_32_ddd_32,\
|
|
982 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
983 neon_mla_qqq_8_16,\
|
|
984 neon_fp_vadd_ddd_vabs_dd,\
|
|
985 neon_fp_vadd_qqq_vabs_qq,\
|
|
986 neon_fp_vmla_ddd,\
|
|
987 neon_fp_vmla_qqq,\
|
|
988 neon_fp_vrecps_vrsqrts_ddd,\
|
|
989 neon_fp_vrecps_vrsqrts_qqq")
|
|
990
|
|
991 (define_bypass 3 "neon_shift_3"
|
|
992 "neon_int_1,\
|
|
993 neon_int_4,\
|
|
994 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
995 neon_mul_qqq_8_16_32_ddd_32,\
|
|
996 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
997 neon_mla_qqq_8_16,\
|
|
998 neon_fp_vadd_ddd_vabs_dd,\
|
|
999 neon_fp_vadd_qqq_vabs_qq,\
|
|
1000 neon_fp_vmla_ddd,\
|
|
1001 neon_fp_vmla_qqq,\
|
|
1002 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1003 neon_fp_vrecps_vrsqrts_qqq")
|
|
1004
|
|
1005 (define_bypass 3 "neon_shift_2"
|
|
1006 "neon_int_1,\
|
|
1007 neon_int_4,\
|
|
1008 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1009 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1010 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1011 neon_mla_qqq_8_16,\
|
|
1012 neon_fp_vadd_ddd_vabs_dd,\
|
|
1013 neon_fp_vadd_qqq_vabs_qq,\
|
|
1014 neon_fp_vmla_ddd,\
|
|
1015 neon_fp_vmla_qqq,\
|
|
1016 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1017 neon_fp_vrecps_vrsqrts_qqq")
|
|
1018
|
|
1019 (define_bypass 2 "neon_shift_1"
|
|
1020 "neon_int_1,\
|
|
1021 neon_int_4,\
|
|
1022 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1023 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1024 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1025 neon_mla_qqq_8_16,\
|
|
1026 neon_fp_vadd_ddd_vabs_dd,\
|
|
1027 neon_fp_vadd_qqq_vabs_qq,\
|
|
1028 neon_fp_vmla_ddd,\
|
|
1029 neon_fp_vmla_qqq,\
|
|
1030 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1031 neon_fp_vrecps_vrsqrts_qqq")
|
|
1032
|
|
1033 (define_bypass 5 "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
|
|
1034 "neon_int_1,\
|
|
1035 neon_int_4,\
|
|
1036 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1037 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1038 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1039 neon_mla_qqq_8_16,\
|
|
1040 neon_fp_vadd_ddd_vabs_dd,\
|
|
1041 neon_fp_vadd_qqq_vabs_qq,\
|
|
1042 neon_fp_vmla_ddd,\
|
|
1043 neon_fp_vmla_qqq,\
|
|
1044 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1045 neon_fp_vrecps_vrsqrts_qqq")
|
|
1046
|
|
1047 (define_bypass 8 "neon_mul_qqd_32_scalar"
|
|
1048 "neon_int_1,\
|
|
1049 neon_int_4,\
|
|
1050 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1051 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1052 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1053 neon_mla_qqq_8_16,\
|
|
1054 neon_fp_vadd_ddd_vabs_dd,\
|
|
1055 neon_fp_vadd_qqq_vabs_qq,\
|
|
1056 neon_fp_vmla_ddd,\
|
|
1057 neon_fp_vmla_qqq,\
|
|
1058 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1059 neon_fp_vrecps_vrsqrts_qqq")
|
|
1060
|
|
1061 (define_bypass 5 "neon_mul_ddd_16_scalar_32_16_long_scalar"
|
|
1062 "neon_int_1,\
|
|
1063 neon_int_4,\
|
|
1064 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1065 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1066 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1067 neon_mla_qqq_8_16,\
|
|
1068 neon_fp_vadd_ddd_vabs_dd,\
|
|
1069 neon_fp_vadd_qqq_vabs_qq,\
|
|
1070 neon_fp_vmla_ddd,\
|
|
1071 neon_fp_vmla_qqq,\
|
|
1072 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1073 neon_fp_vrecps_vrsqrts_qqq")
|
|
1074
|
|
1075 (define_bypass 8 "neon_mla_qqq_32_qqd_32_scalar"
|
|
1076 "neon_int_1,\
|
|
1077 neon_int_4,\
|
|
1078 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1079 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1080 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1081 neon_mla_qqq_8_16,\
|
|
1082 neon_fp_vadd_ddd_vabs_dd,\
|
|
1083 neon_fp_vadd_qqq_vabs_qq,\
|
|
1084 neon_fp_vmla_ddd,\
|
|
1085 neon_fp_vmla_qqq,\
|
|
1086 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1087 neon_fp_vrecps_vrsqrts_qqq")
|
|
1088
|
|
1089 (define_bypass 6 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
|
|
1090 "neon_int_1,\
|
|
1091 neon_int_4,\
|
|
1092 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1093 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1094 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1095 neon_mla_qqq_8_16,\
|
|
1096 neon_fp_vadd_ddd_vabs_dd,\
|
|
1097 neon_fp_vadd_qqq_vabs_qq,\
|
|
1098 neon_fp_vmla_ddd,\
|
|
1099 neon_fp_vmla_qqq,\
|
|
1100 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1101 neon_fp_vrecps_vrsqrts_qqq")
|
|
1102
|
|
1103 (define_bypass 6 "neon_mla_qqq_8_16"
|
|
1104 "neon_int_1,\
|
|
1105 neon_int_4,\
|
|
1106 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1107 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1108 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1109 neon_mla_qqq_8_16,\
|
|
1110 neon_fp_vadd_ddd_vabs_dd,\
|
|
1111 neon_fp_vadd_qqq_vabs_qq,\
|
|
1112 neon_fp_vmla_ddd,\
|
|
1113 neon_fp_vmla_qqq,\
|
|
1114 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1115 neon_fp_vrecps_vrsqrts_qqq")
|
|
1116
|
|
1117 (define_bypass 5 "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
|
|
1118 "neon_int_1,\
|
|
1119 neon_int_4,\
|
|
1120 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1121 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1122 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1123 neon_mla_qqq_8_16,\
|
|
1124 neon_fp_vadd_ddd_vabs_dd,\
|
|
1125 neon_fp_vadd_qqq_vabs_qq,\
|
|
1126 neon_fp_vmla_ddd,\
|
|
1127 neon_fp_vmla_qqq,\
|
|
1128 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1129 neon_fp_vrecps_vrsqrts_qqq")
|
|
1130
|
|
1131 (define_bypass 6 "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
|
|
1132 "neon_int_1,\
|
|
1133 neon_int_4,\
|
|
1134 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1135 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1136 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1137 neon_mla_qqq_8_16,\
|
|
1138 neon_fp_vadd_ddd_vabs_dd,\
|
|
1139 neon_fp_vadd_qqq_vabs_qq,\
|
|
1140 neon_fp_vmla_ddd,\
|
|
1141 neon_fp_vmla_qqq,\
|
|
1142 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1143 neon_fp_vrecps_vrsqrts_qqq")
|
|
1144
|
|
1145 (define_bypass 6 "neon_mul_qqq_8_16_32_ddd_32"
|
|
1146 "neon_int_1,\
|
|
1147 neon_int_4,\
|
|
1148 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1149 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1150 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1151 neon_mla_qqq_8_16,\
|
|
1152 neon_fp_vadd_ddd_vabs_dd,\
|
|
1153 neon_fp_vadd_qqq_vabs_qq,\
|
|
1154 neon_fp_vmla_ddd,\
|
|
1155 neon_fp_vmla_qqq,\
|
|
1156 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1157 neon_fp_vrecps_vrsqrts_qqq")
|
|
1158
|
|
1159 (define_bypass 5 "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
|
|
1160 "neon_int_1,\
|
|
1161 neon_int_4,\
|
|
1162 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1163 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1164 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1165 neon_mla_qqq_8_16,\
|
|
1166 neon_fp_vadd_ddd_vabs_dd,\
|
|
1167 neon_fp_vadd_qqq_vabs_qq,\
|
|
1168 neon_fp_vmla_ddd,\
|
|
1169 neon_fp_vmla_qqq,\
|
|
1170 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1171 neon_fp_vrecps_vrsqrts_qqq")
|
|
1172
|
|
1173 (define_bypass 5 "neon_vsma"
|
|
1174 "neon_int_1,\
|
|
1175 neon_int_4,\
|
|
1176 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1177 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1178 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1179 neon_mla_qqq_8_16,\
|
|
1180 neon_fp_vadd_ddd_vabs_dd,\
|
|
1181 neon_fp_vadd_qqq_vabs_qq,\
|
|
1182 neon_fp_vmla_ddd,\
|
|
1183 neon_fp_vmla_qqq,\
|
|
1184 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1185 neon_fp_vrecps_vrsqrts_qqq")
|
|
1186
|
|
1187 (define_bypass 6 "neon_vaba_qqq"
|
|
1188 "neon_int_1,\
|
|
1189 neon_int_4,\
|
|
1190 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1191 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1192 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1193 neon_mla_qqq_8_16,\
|
|
1194 neon_fp_vadd_ddd_vabs_dd,\
|
|
1195 neon_fp_vadd_qqq_vabs_qq,\
|
|
1196 neon_fp_vmla_ddd,\
|
|
1197 neon_fp_vmla_qqq,\
|
|
1198 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1199 neon_fp_vrecps_vrsqrts_qqq")
|
|
1200
|
|
1201 (define_bypass 5 "neon_vaba"
|
|
1202 "neon_int_1,\
|
|
1203 neon_int_4,\
|
|
1204 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1205 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1206 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1207 neon_mla_qqq_8_16,\
|
|
1208 neon_fp_vadd_ddd_vabs_dd,\
|
|
1209 neon_fp_vadd_qqq_vabs_qq,\
|
|
1210 neon_fp_vmla_ddd,\
|
|
1211 neon_fp_vmla_qqq,\
|
|
1212 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1213 neon_fp_vrecps_vrsqrts_qqq")
|
|
1214
|
|
1215 (define_bypass 2 "neon_vmov"
|
|
1216 "neon_int_1,\
|
|
1217 neon_int_4,\
|
|
1218 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1219 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1220 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1221 neon_mla_qqq_8_16,\
|
|
1222 neon_fp_vadd_ddd_vabs_dd,\
|
|
1223 neon_fp_vadd_qqq_vabs_qq,\
|
|
1224 neon_fp_vmla_ddd,\
|
|
1225 neon_fp_vmla_qqq,\
|
|
1226 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1227 neon_fp_vrecps_vrsqrts_qqq")
|
|
1228
|
|
1229 (define_bypass 3 "neon_vqneg_vqabs"
|
|
1230 "neon_int_1,\
|
|
1231 neon_int_4,\
|
|
1232 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1233 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1234 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1235 neon_mla_qqq_8_16,\
|
|
1236 neon_fp_vadd_ddd_vabs_dd,\
|
|
1237 neon_fp_vadd_qqq_vabs_qq,\
|
|
1238 neon_fp_vmla_ddd,\
|
|
1239 neon_fp_vmla_qqq,\
|
|
1240 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1241 neon_fp_vrecps_vrsqrts_qqq")
|
|
1242
|
|
1243 (define_bypass 3 "neon_int_5"
|
|
1244 "neon_int_1,\
|
|
1245 neon_int_4,\
|
|
1246 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1247 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1248 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1249 neon_mla_qqq_8_16,\
|
|
1250 neon_fp_vadd_ddd_vabs_dd,\
|
|
1251 neon_fp_vadd_qqq_vabs_qq,\
|
|
1252 neon_fp_vmla_ddd,\
|
|
1253 neon_fp_vmla_qqq,\
|
|
1254 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1255 neon_fp_vrecps_vrsqrts_qqq")
|
|
1256
|
|
1257 (define_bypass 3 "neon_int_4"
|
|
1258 "neon_int_1,\
|
|
1259 neon_int_4,\
|
|
1260 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1261 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1262 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1263 neon_mla_qqq_8_16,\
|
|
1264 neon_fp_vadd_ddd_vabs_dd,\
|
|
1265 neon_fp_vadd_qqq_vabs_qq,\
|
|
1266 neon_fp_vmla_ddd,\
|
|
1267 neon_fp_vmla_qqq,\
|
|
1268 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1269 neon_fp_vrecps_vrsqrts_qqq")
|
|
1270
|
|
1271 (define_bypass 2 "neon_int_3"
|
|
1272 "neon_int_1,\
|
|
1273 neon_int_4,\
|
|
1274 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1275 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1276 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1277 neon_mla_qqq_8_16,\
|
|
1278 neon_fp_vadd_ddd_vabs_dd,\
|
|
1279 neon_fp_vadd_qqq_vabs_qq,\
|
|
1280 neon_fp_vmla_ddd,\
|
|
1281 neon_fp_vmla_qqq,\
|
|
1282 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1283 neon_fp_vrecps_vrsqrts_qqq")
|
|
1284
|
|
1285 (define_bypass 2 "neon_int_2"
|
|
1286 "neon_int_1,\
|
|
1287 neon_int_4,\
|
|
1288 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1289 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1290 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1291 neon_mla_qqq_8_16,\
|
|
1292 neon_fp_vadd_ddd_vabs_dd,\
|
|
1293 neon_fp_vadd_qqq_vabs_qq,\
|
|
1294 neon_fp_vmla_ddd,\
|
|
1295 neon_fp_vmla_qqq,\
|
|
1296 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1297 neon_fp_vrecps_vrsqrts_qqq")
|
|
1298
|
|
1299 (define_bypass 2 "neon_int_1"
|
|
1300 "neon_int_1,\
|
|
1301 neon_int_4,\
|
|
1302 neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1303 neon_mul_qqq_8_16_32_ddd_32,\
|
|
1304 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
|
|
1305 neon_mla_qqq_8_16,\
|
|
1306 neon_fp_vadd_ddd_vabs_dd,\
|
|
1307 neon_fp_vadd_qqq_vabs_qq,\
|
|
1308 neon_fp_vmla_ddd,\
|
|
1309 neon_fp_vmla_qqq,\
|
|
1310 neon_fp_vrecps_vrsqrts_ddd,\
|
|
1311 neon_fp_vrecps_vrsqrts_qqq")
|
|
1312
|