152
|
1 ;; Cavium ThunderX 3 CN11xx pipeline description
|
|
2 ;; Copyright (C) 2020 Free Software Foundation, Inc.
|
|
3 ;;
|
|
4 ;; Contributed by Marvell
|
|
5
|
|
6 ;; This file is part of GCC.
|
|
7
|
|
8 ;; GCC is free software; you can redistribute it and/or modify
|
|
9 ;; it under the terms of the GNU General Public License as published by
|
|
10 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
11 ;; any later version.
|
|
12
|
|
13 ;; GCC is distributed in the hope that it will be useful,
|
|
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16 ;; GNU General Public License for more details.
|
|
17
|
|
18 ;; You should have received a copy of the GNU General Public License
|
|
19 ;; along with GCC; see the file COPYING3. If not see
|
|
20 ;; <http://www.gnu.org/licenses/>.
|
|
21
|
|
22 (define_automaton "thunderx3t110, thunderx3t110_advsimd, thunderx3t110_ldst")
|
|
23 (define_automaton "thunderx3t110_mult")
|
|
24
|
|
25 (define_cpu_unit "thunderx3t110_i0" "thunderx3t110")
|
|
26 (define_cpu_unit "thunderx3t110_i1" "thunderx3t110")
|
|
27 (define_cpu_unit "thunderx3t110_i2" "thunderx3t110")
|
|
28 (define_cpu_unit "thunderx3t110_i3" "thunderx3t110")
|
|
29
|
|
30 (define_cpu_unit "thunderx3t110_ls0" "thunderx3t110_ldst")
|
|
31 (define_cpu_unit "thunderx3t110_ls1" "thunderx3t110_ldst")
|
|
32 (define_cpu_unit "thunderx3t110_sd" "thunderx3t110_ldst")
|
|
33
|
|
34 ; Pseudo-units for multiply pipeline.
|
|
35 ; unchanged from TX2, occupies I1 for four (1 + 3 additional) slots
|
|
36
|
|
37 (define_cpu_unit "thunderx3t110_i1m1" "thunderx3t110_mult")
|
|
38 (define_cpu_unit "thunderx3t110_i1m2" "thunderx3t110_mult")
|
|
39 (define_cpu_unit "thunderx3t110_i1m3" "thunderx3t110_mult")
|
|
40
|
|
41 ; Pseudo-units for load delay (assuming dcache hit).
|
|
42
|
|
43 (define_cpu_unit "thunderx3t110_ls0d1" "thunderx3t110_ldst")
|
|
44 (define_cpu_unit "thunderx3t110_ls0d2" "thunderx3t110_ldst")
|
|
45 (define_cpu_unit "thunderx3t110_ls0d3" "thunderx3t110_ldst")
|
|
46
|
|
47 (define_cpu_unit "thunderx3t110_ls1d1" "thunderx3t110_ldst")
|
|
48 (define_cpu_unit "thunderx3t110_ls1d2" "thunderx3t110_ldst")
|
|
49 (define_cpu_unit "thunderx3t110_ls1d3" "thunderx3t110_ldst")
|
|
50
|
|
51 ; Define FP units f0/f1/f2/f3.
|
|
52 (define_cpu_unit "thunderx3t110_f0" "thunderx3t110_advsimd")
|
|
53 (define_cpu_unit "thunderx3t110_f1" "thunderx3t110_advsimd")
|
|
54 (define_cpu_unit "thunderx3t110_f2" "thunderx3t110_advsimd")
|
|
55 (define_cpu_unit "thunderx3t110_f3" "thunderx3t110_advsimd")
|
|
56
|
|
57 (define_reservation "thunderx3t110_i23" "thunderx3t110_i2|thunderx3t110_i3")
|
|
58 (define_reservation "thunderx3t110_i01"
|
|
59 "thunderx3t110_i0|thunderx3t110_i1")
|
|
60 (define_reservation "thunderx3t110_i012"
|
|
61 "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2")
|
|
62 (define_reservation "thunderx3t110_i0123"
|
|
63 "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2|thunderx3t110_i3")
|
|
64 (define_reservation "thunderx3t110_ls01" "thunderx3t110_ls0|thunderx3t110_ls1")
|
|
65 (define_reservation "thunderx3t110_f01" "thunderx3t110_f0|thunderx3t110_f1")
|
|
66 (define_reservation "thunderx3t110_f23" "thunderx3t110_f2|thunderx3t110_f3")
|
|
67 (define_reservation "thunderx3t110_f0123"
|
|
68 "thunderx3t110_f0|thunderx3t110_f1|thunderx3t110_f2|thunderx3t110_f3")
|
|
69
|
|
70 ; A load with delay in the ls0/ls1 pipes.
|
|
71 ; this is always a delay of four
|
|
72 (define_reservation "thunderx3t110_l0delay"
|
|
73 "thunderx3t110_ls0,thunderx3t110_ls0d1,thunderx3t110_ls0d2,\
|
|
74 thunderx3t110_ls0d3")
|
|
75 (define_reservation "thunderx3t110_l1delay"
|
|
76 "thunderx3t110_ls1,thunderx3t110_ls1d1,thunderx3t110_ls1d2,\
|
|
77 thunderx3t110_ls1d3")
|
|
78 (define_reservation "thunderx3t110_l01delay"
|
|
79 "thunderx3t110_l0delay|thunderx3t110_l1delay")
|
|
80 ;; Branch and call instructions.
|
|
81
|
|
82 (define_insn_reservation "thunderx3t110_branch" 1
|
|
83 (and (eq_attr "tune" "thunderx3t110")
|
|
84 (eq_attr "type" "call,branch,trap"))
|
|
85 "thunderx3t110_i23")
|
|
86
|
|
87 ;; Misc instructions.
|
|
88
|
|
89 ; Speculation barrier
|
|
90 (define_insn_reservation "thunderx3t110_nothing" 0
|
|
91 (and (eq_attr "tune" "thunderx3t110")
|
|
92 (eq_attr "type" "block"))
|
|
93 "nothing")
|
|
94
|
|
95 (define_insn_reservation "thunderx3t110_mrs" 0
|
|
96 (and (eq_attr "tune" "thunderx3t110")
|
|
97 (eq_attr "type" "mrs"))
|
|
98 "thunderx3t110_i2")
|
|
99
|
|
100 (define_insn_reservation "thunderx3t110_multiple" 1
|
|
101 (and (eq_attr "tune" "thunderx3t110")
|
|
102 (eq_attr "type" "multiple"))
|
|
103 "thunderx3t110_i0+thunderx3t110_i1+thunderx3t110_i3+thunderx3t110_ls0+\
|
|
104 thunderx3t110_ls1+thunderx3t110_sd+thunderx3t110_i1m1+thunderx3t110_i1m2+\
|
|
105 thunderx3t110_i1m3+thunderx3t110_f0+thunderx3t110_f1")
|
|
106
|
|
107 ;; Integer arithmetic/logic instructions.
|
|
108
|
|
109 ; Plain register moves are handled by renaming,
|
|
110 ; and don't create any uops.
|
|
111 (define_insn_reservation "thunderx3t110_regmove" 0
|
|
112 (and (eq_attr "tune" "thunderx3t110")
|
|
113 (eq_attr "type" "mov_reg"))
|
|
114 "nothing")
|
|
115
|
|
116 (define_insn_reservation "thunderx3t110_alu_basic" 1
|
|
117 (and (eq_attr "tune" "thunderx3t110")
|
|
118 (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\
|
|
119 adc_reg,adc_imm,adcs_reg,adcs_imm,\
|
|
120 logic_reg,logic_imm,logics_reg,logics_imm,\
|
|
121 csel,adr,mov_imm,shift_reg,shift_imm,bfm,\
|
|
122 bfx,rbit,rev,extend,rotate_imm"))
|
|
123 "thunderx3t110_i0123")
|
|
124
|
|
125 ; distinguish between latency 1|2 and throughput 1/4|2/4?
|
|
126 ; is it actually 1,1/2,{i0,i1} vs 2,1/4,{i0,i1,i2,i3}
|
|
127 (define_insn_reservation "thunderx3t110_alu_shift" 2
|
|
128 (and (eq_attr "tune" "thunderx3t110")
|
|
129 (eq_attr "type" "alu_shift_imm,alu_ext,\
|
|
130 alus_shift_imm,alus_ext,\
|
|
131 logic_shift_imm,logics_shift_imm"))
|
|
132 "thunderx3t110_i0123")
|
|
133
|
|
134 (define_insn_reservation "thunderx3t110_alu_shift1" 1
|
|
135 (and (eq_attr "tune" "thunderx3t110")
|
|
136 (eq_attr "type" "alu_shift_imm,alu_ext,\
|
|
137 alus_shift_imm,alus_ext,\
|
|
138 logic_shift_imm,logics_shift_imm"))
|
|
139 "thunderx3t110_i01")
|
|
140
|
|
141 ; we are going for the the optimistic answer (13)
|
|
142 ; for now, the worst case is 23
|
|
143 (define_insn_reservation "thunderx3t110_div" 13
|
|
144 (and (eq_attr "tune" "thunderx3t110")
|
|
145 (eq_attr "type" "sdiv,udiv"))
|
|
146 "thunderx3t110_i1*3")
|
|
147
|
|
148 (define_insn_reservation "thunderx3t110_madd" 5
|
|
149 (and (eq_attr "tune" "thunderx3t110")
|
|
150 (eq_attr "type" "mla,smlal,umlal"))
|
|
151 "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3,\
|
|
152 thunderx3t110_i012")
|
|
153
|
|
154 ; NOTE: smull, umull are used for "high part" multiplies too.
|
|
155 ; mul is alias for MADD
|
|
156 ; it has to be distinguished between smulh, umulh (4,1) and
|
|
157 ; other (5,1) but there is no such a type, so, we go for the
|
|
158 ; conservative approach of (5,1) for now
|
|
159 ; smulh, umulh only runs on I1
|
|
160 (define_insn_reservation "thunderx3t110_mul" 5
|
|
161 (and (eq_attr "tune" "thunderx3t110")
|
|
162 (eq_attr "type" "mul,smull,umull"))
|
|
163 "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3")
|
|
164
|
|
165 (define_insn_reservation "thunderx3t110_countbits" 3
|
|
166 (and (eq_attr "tune" "thunderx3t110")
|
|
167 (eq_attr "type" "clz"))
|
|
168 "thunderx3t110_i1")
|
|
169
|
|
170 ;; Integer loads and stores.
|
|
171
|
|
172 ; load_4 matches prefetch, a multitude of move/str/dup variants,
|
|
173 ; sign extend
|
|
174 (define_insn_reservation "thunderx3t110_load_basic" 4
|
|
175 (and (eq_attr "tune" "thunderx3t110")
|
|
176 (eq_attr "type" "load_4"))
|
|
177 "thunderx3t110_ls01")
|
|
178
|
|
179 ; model use of I0/I1/I2 for index versions only, model 4|8 2nd on load
|
|
180 (define_insn_reservation "thunderx3t110_loadpair" 5
|
|
181 (and (eq_attr "tune" "thunderx3t110")
|
|
182 (eq_attr "type" "load_8,load_16"))
|
|
183 "thunderx3t110_i012,thunderx3t110_ls01")
|
|
184
|
|
185 (define_insn_reservation "thunderx3t110_store_basic" 1
|
|
186 (and (eq_attr "tune" "thunderx3t110")
|
|
187 (eq_attr "type" "store_4"))
|
|
188 "thunderx3t110_ls01,thunderx3t110_sd")
|
|
189
|
|
190 ; model use of I0/I1/I2/I3 for index versions, model differing
|
|
191 ; throughputs
|
|
192 (define_insn_reservation "thunderx3t110_storepair_basic" 1
|
|
193 (and (eq_attr "tune" "thunderx3t110")
|
|
194 (eq_attr "type" "store_8,store_16"))
|
|
195 "thunderx3t110_ls01,thunderx3t110_sd")
|
|
196
|
|
197 ;; FP data processing instructions.
|
|
198
|
|
199 (define_insn_reservation "thunderx3t110_fp_simple" 5
|
|
200 (and (eq_attr "tune" "thunderx3t110")
|
|
201 (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd"))
|
|
202 "thunderx3t110_f0123")
|
|
203
|
|
204 ; distinguish latency 3/4 throughput 1/2|1/4
|
|
205 (define_insn_reservation "thunderx3t110_fp_addsub3" 3
|
|
206 (and (eq_attr "tune" "thunderx3t110")
|
|
207 (eq_attr "type" "fadds,faddd"))
|
|
208 "thunderx3t110_f23")
|
|
209 (define_insn_reservation "thunderx3t110_fp_addsub4" 4
|
|
210 (and (eq_attr "tune" "thunderx3t110")
|
|
211 (eq_attr "type" "fadds,faddd"))
|
|
212 "thunderx3t110_f0123")
|
|
213
|
|
214 (define_insn_reservation "thunderx3t110_fp_cmp" 4
|
|
215 (and (eq_attr "tune" "thunderx3t110")
|
|
216 (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
|
|
217 "thunderx3t110_f0123")
|
|
218
|
|
219 ; need to split out latency 23 throughput 23/4: F64 from
|
|
220 ; latency 16 throughput 16/4: FDIV F32
|
|
221 (define_insn_reservation "thunderx3t110_fp_divsqrt_s" 16
|
|
222 (and (eq_attr "tune" "thunderx3t110")
|
|
223 (eq_attr "type" "fdivs,fsqrts"))
|
|
224 "thunderx3t110_f0*3|thunderx3t110_f1*3|\
|
|
225 thunderx3t110_f2*3|thunderx3t110_f3*3")
|
|
226
|
|
227 (define_insn_reservation "thunderx3t110_fp_divsqrt_d" 23
|
|
228 (and (eq_attr "tune" "thunderx3t110")
|
|
229 (eq_attr "type" "fdivd,fsqrtd"))
|
|
230 "thunderx3t110_f0*5|thunderx3t110_f1*5|\
|
|
231 thunderx3t110_f2*5|thunderx3t110_f3*5")
|
|
232
|
|
233 (define_insn_reservation "thunderx3t110_fp_mul_mac" 5
|
|
234 (and (eq_attr "tune" "thunderx3t110")
|
|
235 (eq_attr "type" "fmuls,fmuld,fmacs,fmacd"))
|
|
236 "thunderx3t110_f01")
|
|
237
|
|
238 (define_insn_reservation "thunderx3t110_frint" 5
|
|
239 (and (eq_attr "tune" "thunderx3t110")
|
|
240 (eq_attr "type" "f_rints,f_rintd"))
|
|
241 "thunderx3t110_f0123")
|
|
242
|
|
243 ; mimic latency 3|4 throughput 1/2|1/4
|
|
244 (define_insn_reservation "thunderx3t110_fcsel3" 3
|
|
245 (and (eq_attr "tune" "thunderx3t110")
|
|
246 (eq_attr "type" "fcsel"))
|
|
247 "thunderx3t110_f23")
|
|
248
|
|
249 (define_insn_reservation "thunderx3t110_fcsel4" 4
|
|
250 (and (eq_attr "tune" "thunderx3t110")
|
|
251 (eq_attr "type" "fcsel"))
|
|
252 "thunderx3t110_f0123")
|
|
253
|
|
254 ;; FP miscellaneous instructions.
|
|
255
|
|
256 (define_insn_reservation "thunderx3t110_fp_cvt" 5
|
|
257 (and (eq_attr "tune" "thunderx3t110")
|
|
258 (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f"))
|
|
259 "thunderx3t110_f0123")
|
|
260
|
|
261 ; even though f_mrc has to belong to fp_mov_to_gen
|
|
262 ; we retain this for the sake of legacy as codegen
|
|
263 ; doesn't use it anyway
|
|
264 (define_insn_reservation "thunderx3t110_fp_mov3" 3
|
|
265 (and (eq_attr "tune" "thunderx3t110")
|
|
266 (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
|
|
267 "thunderx3t110_f23")
|
|
268
|
|
269 (define_insn_reservation "thunderx3t110_fp_mov" 4
|
|
270 (and (eq_attr "tune" "thunderx3t110")
|
|
271 (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
|
|
272 "thunderx3t110_f0123")
|
|
273
|
|
274 (define_insn_reservation "thunderx3t110_fp_mov_to_gen" 4
|
|
275 (and (eq_attr "tune" "thunderx3t110")
|
|
276 (eq_attr "type" "f_mcr"))
|
|
277 "thunderx3t110_f0123")
|
|
278
|
|
279 ;; FP loads and stores.
|
|
280 ; model use of I0/I1/I2 for post/pre index modes
|
|
281
|
|
282 (define_insn_reservation "thunderx3t110_fp_load_basic" 4
|
|
283 (and (eq_attr "tune" "thunderx3t110")
|
|
284 (eq_attr "type" "f_loads,f_loadd"))
|
|
285 "thunderx3t110_ls01")
|
|
286
|
|
287 ; model throughput 1
|
|
288 (define_insn_reservation "thunderx3t110_fp_store_basic" 1
|
|
289 (and (eq_attr "tune" "thunderx3t110")
|
|
290 (eq_attr "type" "f_stores,f_stored"))
|
|
291 "thunderx3t110_ls01,thunderx3t110_sd")
|
|
292
|
|
293 ;; ASIMD integer instructions.
|
|
294
|
|
295 (define_insn_reservation "thunderx3t110_asimd_int" 5
|
|
296 (and (eq_attr "tune" "thunderx3t110")
|
|
297 (eq_attr "type" "neon_abd,neon_abd_q,\
|
|
298 neon_arith_acc,neon_arith_acc_q,\
|
|
299 neon_abs,neon_abs_q,\
|
|
300 neon_add,neon_add_q,\
|
|
301 neon_sub,neon_sub_q,\
|
|
302 neon_neg,neon_neg_q,\
|
|
303 neon_add_long,neon_add_widen,\
|
|
304 neon_add_halve,neon_add_halve_q,\
|
|
305 neon_sub_long,neon_sub_widen,\
|
|
306 neon_sub_halve,neon_sub_halve_q,\
|
|
307 neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
|
|
308 neon_qabs,neon_qabs_q,\
|
|
309 neon_qadd,neon_qadd_q,\
|
|
310 neon_qneg,neon_qneg_q,\
|
|
311 neon_qsub,neon_qsub_q,\
|
|
312 neon_minmax,neon_minmax_q,\
|
|
313 neon_reduc_minmax,neon_reduc_minmax_q,\
|
|
314 neon_mul_b,neon_mul_h,neon_mul_s,\
|
|
315 neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\
|
|
316 neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\
|
|
317 neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\
|
|
318 neon_mla_b,neon_mla_h,neon_mla_s,\
|
|
319 neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\
|
|
320 neon_mul_b_long,neon_mul_h_long,\
|
|
321 neon_mul_s_long,neon_mul_d_long,\
|
|
322 neon_sat_mul_b_long,neon_sat_mul_h_long,\
|
|
323 neon_sat_mul_s_long,\
|
|
324 neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
|
|
325 neon_sat_mla_b_long,neon_sat_mla_h_long,\
|
|
326 neon_sat_mla_s_long,\
|
|
327 neon_shift_acc,neon_shift_acc_q,\
|
|
328 neon_shift_imm,neon_shift_imm_q,\
|
|
329 neon_shift_reg,neon_shift_reg_q,\
|
|
330 neon_shift_imm_long,neon_shift_imm_narrow_q,\
|
|
331 neon_sat_shift_imm,neon_sat_shift_imm_q,\
|
|
332 neon_sat_shift_reg,neon_sat_shift_reg_q,\
|
|
333 neon_sat_shift_imm_narrow_q"))
|
|
334 "thunderx3t110_f0123")
|
|
335
|
|
336 ; neon_reduc_add is used for both addp and [su]adalp
|
|
337 (define_insn_reservation "thunderx3t110_asimd_reduc_add" 5
|
|
338 (and (eq_attr "tune" "thunderx3t110")
|
|
339 (eq_attr "type" "neon_reduc_add,neon_reduc_add_q"))
|
|
340 "thunderx3t110_f01")
|
|
341
|
|
342 (define_insn_reservation "thunderx3t110_asimd_cmp" 5
|
|
343 (and (eq_attr "tune" "thunderx3t110")
|
|
344 (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\
|
|
345 neon_tst,neon_tst_q"))
|
|
346 "thunderx3t110_f0123")
|
|
347
|
|
348 ; neon_logic used in ldr, str, mov, umov, fmov, mov; orn; bic; and,
|
|
349 ; simd mov immediate; orr, simd mov immediate; eor; not (mvn)
|
|
350 ; latency 4 throughput 1/2 LS0/LS1: ldr
|
|
351 ; latency 1 throughput 1 LS0/LS1,SDI,I0/I1/I2: str
|
|
352 ; latency 3|4 throughput 1/2|1/4 F2/F3 F0/F1/F2/F3: fmov immed, orn,
|
|
353 ; bic, and, orr, eor, not (mvn)
|
|
354 ; latency 4 throughput 1/4 F0/F1/F2/F3: fmov register, fmov gen to vec
|
|
355 ; latency 5 throughput 1/4 F0/F1/F2/F3: fmov vec to gen, umov, fmov
|
|
356 (define_insn_reservation "thunderx3t110_asimd_logic4" 4
|
|
357 (and (eq_attr "tune" "thunderx3t110")
|
|
358 (eq_attr "type" "neon_logic,neon_logic_q"))
|
|
359 "thunderx3t110_f23")
|
|
360
|
|
361 (define_insn_reservation "thunderx3t110_asimd_logic5" 5
|
|
362 (and (eq_attr "tune" "thunderx3t110")
|
|
363 (eq_attr "type" "neon_logic,neon_logic_q"))
|
|
364 "thunderx3t110_f0123")
|
|
365
|
|
366 ;; ASIMD floating-point instructions.
|
|
367
|
|
368 ; Distinguish between latency 5 throughput 1/4: fabs, fmax, fmin, fneg
|
|
369 ; latency 4 throughput 1/4: fcmp
|
|
370 (define_insn_reservation "thunderx3t110_asimd_fp_simple" 5
|
|
371 (and (eq_attr "tune" "thunderx3t110")
|
|
372 (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\
|
|
373 neon_fp_abs_s_q,neon_fp_abs_d_q,\
|
|
374 neon_fp_compare_s,neon_fp_compare_d,\
|
|
375 neon_fp_compare_s_q,neon_fp_compare_d_q,\
|
|
376 neon_fp_minmax_s,neon_fp_minmax_d,\
|
|
377 neon_fp_minmax_s_q,neon_fp_minmax_d_q,\
|
|
378 neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\
|
|
379 neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\
|
|
380 neon_fp_neg_s,neon_fp_neg_d,\
|
|
381 neon_fp_neg_s_q,neon_fp_neg_d_q"))
|
|
382 "thunderx3t110_f0123")
|
|
383
|
|
384 ; distinguish between latency 3 throughput 1/2,
|
|
385 ; latency 4 throughput 1/4
|
|
386 ; neon_fp_reduc_add_<stype><q> is used for both faddp and
|
|
387 ; vector reduction add. On TX3, faddp is 3|4 1/2|1/4 and reduction is 5 1/4
|
|
388 (define_insn_reservation "thunderx3t110_asimd_fp_arith3" 3
|
|
389 (and (eq_attr "tune" "thunderx3t110")
|
|
390 (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
|
|
391 neon_fp_abd_s_q,neon_fp_abd_d_q,\
|
|
392 neon_fp_addsub_s,neon_fp_addsub_d,\
|
|
393 neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
|
|
394 neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
|
|
395 neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
|
|
396 "thunderx3t110_f23")
|
|
397
|
|
398 (define_insn_reservation "thunderx3t110_asimd_fp_arith4" 4
|
|
399 (and (eq_attr "tune" "thunderx3t110")
|
|
400 (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
|
|
401 neon_fp_abd_s_q,neon_fp_abd_d_q,\
|
|
402 neon_fp_addsub_s,neon_fp_addsub_d,\
|
|
403 neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
|
|
404 neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
|
|
405 neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
|
|
406 "thunderx3t110_f0123")
|
|
407
|
|
408 (define_insn_reservation "thunderx3t110_asimd_fp_arith5" 5
|
|
409 (and (eq_attr "tune" "thunderx3t110")
|
|
410 (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_d,\
|
|
411 neon_fp_mul_s_q,neon_fp_mul_d_q,\
|
|
412 neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\
|
|
413 neon_fp_mla_s,neon_fp_mla_d,\
|
|
414 neon_fp_mla_s_q,neon_fp_mla_d_q"))
|
|
415 "thunderx3t110_f0123")
|
|
416
|
|
417 ; neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q: fcvtl,fctvl2,fcvtn,fcvtn2
|
|
418 ; neon_fp_to_int_s,neon_fp_to_int_d: fcvt{<frint_suffix><su>,z<su>}
|
|
419 ; where frint_suffix: zpmixan, su: su (plus other sign/unsign/extract...
|
|
420 ; neon_fp_to_int_s_q,neon_fp_to_int_d_q: fcvtz<su> other
|
|
421 ; The int_to_fp* is complicated
|
|
422 ; neon_int_to_fp_s,neon_int_to_fp_d: <su_optab>cvtf
|
|
423 ; neon_int_to_fp_s_q,neon_int_to_fp_d_q
|
|
424 ; Round matches single define_insn, frint<frint_suffix>
|
|
425 ; neon_fp_round_s,neon_fp_round_d,neon_fp_round_s_q,
|
|
426 ; neon_fp_round_d_q: frint<frint_suffix>
|
|
427 ; FCVT*,VCVTAU,[SU]CVTF: latency 5 throughput 1/4
|
|
428 ; FRINT*: latency 5 throughput 1/4
|
|
429 (define_insn_reservation "thunderx3t110_asimd_fp_conv" 5
|
|
430 (and (eq_attr "tune" "thunderx3t110")
|
|
431 (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\
|
|
432 neon_fp_to_int_s,neon_fp_to_int_d,\
|
|
433 neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
|
|
434 neon_int_to_fp_s,neon_int_to_fp_d,\
|
|
435 neon_int_to_fp_s_q,neon_int_to_fp_d_q,\
|
|
436 neon_fp_round_s,neon_fp_round_d,\
|
|
437 neon_fp_round_s_q,neon_fp_round_d_q"))
|
|
438 "thunderx3t110_f0123")
|
|
439
|
|
440 ; model that pipeline is occupied the whole time D/F32, Q/F32: 16/4
|
|
441 ; Q/F64: 23/4
|
|
442 (define_insn_reservation "thunderx3t110_asimd_fp_div_s" 16
|
|
443 (and (eq_attr "tune" "thunderx3t110")
|
|
444 (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q"))
|
|
445 "thunderx3t110_f0123")
|
|
446
|
|
447 (define_insn_reservation "thunderx3t110_asimd_fp_div_d" 23
|
|
448 (and (eq_attr "tune" "thunderx3t110")
|
|
449 (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q"))
|
|
450 "thunderx3t110_f0123")
|
|
451
|
|
452 ;; ASIMD miscellaneous instructions.
|
|
453
|
|
454 ; divided out:
|
|
455 ; rbit,bsl,bsl_q,cls,cls_q,cnt,cnt_q,move,move_q: 3|4 1/2 | 1/4
|
|
456 ; from_gp,from_gp_q : 4 | 1/4
|
|
457 ; dup,dup_q,ext,ext_q,ins,ins_q,all recpe forms, rev,rev_q: 5 1/4
|
|
458 ; permute,permute_q needs to depend on aarch64_expand_vec_perm_const does
|
|
459 ; on TX3
|
|
460 (define_insn_reservation "thunderx3t110_asimd_misc3" 3
|
|
461 (and (eq_attr "tune" "thunderx3t110")
|
|
462 (eq_attr "type" "neon_rbit,\
|
|
463 neon_bsl,neon_bsl_q,\
|
|
464 neon_cls,neon_cls_q,\
|
|
465 neon_cnt,neon_cnt_q,\
|
|
466 neon_move,neon_move_q"))
|
|
467 "thunderx3t110_f23")
|
|
468
|
|
469 (define_insn_reservation "thunderx3t110_asimd_misc4" 4
|
|
470 (and (eq_attr "tune" "thunderx3t110")
|
|
471 (eq_attr "type" "neon_rbit,\
|
|
472 neon_bsl,neon_bsl_q,\
|
|
473 neon_cls,neon_cls_q,\
|
|
474 neon_cnt,neon_cnt_q,\
|
|
475 neon_from_gp,neon_from_gp_q,\
|
|
476 neon_move,neon_move_q"))
|
|
477 "thunderx3t110_f0123")
|
|
478
|
|
479 (define_insn_reservation "thunderx3t110_asimd_misc" 5
|
|
480 (and (eq_attr "tune" "thunderx3t110")
|
|
481 (eq_attr "type" "
|
|
482 neon_dup,neon_dup_q,\
|
|
483 neon_ext,neon_ext_q,\
|
|
484 neon_ins,neon_ins_q,\
|
|
485 neon_move,neon_move_q,\
|
|
486 neon_fp_recpe_s,neon_fp_recpe_d,\
|
|
487 neon_fp_recpe_s_q,neon_fp_recpe_d_q,\
|
|
488 neon_fp_recpx_s,neon_fp_recpx_d,\
|
|
489 neon_fp_recpx_s_q,neon_fp_recpx_d_q,\
|
|
490 neon_rev,neon_rev_q,\
|
|
491 neon_permute,neon_permute_q"))
|
|
492 "thunderx3t110_f0123")
|
|
493
|
|
494 (define_insn_reservation "thunderx3t110_asimd_recip_step" 5
|
|
495 (and (eq_attr "tune" "thunderx3t110")
|
|
496 (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\
|
|
497 neon_fp_recps_d,neon_fp_recps_d_q,\
|
|
498 neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
|
|
499 neon_fp_sqrt_d,neon_fp_sqrt_d_q,\
|
|
500 neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
|
|
501 neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\
|
|
502 neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
|
|
503 neon_fp_rsqrts_d, neon_fp_rsqrts_d_q"))
|
|
504 "thunderx3t110_f0123")
|
|
505
|
|
506 (define_insn_reservation "thunderx3t110_asimd_lut1" 5
|
|
507 (and (eq_attr "tune" "thunderx3t110")
|
|
508 (eq_attr "type" "neon_tbl1,neon_tbl1_q"))
|
|
509 "thunderx3t110_f0123")
|
|
510
|
|
511 (define_insn_reservation "thunderx3t110_asimd_lut2" 10
|
|
512 (and (eq_attr "tune" "thunderx3t110")
|
|
513 (eq_attr "type" "neon_tbl2,neon_tbl2_q"))
|
|
514 "thunderx3t110_f0123")
|
|
515
|
|
516 (define_insn_reservation "thunderx3t110_asimd_lut3" 15
|
|
517 (and (eq_attr "tune" "thunderx3t110")
|
|
518 (eq_attr "type" "neon_tbl3,neon_tbl3_q"))
|
|
519 "thunderx3t110_f0123")
|
|
520
|
|
521 (define_insn_reservation "thunderx3t110_asimd_lut4" 20
|
|
522 (and (eq_attr "tune" "thunderx3t110")
|
|
523 (eq_attr "type" "neon_tbl4,neon_tbl4_q"))
|
|
524 "thunderx3t110_f0123")
|
|
525
|
|
526 (define_insn_reservation "thunderx3t110_asimd_elt_to_gr" 5
|
|
527 (and (eq_attr "tune" "thunderx3t110")
|
|
528 (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
|
|
529 "thunderx3t110_f0123")
|
|
530
|
|
531 ;; ASIMD load instructions.
|
|
532
|
|
533 ; NOTE: These reservations attempt to model latency and throughput
|
|
534 ; correctly, but the cycle timing of unit allocation is not
|
|
535 ; necessarily accurate (because insns are split into uops, and those
|
|
536 ; may be issued out-of-order).
|
|
537
|
|
538 ; the LDP/LDNP imm-offset S/D/Q suppplies the first arg with latency 4
|
|
539 ; and the 2nd at 5 (Q form) or 8 (S/D form). Can this be modeled? These
|
|
540 ;forms, as documented, do not use the I0/I1/I2 units (no I3), but the
|
|
541 ; other LDP ones do.
|
|
542 (define_insn_reservation "thunderx3t110_asimd_load1_ldp" 5
|
|
543 (and (eq_attr "tune" "thunderx3t110")
|
|
544 (eq_attr "type" "neon_ldp,neon_ldp_q"))
|
|
545 "thunderx3t110_i012,thunderx3t110_ls01")
|
|
546
|
|
547 ; Need to distinguish latency 6 throughput 2: 4 reg D/Q
|
|
548 ; latency 5 throughput 3/2: 3 reg D/Q
|
|
549 ; latency 4 throughput 1: 2 reg D/Q
|
|
550 ; latency 4 throughput 1/2: 1 reg D/Q
|
|
551 (define_insn_reservation "thunderx3t110_asimd_load1" 4
|
|
552 (and (eq_attr "tune" "thunderx3t110")
|
|
553 (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
|
|
554 neon_load1_2reg,neon_load1_2reg_q,\
|
|
555 neon_load1_3reg,neon_load1_3reg_q,\
|
|
556 neon_load1_4reg,neon_load1_4reg_q"))
|
|
557 "thunderx3t110_ls01")
|
|
558
|
|
559 (define_insn_reservation "thunderx3t110_asimd_load1_onelane" 5
|
|
560 (and (eq_attr "tune" "thunderx3t110")
|
|
561 (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q"))
|
|
562 "thunderx3t110_l01delay,thunderx3t110_f0123")
|
|
563
|
|
564 (define_insn_reservation "thunderx3t110_asimd_load1_all" 5
|
|
565 (and (eq_attr "tune" "thunderx3t110")
|
|
566 (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q"))
|
|
567 "thunderx3t110_l01delay,thunderx3t110_f0123")
|
|
568
|
|
569 (define_insn_reservation "thunderx3t110_asimd_load2" 5
|
|
570 (and (eq_attr "tune" "thunderx3t110")
|
|
571 (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\
|
|
572 neon_load2_one_lane,neon_load2_one_lane_q,\
|
|
573 neon_load2_all_lanes,neon_load2_all_lanes_q"))
|
|
574 "thunderx3t110_l01delay,thunderx3t110_f0123")
|
|
575
|
|
576 (define_insn_reservation "thunderx3t110_asimd_load3" 7
|
|
577 (and (eq_attr "tune" "thunderx3t110")
|
|
578 (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
|
|
579 neon_load3_one_lane,neon_load3_one_lane_q,\
|
|
580 neon_load3_all_lanes,neon_load3_all_lanes_q"))
|
|
581 "thunderx3t110_l01delay,thunderx3t110_f0123")
|
|
582
|
|
583 (define_insn_reservation "thunderx3t110_asimd_load4" 8
|
|
584 (and (eq_attr "tune" "thunderx3t110")
|
|
585 (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\
|
|
586 neon_load4_one_lane,neon_load4_one_lane_q,\
|
|
587 neon_load4_all_lanes,neon_load4_all_lanes_q"))
|
|
588 "thunderx3t110_l01delay,thunderx3t110_f0123")
|
|
589
|
|
590 ;; ASIMD store instructions.
|
|
591
|
|
592 ; Same note applies as for ASIMD load instructions.
|
|
593
|
|
594 ; Vector Store pair Need to distinguish:
|
|
595 ; 5 throughput: imm-offset S/D; imm-postindex S/D; imm-preindex S/D
|
|
596 ; 2 throughput: imm-offset Q; imm-postindex Q; imm-preindex Q
|
|
597 ; all index modes use I0/I1/I2
|
|
598 (define_insn_reservation "thunderx3t110_asimd_store_stp" 1
|
|
599 (and (eq_attr "tune" "thunderx3t110")
|
|
600 (eq_attr "type" "neon_stp,neon_stp_q"))
|
|
601 "thunderx3t110_ls01,thunderx3t110_sd")
|
|
602
|
|
603 ; There are multiple forms of ST1
|
|
604 ; The following two groups, as documented, do not use the FP pipelines.
|
|
605 ; multiple, 1 reg, D-form ST1
|
|
606 ; tx2_ltp: x 1/2 LS0/LS1
|
|
607 ; tx3_ltp: x 1/2 LS0/LS1
|
|
608 ; multiple, 1 reg, Q-form ST1
|
|
609 ; tx2_ltp: x 1/2 LS0/LS1
|
|
610 ; tx3_ltp: x 1/2 LS0/LS1
|
|
611 ;
|
|
612 ; one lane, B/H/S ST1
|
|
613 ; tx2_ltp: x 1/2 LS0/LS1,F0/F1
|
|
614 ; tx3_ltp: x 1/2 LS0/LS1,F0/F1/F2/F3
|
|
615 ; one lane, D ST1
|
|
616 ; tx2_ltp: x 1/2 LS0/LS1,F0/F1
|
|
617 ; tx3_ltp: x 1/2 LS0/LS1,F0/F1/F2/F3
|
|
618 ;; Model for st1 insn needs refinement for different register forms
|
|
619 ; multiple, 2 reg, D-form ST1 x 1 LS0/LS1
|
|
620 ; multiple, 2 reg, Q-form ST1 x 1 LS0/LS1
|
|
621 ; multiple, 3 reg, D-form ST1 x 3/2 LS0/LS1
|
|
622 ; multiple, 3 reg, Q-form ST1 x 3/2 LS0/LS1
|
|
623 ; multiple,4 reg, D-form ST1 x 2 LS0/LS1
|
|
624 ; multiple,4 reg, Q-form ST1 x 2 LS0/LS1
|
|
625 (define_insn_reservation "thunderx3t110_asimd_store1" 1
|
|
626 (and (eq_attr "tune" "thunderx3t110")
|
|
627 (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\
|
|
628 neon_store1_2reg,neon_store1_2reg_q,\
|
|
629 neon_store1_3reg,neon_store1_4reg"))
|
|
630 "thunderx3t110_ls01")
|
|
631
|
|
632 (define_insn_reservation "thunderx3t110_asimd_store1_onelane" 1
|
|
633 (and (eq_attr "tune" "thunderx3t110")
|
|
634 (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q"))
|
|
635 "thunderx3t110_ls01,thunderx3t110_f0123")
|
|
636
|
|
637 ; distinguish between throughput 1: D/Q-form B/H/S, Q-form D and
|
|
638 ; throughput 1/2: one lane B/H/S/D
|
|
639 (define_insn_reservation "thunderx3t110_asimd_store2" 1
|
|
640 (and (eq_attr "tune" "thunderx3t110")
|
|
641 (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\
|
|
642 neon_store2_one_lane,neon_store2_one_lane_q"))
|
|
643 "thunderx3t110_ls01,thunderx3t110_f0123")
|
|
644
|
|
645 ; distinguish between throughput 3: D/Q-form B/H/S, Q-form D and
|
|
646 ; throughput 1: one lane B/H/S/D
|
|
647 (define_insn_reservation "thunderx3t110_asimd_store3" 1
|
|
648 (and (eq_attr "tune" "thunderx3t110")
|
|
649 (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\
|
|
650 neon_store3_one_lane,neon_store3_one_lane_q"))
|
|
651 "thunderx3t110_ls01,thunderx3t110_f0123")
|
|
652
|
|
653 ; distinguish between throughput 4: D/Q-form B/H/S, Q-form D and
|
|
654 ; throughput 1: one lane B/H/S/D? (not in doc)
|
|
655 (define_insn_reservation "thunderx3t110_asimd_store4" 1
|
|
656 (and (eq_attr "tune" "thunderx3t110")
|
|
657 (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\
|
|
658 neon_store4_one_lane,neon_store4_one_lane_q"))
|
|
659 "thunderx3t110_ls01,thunderx3t110_f0123")
|
|
660
|
|
661 ;; Crypto extensions.
|
|
662
|
|
663 (define_insn_reservation "thunderx3t110_aes" 4
|
|
664 (and (eq_attr "tune" "thunderx3t110")
|
|
665 (eq_attr "type" "crypto_aese,crypto_aesmc"))
|
|
666 "thunderx3t110_f0123")
|
|
667
|
|
668 (define_insn_reservation "thunderx3t110_sha" 5
|
|
669 (and (eq_attr "tune" "thunderx3t110")
|
|
670 (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\
|
|
671 crypto_sha256_fast,crypto_sha256_slow"))
|
|
672 "thunderx3t110_f0123")
|
|
673
|
|
674 ;; CRC extension.
|
|
675
|
|
676 (define_insn_reservation "thunderx3t110_crc" 3
|
|
677 (and (eq_attr "tune" "thunderx3t110")
|
|
678 (eq_attr "type" "crc"))
|
|
679 "thunderx3t110_i1")
|
|
680
|
|
681 ;; PMULL extension.
|
|
682
|
|
683 (define_insn_reservation "thunderx3t110_pmull" 5
|
|
684 (and (eq_attr "tune" "thunderx3t110")
|
|
685 (eq_attr "type" "crypto_pmull"))
|
|
686 "thunderx3t110_f0123")
|