111
|
1 ;; Cavium ThunderX pipeline description
|
|
2 ;; Copyright (C) 2014-2017 Free Software Foundation, Inc.
|
|
3 ;;
|
|
4 ;; Written by Andrew Pinski <apinski@cavium.com>
|
|
5
|
|
6 ;; This file is part of GCC.
|
|
7
|
|
8 ;; GCC is free software; you can redistribute it and/or modify
|
|
9 ;; it under the terms of the GNU General Public License as published by
|
|
10 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
11 ;; any later version.
|
|
12
|
|
13 ;; GCC is distributed in the hope that it will be useful,
|
|
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16 ;; GNU General Public License for more details.
|
|
17
|
|
18 ;; You should have received a copy of the GNU General Public License
|
|
19 ;; along with GCC; see the file COPYING3. If not see
|
|
20 ;; <http://www.gnu.org/licenses/>.
|
|
21
|
|
22
|
|
23 ;; Thunder is a dual-issue processor that can issue all instructions on
|
|
24 ;; pipe0 and a subset on pipe1.
|
|
25
|
|
26
|
|
27 (define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")
|
|
28
|
|
29 (define_cpu_unit "thunderx_pipe0" "thunderx_main")
|
|
30 (define_cpu_unit "thunderx_pipe1" "thunderx_main")
|
|
31 (define_cpu_unit "thunderx_mult" "thunderx_mult")
|
|
32 (define_cpu_unit "thunderx_divide" "thunderx_divide")
|
|
33 (define_cpu_unit "thunderx_simd" "thunderx_simd")
|
|
34
|
|
35 (define_insn_reservation "thunderx_add" 1
|
|
36 (and (eq_attr "tune" "thunderx")
|
|
37 (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
|
|
38 "thunderx_pipe0 | thunderx_pipe1")
|
|
39
|
|
40 (define_insn_reservation "thunderx_shift" 1
|
|
41 (and (eq_attr "tune" "thunderx")
|
|
42 (eq_attr "type" "bfm,bfx,extend,rotate_imm,shift_imm,shift_reg,rbit,rev"))
|
|
43 "thunderx_pipe0 | thunderx_pipe1")
|
|
44
|
|
45
|
|
46 ;; Arthimentic instructions with an extra shift or extend is two cycles.
|
|
47 ;; FIXME: This needs more attributes on aarch64 than what is currently there;
|
|
48 ;; this is conserative for now.
|
|
49 ;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
|
|
50 ;; Except this is not correct as this is only for !(zero extend)
|
|
51
|
|
52 (define_insn_reservation "thunderx_arith_shift" 2
|
|
53 (and (eq_attr "tune" "thunderx")
|
|
54 (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
|
|
55 "thunderx_pipe0 | thunderx_pipe1")
|
|
56
|
|
57 (define_insn_reservation "thunderx_csel" 2
|
|
58 (and (eq_attr "tune" "thunderx")
|
|
59 (eq_attr "type" "csel"))
|
|
60 "thunderx_pipe0 | thunderx_pipe1")
|
|
61
|
|
62 ;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1
|
|
63
|
|
64 (define_insn_reservation "thunderx_mul" 4
|
|
65 (and (eq_attr "tune" "thunderx")
|
|
66 (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
|
|
67 "thunderx_pipe1 + thunderx_mult")
|
|
68
|
|
69 ;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1
|
|
70
|
|
71 (define_insn_reservation "thunderx_crc32" 4
|
|
72 (and (eq_attr "tune" "thunderx")
|
|
73 (eq_attr "type" "crc"))
|
|
74 "thunderx_pipe1 + thunderx_mult")
|
|
75
|
|
76 ;; crcx is 5 cycles and only happen on pipe 1
|
|
77 ;(define_insn_reservation "thunderx_crc64" 5
|
|
78 ; (and (eq_attr "tune" "thunderx")
|
|
79 ; (eq_attr "type" "crc")
|
|
80 ; (eq_attr "mode" "DI"))
|
|
81 ; "thunderx_pipe1 + thunderx_mult")
|
|
82
|
|
83 (define_insn_reservation "thunderx_div32" 22
|
|
84 (and (eq_attr "tune" "thunderx")
|
|
85 (eq_attr "type" "udiv,sdiv"))
|
|
86 "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")
|
|
87
|
|
88 ;(define_insn_reservation "thunderx_div64" 38
|
|
89 ; (and (eq_attr "tune" "thunderx")
|
|
90 ; (eq_attr "type" "udiv,sdiv")
|
|
91 ; (eq_attr "mode" "DI"))
|
|
92 ; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")
|
|
93
|
|
94 ;; Stores take one cycle in pipe 0
|
|
95 (define_insn_reservation "thunderx_store" 1
|
|
96 (and (eq_attr "tune" "thunderx")
|
|
97 (eq_attr "type" "store_4"))
|
|
98 "thunderx_pipe0")
|
|
99
|
|
100 ;; Store pair are single issued
|
|
101 (define_insn_reservation "thunderx_storepair" 1
|
|
102 (and (eq_attr "tune" "thunderx")
|
|
103 (eq_attr "type" "store_8,store_16"))
|
|
104 "thunderx_pipe0 + thunderx_pipe1")
|
|
105
|
|
106 ;; Prefetch are single issued
|
|
107 ;(define_insn_reservation "thunderx_prefetch" 1
|
|
108 ; (and (eq_attr "tune" "thunderx")
|
|
109 ; (eq_attr "type" "prefetch"))
|
|
110 ; "thunderx_pipe0 + thunderx_pipe1")
|
|
111
|
|
112 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
|
|
113 (define_insn_reservation "thunderx_load" 3
|
|
114 (and (eq_attr "tune" "thunderx")
|
|
115 (eq_attr "type" "load_4, load_8, load_16"))
|
|
116 "thunderx_pipe0")
|
|
117
|
|
118 (define_insn_reservation "thunderx_brj" 1
|
|
119 (and (eq_attr "tune" "thunderx")
|
|
120 (eq_attr "type" "branch,trap,call"))
|
|
121 "thunderx_pipe1")
|
|
122
|
|
123 ;; FPU
|
|
124
|
|
125 (define_insn_reservation "thunderx_fadd" 4
|
|
126 (and (eq_attr "tune" "thunderx")
|
|
127 (eq_attr "type" "faddd,fadds"))
|
|
128 "thunderx_pipe1")
|
|
129
|
|
130 (define_insn_reservation "thunderx_fconst" 1
|
|
131 (and (eq_attr "tune" "thunderx")
|
|
132 (eq_attr "type" "fconsts,fconstd"))
|
|
133 "thunderx_pipe1")
|
|
134
|
|
135 ;; Moves between fp are 2 cycles including min/max
|
|
136 (define_insn_reservation "thunderx_fmov" 2
|
|
137 (and (eq_attr "tune" "thunderx")
|
|
138 (eq_attr "type" "fmov,f_minmaxs,f_minmaxd"))
|
|
139 "thunderx_pipe1")
|
|
140
|
|
141 ;; ABS, and NEG are 1 cycle
|
|
142 (define_insn_reservation "thunderx_fabs" 1
|
|
143 (and (eq_attr "tune" "thunderx")
|
|
144 (eq_attr "type" "ffariths,ffarithd"))
|
|
145 "thunderx_pipe1")
|
|
146
|
|
147 (define_insn_reservation "thunderx_fcsel" 3
|
|
148 (and (eq_attr "tune" "thunderx")
|
|
149 (eq_attr "type" "fcsel"))
|
|
150 "thunderx_pipe1")
|
|
151
|
|
152 (define_insn_reservation "thunderx_fmovgpr" 2
|
|
153 (and (eq_attr "tune" "thunderx")
|
|
154 (eq_attr "type" "f_mrc, f_mcr"))
|
|
155 "thunderx_pipe1")
|
|
156
|
|
157 (define_insn_reservation "thunderx_fcmp" 3
|
|
158 (and (eq_attr "tune" "thunderx")
|
|
159 (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
|
|
160 "thunderx_pipe1")
|
|
161
|
|
162 (define_insn_reservation "thunderx_fmul" 6
|
|
163 (and (eq_attr "tune" "thunderx")
|
|
164 (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
|
|
165 "thunderx_pipe1")
|
|
166
|
|
167 (define_insn_reservation "thunderx_fdivs" 12
|
|
168 (and (eq_attr "tune" "thunderx")
|
|
169 (eq_attr "type" "fdivs"))
|
|
170 "thunderx_pipe1 + thunderx_divide, thunderx_divide*8")
|
|
171
|
|
172 (define_insn_reservation "thunderx_fdivd" 22
|
|
173 (and (eq_attr "tune" "thunderx")
|
|
174 (eq_attr "type" "fdivd"))
|
|
175 "thunderx_pipe1 + thunderx_divide, thunderx_divide*18")
|
|
176
|
|
177 (define_insn_reservation "thunderx_fsqrts" 17
|
|
178 (and (eq_attr "tune" "thunderx")
|
|
179 (eq_attr "type" "fsqrts"))
|
|
180 "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
|
|
181
|
|
182 (define_insn_reservation "thunderx_fsqrtd" 31
|
|
183 (and (eq_attr "tune" "thunderx")
|
|
184 (eq_attr "type" "fsqrtd"))
|
|
185 "thunderx_pipe1 + thunderx_divide, thunderx_divide*27")
|
|
186
|
|
187 ;; The rounding conversion inside fp is 4 cycles
|
|
188 (define_insn_reservation "thunderx_frint" 4
|
|
189 (and (eq_attr "tune" "thunderx")
|
|
190 (eq_attr "type" "f_cvt,f_rints,f_rintd"))
|
|
191 "thunderx_pipe1")
|
|
192
|
|
193 ;; Float to integer with a move from int to/from float is 6 cycles
|
|
194 (define_insn_reservation "thunderx_f_cvt" 6
|
|
195 (and (eq_attr "tune" "thunderx")
|
|
196 (eq_attr "type" "f_cvtf2i,f_cvti2f"))
|
|
197 "thunderx_pipe1")
|
|
198
|
|
199 ;; FP/SIMD load/stores happen in pipe 0
|
|
200 ;; 64bit Loads register/pairs are 4 cycles from L1
|
|
201 (define_insn_reservation "thunderx_64simd_fp_load" 4
|
|
202 (and (eq_attr "tune" "thunderx")
|
|
203 (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
|
|
204 neon_load1_1reg_q,neon_load1_2reg"))
|
|
205 "thunderx_pipe0")
|
|
206
|
|
207 ;; 128bit load pair is singled issue and 4 cycles from L1
|
|
208 (define_insn_reservation "thunderx_128simd_pair_load" 4
|
|
209 (and (eq_attr "tune" "thunderx")
|
|
210 (eq_attr "type" "neon_load1_2reg_q"))
|
|
211 "thunderx_pipe0+thunderx_pipe1")
|
|
212
|
|
213 ;; FP/SIMD Stores takes one cycle in pipe 0
|
|
214 ;; ST1 with one registers either multiple structures or single structure is
|
|
215 ;; also one cycle.
|
|
216 (define_insn_reservation "thunderx_simd_fp_store" 1
|
|
217 (and (eq_attr "tune" "thunderx")
|
|
218 (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \
|
|
219 neon_store1_one_lane, neon_store1_one_lane_q"))
|
|
220 "thunderx_pipe0")
|
|
221
|
|
222 ;; 64bit neon store pairs are single issue for one cycle
|
|
223 (define_insn_reservation "thunderx_64neon_storepair" 1
|
|
224 (and (eq_attr "tune" "thunderx")
|
|
225 (eq_attr "type" "neon_store1_2reg"))
|
|
226 "thunderx_pipe0 + thunderx_pipe1")
|
|
227
|
|
228 ;; 128bit neon store pair are single issued for two cycles
|
|
229 (define_insn_reservation "thunderx_128neon_storepair" 2
|
|
230 (and (eq_attr "tune" "thunderx")
|
|
231 (eq_attr "type" "neon_store1_2reg_q"))
|
|
232 "(thunderx_pipe0 + thunderx_pipe1)*2")
|
|
233
|
|
234 ;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0
|
|
235 (define_insn_reservation "thunderx_neon_ld1" 6
|
|
236 (and (eq_attr "tune" "thunderx")
|
|
237 (eq_attr "type" "neon_load1_all_lanes"))
|
|
238 "thunderx_pipe0")
|
|
239
|
|
240 ;; SIMD/NEON (q forms take an extra cycle)
|
|
241 ;; SIMD For ThunderX is 64bit wide,
|
|
242
|
|
243 ;; ThunderX simd move instruction types - 2/3 cycles
|
|
244 ;; ThunderX dup, ins is the same
|
|
245 ;; ThunderX SIMD fabs/fneg instruction types
|
|
246 (define_insn_reservation "thunderx_neon_move" 2
|
|
247 (and (eq_attr "tune" "thunderx")
|
|
248 (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
|
|
249 neon_fp_compare_d, neon_move, neon_dup, \
|
|
250 neon_ins, neon_from_gp, neon_to_gp, \
|
|
251 neon_abs, neon_neg, \
|
|
252 neon_fp_neg_s, neon_fp_abs_s"))
|
|
253 "thunderx_pipe1 + thunderx_simd")
|
|
254
|
|
255 (define_insn_reservation "thunderx_neon_move_q" 3
|
|
256 (and (eq_attr "tune" "thunderx")
|
|
257 (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
|
|
258 neon_fp_compare_d_q, neon_move_q, neon_dup_q, \
|
|
259 neon_ins_q, neon_from_gp_q, neon_to_gp_q, \
|
|
260 neon_abs_q, neon_neg_q, \
|
|
261 neon_fp_neg_s_q, neon_fp_neg_d_q, \
|
|
262 neon_fp_abs_s_q, neon_fp_abs_d_q"))
|
|
263 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
|
264
|
|
265 ;; ThunderX simd simple/add instruction types - 4/5 cycles
|
|
266
|
|
267 (define_insn_reservation "thunderx_neon_add" 4
|
|
268 (and (eq_attr "tune" "thunderx")
|
|
269 (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
|
|
270 neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
|
|
271 neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
|
|
272 neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
|
|
273 neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
|
|
274 neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \
|
|
275 neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \
|
|
276 neon_fp_reduc_minmax_s"))
|
|
277 "thunderx_pipe1 + thunderx_simd")
|
|
278
|
|
279 ;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
|
|
280
|
|
281 (define_insn_reservation "thunderx_neon_add_q" 5
|
|
282 (and (eq_attr "tune" "thunderx")
|
|
283 (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
|
|
284 neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
|
|
285 neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
|
|
286 neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
|
|
287 neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
|
|
288 neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
|
|
289 neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \
|
|
290 neon_arith_acc_q, neon_rev_q, \
|
|
291 neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q"))
|
|
292 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
|
293
|
|
294 ;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions
|
|
295 ;; are 6/7 cycles
|
|
296 (define_insn_reservation "thunderx_neon_mult" 6
|
|
297 (and (eq_attr "tune" "thunderx")
|
|
298 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \
|
|
299 neon_mla_b, neon_mla_h, neon_mla_s, \
|
|
300 neon_mla_h_scalar, neon_mla_s_scalar, \
|
|
301 neon_ext, neon_shift_imm, neon_permute, \
|
|
302 neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \
|
|
303 neon_sat_shift_reg, neon_shift_acc, \
|
|
304 neon_mul_b, neon_mul_h, neon_mul_s, \
|
|
305 neon_mul_h_scalar, neon_mul_s_scalar, \
|
|
306 neon_fp_mul_s_scalar, \
|
|
307 neon_fp_mla_s_scalar"))
|
|
308 "thunderx_pipe1 + thunderx_simd")
|
|
309
|
|
310 (define_insn_reservation "thunderx_neon_mult_q" 7
|
|
311 (and (eq_attr "tune" "thunderx")
|
|
312 (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \
|
|
313 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \
|
|
314 neon_mla_h_scalar_q, neon_mla_s_scalar_q, \
|
|
315 neon_ext_q, neon_shift_imm_q, neon_permute_q, \
|
|
316 neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \
|
|
317 neon_sat_shift_reg_q, neon_shift_acc_q, \
|
|
318 neon_shift_imm_long, \
|
|
319 neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \
|
|
320 neon_mul_h_scalar_q, neon_mul_s_scalar_q, \
|
|
321 neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \
|
|
322 neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \
|
|
323 neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \
|
|
324 neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q"))
|
|
325 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
|
326
|
|
327
|
|
328 ;; AES[ED] is 5 cycles
|
|
329 (define_insn_reservation "thunderx_crypto_aese" 5
|
|
330 (and (eq_attr "tune" "thunderx")
|
|
331 (eq_attr "type" "crypto_aese"))
|
|
332 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
|
333
|
|
334 ;; AES{,I}MC is 3 cycles
|
|
335 (define_insn_reservation "thunderx_crypto_aesmc" 3
|
|
336 (and (eq_attr "tune" "thunderx")
|
|
337 (eq_attr "type" "crypto_aesmc"))
|
|
338 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
|
339
|
|
340
|
|
341 ;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle
|
|
342 (define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
|
|
343 (define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
|
|
344 (define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
|
|
345
|
|
346 ;; 64bit TBL is emulated and takes 160 cycles
|
|
347 (define_insn_reservation "thunderx_tbl" 160
|
|
348 (and (eq_attr "tune" "thunderx")
|
|
349 (eq_attr "type" "neon_tbl1"))
|
|
350 "(thunderx_pipe1+thunderx_pipe0)*160")
|
|
351
|
|
352 ;; 128bit TBL is emulated and takes 320 cycles
|
|
353 (define_insn_reservation "thunderx_tblq" 320
|
|
354 (and (eq_attr "tune" "thunderx")
|
|
355 (eq_attr "type" "neon_tbl1_q"))
|
|
356 "(thunderx_pipe1+thunderx_pipe0)*320")
|
|
357
|
|
358 ;; Assume both pipes are needed for unknown and multiple-instruction
|
|
359 ;; patterns.
|
|
360
|
|
361 (define_insn_reservation "thunderx_unknown" 1
|
|
362 (and (eq_attr "tune" "thunderx")
|
|
363 (eq_attr "type" "untyped,multiple"))
|
|
364 "thunderx_pipe0 + thunderx_pipe1")
|
|
365
|
|
366
|