111
|
1 ;; Samsung Exynos M1 pipeline description
|
145
|
2 ;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
|
111
|
3 ;;
|
|
4 ;; This file is part of GCC.
|
|
5 ;;
|
|
6 ;; GCC is free software; you can redistribute it and/or modify it
|
|
7 ;; under the terms of the GNU General Public License as published by
|
|
8 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
9 ;; any later version.
|
|
10 ;;
|
|
11 ;; GCC is distributed in the hope that it will be useful, but
|
|
12 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14 ;; General Public License for more details.
|
|
15 ;;
|
|
16 ;; You should have received a copy of the GNU General Public License
|
|
17 ;; along with GCC; see the file COPYING3. If not see
|
|
18 ;; <http://www.gnu.org/licenses/>.
|
|
19
|
|
20 (define_attr "exynos_m1_neon_type"
|
|
21 "neon_arith_simple, neon_arith_basic, neon_arith_complex,
|
|
22 neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long,
|
|
23 neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex,
|
|
24 neon_shift_reg_basic, neon_shift_reg_basic_q,
|
|
25 neon_shift_reg_complex, neon_shift_reg_complex_q,
|
|
26 neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare,
|
|
27 neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt,
|
|
28 neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q,
|
|
29 neon_fp_estimate, neon_fp_estimatex, neon_fp_step,
|
|
30 neon_bitops, neon_bitops_q, neon_bitins,
|
|
31 neon_to_gp, neon_from_gp, neon_move, neon_tbl,
|
|
32 neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4,
|
|
33 neon_load1_one, neon_load1_all,
|
|
34 neon_load2_2, neon_load2_one, neon_load2_all,
|
|
35 neon_load3_3, neon_load3_one, neon_load3_all,
|
|
36 neon_load4_4, neon_load4_one, neon_load4_all,
|
|
37 neon_store,
|
|
38 neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one,
|
|
39 neon_store2_2, neon_store2_one,
|
|
40 neon_store3_3, neon_store3_one,
|
|
41 neon_store4_4, neon_store4_one,
|
|
42 unknown"
|
|
43 (cond [
|
|
44 (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\
|
|
45 neon_abs, neon_abs_q,\
|
|
46 neon_minmax, neon_minmax_q")
|
|
47 (const_string "neon_arith_simple")
|
|
48
|
|
49 (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
|
|
50 neon_neg, neon_neg_q,\
|
|
51 neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\
|
|
52 neon_logic, neon_logic_q, neon_tst, neon_tst_q,\
|
|
53 neon_compare_zero, neon_compare_zero_q")
|
|
54 (const_string "neon_arith_basic")
|
|
55
|
|
56 (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\
|
|
57 neon_reduc_add, neon_reduc_add_q,\
|
|
58 neon_reduc_add_acc, neon_reduc_add_acc_q,\
|
|
59 neon_reduc_add_long, neon_add_halve_narrow_q,\
|
|
60 neon_add_halve, neon_add_halve_q,\
|
|
61 neon_sub_halve, neon_sub_halve_q, neon_qabs,\
|
|
62 neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
|
|
63 neon_qneg_q, neon_qsub, neon_qsub_q,\
|
|
64 neon_sub_halve_narrow_q,\
|
|
65 neon_compare, neon_compare_q,\
|
|
66 neon_reduc_minmax, neon_reduc_minmax_q")
|
|
67 (const_string "neon_arith_complex")
|
|
68
|
|
69 (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\
|
|
70 neon_mul_s, neon_mul_s_q,\
|
|
71 neon_mul_h_scalar, neon_mul_h_scalar_q,\
|
|
72 neon_mul_s_scalar, neon_mul_s_scalar_q,\
|
|
73 neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
|
|
74 neon_sat_mul_b, neon_sat_mul_b_q,\
|
|
75 neon_sat_mul_h, neon_sat_mul_h_q,\
|
|
76 neon_sat_mul_s, neon_sat_mul_s_q,\
|
|
77 neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\
|
|
78 neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\
|
|
79 neon_sat_mul_b_long, neon_sat_mul_h_long,\
|
|
80 neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
|
|
81 neon_sat_mul_s_scalar_long, crypto_pmull")
|
|
82 (const_string "neon_multiply")
|
|
83
|
|
84 (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
|
|
85 neon_mla_h_scalar, neon_mla_s_scalar,\
|
|
86 neon_mla_b_long, neon_mla_h_long,\
|
|
87 neon_mla_s_long,\
|
|
88 neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
|
|
89 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
|
|
90 neon_mla_h_scalar_q, neon_mla_s_scalar_q")
|
|
91 (const_string "neon_mla")
|
|
92
|
|
93 (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
|
|
94 neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
|
|
95 neon_sat_mla_s_scalar_long")
|
|
96 (const_string "neon_sat_mla_long")
|
|
97
|
|
98 (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
|
|
99 (const_string "neon_shift_acc")
|
|
100
|
|
101 (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
|
|
102 neon_shift_imm_narrow_q, neon_shift_imm_long")
|
|
103 (const_string "neon_shift_imm_basic")
|
|
104
|
|
105 (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
|
|
106 neon_sat_shift_imm_narrow_q")
|
|
107 (const_string "neon_shift_imm_complex")
|
|
108
|
|
109 (eq_attr "type" "neon_shift_reg, neon_shift_reg_q")
|
|
110 (const_string "neon_shift_reg_basic")
|
|
111
|
|
112 (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q")
|
|
113 (const_string "neon_shift_reg_complex")
|
|
114
|
|
115 (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
|
|
116 neon_fp_abs_s, neon_fp_abs_s_q,\
|
|
117 neon_fp_neg_d, neon_fp_neg_d_q,\
|
|
118 neon_fp_abs_d, neon_fp_abs_d_q")
|
|
119 (const_string "neon_fp_unary")
|
|
120
|
|
121 (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\
|
|
122 neon_fp_addsub_d, neon_fp_addsub_d_q")
|
|
123 (const_string "neon_fp_add")
|
|
124
|
|
125 (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\
|
|
126 neon_fp_abd_d, neon_fp_abd_d_q")
|
|
127 (const_string "neon_fp_abd")
|
|
128
|
|
129 (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\
|
|
130 neon_fp_compare_d, neon_fp_compare_d_q,\
|
|
131 neon_fp_minmax_s, neon_fp_minmax_s_q,\
|
|
132 neon_fp_minmax_d, neon_fp_minmax_d_q")
|
|
133 (const_string "neon_fp_compare")
|
|
134
|
|
135 (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\
|
|
136 neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q")
|
|
137 (const_string "neon_fp_reduc_minmax")
|
|
138
|
|
139 (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
|
|
140 neon_fp_reduc_add_d, neon_fp_reduc_add_d_q")
|
|
141 (const_string "neon_fp_reduc_add")
|
|
142
|
|
143 (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\
|
|
144 neon_fp_round_d, neon_fp_round_d_q")
|
|
145 (const_string "neon_fp_round")
|
|
146
|
|
147 (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h,
|
|
148 neon_fp_to_int_s, neon_fp_to_int_s_q,\
|
|
149 neon_fp_to_int_d_q, neon_fp_to_int_d,\
|
|
150 neon_int_to_fp_s, neon_int_to_fp_s_q,\
|
|
151 neon_int_to_fp_d, neon_int_to_fp_d_q")
|
|
152 (const_string "neon_fp_cvt")
|
|
153
|
|
154 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\
|
|
155 neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\
|
|
156 neon_fp_mul_d, neon_fp_mul_d_q,\
|
|
157 neon_fp_mul_d_scalar_q")
|
|
158 (const_string "neon_fp_mul")
|
|
159
|
|
160 (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\
|
|
161 neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
|
|
162 neon_fp_mla_d, neon_fp_mla_d_q,\
|
|
163 neon_fp_mla_d_scalar_q")
|
|
164 (const_string "neon_fp_mla")
|
|
165
|
|
166 (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\
|
|
167 neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
|
|
168 neon_fp_recpe_d, neon_fp_recpe_d_q,\
|
|
169 neon_fp_rsqrte_d, neon_fp_rsqrte_d_q")
|
|
170 (const_string "neon_fp_estimate")
|
|
171
|
|
172 (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\
|
|
173 neon_fp_recpx_d, neon_fp_recpx_d_q")
|
|
174 (const_string "neon_fp_estimatex")
|
|
175
|
|
176 (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\
|
|
177 neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
|
|
178 neon_fp_recps_d, neon_fp_recps_d_q,\
|
|
179 neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")
|
|
180 (const_string "neon_fp_step")
|
|
181
|
|
182 (eq_attr "type" "neon_rbit, neon_rbit_q,\
|
|
183 neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\
|
|
184 neon_dup, neon_dup_q,\
|
|
185 neon_rev, neon_rev_q,\
|
|
186 neon_move, neon_move_q,
|
|
187 neon_ext, neon_permute, neon_zip")
|
|
188 (const_string "neon_bitops")
|
|
189
|
|
190 (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q")
|
|
191 (const_string "neon_bitops_q")
|
|
192
|
|
193 (eq_attr "type" "neon_bsl, neon_bsl_q")
|
|
194 (const_string "neon_bitins")
|
|
195
|
|
196 (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4")
|
|
197 (const_string "neon_tbl")
|
|
198
|
|
199 (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr")
|
|
200 (const_string "neon_from_gp")
|
|
201
|
|
202 (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc")
|
|
203 (const_string "neon_to_gp")
|
|
204
|
|
205 (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")
|
|
206 (const_string "neon_load1_1")
|
|
207
|
|
208 (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q")
|
|
209 (const_string "neon_load1_2")
|
|
210
|
|
211 (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q")
|
|
212 (const_string "neon_load1_3")
|
|
213
|
|
214 (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q")
|
|
215 (const_string "neon_load1_4")
|
|
216
|
|
217 (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q")
|
|
218 (const_string "neon_load1_one")
|
|
219
|
|
220 (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q")
|
|
221 (const_string "neon_load1_all")
|
|
222
|
|
223 (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\
|
|
224 neon_load2_4reg, neon_load2_4reg_q")
|
|
225 (const_string "neon_load2_2")
|
|
226
|
|
227 (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q")
|
|
228 (const_string "neon_load2_one")
|
|
229
|
|
230 (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q")
|
|
231 (const_string "neon_load2_all")
|
|
232
|
|
233 (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q")
|
|
234 (const_string "neon_load3_3")
|
|
235
|
|
236 (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q")
|
|
237 (const_string "neon_load3_one")
|
|
238
|
|
239 (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q")
|
|
240 (const_string "neon_load3_all")
|
|
241
|
|
242 (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q")
|
|
243 (const_string "neon_load4_4")
|
|
244
|
|
245 (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
|
|
246 (const_string "neon_load4_one")
|
|
247
|
|
248 (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
|
|
249 (const_string "neon_load4_all")
|
|
250
|
|
251 (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
|
|
252 (const_string "neon_store1_1")
|
|
253
|
|
254 (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q")
|
|
255 (const_string "neon_store1_2")
|
|
256
|
|
257 (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q")
|
|
258 (const_string "neon_store1_3")
|
|
259
|
|
260 (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q")
|
|
261 (const_string "neon_store1_4")
|
|
262
|
|
263 (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q")
|
|
264 (const_string "neon_store1_one")
|
|
265
|
|
266 (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\
|
|
267 neon_store2_4reg, neon_store2_4reg_q")
|
|
268 (const_string "neon_store2_2")
|
|
269
|
|
270 (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q")
|
|
271 (const_string "neon_store2_one")
|
|
272
|
|
273 (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q")
|
|
274 (const_string "neon_store3_3")
|
|
275
|
|
276 (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q")
|
|
277 (const_string "neon_store3_one")
|
|
278
|
|
279 (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q")
|
|
280 (const_string "neon_store4_4")
|
|
281
|
|
282 (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q")
|
|
283 (const_string "neon_store4_one")]
|
|
284
|
|
285 (const_string "unknown")))
|
|
286
|
|
287 ;; The Exynos M1 core is modeled as a triple issue pipeline that has
|
|
288 ;; the following functional units.
|
|
289
|
|
290 (define_automaton "exynos_m1_gp")
|
|
291 (define_automaton "exynos_m1_ls")
|
|
292 (define_automaton "exynos_m1_fp")
|
|
293
|
|
294 ;; 1. Two pipelines for simple integer operations: A, B
|
|
295 ;; 2. One pipeline for simple or complex integer operations: C
|
|
296
|
|
297 (define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp")
|
|
298
|
|
299 (define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)")
|
|
300 (define_reservation "em1_c" "em1_xc")
|
|
301
|
|
302 ;; 3. Two asymmetric pipelines for Neon and FP operations: F0, F1
|
|
303
|
|
304 (define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp")
|
|
305
|
|
306 (define_reservation "em1_fmac" "em1_f0")
|
|
307 (define_reservation "em1_fcvt" "em1_f0")
|
|
308 (define_reservation "em1_nalu" "(em1_f0 | em1_f1)")
|
|
309 (define_reservation "em1_nalu0" "em1_f0")
|
|
310 (define_reservation "em1_nalu1" "em1_f1")
|
|
311 (define_reservation "em1_nmisc" "em1_f0")
|
|
312 (define_reservation "em1_ncrypt" "em1_f0")
|
|
313 (define_reservation "em1_fadd" "em1_f1")
|
|
314 (define_reservation "em1_fvar" "em1_f1")
|
|
315 (define_reservation "em1_fst" "em1_f1")
|
|
316
|
|
317 ;; 4. One pipeline for branch operations: BX
|
|
318
|
|
319 (define_cpu_unit "em1_bx" "exynos_m1_gp")
|
|
320
|
|
321 (define_reservation "em1_br" "em1_bx")
|
|
322
|
|
323 ;; 5. One AGU for loads: L
|
|
324 ;; One AGU for stores and one pipeline for stores: S, SD
|
|
325
|
|
326 (define_cpu_unit "em1_lx" "exynos_m1_ls")
|
|
327 (define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls")
|
|
328
|
|
329 (define_reservation "em1_ld" "em1_lx")
|
|
330 (define_reservation "em1_st" "(em1_sx + em1_sd)")
|
|
331
|
|
332 ;; Common occurrences
|
|
333 (define_reservation "em1_sfst" "(em1_fst + em1_st)")
|
|
334 (define_reservation "em1_lfst" "(em1_fst + em1_ld)")
|
|
335
|
|
336 ;; Branches
|
|
337 ;;
|
|
338 ;; No latency as there is no result
|
|
339 ;; TODO: Unconditional branches use no units;
|
|
340 ;; conditional branches add the BX unit;
|
|
341 ;; indirect branches add the C unit.
|
|
342 (define_insn_reservation "exynos_m1_branch" 0
|
|
343 (and (eq_attr "tune" "exynosm1")
|
|
344 (eq_attr "type" "branch"))
|
|
345 "em1_br")
|
|
346
|
|
347 (define_insn_reservation "exynos_m1_call" 1
|
|
348 (and (eq_attr "tune" "exynosm1")
|
|
349 (eq_attr "type" "call"))
|
|
350 "em1_alu")
|
|
351
|
|
352 ;; Basic ALU
|
|
353 ;;
|
|
354 ;; Simple ALU without shift, non-predicated
|
|
355 (define_insn_reservation "exynos_m1_alu" 1
|
|
356 (and (eq_attr "tune" "exynosm1")
|
|
357 (and (not (eq_attr "predicated" "yes"))
|
|
358 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
|
|
359 alu_sreg, alus_sreg, logic_reg, logics_reg,\
|
|
360 adc_imm, adcs_imm, adc_reg, adcs_reg,\
|
|
361 adr, bfm, bfx, clz, rbit, rev, csel, alu_dsp_reg,\
|
|
362 shift_imm, shift_reg, rotate_imm, extend,\
|
|
363 mov_imm, mov_reg,\
|
|
364 mvn_imm, mvn_reg,\
|
|
365 mrs, multiple")))
|
|
366 "em1_alu")
|
|
367
|
|
368 ;; Simple ALU without shift, predicated
|
|
369 (define_insn_reservation "exynos_m1_alu_p" 1
|
|
370 (and (eq_attr "tune" "exynosm1")
|
|
371 (and (eq_attr "predicated" "yes")
|
|
372 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
|
|
373 alu_sreg, alus_sreg, logic_reg, logics_reg,\
|
|
374 adc_imm, adcs_imm, adc_reg, adcs_reg,\
|
|
375 adr, bfm, bfx, clz, rbit, rev, alu_dsp_reg,\
|
|
376 shift_imm, shift_reg, rotate_imm, extend,\
|
|
377 mov_imm, mov_reg,\
|
|
378 mvn_imm, mvn_reg,\
|
|
379 mrs, multiple")))
|
|
380 "em1_c")
|
|
381
|
|
382 ;; ALU ops with immediate shift
|
|
383 ;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle;
|
|
384 ;; otherwise it takes 2 cycles and the unit is blocked;
|
|
385 ;; for now, assume the latter's latency and the former's units.
|
|
386 (define_insn_reservation "exynos_m1_alu_shift" 2
|
|
387 (and (eq_attr "tune" "exynosm1")
|
|
388 (eq_attr "type" "alu_ext, alus_ext,\
|
|
389 alu_shift_imm, alus_shift_imm,\
|
|
390 logic_shift_imm, logics_shift_imm,\
|
|
391 mov_shift, mvn_shift"))
|
|
392 "(em1_alu)")
|
|
393
|
|
394 ;; ALU ops with register controlled shift, non-predicated
|
|
395 (define_insn_reservation "exynos_m1_alu_shift_reg" 2
|
|
396 (and (eq_attr "tune" "exynosm1")
|
|
397 (and (not (eq_attr "predicated" "yes"))
|
|
398 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
|
|
399 logic_shift_reg, logics_shift_reg,\
|
|
400 mov_shift_reg, mvn_shift_reg")))
|
|
401 "(em1_alu * 2)")
|
|
402
|
|
403 ;; ALU ops with register controlled shift, predicated
|
|
404 (define_insn_reservation "exynos_m1_alu_shift_reg_p" 2
|
|
405 (and (eq_attr "tune" "exynosm1")
|
|
406 (and (eq_attr "predicated" "yes")
|
|
407 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
|
|
408 logic_shift_reg, logics_shift_reg,\
|
|
409 mov_shift_reg, mvn_shift_reg")))
|
|
410 "(em1_alu, em1_c)")
|
|
411
|
|
412 ;; Integer multiply
|
|
413 (define_insn_reservation "exynos_m1_mla" 3
|
|
414 (and (eq_attr "tune" "exynosm1")
|
|
415 (eq_attr "mul32" "yes"))
|
|
416 "em1_c")
|
|
417
|
|
418 (define_insn_reservation "exynos_m1_mlal" 4
|
|
419 (and (eq_attr "tune" "exynosm1")
|
145
|
420 (eq_attr "widen_mul64" "yes"))
|
111
|
421 "em1_alu, em1_c")
|
|
422
|
|
423 ;; Integer divide
|
|
424 ;; TODO: assume the median latency; blocks other divisions
|
|
425 (define_insn_reservation "exynos_m1_div" 13
|
|
426 (and (eq_attr "tune" "exynosm1")
|
|
427 (eq_attr "type" "udiv, sdiv"))
|
|
428 "em1_c")
|
|
429
|
|
430 ;; Load-store execution Unit
|
|
431 ;;
|
|
432 ;; Loads of up to 2 words.
|
|
433 (define_insn_reservation "exynos_m1_load" 4
|
|
434 (and (eq_attr "tune" "exynosm1")
|
|
435 (eq_attr "type" "load_byte, load_4, load_8"))
|
|
436 "em1_ld")
|
|
437
|
|
438 ;; Loads of 3 or 4 words.
|
|
439 (define_insn_reservation "exynos_m1_loadm" 6
|
|
440 (and (eq_attr "tune" "exynosm1")
|
|
441 (eq_attr "type" "load_12, load_16"))
|
|
442 "(em1_ld * 3)")
|
|
443
|
|
444 ;; Stores of up to 2 words.
|
|
445 (define_insn_reservation "exynos_m1_store" 1
|
|
446 (and (eq_attr "tune" "exynosm1")
|
|
447 (eq_attr "type" "store_4, store_8"))
|
|
448 "em1_st")
|
|
449
|
|
450 ;; Stores of 3 or 4 words.
|
|
451 (define_insn_reservation "exynos_m1_storem" 3
|
|
452 (and (eq_attr "tune" "exynosm1")
|
|
453 (eq_attr "type" "store_12, store_16"))
|
|
454 "(em1_st * 3)")
|
|
455
|
|
456 ;; Advanced SIMD Unit
|
|
457 ;;
|
|
458 ;; Integer Arithmetic Instructions.
|
|
459
|
|
460 (define_insn_reservation "exynos_m1_arith_simple" 1
|
|
461 (and (eq_attr "tune" "exynosm1")
|
|
462 (eq_attr "exynos_m1_neon_type" "neon_arith_simple"))
|
|
463 "em1_nmisc")
|
|
464
|
|
465 (define_insn_reservation "exynos_m1_neon_arith_basic" 2
|
|
466 (and (eq_attr "tune" "exynosm1")
|
|
467 (eq_attr "exynos_m1_neon_type" "neon_arith_basic"))
|
|
468 "em1_nalu")
|
|
469
|
|
470 (define_insn_reservation "exynos_m1_neon_arith_complex" 3
|
|
471 (and (eq_attr "tune" "exynosm1")
|
|
472 (eq_attr "exynos_m1_neon_type" "neon_arith_complex"))
|
|
473 "em1_nmisc")
|
|
474
|
|
475 ;; Integer Multiply Instructions.
|
|
476
|
|
477 (define_insn_reservation "exynos_m1_neon_multiply" 4
|
|
478 (and (eq_attr "tune" "exynosm1")
|
|
479 (eq_attr "exynos_m1_neon_type"
|
|
480 "neon_multiply, neon_mla, neon_sat_mla_long"))
|
|
481 "em1_nmisc")
|
|
482
|
|
483 ;; Integer Shift Instructions.
|
|
484
|
|
485 (define_insn_reservation
|
|
486 "exynos_m1_neon_shift_acc" 4
|
|
487 (and (eq_attr "tune" "exynosm1")
|
|
488 (eq_attr "exynos_m1_neon_type" "neon_shift_acc"))
|
|
489 "em1_nalu1")
|
|
490
|
|
491 (define_insn_reservation
|
|
492 "exynos_m1_neon_shift_basic" 2
|
|
493 (and (eq_attr "tune" "exynosm1")
|
|
494 (eq_attr "exynos_m1_neon_type"
|
|
495 "neon_shift_imm_basic, neon_shift_reg_basic"))
|
|
496 "em1_nalu")
|
|
497
|
|
498 (define_insn_reservation
|
|
499 "exynos_m1_neon_shift_complex" 4
|
|
500 (and (eq_attr "tune" "exynosm1")
|
|
501 (eq_attr "exynos_m1_neon_type"
|
|
502 "neon_shift_imm_complex, neon_shift_reg_complex"))
|
|
503 "em1_nalu1")
|
|
504
|
|
505 ;; Floating Point Instructions.
|
|
506
|
|
507 (define_insn_reservation
|
|
508 "exynos_m1_neon_fp_unary" 2
|
|
509 (and (eq_attr "tune" "exynosm1")
|
|
510 (eq_attr "exynos_m1_neon_type" "neon_fp_unary"))
|
|
511 "em1_nalu")
|
|
512
|
|
513 (define_insn_reservation
|
|
514 "exynos_m1_neon_fp_add" 4
|
|
515 (and (eq_attr "tune" "exynosm1")
|
|
516 (eq_attr "exynos_m1_neon_type" "neon_fp_add"))
|
|
517 "em1_fadd")
|
|
518
|
|
519 (define_insn_reservation
|
|
520 "exynos_m1_neon_fp_abd" 3
|
|
521 (and (eq_attr "tune" "exynosm1")
|
|
522 (eq_attr "exynos_m1_neon_type" "neon_fp_abd"))
|
|
523 "em1_nmisc")
|
|
524
|
|
525 (define_insn_reservation
|
|
526 "exynos_m1_neon_fp_compare" 1
|
|
527 (and (eq_attr "tune" "exynosm1")
|
|
528 (eq_attr "exynos_m1_neon_type" "neon_fp_compare"))
|
|
529 "em1_nmisc")
|
|
530
|
|
531 ;; TODO: the latency and throughput of reduce insns actually varies between
|
|
532 ;; 3-5 and 1/4-1, but picked the median values.
|
|
533 (define_insn_reservation
|
|
534 "exynos_m1_neon_fp_reduc" 5
|
|
535 (and (eq_attr "tune" "exynosm1")
|
|
536 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax"))
|
|
537 "(em1_nmisc * 4)")
|
|
538
|
|
539 (define_insn_reservation
|
|
540 "exynos_m1_neon_fp_reduc_add" 10
|
|
541 (and (eq_attr "tune" "exynosm1")
|
|
542 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add"))
|
|
543 "((em1_nalu * 2), em1_fadd)")
|
|
544
|
|
545 (define_insn_reservation
|
|
546 "exynos_m1_neon_fp_round" 4
|
|
547 (and (eq_attr "tune" "exynosm1")
|
|
548 (eq_attr "exynos_m1_neon_type" "neon_fp_round"))
|
|
549 "em1_fcvt")
|
|
550
|
|
551 (define_insn_reservation
|
|
552 "exynos_m1_neon_fp_cvt" 4
|
|
553 (and (eq_attr "tune" "exynosm1")
|
|
554 (eq_attr "exynos_m1_neon_type" "neon_fp_cvt"))
|
|
555 "em1_fcvt")
|
|
556
|
|
557 (define_insn_reservation
|
|
558 "exynos_m1_neon_fp_mul" 5
|
|
559 (and (eq_attr "tune" "exynosm1")
|
|
560 (eq_attr "exynos_m1_neon_type" "neon_fp_mul"))
|
|
561 "em1_fmac")
|
|
562
|
|
563 (define_insn_reservation
|
|
564 "exynos_m1_neon_fp_mla" 6
|
|
565 (and (eq_attr "tune" "exynosm1")
|
|
566 (eq_attr "exynos_m1_neon_type" "neon_fp_mla"))
|
|
567 "em1_fmac")
|
|
568
|
|
569 (define_insn_reservation
|
|
570 "exynos_m1_neon_fp_estimate" 5
|
|
571 (and (eq_attr "tune" "exynosm1")
|
|
572 (eq_attr "exynos_m1_neon_type" "neon_fp_estimate"))
|
|
573 "em1_fcvt")
|
|
574
|
|
575 (define_insn_reservation
|
|
576 "exynos_m1_neon_fp_estimatex" 1
|
|
577 (and (eq_attr "tune" "exynosm1")
|
|
578 (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex"))
|
|
579 "em1_nmisc")
|
|
580
|
|
581 (define_insn_reservation
|
|
582 "exynos_m1_neon_fp_step" 6
|
|
583 (and (eq_attr "tune" "exynosm1")
|
|
584 (eq_attr "exynos_m1_neon_type" "neon_fp_step"))
|
|
585 "em1_fmac")
|
|
586
|
|
587 ;; Miscellaneous Instructions.
|
|
588
|
|
589 (define_insn_reservation
|
|
590 "exynos_m1_neon_bitops" 2
|
|
591 (and (eq_attr "tune" "exynosm1")
|
|
592 (eq_attr "exynos_m1_neon_type" "neon_bitops"))
|
|
593 "em1_nalu")
|
|
594
|
|
595 (define_insn_reservation
|
|
596 "exynos_m1_neon_bitops_q" 3
|
|
597 (and (eq_attr "tune" "exynosm1")
|
|
598 (eq_attr "exynos_m1_neon_type" "neon_bitops_q"))
|
|
599 "(em1_nalu, em1_nalu)")
|
|
600
|
|
601 (define_insn_reservation
|
|
602 "exynos_m1_neon_bitins" 2
|
|
603 (and (eq_attr "tune" "exynosm1")
|
|
604 (eq_attr "exynos_m1_neon_type" "neon_bitins"))
|
|
605 "em1_nalu1")
|
|
606
|
|
607 ;; TODO: it is more complicated than this.
|
|
608 (define_insn_reservation
|
|
609 "exynos_m1_neon_tbl" 2
|
|
610 (and (eq_attr "tune" "exynosm1")
|
|
611 (eq_attr "exynos_m1_neon_type" "neon_tbl"))
|
|
612 "em1_nalu1")
|
|
613
|
|
614 (define_insn_reservation
|
|
615 "exynos_m1_neon_from_gp" 4
|
|
616 (and (eq_attr "tune" "exynosm1")
|
|
617 (eq_attr "exynos_m1_neon_type" "neon_from_gp"))
|
|
618 "em1_st")
|
|
619
|
|
620 (define_insn_reservation
|
|
621 "exynos_m1_neon_to_gp" 9
|
|
622 (and (eq_attr "tune" "exynosm1")
|
|
623 (eq_attr "exynos_m1_neon_type" "neon_to_gp"))
|
|
624 "em1_lfst")
|
|
625
|
|
626 ;; Load Instructions.
|
|
627
|
|
628 (define_insn_reservation
|
|
629 "exynos_m1_neon_load" 5
|
|
630 (and (eq_attr "tune" "exynosm1")
|
|
631 (eq_attr "type" "f_loads, f_loadd, neon_ldp"))
|
|
632 "em1_ld")
|
|
633
|
|
634 (define_insn_reservation
|
|
635 "exynos_m1_neon_load_q" 6
|
|
636 (and (eq_attr "tune" "exynosm1")
|
|
637 (eq_attr "type" "neon_ldp_q"))
|
|
638 "(em1_ld, em1_ld)")
|
|
639
|
|
640 (define_insn_reservation
|
|
641 "exynos_m1_neon_load1_1" 6
|
|
642 (and (eq_attr "tune" "exynosm1")
|
|
643 (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all"))
|
|
644 "em1_ld")
|
|
645
|
|
646 (define_insn_reservation
|
|
647 "exynos_m1_neon_load1_2" 6
|
|
648 (and (eq_attr "tune" "exynosm1")
|
|
649 (eq_attr "exynos_m1_neon_type" "neon_load1_2"))
|
|
650 "(em1_ld * 2)")
|
|
651
|
|
652 (define_insn_reservation
|
|
653 "exynos_m1_neon_load1_3" 7
|
|
654 (and (eq_attr "tune" "exynosm1")
|
|
655 (eq_attr "exynos_m1_neon_type" "neon_load1_3"))
|
|
656 "(em1_ld * 3)")
|
|
657
|
|
658 (define_insn_reservation
|
|
659 "exynos_m1_neon_load1_4" 8
|
|
660 (and (eq_attr "tune" "exynosm1")
|
|
661 (eq_attr "exynos_m1_neon_type" "neon_load1_4"))
|
|
662 "(em1_ld * 4)")
|
|
663
|
|
664 (define_insn_reservation
|
|
665 "exynos_m1_neon_load1_one" 7
|
|
666 (and (eq_attr "tune" "exynosm1")
|
|
667 (eq_attr "exynos_m1_neon_type" "neon_load1_one"))
|
|
668 "((em1_ld * 2), em1_nalu)")
|
|
669
|
|
670 (define_insn_reservation
|
|
671 "exynos_m1_neon_load2_2" 10
|
|
672 (and (eq_attr "tune" "exynosm1")
|
|
673 (eq_attr "exynos_m1_neon_type" "neon_load2_2"))
|
|
674 "(em1_ld * 5)")
|
|
675
|
|
676 (define_insn_reservation
|
|
677 "exynos_m1_neon_load2_one" 7
|
|
678 (and (eq_attr "tune" "exynosm1")
|
|
679 (eq_attr "exynos_m1_neon_type" "neon_load2_one"))
|
|
680 "((em1_ld * 2), (em1_nalu * 2))")
|
|
681
|
|
682 (define_insn_reservation
|
|
683 "exynos_m1_neon_load2_all" 6
|
|
684 (and (eq_attr "tune" "exynosm1")
|
|
685 (eq_attr "exynos_m1_neon_type" "neon_load2_all"))
|
|
686 "(em1_ld * 2)")
|
|
687
|
|
688 (define_insn_reservation
|
|
689 "exynos_m1_neon_load3_3" 12
|
|
690 (and (eq_attr "tune" "exynosm1")
|
|
691 (eq_attr "exynos_m1_neon_type" "neon_load3_3"))
|
|
692 "(em1_ld * 6)")
|
|
693
|
|
694 (define_insn_reservation
|
|
695 "exynos_m1_neon_load3_one" 9
|
|
696 (and (eq_attr "tune" "exynosm1")
|
|
697 (eq_attr "exynos_m1_neon_type" "neon_load3_one"))
|
|
698 "((em1_ld * 4), (em1_nalu * 3))")
|
|
699
|
|
700 (define_insn_reservation
|
|
701 "exynos_m1_neon_load3_all" 7
|
|
702 (and (eq_attr "tune" "exynosm1")
|
|
703 (eq_attr "exynos_m1_neon_type" "neon_load3_all"))
|
|
704 "(em1_ld * 3)")
|
|
705
|
|
706 (define_insn_reservation
|
|
707 "exynos_m1_neon_load4_4" 14
|
|
708 (and (eq_attr "tune" "exynosm1")
|
|
709 (eq_attr "exynos_m1_neon_type" "neon_load4_4"))
|
|
710 "(em1_ld * 7)")
|
|
711
|
|
712 (define_insn_reservation
|
|
713 "exynos_m1_neon_load4_one" 9
|
|
714 (and (eq_attr "tune" "exynosm1")
|
|
715 (eq_attr "exynos_m1_neon_type" "neon_load4_one"))
|
|
716 "((em1_ld * 4), (em1_nalu * 4))")
|
|
717
|
|
718 (define_insn_reservation
|
|
719 "exynos_m1_neon_load4_all" 8
|
|
720 (and (eq_attr "tune" "exynosm1")
|
|
721 (eq_attr "exynos_m1_neon_type" "neon_load4_all"))
|
|
722 "(em1_ld * 4)")
|
|
723
|
|
724 ;; Store Instructions.
|
|
725
|
|
726 (define_insn_reservation
|
|
727 "exynos_m1_neon_store" 1
|
|
728 (and (eq_attr "tune" "exynosm1")
|
|
729 (eq_attr "type" "f_stores, f_stored, neon_stp"))
|
|
730 "em1_sfst")
|
|
731
|
|
732 (define_insn_reservation
|
|
733 "exynos_m1_neon_store_q" 3
|
|
734 (and (eq_attr "tune" "exynosm1")
|
|
735 (eq_attr "type" "neon_stp_q"))
|
|
736 "(em1_sfst * 2)")
|
|
737
|
|
738 (define_insn_reservation
|
|
739 "exynos_m1_neon_store1_1" 1
|
|
740 (and (eq_attr "tune" "exynosm1")
|
|
741 (eq_attr "exynos_m1_neon_type" "neon_store1_1"))
|
|
742 "em1_sfst")
|
|
743
|
|
744 (define_insn_reservation
|
|
745 "exynos_m1_neon_store1_2" 2
|
|
746 (and (eq_attr "tune" "exynosm1")
|
|
747 (eq_attr "exynos_m1_neon_type" "neon_store1_2"))
|
|
748 "(em1_sfst * 2)")
|
|
749
|
|
750 (define_insn_reservation
|
|
751 "exynos_m1_neon_store1_3" 3
|
|
752 (and (eq_attr "tune" "exynosm1")
|
|
753 (eq_attr "exynos_m1_neon_type" "neon_store1_3"))
|
|
754 "(em1_sfst * 3)")
|
|
755
|
|
756 (define_insn_reservation
|
|
757 "exynos_m1_neon_store1_4" 4
|
|
758 (and (eq_attr "tune" "exynosm1")
|
|
759 (eq_attr "exynos_m1_neon_type" "neon_store1_4"))
|
|
760 "(em1_sfst * 4)")
|
|
761
|
|
762 (define_insn_reservation
|
|
763 "exynos_m1_neon_store1_one" 7
|
|
764 (and (eq_attr "tune" "exynosm1")
|
|
765 (eq_attr "exynos_m1_neon_type" "neon_store1_one"))
|
|
766 "em1_sfst")
|
|
767
|
|
768 (define_insn_reservation
|
|
769 "exynos_m1_neon_store2" 7
|
|
770 (and (eq_attr "tune" "exynosm1")
|
|
771 (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one"))
|
|
772 "em1_sfst, em1_fst")
|
|
773
|
|
774 (define_insn_reservation
|
|
775 "exynos_m1_neon_store3" 16
|
|
776 (and (eq_attr "tune" "exynosm1")
|
|
777 (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one"))
|
|
778 "((em1_sfst * 3), (em1_fst * 2), em1_nalu)")
|
|
779
|
|
780 (define_insn_reservation
|
|
781 "exynos_m1_neon_store4" 17
|
|
782 (and (eq_attr "tune" "exynosm1")
|
|
783 (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one"))
|
|
784 "((em1_sfst * 4), (em1_fst * 2), em1_nalu)")
|
|
785
|
|
786 ;; Floating-Point Operations.
|
|
787
|
|
788 (define_insn_reservation "exynos_m1_fp_const" 2
|
|
789 (and (eq_attr "tune" "exynosm1")
|
|
790 (eq_attr "type" "fconsts, fconstd"))
|
|
791 "em1_nalu")
|
|
792
|
|
793 (define_insn_reservation "exynos_m1_fp_add" 4
|
|
794 (and (eq_attr "tune" "exynosm1")
|
|
795 (eq_attr "type" "fadds, faddd"))
|
|
796 "em1_fadd")
|
|
797
|
|
798 (define_insn_reservation "exynos_m1_fp_mul" 5
|
|
799 (and (eq_attr "tune" "exynosm1")
|
|
800 (eq_attr "type" "fmuls, fmuld"))
|
|
801 "em1_fmac")
|
|
802
|
|
803 (define_insn_reservation "exynos_m1_fp_mac" 6
|
|
804 (and (eq_attr "tune" "exynosm1")
|
|
805 (eq_attr "type" "fmacs, ffmas, fmacd, ffmad"))
|
|
806 "em1_fmac")
|
|
807
|
|
808 (define_insn_reservation "exynos_m1_fp_cvt" 4
|
|
809 (and (eq_attr "tune" "exynosm1")
|
|
810 (eq_attr "type" "f_cvt, f_rints, f_rintd"))
|
|
811 "em1_fcvt")
|
|
812
|
|
813 (define_insn_reservation "exynos_m1_fp_cvt_i" 13
|
|
814 (and (eq_attr "tune" "exynosm1")
|
|
815 (eq_attr "type" "f_cvtf2i"))
|
|
816 "(em1_fcvt, em1_lfst)")
|
|
817
|
|
818 (define_insn_reservation "exynos_m1_i_cvt_fp" 9
|
|
819 (and (eq_attr "tune" "exynosm1")
|
|
820 (eq_attr "type" "f_cvti2f"))
|
|
821 "(em1_st, em1_fcvt)")
|
|
822
|
|
823 (define_insn_reservation "exynos_m1_fp_cmp" 4
|
|
824 (and (eq_attr "tune" "exynosm1")
|
|
825 (eq_attr "type" "fcmps, fcmpd"))
|
|
826 "em1_nmisc")
|
|
827
|
|
828 (define_insn_reservation "exynos_m1_fp_ccmp" 7
|
|
829 (and (eq_attr "tune" "exynosm1")
|
|
830 (eq_attr "type" "fccmps, fccmpd"))
|
|
831 "(em1_st, em1_nmisc)")
|
|
832
|
|
833 (define_insn_reservation "exynos_m1_fp_sel" 4
|
|
834 (and (eq_attr "tune" "exynosm1")
|
|
835 (eq_attr "type" "fcsel"))
|
|
836 "(em1_st + em1_nalu0)")
|
|
837
|
|
838 (define_insn_reservation "exynos_m1_fp_arith" 2
|
|
839 (and (eq_attr "tune" "exynosm1")
|
|
840 (eq_attr "type" "ffariths, ffarithd"))
|
|
841 "em1_nalu")
|
|
842
|
|
843 (define_insn_reservation "exynos_m1_fp_cpy" 2
|
|
844 (and (eq_attr "tune" "exynosm1")
|
|
845 (eq_attr "type" "fmov"))
|
|
846 "em1_nalu")
|
|
847
|
|
848 (define_insn_reservation "exynos_m1_fp_divs" 15
|
|
849 (and (eq_attr "tune" "exynosm1")
|
|
850 (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\
|
|
851 fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q"))
|
|
852 "(em1_fvar * 9)")
|
|
853
|
|
854 (define_insn_reservation "exynos_m1_fp_divd" 22
|
|
855 (and (eq_attr "tune" "exynosm1")
|
|
856 (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\
|
|
857 fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q"))
|
|
858 "(em1_fvar * 9)")
|
|
859
|
|
860 (define_insn_reservation "exynos_m1_fp_minmax" 2
|
|
861 (and (eq_attr "tune" "exynosm1")
|
|
862 (eq_attr "type" "f_minmaxs, f_minmaxd"))
|
|
863 "(em1_nmisc * 2)")
|
|
864
|
|
865 ;; Crypto Operations.
|
|
866
|
|
867 (define_insn_reservation "exynos_m1_crypto_simple" 2
|
|
868 (and (eq_attr "tune" "exynosm1")
|
|
869 (eq_attr "type" "crypto_aese, crypto_aesmc,\
|
|
870 crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast"))
|
|
871 "em1_ncrypt")
|
|
872
|
|
873 (define_insn_reservation "exynos_m1_crypto_complex" 6
|
|
874 (and (eq_attr "tune" "exynosm1")
|
|
875 (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
|
|
876 "em1_ncrypt")
|
|
877
|
|
878 (define_insn_reservation "exynos_m1_crypto_poly" 2
|
|
879 (and (eq_attr "tune" "exynosm1")
|
|
880 (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long"))
|
|
881 "em1_ncrypt")
|
|
882
|
|
883 (define_insn_reservation "exynos_m1_crypto_polyl" 4
|
|
884 (and (eq_attr "tune" "exynosm1")
|
|
885 (eq_attr "type" "neon_mul_d_long"))
|
|
886 "em1_ncrypt")
|
|
887
|
|
888 (define_insn_reservation "exynos_m1_crc" 2
|
|
889 (and (eq_attr "tune" "exynosm1")
|
|
890 (eq_attr "type" "crc"))
|
|
891 "em1_c")
|
|
892
|
|
893 ;; Simple execution unit bypasses
|
|
894
|
|
895 ;; Pre-decrement and post-increment addressing modes update the register quickly.
|
|
896 ;; TODO: figure out how to tell the addressing mode register from the loaded one.
|
|
897 (define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*"
|
|
898 "exynos_m1_store*, exynos_m1_neon_store*,
|
|
899 exynos_m1_load*, exynos_m1_neon_load*")
|
|
900
|
|
901 ;; MLAs can feed other MLAs quickly.
|
|
902 (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
|
|
903
|
|
904 ;; Insns in FMAC or FADD can feed other such insns quickly.
|
|
905 (define_bypass 4 "exynos_m1_fp_mul"
|
|
906 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
|
|
907 (define_bypass 5 "exynos_m1_fp_mac"
|
|
908 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
|
|
909 (define_bypass 4 "exynos_m1_neon_fp_mul"
|
|
910 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
|
|
911 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
|
|
912 (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
|
|
913 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
|
|
914 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
|
|
915 (define_bypass 3 "exynos_m1_fp_add"
|
|
916 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
|
|
917 (define_bypass 3 "exynos_m1_neon_fp_add"
|
|
918 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
|
|
919 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
|
|
920
|
|
921 ;; Insns in NALU can feed other such insns quickly.
|
|
922 (define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy"
|
|
923 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
|
|
924 exynos_m1_fp_sel")
|
|
925 (define_bypass 3 "exynos_m1_fp_sel"
|
|
926 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
|
|
927 exynos_m1_fp_sel")
|
|
928 (define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
|
|
929 exynos_m1_neon_bitops, exynos_m1_neon_bitins,\
|
|
930 exynos_m1_neon_tbl"
|
|
931 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
|
|
932 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
|
|
933 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
|
|
934 exynos_m1_neon_tbl")
|
|
935 (define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex"
|
|
936 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
|
|
937 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
|
|
938 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
|
|
939 exynos_m1_neon_tbl")
|
|
940 (define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary")
|
|
941
|
|
942 ;; Insns in NCRYPT can feed other such insns quickly.
|
|
943 (define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly"
|
|
944 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
|
|
945 exynos_m1_crypto_poly*")
|
|
946 (define_bypass 3 "exynos_m1_crypto_polyl"
|
|
947 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
|
|
948 exynos_m1_crypto_poly*")
|
|
949 (define_bypass 5 "exynos_m1_crypto_complex"
|
|
950 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
|
|
951 exynos_m1_crypto_poly*")
|
|
952
|
|
953 ;; Predicted branches take no time, but mispredicted ones take forever anyway.
|
|
954 (define_bypass 1 "exynos_m1_*"
|
|
955 "exynos_m1_call, exynos_m1_branch")
|