Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/exynos-m1.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 ;; Samsung Exynos M1 pipeline description | |
2 ;; Copyright (C) 2014-2017 Free Software Foundation, Inc. | |
3 ;; | |
4 ;; This file is part of GCC. | |
5 ;; | |
6 ;; GCC is free software; you can redistribute it and/or modify it | |
7 ;; under the terms of the GNU General Public License as published by | |
8 ;; the Free Software Foundation; either version 3, or (at your option) | |
9 ;; any later version. | |
10 ;; | |
11 ;; GCC is distributed in the hope that it will be useful, but | |
12 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 ;; General Public License for more details. | |
15 ;; | |
16 ;; You should have received a copy of the GNU General Public License | |
17 ;; along with GCC; see the file COPYING3. If not see | |
18 ;; <http://www.gnu.org/licenses/>. | |
19 | |
20 (define_attr "exynos_m1_neon_type" | |
21 "neon_arith_simple, neon_arith_basic, neon_arith_complex, | |
22 neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long, | |
23 neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex, | |
24 neon_shift_reg_basic, neon_shift_reg_basic_q, | |
25 neon_shift_reg_complex, neon_shift_reg_complex_q, | |
26 neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare, | |
27 neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt, | |
28 neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, | |
29 neon_fp_estimate, neon_fp_estimatex, neon_fp_step, | |
30 neon_bitops, neon_bitops_q, neon_bitins, | |
31 neon_to_gp, neon_from_gp, neon_move, neon_tbl, | |
32 neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4, | |
33 neon_load1_one, neon_load1_all, | |
34 neon_load2_2, neon_load2_one, neon_load2_all, | |
35 neon_load3_3, neon_load3_one, neon_load3_all, | |
36 neon_load4_4, neon_load4_one, neon_load4_all, | |
37 neon_store, | |
38 neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one, | |
39 neon_store2_2, neon_store2_one, | |
40 neon_store3_3, neon_store3_one, | |
41 neon_store4_4, neon_store4_one, | |
42 unknown" | |
43 (cond [ | |
44 (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\ | |
45 neon_abs, neon_abs_q,\ | |
46 neon_minmax, neon_minmax_q") | |
47 (const_string "neon_arith_simple") | |
48 | |
49 (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ | |
50 neon_neg, neon_neg_q,\ | |
51 neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\ | |
52 neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ | |
53 neon_compare_zero, neon_compare_zero_q") | |
54 (const_string "neon_arith_basic") | |
55 | |
56 (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\ | |
57 neon_reduc_add, neon_reduc_add_q,\ | |
58 neon_reduc_add_acc, neon_reduc_add_acc_q,\ | |
59 neon_reduc_add_long, neon_add_halve_narrow_q,\ | |
60 neon_add_halve, neon_add_halve_q,\ | |
61 neon_sub_halve, neon_sub_halve_q, neon_qabs,\ | |
62 neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\ | |
63 neon_qneg_q, neon_qsub, neon_qsub_q,\ | |
64 neon_sub_halve_narrow_q,\ | |
65 neon_compare, neon_compare_q,\ | |
66 neon_reduc_minmax, neon_reduc_minmax_q") | |
67 (const_string "neon_arith_complex") | |
68 | |
69 (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\ | |
70 neon_mul_s, neon_mul_s_q,\ | |
71 neon_mul_h_scalar, neon_mul_h_scalar_q,\ | |
72 neon_mul_s_scalar, neon_mul_s_scalar_q,\ | |
73 neon_mul_h_scalar_long, neon_mul_s_scalar_long,\ | |
74 neon_sat_mul_b, neon_sat_mul_b_q,\ | |
75 neon_sat_mul_h, neon_sat_mul_h_q,\ | |
76 neon_sat_mul_s, neon_sat_mul_s_q,\ | |
77 neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\ | |
78 neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\ | |
79 neon_sat_mul_b_long, neon_sat_mul_h_long,\ | |
80 neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\ | |
81 neon_sat_mul_s_scalar_long, crypto_pmull") | |
82 (const_string "neon_multiply") | |
83 | |
84 (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\ | |
85 neon_mla_h_scalar, neon_mla_s_scalar,\ | |
86 neon_mla_b_long, neon_mla_h_long,\ | |
87 neon_mla_s_long,\ | |
88 neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ | |
89 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\ | |
90 neon_mla_h_scalar_q, neon_mla_s_scalar_q") | |
91 (const_string "neon_mla") | |
92 | |
93 (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\ | |
94 neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ | |
95 neon_sat_mla_s_scalar_long") | |
96 (const_string "neon_sat_mla_long") | |
97 | |
98 (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") | |
99 (const_string "neon_shift_acc") | |
100 | |
101 (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ | |
102 neon_shift_imm_narrow_q, neon_shift_imm_long") | |
103 (const_string "neon_shift_imm_basic") | |
104 | |
105 (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\ | |
106 neon_sat_shift_imm_narrow_q") | |
107 (const_string "neon_shift_imm_complex") | |
108 | |
109 (eq_attr "type" "neon_shift_reg, neon_shift_reg_q") | |
110 (const_string "neon_shift_reg_basic") | |
111 | |
112 (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q") | |
113 (const_string "neon_shift_reg_complex") | |
114 | |
115 (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\ | |
116 neon_fp_abs_s, neon_fp_abs_s_q,\ | |
117 neon_fp_neg_d, neon_fp_neg_d_q,\ | |
118 neon_fp_abs_d, neon_fp_abs_d_q") | |
119 (const_string "neon_fp_unary") | |
120 | |
121 (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\ | |
122 neon_fp_addsub_d, neon_fp_addsub_d_q") | |
123 (const_string "neon_fp_add") | |
124 | |
125 (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\ | |
126 neon_fp_abd_d, neon_fp_abd_d_q") | |
127 (const_string "neon_fp_abd") | |
128 | |
129 (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\ | |
130 neon_fp_compare_d, neon_fp_compare_d_q,\ | |
131 neon_fp_minmax_s, neon_fp_minmax_s_q,\ | |
132 neon_fp_minmax_d, neon_fp_minmax_d_q") | |
133 (const_string "neon_fp_compare") | |
134 | |
135 (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\ | |
136 neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q") | |
137 (const_string "neon_fp_reduc_minmax") | |
138 | |
139 (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\ | |
140 neon_fp_reduc_add_d, neon_fp_reduc_add_d_q") | |
141 (const_string "neon_fp_reduc_add") | |
142 | |
143 (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\ | |
144 neon_fp_round_d, neon_fp_round_d_q") | |
145 (const_string "neon_fp_round") | |
146 | |
147 (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h, | |
148 neon_fp_to_int_s, neon_fp_to_int_s_q,\ | |
149 neon_fp_to_int_d_q, neon_fp_to_int_d,\ | |
150 neon_int_to_fp_s, neon_int_to_fp_s_q,\ | |
151 neon_int_to_fp_d, neon_int_to_fp_d_q") | |
152 (const_string "neon_fp_cvt") | |
153 | |
154 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\ | |
155 neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\ | |
156 neon_fp_mul_d, neon_fp_mul_d_q,\ | |
157 neon_fp_mul_d_scalar_q") | |
158 (const_string "neon_fp_mul") | |
159 | |
160 (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\ | |
161 neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ | |
162 neon_fp_mla_d, neon_fp_mla_d_q,\ | |
163 neon_fp_mla_d_scalar_q") | |
164 (const_string "neon_fp_mla") | |
165 | |
166 (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\ | |
167 neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ | |
168 neon_fp_recpe_d, neon_fp_recpe_d_q,\ | |
169 neon_fp_rsqrte_d, neon_fp_rsqrte_d_q") | |
170 (const_string "neon_fp_estimate") | |
171 | |
172 (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\ | |
173 neon_fp_recpx_d, neon_fp_recpx_d_q") | |
174 (const_string "neon_fp_estimatex") | |
175 | |
176 (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\ | |
177 neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\ | |
178 neon_fp_recps_d, neon_fp_recps_d_q,\ | |
179 neon_fp_rsqrts_d, neon_fp_rsqrts_d_q") | |
180 (const_string "neon_fp_step") | |
181 | |
182 (eq_attr "type" "neon_rbit, neon_rbit_q,\ | |
183 neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\ | |
184 neon_dup, neon_dup_q,\ | |
185 neon_rev, neon_rev_q,\ | |
186 neon_move, neon_move_q, | |
187 neon_ext, neon_permute, neon_zip") | |
188 (const_string "neon_bitops") | |
189 | |
190 (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q") | |
191 (const_string "neon_bitops_q") | |
192 | |
193 (eq_attr "type" "neon_bsl, neon_bsl_q") | |
194 (const_string "neon_bitins") | |
195 | |
196 (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4") | |
197 (const_string "neon_tbl") | |
198 | |
199 (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr") | |
200 (const_string "neon_from_gp") | |
201 | |
202 (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc") | |
203 (const_string "neon_to_gp") | |
204 | |
205 (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q") | |
206 (const_string "neon_load1_1") | |
207 | |
208 (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q") | |
209 (const_string "neon_load1_2") | |
210 | |
211 (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q") | |
212 (const_string "neon_load1_3") | |
213 | |
214 (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q") | |
215 (const_string "neon_load1_4") | |
216 | |
217 (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q") | |
218 (const_string "neon_load1_one") | |
219 | |
220 (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q") | |
221 (const_string "neon_load1_all") | |
222 | |
223 (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\ | |
224 neon_load2_4reg, neon_load2_4reg_q") | |
225 (const_string "neon_load2_2") | |
226 | |
227 (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q") | |
228 (const_string "neon_load2_one") | |
229 | |
230 (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q") | |
231 (const_string "neon_load2_all") | |
232 | |
233 (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q") | |
234 (const_string "neon_load3_3") | |
235 | |
236 (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q") | |
237 (const_string "neon_load3_one") | |
238 | |
239 (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q") | |
240 (const_string "neon_load3_all") | |
241 | |
242 (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q") | |
243 (const_string "neon_load4_4") | |
244 | |
245 (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q") | |
246 (const_string "neon_load4_one") | |
247 | |
248 (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q") | |
249 (const_string "neon_load4_all") | |
250 | |
251 (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q") | |
252 (const_string "neon_store1_1") | |
253 | |
254 (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q") | |
255 (const_string "neon_store1_2") | |
256 | |
257 (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q") | |
258 (const_string "neon_store1_3") | |
259 | |
260 (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q") | |
261 (const_string "neon_store1_4") | |
262 | |
263 (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q") | |
264 (const_string "neon_store1_one") | |
265 | |
266 (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\ | |
267 neon_store2_4reg, neon_store2_4reg_q") | |
268 (const_string "neon_store2_2") | |
269 | |
270 (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q") | |
271 (const_string "neon_store2_one") | |
272 | |
273 (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q") | |
274 (const_string "neon_store3_3") | |
275 | |
276 (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q") | |
277 (const_string "neon_store3_one") | |
278 | |
279 (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q") | |
280 (const_string "neon_store4_4") | |
281 | |
282 (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q") | |
283 (const_string "neon_store4_one")] | |
284 | |
285 (const_string "unknown"))) | |
286 | |
287 ;; The Exynos M1 core is modeled as a triple issue pipeline that has | |
288 ;; the following functional units. | |
289 | |
290 (define_automaton "exynos_m1_gp") | |
291 (define_automaton "exynos_m1_ls") | |
292 (define_automaton "exynos_m1_fp") | |
293 | |
294 ;; 1. Two pipelines for simple integer operations: A, B | |
295 ;; 2. One pipeline for simple or complex integer operations: C | |
296 | |
297 (define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp") | |
298 | |
299 (define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)") | |
300 (define_reservation "em1_c" "em1_xc") | |
301 | |
302 ;; 3. Two asymmetric pipelines for Neon and FP operations: F0, F1 | |
303 | |
304 (define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp") | |
305 | |
306 (define_reservation "em1_fmac" "em1_f0") | |
307 (define_reservation "em1_fcvt" "em1_f0") | |
308 (define_reservation "em1_nalu" "(em1_f0 | em1_f1)") | |
309 (define_reservation "em1_nalu0" "em1_f0") | |
310 (define_reservation "em1_nalu1" "em1_f1") | |
311 (define_reservation "em1_nmisc" "em1_f0") | |
312 (define_reservation "em1_ncrypt" "em1_f0") | |
313 (define_reservation "em1_fadd" "em1_f1") | |
314 (define_reservation "em1_fvar" "em1_f1") | |
315 (define_reservation "em1_fst" "em1_f1") | |
316 | |
317 ;; 4. One pipeline for branch operations: BX | |
318 | |
319 (define_cpu_unit "em1_bx" "exynos_m1_gp") | |
320 | |
321 (define_reservation "em1_br" "em1_bx") | |
322 | |
323 ;; 5. One AGU for loads: L | |
324 ;; One AGU for stores and one pipeline for stores: S, SD | |
325 | |
326 (define_cpu_unit "em1_lx" "exynos_m1_ls") | |
327 (define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls") | |
328 | |
329 (define_reservation "em1_ld" "em1_lx") | |
330 (define_reservation "em1_st" "(em1_sx + em1_sd)") | |
331 | |
332 ;; Common occurrences | |
333 (define_reservation "em1_sfst" "(em1_fst + em1_st)") | |
334 (define_reservation "em1_lfst" "(em1_fst + em1_ld)") | |
335 | |
336 ;; Branches | |
337 ;; | |
338 ;; No latency as there is no result | |
339 ;; TODO: Unconditional branches use no units; | |
340 ;; conditional branches add the BX unit; | |
341 ;; indirect branches add the C unit. | |
342 (define_insn_reservation "exynos_m1_branch" 0 | |
343 (and (eq_attr "tune" "exynosm1") | |
344 (eq_attr "type" "branch")) | |
345 "em1_br") | |
346 | |
347 (define_insn_reservation "exynos_m1_call" 1 | |
348 (and (eq_attr "tune" "exynosm1") | |
349 (eq_attr "type" "call")) | |
350 "em1_alu") | |
351 | |
352 ;; Basic ALU | |
353 ;; | |
354 ;; Simple ALU without shift, non-predicated | |
355 (define_insn_reservation "exynos_m1_alu" 1 | |
356 (and (eq_attr "tune" "exynosm1") | |
357 (and (not (eq_attr "predicated" "yes")) | |
358 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ | |
359 alu_sreg, alus_sreg, logic_reg, logics_reg,\ | |
360 adc_imm, adcs_imm, adc_reg, adcs_reg,\ | |
361 adr, bfm, bfx, clz, rbit, rev, csel, alu_dsp_reg,\ | |
362 shift_imm, shift_reg, rotate_imm, extend,\ | |
363 mov_imm, mov_reg,\ | |
364 mvn_imm, mvn_reg,\ | |
365 mrs, multiple"))) | |
366 "em1_alu") | |
367 | |
368 ;; Simple ALU without shift, predicated | |
369 (define_insn_reservation "exynos_m1_alu_p" 1 | |
370 (and (eq_attr "tune" "exynosm1") | |
371 (and (eq_attr "predicated" "yes") | |
372 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ | |
373 alu_sreg, alus_sreg, logic_reg, logics_reg,\ | |
374 adc_imm, adcs_imm, adc_reg, adcs_reg,\ | |
375 adr, bfm, bfx, clz, rbit, rev, alu_dsp_reg,\ | |
376 shift_imm, shift_reg, rotate_imm, extend,\ | |
377 mov_imm, mov_reg,\ | |
378 mvn_imm, mvn_reg,\ | |
379 mrs, multiple"))) | |
380 "em1_c") | |
381 | |
382 ;; ALU ops with immediate shift | |
383 ;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle; | |
384 ;; otherwise it takes 2 cycles and the unit is blocked; | |
385 ;; for now, assume the latter's latency and the former's units. | |
386 (define_insn_reservation "exynos_m1_alu_shift" 2 | |
387 (and (eq_attr "tune" "exynosm1") | |
388 (eq_attr "type" "alu_ext, alus_ext,\ | |
389 alu_shift_imm, alus_shift_imm,\ | |
390 logic_shift_imm, logics_shift_imm,\ | |
391 mov_shift, mvn_shift")) | |
392 "(em1_alu)") | |
393 | |
394 ;; ALU ops with register controlled shift, non-predicated | |
395 (define_insn_reservation "exynos_m1_alu_shift_reg" 2 | |
396 (and (eq_attr "tune" "exynosm1") | |
397 (and (not (eq_attr "predicated" "yes")) | |
398 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ | |
399 logic_shift_reg, logics_shift_reg,\ | |
400 mov_shift_reg, mvn_shift_reg"))) | |
401 "(em1_alu * 2)") | |
402 | |
403 ;; ALU ops with register controlled shift, predicated | |
404 (define_insn_reservation "exynos_m1_alu_shift_reg_p" 2 | |
405 (and (eq_attr "tune" "exynosm1") | |
406 (and (eq_attr "predicated" "yes") | |
407 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ | |
408 logic_shift_reg, logics_shift_reg,\ | |
409 mov_shift_reg, mvn_shift_reg"))) | |
410 "(em1_alu, em1_c)") | |
411 | |
412 ;; Integer multiply | |
413 (define_insn_reservation "exynos_m1_mla" 3 | |
414 (and (eq_attr "tune" "exynosm1") | |
415 (eq_attr "mul32" "yes")) | |
416 "em1_c") | |
417 | |
418 (define_insn_reservation "exynos_m1_mlal" 4 | |
419 (and (eq_attr "tune" "exynosm1") | |
420 (eq_attr "mul64" "yes")) | |
421 "em1_alu, em1_c") | |
422 | |
423 ;; Integer divide | |
424 ;; TODO: assume the median latency; blocks other divisions | |
425 (define_insn_reservation "exynos_m1_div" 13 | |
426 (and (eq_attr "tune" "exynosm1") | |
427 (eq_attr "type" "udiv, sdiv")) | |
428 "em1_c") | |
429 | |
430 ;; Load-store execution Unit | |
431 ;; | |
432 ;; Loads of up to 2 words. | |
433 (define_insn_reservation "exynos_m1_load" 4 | |
434 (and (eq_attr "tune" "exynosm1") | |
435 (eq_attr "type" "load_byte, load_4, load_8")) | |
436 "em1_ld") | |
437 | |
438 ;; Loads of 3 or 4 words. | |
439 (define_insn_reservation "exynos_m1_loadm" 6 | |
440 (and (eq_attr "tune" "exynosm1") | |
441 (eq_attr "type" "load_12, load_16")) | |
442 "(em1_ld * 3)") | |
443 | |
444 ;; Stores of up to 2 words. | |
445 (define_insn_reservation "exynos_m1_store" 1 | |
446 (and (eq_attr "tune" "exynosm1") | |
447 (eq_attr "type" "store_4, store_8")) | |
448 "em1_st") | |
449 | |
450 ;; Stores of 3 or 4 words. | |
451 (define_insn_reservation "exynos_m1_storem" 3 | |
452 (and (eq_attr "tune" "exynosm1") | |
453 (eq_attr "type" "store_12, store_16")) | |
454 "(em1_st * 3)") | |
455 | |
456 ;; Advanced SIMD Unit | |
457 ;; | |
458 ;; Integer Arithmetic Instructions. | |
459 | |
460 (define_insn_reservation "exynos_m1_arith_simple" 1 | |
461 (and (eq_attr "tune" "exynosm1") | |
462 (eq_attr "exynos_m1_neon_type" "neon_arith_simple")) | |
463 "em1_nmisc") | |
464 | |
465 (define_insn_reservation "exynos_m1_neon_arith_basic" 2 | |
466 (and (eq_attr "tune" "exynosm1") | |
467 (eq_attr "exynos_m1_neon_type" "neon_arith_basic")) | |
468 "em1_nalu") | |
469 | |
470 (define_insn_reservation "exynos_m1_neon_arith_complex" 3 | |
471 (and (eq_attr "tune" "exynosm1") | |
472 (eq_attr "exynos_m1_neon_type" "neon_arith_complex")) | |
473 "em1_nmisc") | |
474 | |
475 ;; Integer Multiply Instructions. | |
476 | |
477 (define_insn_reservation "exynos_m1_neon_multiply" 4 | |
478 (and (eq_attr "tune" "exynosm1") | |
479 (eq_attr "exynos_m1_neon_type" | |
480 "neon_multiply, neon_mla, neon_sat_mla_long")) | |
481 "em1_nmisc") | |
482 | |
483 ;; Integer Shift Instructions. | |
484 | |
485 (define_insn_reservation | |
486 "exynos_m1_neon_shift_acc" 4 | |
487 (and (eq_attr "tune" "exynosm1") | |
488 (eq_attr "exynos_m1_neon_type" "neon_shift_acc")) | |
489 "em1_nalu1") | |
490 | |
491 (define_insn_reservation | |
492 "exynos_m1_neon_shift_basic" 2 | |
493 (and (eq_attr "tune" "exynosm1") | |
494 (eq_attr "exynos_m1_neon_type" | |
495 "neon_shift_imm_basic, neon_shift_reg_basic")) | |
496 "em1_nalu") | |
497 | |
498 (define_insn_reservation | |
499 "exynos_m1_neon_shift_complex" 4 | |
500 (and (eq_attr "tune" "exynosm1") | |
501 (eq_attr "exynos_m1_neon_type" | |
502 "neon_shift_imm_complex, neon_shift_reg_complex")) | |
503 "em1_nalu1") | |
504 | |
505 ;; Floating Point Instructions. | |
506 | |
507 (define_insn_reservation | |
508 "exynos_m1_neon_fp_unary" 2 | |
509 (and (eq_attr "tune" "exynosm1") | |
510 (eq_attr "exynos_m1_neon_type" "neon_fp_unary")) | |
511 "em1_nalu") | |
512 | |
513 (define_insn_reservation | |
514 "exynos_m1_neon_fp_add" 4 | |
515 (and (eq_attr "tune" "exynosm1") | |
516 (eq_attr "exynos_m1_neon_type" "neon_fp_add")) | |
517 "em1_fadd") | |
518 | |
519 (define_insn_reservation | |
520 "exynos_m1_neon_fp_abd" 3 | |
521 (and (eq_attr "tune" "exynosm1") | |
522 (eq_attr "exynos_m1_neon_type" "neon_fp_abd")) | |
523 "em1_nmisc") | |
524 | |
525 (define_insn_reservation | |
526 "exynos_m1_neon_fp_compare" 1 | |
527 (and (eq_attr "tune" "exynosm1") | |
528 (eq_attr "exynos_m1_neon_type" "neon_fp_compare")) | |
529 "em1_nmisc") | |
530 | |
531 ;; TODO: the latency and throughput of reduce insns actually varies between | |
532 ;; 3-5 and 1/4-1, but picked the median values. | |
533 (define_insn_reservation | |
534 "exynos_m1_neon_fp_reduc" 5 | |
535 (and (eq_attr "tune" "exynosm1") | |
536 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax")) | |
537 "(em1_nmisc * 4)") | |
538 | |
539 (define_insn_reservation | |
540 "exynos_m1_neon_fp_reduc_add" 10 | |
541 (and (eq_attr "tune" "exynosm1") | |
542 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add")) | |
543 "((em1_nalu * 2), em1_fadd)") | |
544 | |
545 (define_insn_reservation | |
546 "exynos_m1_neon_fp_round" 4 | |
547 (and (eq_attr "tune" "exynosm1") | |
548 (eq_attr "exynos_m1_neon_type" "neon_fp_round")) | |
549 "em1_fcvt") | |
550 | |
551 (define_insn_reservation | |
552 "exynos_m1_neon_fp_cvt" 4 | |
553 (and (eq_attr "tune" "exynosm1") | |
554 (eq_attr "exynos_m1_neon_type" "neon_fp_cvt")) | |
555 "em1_fcvt") | |
556 | |
557 (define_insn_reservation | |
558 "exynos_m1_neon_fp_mul" 5 | |
559 (and (eq_attr "tune" "exynosm1") | |
560 (eq_attr "exynos_m1_neon_type" "neon_fp_mul")) | |
561 "em1_fmac") | |
562 | |
563 (define_insn_reservation | |
564 "exynos_m1_neon_fp_mla" 6 | |
565 (and (eq_attr "tune" "exynosm1") | |
566 (eq_attr "exynos_m1_neon_type" "neon_fp_mla")) | |
567 "em1_fmac") | |
568 | |
569 (define_insn_reservation | |
570 "exynos_m1_neon_fp_estimate" 5 | |
571 (and (eq_attr "tune" "exynosm1") | |
572 (eq_attr "exynos_m1_neon_type" "neon_fp_estimate")) | |
573 "em1_fcvt") | |
574 | |
575 (define_insn_reservation | |
576 "exynos_m1_neon_fp_estimatex" 1 | |
577 (and (eq_attr "tune" "exynosm1") | |
578 (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex")) | |
579 "em1_nmisc") | |
580 | |
581 (define_insn_reservation | |
582 "exynos_m1_neon_fp_step" 6 | |
583 (and (eq_attr "tune" "exynosm1") | |
584 (eq_attr "exynos_m1_neon_type" "neon_fp_step")) | |
585 "em1_fmac") | |
586 | |
587 ;; Miscellaneous Instructions. | |
588 | |
589 (define_insn_reservation | |
590 "exynos_m1_neon_bitops" 2 | |
591 (and (eq_attr "tune" "exynosm1") | |
592 (eq_attr "exynos_m1_neon_type" "neon_bitops")) | |
593 "em1_nalu") | |
594 | |
595 (define_insn_reservation | |
596 "exynos_m1_neon_bitops_q" 3 | |
597 (and (eq_attr "tune" "exynosm1") | |
598 (eq_attr "exynos_m1_neon_type" "neon_bitops_q")) | |
599 "(em1_nalu, em1_nalu)") | |
600 | |
601 (define_insn_reservation | |
602 "exynos_m1_neon_bitins" 2 | |
603 (and (eq_attr "tune" "exynosm1") | |
604 (eq_attr "exynos_m1_neon_type" "neon_bitins")) | |
605 "em1_nalu1") | |
606 | |
607 ;; TODO: it is more complicated than this. | |
608 (define_insn_reservation | |
609 "exynos_m1_neon_tbl" 2 | |
610 (and (eq_attr "tune" "exynosm1") | |
611 (eq_attr "exynos_m1_neon_type" "neon_tbl")) | |
612 "em1_nalu1") | |
613 | |
614 (define_insn_reservation | |
615 "exynos_m1_neon_from_gp" 4 | |
616 (and (eq_attr "tune" "exynosm1") | |
617 (eq_attr "exynos_m1_neon_type" "neon_from_gp")) | |
618 "em1_st") | |
619 | |
620 (define_insn_reservation | |
621 "exynos_m1_neon_to_gp" 9 | |
622 (and (eq_attr "tune" "exynosm1") | |
623 (eq_attr "exynos_m1_neon_type" "neon_to_gp")) | |
624 "em1_lfst") | |
625 | |
626 ;; Load Instructions. | |
627 | |
628 (define_insn_reservation | |
629 "exynos_m1_neon_load" 5 | |
630 (and (eq_attr "tune" "exynosm1") | |
631 (eq_attr "type" "f_loads, f_loadd, neon_ldp")) | |
632 "em1_ld") | |
633 | |
634 (define_insn_reservation | |
635 "exynos_m1_neon_load_q" 6 | |
636 (and (eq_attr "tune" "exynosm1") | |
637 (eq_attr "type" "neon_ldp_q")) | |
638 "(em1_ld, em1_ld)") | |
639 | |
640 (define_insn_reservation | |
641 "exynos_m1_neon_load1_1" 6 | |
642 (and (eq_attr "tune" "exynosm1") | |
643 (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all")) | |
644 "em1_ld") | |
645 | |
646 (define_insn_reservation | |
647 "exynos_m1_neon_load1_2" 6 | |
648 (and (eq_attr "tune" "exynosm1") | |
649 (eq_attr "exynos_m1_neon_type" "neon_load1_2")) | |
650 "(em1_ld * 2)") | |
651 | |
652 (define_insn_reservation | |
653 "exynos_m1_neon_load1_3" 7 | |
654 (and (eq_attr "tune" "exynosm1") | |
655 (eq_attr "exynos_m1_neon_type" "neon_load1_3")) | |
656 "(em1_ld * 3)") | |
657 | |
658 (define_insn_reservation | |
659 "exynos_m1_neon_load1_4" 8 | |
660 (and (eq_attr "tune" "exynosm1") | |
661 (eq_attr "exynos_m1_neon_type" "neon_load1_4")) | |
662 "(em1_ld * 4)") | |
663 | |
664 (define_insn_reservation | |
665 "exynos_m1_neon_load1_one" 7 | |
666 (and (eq_attr "tune" "exynosm1") | |
667 (eq_attr "exynos_m1_neon_type" "neon_load1_one")) | |
668 "((em1_ld * 2), em1_nalu)") | |
669 | |
670 (define_insn_reservation | |
671 "exynos_m1_neon_load2_2" 10 | |
672 (and (eq_attr "tune" "exynosm1") | |
673 (eq_attr "exynos_m1_neon_type" "neon_load2_2")) | |
674 "(em1_ld * 5)") | |
675 | |
676 (define_insn_reservation | |
677 "exynos_m1_neon_load2_one" 7 | |
678 (and (eq_attr "tune" "exynosm1") | |
679 (eq_attr "exynos_m1_neon_type" "neon_load2_one")) | |
680 "((em1_ld * 2), (em1_nalu * 2))") | |
681 | |
682 (define_insn_reservation | |
683 "exynos_m1_neon_load2_all" 6 | |
684 (and (eq_attr "tune" "exynosm1") | |
685 (eq_attr "exynos_m1_neon_type" "neon_load2_all")) | |
686 "(em1_ld * 2)") | |
687 | |
688 (define_insn_reservation | |
689 "exynos_m1_neon_load3_3" 12 | |
690 (and (eq_attr "tune" "exynosm1") | |
691 (eq_attr "exynos_m1_neon_type" "neon_load3_3")) | |
692 "(em1_ld * 6)") | |
693 | |
694 (define_insn_reservation | |
695 "exynos_m1_neon_load3_one" 9 | |
696 (and (eq_attr "tune" "exynosm1") | |
697 (eq_attr "exynos_m1_neon_type" "neon_load3_one")) | |
698 "((em1_ld * 4), (em1_nalu * 3))") | |
699 | |
700 (define_insn_reservation | |
701 "exynos_m1_neon_load3_all" 7 | |
702 (and (eq_attr "tune" "exynosm1") | |
703 (eq_attr "exynos_m1_neon_type" "neon_load3_all")) | |
704 "(em1_ld * 3)") | |
705 | |
706 (define_insn_reservation | |
707 "exynos_m1_neon_load4_4" 14 | |
708 (and (eq_attr "tune" "exynosm1") | |
709 (eq_attr "exynos_m1_neon_type" "neon_load4_4")) | |
710 "(em1_ld * 7)") | |
711 | |
712 (define_insn_reservation | |
713 "exynos_m1_neon_load4_one" 9 | |
714 (and (eq_attr "tune" "exynosm1") | |
715 (eq_attr "exynos_m1_neon_type" "neon_load4_one")) | |
716 "((em1_ld * 4), (em1_nalu * 4))") | |
717 | |
718 (define_insn_reservation | |
719 "exynos_m1_neon_load4_all" 8 | |
720 (and (eq_attr "tune" "exynosm1") | |
721 (eq_attr "exynos_m1_neon_type" "neon_load4_all")) | |
722 "(em1_ld * 4)") | |
723 | |
724 ;; Store Instructions. | |
725 | |
726 (define_insn_reservation | |
727 "exynos_m1_neon_store" 1 | |
728 (and (eq_attr "tune" "exynosm1") | |
729 (eq_attr "type" "f_stores, f_stored, neon_stp")) | |
730 "em1_sfst") | |
731 | |
732 (define_insn_reservation | |
733 "exynos_m1_neon_store_q" 3 | |
734 (and (eq_attr "tune" "exynosm1") | |
735 (eq_attr "type" "neon_stp_q")) | |
736 "(em1_sfst * 2)") | |
737 | |
738 (define_insn_reservation | |
739 "exynos_m1_neon_store1_1" 1 | |
740 (and (eq_attr "tune" "exynosm1") | |
741 (eq_attr "exynos_m1_neon_type" "neon_store1_1")) | |
742 "em1_sfst") | |
743 | |
744 (define_insn_reservation | |
745 "exynos_m1_neon_store1_2" 2 | |
746 (and (eq_attr "tune" "exynosm1") | |
747 (eq_attr "exynos_m1_neon_type" "neon_store1_2")) | |
748 "(em1_sfst * 2)") | |
749 | |
750 (define_insn_reservation | |
751 "exynos_m1_neon_store1_3" 3 | |
752 (and (eq_attr "tune" "exynosm1") | |
753 (eq_attr "exynos_m1_neon_type" "neon_store1_3")) | |
754 "(em1_sfst * 3)") | |
755 | |
756 (define_insn_reservation | |
757 "exynos_m1_neon_store1_4" 4 | |
758 (and (eq_attr "tune" "exynosm1") | |
759 (eq_attr "exynos_m1_neon_type" "neon_store1_4")) | |
760 "(em1_sfst * 4)") | |
761 | |
762 (define_insn_reservation | |
763 "exynos_m1_neon_store1_one" 7 | |
764 (and (eq_attr "tune" "exynosm1") | |
765 (eq_attr "exynos_m1_neon_type" "neon_store1_one")) | |
766 "em1_sfst") | |
767 | |
768 (define_insn_reservation | |
769 "exynos_m1_neon_store2" 7 | |
770 (and (eq_attr "tune" "exynosm1") | |
771 (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one")) | |
772 "em1_sfst, em1_fst") | |
773 | |
774 (define_insn_reservation | |
775 "exynos_m1_neon_store3" 16 | |
776 (and (eq_attr "tune" "exynosm1") | |
777 (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one")) | |
778 "((em1_sfst * 3), (em1_fst * 2), em1_nalu)") | |
779 | |
780 (define_insn_reservation | |
781 "exynos_m1_neon_store4" 17 | |
782 (and (eq_attr "tune" "exynosm1") | |
783 (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one")) | |
784 "((em1_sfst * 4), (em1_fst * 2), em1_nalu)") | |
785 | |
786 ;; Floating-Point Operations. | |
787 | |
788 (define_insn_reservation "exynos_m1_fp_const" 2 | |
789 (and (eq_attr "tune" "exynosm1") | |
790 (eq_attr "type" "fconsts, fconstd")) | |
791 "em1_nalu") | |
792 | |
793 (define_insn_reservation "exynos_m1_fp_add" 4 | |
794 (and (eq_attr "tune" "exynosm1") | |
795 (eq_attr "type" "fadds, faddd")) | |
796 "em1_fadd") | |
797 | |
798 (define_insn_reservation "exynos_m1_fp_mul" 5 | |
799 (and (eq_attr "tune" "exynosm1") | |
800 (eq_attr "type" "fmuls, fmuld")) | |
801 "em1_fmac") | |
802 | |
803 (define_insn_reservation "exynos_m1_fp_mac" 6 | |
804 (and (eq_attr "tune" "exynosm1") | |
805 (eq_attr "type" "fmacs, ffmas, fmacd, ffmad")) | |
806 "em1_fmac") | |
807 | |
808 (define_insn_reservation "exynos_m1_fp_cvt" 4 | |
809 (and (eq_attr "tune" "exynosm1") | |
810 (eq_attr "type" "f_cvt, f_rints, f_rintd")) | |
811 "em1_fcvt") | |
812 | |
813 (define_insn_reservation "exynos_m1_fp_cvt_i" 13 | |
814 (and (eq_attr "tune" "exynosm1") | |
815 (eq_attr "type" "f_cvtf2i")) | |
816 "(em1_fcvt, em1_lfst)") | |
817 | |
818 (define_insn_reservation "exynos_m1_i_cvt_fp" 9 | |
819 (and (eq_attr "tune" "exynosm1") | |
820 (eq_attr "type" "f_cvti2f")) | |
821 "(em1_st, em1_fcvt)") | |
822 | |
823 (define_insn_reservation "exynos_m1_fp_cmp" 4 | |
824 (and (eq_attr "tune" "exynosm1") | |
825 (eq_attr "type" "fcmps, fcmpd")) | |
826 "em1_nmisc") | |
827 | |
828 (define_insn_reservation "exynos_m1_fp_ccmp" 7 | |
829 (and (eq_attr "tune" "exynosm1") | |
830 (eq_attr "type" "fccmps, fccmpd")) | |
831 "(em1_st, em1_nmisc)") | |
832 | |
833 (define_insn_reservation "exynos_m1_fp_sel" 4 | |
834 (and (eq_attr "tune" "exynosm1") | |
835 (eq_attr "type" "fcsel")) | |
836 "(em1_st + em1_nalu0)") | |
837 | |
838 (define_insn_reservation "exynos_m1_fp_arith" 2 | |
839 (and (eq_attr "tune" "exynosm1") | |
840 (eq_attr "type" "ffariths, ffarithd")) | |
841 "em1_nalu") | |
842 | |
843 (define_insn_reservation "exynos_m1_fp_cpy" 2 | |
844 (and (eq_attr "tune" "exynosm1") | |
845 (eq_attr "type" "fmov")) | |
846 "em1_nalu") | |
847 | |
848 (define_insn_reservation "exynos_m1_fp_divs" 15 | |
849 (and (eq_attr "tune" "exynosm1") | |
850 (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\ | |
851 fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q")) | |
852 "(em1_fvar * 9)") | |
853 | |
854 (define_insn_reservation "exynos_m1_fp_divd" 22 | |
855 (and (eq_attr "tune" "exynosm1") | |
856 (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\ | |
857 fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q")) | |
858 "(em1_fvar * 9)") | |
859 | |
860 (define_insn_reservation "exynos_m1_fp_minmax" 2 | |
861 (and (eq_attr "tune" "exynosm1") | |
862 (eq_attr "type" "f_minmaxs, f_minmaxd")) | |
863 "(em1_nmisc * 2)") | |
864 | |
865 ;; Crypto Operations. | |
866 | |
867 (define_insn_reservation "exynos_m1_crypto_simple" 2 | |
868 (and (eq_attr "tune" "exynosm1") | |
869 (eq_attr "type" "crypto_aese, crypto_aesmc,\ | |
870 crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast")) | |
871 "em1_ncrypt") | |
872 | |
873 (define_insn_reservation "exynos_m1_crypto_complex" 6 | |
874 (and (eq_attr "tune" "exynosm1") | |
875 (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow")) | |
876 "em1_ncrypt") | |
877 | |
878 (define_insn_reservation "exynos_m1_crypto_poly" 2 | |
879 (and (eq_attr "tune" "exynosm1") | |
880 (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long")) | |
881 "em1_ncrypt") | |
882 | |
883 (define_insn_reservation "exynos_m1_crypto_polyl" 4 | |
884 (and (eq_attr "tune" "exynosm1") | |
885 (eq_attr "type" "neon_mul_d_long")) | |
886 "em1_ncrypt") | |
887 | |
888 (define_insn_reservation "exynos_m1_crc" 2 | |
889 (and (eq_attr "tune" "exynosm1") | |
890 (eq_attr "type" "crc")) | |
891 "em1_c") | |
892 | |
893 ;; Simple execution unit bypasses | |
894 | |
895 ;; Pre-decrement and post-increment addressing modes update the register quickly. | |
896 ;; TODO: figure out how to tell the addressing mode register from the loaded one. | |
897 (define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*" | |
898 "exynos_m1_store*, exynos_m1_neon_store*, | |
899 exynos_m1_load*, exynos_m1_neon_load*") | |
900 | |
901 ;; MLAs can feed other MLAs quickly. | |
902 (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*") | |
903 | |
904 ;; Insns in FMAC or FADD can feed other such insns quickly. | |
905 (define_bypass 4 "exynos_m1_fp_mul" | |
906 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
907 (define_bypass 5 "exynos_m1_fp_mac" | |
908 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
909 (define_bypass 4 "exynos_m1_neon_fp_mul" | |
910 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
911 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
912 (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step" | |
913 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
914 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
915 (define_bypass 3 "exynos_m1_fp_add" | |
916 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
917 (define_bypass 3 "exynos_m1_neon_fp_add" | |
918 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
919 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
920 | |
921 ;; Insns in NALU can feed other such insns quickly. | |
922 (define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy" | |
923 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ | |
924 exynos_m1_fp_sel") | |
925 (define_bypass 3 "exynos_m1_fp_sel" | |
926 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ | |
927 exynos_m1_fp_sel") | |
928 (define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
929 exynos_m1_neon_bitops, exynos_m1_neon_bitins,\ | |
930 exynos_m1_neon_tbl" | |
931 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
932 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ | |
933 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ | |
934 exynos_m1_neon_tbl") | |
935 (define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex" | |
936 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
937 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ | |
938 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ | |
939 exynos_m1_neon_tbl") | |
940 (define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary") | |
941 | |
942 ;; Insns in NCRYPT can feed other such insns quickly. | |
943 (define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly" | |
944 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
945 exynos_m1_crypto_poly*") | |
946 (define_bypass 3 "exynos_m1_crypto_polyl" | |
947 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
948 exynos_m1_crypto_poly*") | |
949 (define_bypass 5 "exynos_m1_crypto_complex" | |
950 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
951 exynos_m1_crypto_poly*") | |
952 | |
953 ;; AES{D,E}/AESMC pairs can feed each other instantly. | |
954 (define_bypass 0 "exynos_m1_crypto_simple" | |
955 "exynos_m1_crypto_simple" | |
956 "aarch_crypto_can_dual_issue") | |
957 | |
958 ;; Predicted branches take no time, but mispredicted ones take forever anyway. | |
959 (define_bypass 1 "exynos_m1_*" | |
960 "exynos_m1_call, exynos_m1_branch") |