Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/i386/haswell.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 ;; Scheduling for Haswell and derived processors. | |
2 ;; Copyright (C) 2004-2017 Free Software Foundation, Inc. | |
3 ;; | |
4 ;; This file is part of GCC. | |
5 ;; | |
6 ;; GCC is free software; you can redistribute it and/or modify | |
7 ;; it under the terms of the GNU General Public License as published by | |
8 ;; the Free Software Foundation; either version 3, or (at your option) | |
9 ;; any later version. | |
10 ;; | |
11 ;; GCC is distributed in the hope that it will be useful, | |
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 ;; GNU General Public License for more details. | |
15 ;; | |
16 ;; You should have received a copy of the GNU General Public License | |
17 ;; along with GCC; see the file COPYING3. If not see | |
18 ;; <http://www.gnu.org/licenses/>. */ | |
19 | |
20 ;; The scheduling description in this file is based on core2.md. | |
21 ;; The major difference from the CORE2 pipeline is that HASWELL has | |
22 ;; two MU for load and one MU for store. | |
23 (define_automaton "haswell_decoder,haswell_core,haswell_idiv,haswell_fdiv,haswell_ssediv,haswell_load,haswell_store") | |
24 | |
25 ;; The CPU domain, used for HASWELL bypass latencies | |
26 (define_attr "hsw_domain" "int,float,simd" | |
27 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") | |
28 (const_string "float") | |
29 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, | |
30 sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt, | |
31 ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") | |
32 (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF") | |
33 (const_string "float") | |
34 (eq_attr "mode" "SI") | |
35 (const_string "int")] | |
36 (const_string "simd")) | |
37 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") | |
38 (const_string "simd")] | |
39 (const_string "int"))) | |
40 | |
41 (define_cpu_unit "hsw_decoder0" "haswell_decoder") | |
42 (define_cpu_unit "hsw_decoder1" "haswell_decoder") | |
43 (define_cpu_unit "hsw_decoder2" "haswell_decoder") | |
44 (define_cpu_unit "hsw_decoder3" "haswell_decoder") | |
45 | |
46 ;; We first wish to find an instruction for hsw_decoder0, so exclude | |
47 ;; other hsw_decoders from being reserved until hsw_decoder0 is | |
48 ;; reserved. | |
49 (presence_set "hsw_decoder1" "hsw_decoder0") | |
50 (presence_set "hsw_decoder2" "hsw_decoder0") | |
51 (presence_set "hsw_decoder3" "hsw_decoder0") | |
52 | |
53 ;; Most instructions can be decoded on any of the three decoders. | |
54 (define_reservation "hsw_decodern" "(hsw_decoder0|hsw_decoder1|hsw_decoder2|hsw_decoder3)") | |
55 | |
56 ;; The out-of-order core has eight pipelines. These are similar to the | |
57 ;; Pentium Pro's five pipelines. Port 2,3 are responsible for memory loads, | |
58 ;; port 7 for store address calculations, port 4 for memory stores, and | |
59 ;; ports 0, 1, 5 and 6 for everything else. | |
60 | |
61 (define_cpu_unit "hsw_p0,hsw_p1,hsw_p5,hsw_p6" "haswell_core") | |
62 (define_cpu_unit "hsw_p2,hsw_p3" "haswell_load") | |
63 (define_cpu_unit "hsw_p4,hsw_p7" "haswell_store") | |
64 (define_cpu_unit "hsw_idiv" "haswell_idiv") | |
65 (define_cpu_unit "hsw_fdiv" "haswell_fdiv") | |
66 (define_cpu_unit "hsw_ssediv" "haswell_ssediv") | |
67 | |
68 (define_reservation "hsw_p0156" "hsw_p0|hsw_p1|hsw_p5|hsw_p6") | |
69 (define_reservation "hsw_p0p1p5p6" "hsw_p0+hsw_p1+hsw_p5+hsw_p6") | |
70 (define_reservation "hsw_p23" "hsw_p2|hsw_p3") | |
71 (define_reservation "hsw_p4p7" "hsw_p4+hsw_p7") | |
72 (define_reservation "hsw_p237" "hsw_p2|hsw_p3|hsw_p7") | |
73 (define_reservation "hsw_p015" "hsw_p0|hsw_p1|hsw_p5") | |
74 (define_reservation "hsw_p01" "hsw_p0|hsw_p1") | |
75 | |
76 (define_insn_reservation "hsw_complex_insn" 6 | |
77 (and (eq_attr "cpu" "haswell") | |
78 (eq_attr "type" "other,multi,str")) | |
79 "hsw_decoder0") | |
80 | |
81 (define_insn_reservation "hsw_call" 1 | |
82 (and (eq_attr "cpu" "haswell") | |
83 (eq_attr "type" "call,callv")) | |
84 "hsw_decoder0") | |
85 | |
86 ;; imov with memory operands does not use the integer units. | |
87 ;; imovx always decodes to one uop, and also doesn't use the integer | |
88 ;; units if it has memory operands. | |
89 (define_insn_reservation "hsw_imov" 1 | |
90 (and (eq_attr "cpu" "haswell") | |
91 (and (eq_attr "memory" "none") | |
92 (eq_attr "type" "imov,imovx"))) | |
93 "hsw_decodern,hsw_p0156") | |
94 | |
95 (define_insn_reservation "hsw_imov_load" 2 | |
96 (and (eq_attr "cpu" "haswell") | |
97 (and (eq_attr "memory" "load") | |
98 (eq_attr "type" "imov,imovx"))) | |
99 "hsw_decodern,hsw_p23") | |
100 | |
101 (define_insn_reservation "hsw_imov_store" 3 | |
102 (and (eq_attr "cpu" "haswell") | |
103 (and (eq_attr "memory" "store") | |
104 (eq_attr "type" "imov"))) | |
105 "hsw_decodern,hsw_p4+(hsw_p2|hsw_p3|hsw_p7)") | |
106 | |
107 (define_insn_reservation "hsw_icmov" 2 | |
108 (and (eq_attr "cpu" "haswell") | |
109 (and (eq_attr "memory" "none") | |
110 (eq_attr "type" "icmov"))) | |
111 "hsw_decodern,hsw_p0156,hsw_p0156") | |
112 | |
113 (define_insn_reservation "hsw_icmov_load" 2 | |
114 (and (eq_attr "cpu" "haswell") | |
115 (and (eq_attr "memory" "load") | |
116 (eq_attr "type" "icmov"))) | |
117 "hsw_decodern,hsw_p23+hsw_p0156,hsw_p0156") | |
118 | |
119 (define_insn_reservation "hsw_push_reg" 3 | |
120 (and (eq_attr "cpu" "haswell") | |
121 (and (eq_attr "memory" "store") | |
122 (eq_attr "type" "push"))) | |
123 "hsw_decodern,hsw_p4+hsw_p237") | |
124 | |
125 (define_insn_reservation "hsw_push_mem" 3 | |
126 (and (eq_attr "cpu" "haswell") | |
127 (and (eq_attr "memory" "both") | |
128 (eq_attr "type" "push"))) | |
129 "hsw_decodern,hsw_p4+hsw_p237,hsw_p237") | |
130 | |
131 ;; Consider lea latency as having 2 components. | |
132 (define_insn_reservation "hsw_lea" 1 | |
133 (and (eq_attr "cpu" "haswell") | |
134 (and (eq_attr "memory" "none") | |
135 (eq_attr "type" "lea"))) | |
136 "hsw_decodern,hsw_p1|hsw_p5") | |
137 | |
138 (define_insn_reservation "hsw_shift_rotate" 1 | |
139 (and (eq_attr "cpu" "haswell") | |
140 (and (eq_attr "memory" "none") | |
141 (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) | |
142 "hsw_decodern,hsw_p0|hsw_p6") | |
143 | |
144 (define_insn_reservation "hsw_shift_rotate_mem" 1 | |
145 (and (eq_attr "cpu" "haswell") | |
146 (and (eq_attr "memory" "!none") | |
147 (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) | |
148 "hsw_decodern,(hsw_p0|hsw_p6)+hsw_p237+hsw_p4") | |
149 | |
150 (define_insn_reservation "hsw_branch" 1 | |
151 (and (eq_attr "cpu" "haswell") | |
152 (and (eq_attr "memory" "none") | |
153 (eq_attr "type" "ibr"))) | |
154 "hsw_decodern,hsw_p6") | |
155 | |
156 (define_insn_reservation "hsw_indirect_branch" 2 | |
157 (and (eq_attr "cpu" "haswell") | |
158 (and (eq_attr "memory" "!none") | |
159 (eq_attr "type" "ibr"))) | |
160 "hsw_decoder0,hsw_p23+hsw_p6") | |
161 | |
162 (define_insn_reservation "hsw_leave" 4 | |
163 (and (eq_attr "cpu" "haswell") | |
164 (eq_attr "type" "leave")) | |
165 "hsw_decoder0,hsw_p23+hsw_p0156,hsw_p0156") | |
166 | |
167 ;; imul and imulx with two/three operands only execute on port 1. | |
168 (define_insn_reservation "hsw_imul" 3 | |
169 (and (eq_attr "cpu" "haswell") | |
170 (and (eq_attr "memory" "none") | |
171 (eq_attr "type" "imul"))) | |
172 "hsw_decodern,hsw_p1") | |
173 | |
174 (define_insn_reservation "hsw_imul_mem" 3 | |
175 (and (eq_attr "cpu" "haswell") | |
176 (and (eq_attr "memory" "!none") | |
177 (eq_attr "type" "imul"))) | |
178 "hsw_decodern,hsw_p23+hsw_p1") | |
179 | |
180 (define_insn_reservation "hsw_imulx" 4 | |
181 (and (eq_attr "cpu" "haswell") | |
182 (and (eq_attr "memory" "none") | |
183 (eq_attr "type" "imulx"))) | |
184 "hsw_decodern,hsw_p0156,hsw_p0156") | |
185 | |
186 (define_insn_reservation "hsw_imulx_mem" 4 | |
187 (and (eq_attr "cpu" "haswell") | |
188 (and (eq_attr "memory" "!none") | |
189 (eq_attr "type" "imulx"))) | |
190 "hsw_decodern,hsw_p23+hsw_p0156,(hsw_p0|hsw_p6|hsw_p6)") | |
191 | |
192 | |
193 ;; div and idiv are very similar, so we model them the same. | |
194 ;; Use the same latency for all QI,HI and SI modes. | |
195 (define_insn_reservation "hsw_idiv" 23 | |
196 (and (eq_attr "cpu" "haswell") | |
197 (and (eq_attr "memory" "none") | |
198 (eq_attr "type" "idiv"))) | |
199 "hsw_decoder0,(hsw_p0p1p5p6+hsw_idiv)*9") | |
200 | |
201 (define_insn_reservation "hsw_idiv_load" 23 | |
202 (and (eq_attr "cpu" "haswell") | |
203 (and (eq_attr "memory" "load") | |
204 (eq_attr "type" "idiv"))) | |
205 "hsw_decoder0,hsw_p23+hsw_p0+hsw_idiv,(hsw_p0p1p5p6+hsw_idiv)*9") | |
206 | |
207 ;; x87 floating point operations. | |
208 | |
209 (define_insn_reservation "hsw_fxch" 0 | |
210 (and (eq_attr "cpu" "haswell") | |
211 (eq_attr "type" "fxch")) | |
212 "hsw_decodern") | |
213 | |
214 (define_insn_reservation "hsw_fop" 3 | |
215 (and (eq_attr "cpu" "haswell") | |
216 (and (eq_attr "memory" "none,unknown") | |
217 (eq_attr "type" "fop"))) | |
218 "hsw_decodern,hsw_p1") | |
219 | |
220 (define_insn_reservation "hsw_fop_load" 5 | |
221 (and (eq_attr "cpu" "haswell") | |
222 (and (eq_attr "memory" "load") | |
223 (eq_attr "type" "fop"))) | |
224 "hsw_decodern,hsw_p23+hsw_p1,hsw_p1") | |
225 | |
226 (define_insn_reservation "hsw_fop_store" 3 | |
227 (and (eq_attr "cpu" "haswell") | |
228 (and (eq_attr "memory" "store") | |
229 (eq_attr "type" "fop"))) | |
230 "hsw_decodern,hsw_p0,hsw_p0,hsw_p0+hsw_p4+hsw_p3") | |
231 | |
232 (define_insn_reservation "hsw_fop_both" 5 | |
233 (and (eq_attr "cpu" "haswell") | |
234 (and (eq_attr "memory" "both") | |
235 (eq_attr "type" "fop"))) | |
236 "hsw_decodern,hsw_p2+hsw_p0,hsw_p0+hsw_p4+hsw_p3") | |
237 | |
238 (define_insn_reservation "hsw_fsgn" 1 | |
239 (and (eq_attr "cpu" "haswell") | |
240 (eq_attr "type" "fsgn")) | |
241 "hsw_decodern,hsw_p0") | |
242 | |
243 (define_insn_reservation "hsw_fistp" 7 | |
244 (and (eq_attr "cpu" "haswell") | |
245 (eq_attr "type" "fistp")) | |
246 "hsw_decoder0,hsw_p1+hsw_p4+hsw_p23") | |
247 | |
248 (define_insn_reservation "hsw_fcmov" 2 | |
249 (and (eq_attr "cpu" "haswell") | |
250 (eq_attr "type" "fcmov")) | |
251 "hsw_decoder0,hsw_p0+hsw_p5,hsw_p0") | |
252 | |
253 (define_insn_reservation "hsw_fcmp" 1 | |
254 (and (eq_attr "cpu" "haswell") | |
255 (and (eq_attr "memory" "none") | |
256 (eq_attr "type" "fcmp"))) | |
257 "hsw_decodern,hsw_p1") | |
258 | |
259 (define_insn_reservation "hsw_fcmp_load" 1 | |
260 (and (eq_attr "cpu" "haswell") | |
261 (and (eq_attr "memory" "load") | |
262 (eq_attr "type" "fcmp"))) | |
263 "hsw_decodern,hsw_p23+hsw_p1") | |
264 | |
265 (define_insn_reservation "hsw_fmov" 1 | |
266 (and (eq_attr "cpu" "haswell") | |
267 (and (eq_attr "memory" "none") | |
268 (eq_attr "type" "fmov"))) | |
269 "hsw_decodern,hsw_p01") | |
270 | |
271 (define_insn_reservation "hsw_fmov_load" 3 | |
272 (and (eq_attr "cpu" "haswell") | |
273 (and (eq_attr "memory" "load") | |
274 (and (eq_attr "mode" "!XF") | |
275 (eq_attr "type" "fmov")))) | |
276 "hsw_decodern,hsw_p23") | |
277 | |
278 (define_insn_reservation "hsw_fmov_XF_load" 3 | |
279 (and (eq_attr "cpu" "haswell") | |
280 (and (eq_attr "memory" "load") | |
281 (and (eq_attr "mode" "XF") | |
282 (eq_attr "type" "fmov")))) | |
283 "hsw_decodern,(hsw_p23+hsw_p0)*2") | |
284 | |
285 (define_insn_reservation "hsw_fmov_store" 1 | |
286 (and (eq_attr "cpu" "haswell") | |
287 (and (eq_attr "memory" "store") | |
288 (and (eq_attr "mode" "!XF") | |
289 (eq_attr "type" "fmov")))) | |
290 "hsw_decodern,hsw_p4p7") | |
291 | |
292 (define_insn_reservation "hsw_fmov_XF_store" 3 | |
293 (and (eq_attr "cpu" "haswell") | |
294 (and (eq_attr "memory" "store") | |
295 (and (eq_attr "mode" "XF") | |
296 (eq_attr "type" "fmov")))) | |
297 "hsw_decodern,hsw_p4p7,hsw_p4p7") | |
298 | |
299 (define_insn_reservation "hsw_fmul" 4 | |
300 (and (eq_attr "cpu" "haswell") | |
301 (and (eq_attr "memory" "none") | |
302 (eq_attr "type" "fmul"))) | |
303 "hsw_decodern,hsw_p01") | |
304 | |
305 (define_insn_reservation "hsw_fmul_load" 4 | |
306 (and (eq_attr "cpu" "haswell") | |
307 (and (eq_attr "memory" "load") | |
308 (eq_attr "type" "fmul"))) | |
309 "hsw_decodern,hsw_p23+hsw_p01") | |
310 | |
311 ;; fdiv latencies depend on the mode of the operands. XFmode gives | |
312 ;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18. | |
313 ;; Division by a power of 2 takes only 9 cycles, but we cannot model | |
314 ;; that. Throughput is equal to latency - 1, which we model using the | |
315 ;; hsw_div automaton. | |
316 (define_insn_reservation "hsw_fdiv_SF" 18 | |
317 (and (eq_attr "cpu" "haswell") | |
318 (and (eq_attr "memory" "none") | |
319 (and (eq_attr "mode" "SF") | |
320 (eq_attr "type" "fdiv,fpspc")))) | |
321 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*16") | |
322 | |
323 (define_insn_reservation "hsw_fdiv_SF_load" 19 | |
324 (and (eq_attr "cpu" "haswell") | |
325 (and (eq_attr "memory" "load") | |
326 (and (eq_attr "mode" "SF") | |
327 (eq_attr "type" "fdiv,fpspc")))) | |
328 "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*16") | |
329 | |
330 (define_insn_reservation "hsw_fdiv_DF" 32 | |
331 (and (eq_attr "cpu" "haswell") | |
332 (and (eq_attr "memory" "none") | |
333 (and (eq_attr "mode" "DF") | |
334 (eq_attr "type" "fdiv,fpspc")))) | |
335 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*30") | |
336 | |
337 (define_insn_reservation "hsw_fdiv_DF_load" 33 | |
338 (and (eq_attr "cpu" "haswell") | |
339 (and (eq_attr "memory" "load") | |
340 (and (eq_attr "mode" "DF") | |
341 (eq_attr "type" "fdiv,fpspc")))) | |
342 "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*30") | |
343 | |
344 (define_insn_reservation "hsw_fdiv_XF" 38 | |
345 (and (eq_attr "cpu" "haswell") | |
346 (and (eq_attr "memory" "none") | |
347 (and (eq_attr "mode" "XF") | |
348 (eq_attr "type" "fdiv,fpspc")))) | |
349 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*36") | |
350 | |
351 (define_insn_reservation "hsw_fdiv_XF_load" 39 | |
352 (and (eq_attr "cpu" "haswell") | |
353 (and (eq_attr "memory" "load") | |
354 (and (eq_attr "mode" "XF") | |
355 (eq_attr "type" "fdiv,fpspc")))) | |
356 "hsw_decodern,hsw_p2+hsw_p0+hsw_fdiv,hsw_fdiv*36") | |
357 | |
358 ;; MMX instructions. | |
359 | |
360 (define_insn_reservation "hsw_mmx_add" 1 | |
361 (and (eq_attr "cpu" "haswell") | |
362 (and (eq_attr "memory" "none") | |
363 (eq_attr "type" "mmxadd,sseiadd"))) | |
364 "hsw_decodern,hsw_p1|hsw_p5") | |
365 | |
366 (define_insn_reservation "hsw_mmx_add_load" 2 | |
367 (and (eq_attr "cpu" "haswell") | |
368 (and (eq_attr "memory" "load") | |
369 (eq_attr "type" "mmxadd,sseiadd"))) | |
370 "hsw_decodern,hsw_p23+(hsw_p1|hsw_p5)") | |
371 | |
372 (define_insn_reservation "hsw_mmx_shft" 1 | |
373 (and (eq_attr "cpu" "haswell") | |
374 (and (eq_attr "memory" "none") | |
375 (eq_attr "type" "mmxshft"))) | |
376 "hsw_decodern,hsw_p0") | |
377 | |
378 (define_insn_reservation "hsw_mmx_shft_load" 2 | |
379 (and (eq_attr "cpu" "haswell") | |
380 (and (eq_attr "memory" "load") | |
381 (eq_attr "type" "mmxshft"))) | |
382 "hsw_decodern,hsw_p23+hsw_p0") | |
383 | |
384 (define_insn_reservation "hsw_mmx_sse_shft" 1 | |
385 (and (eq_attr "cpu" "haswell") | |
386 (and (eq_attr "memory" "none") | |
387 (and (eq_attr "type" "sseishft") | |
388 (eq_attr "length_immediate" "!0")))) | |
389 "hsw_decodern,hsw_p01") | |
390 | |
391 (define_insn_reservation "hsw_mmx_sse_shft_load" 2 | |
392 (and (eq_attr "cpu" "haswell") | |
393 (and (eq_attr "memory" "load") | |
394 (and (eq_attr "type" "sseishft") | |
395 (eq_attr "length_immediate" "!0")))) | |
396 "hsw_decodern,hsw_p01+hsw_p23") | |
397 | |
398 (define_insn_reservation "hsw_mmx_sse_shft1" 2 | |
399 (and (eq_attr "cpu" "haswell") | |
400 (and (eq_attr "memory" "none") | |
401 (and (eq_attr "type" "sseishft") | |
402 (eq_attr "length_immediate" "0")))) | |
403 "hsw_decodern,hsw_p01") | |
404 | |
405 (define_insn_reservation "hsw_mmx_sse_shft1_load" 3 | |
406 (and (eq_attr "cpu" "haswell") | |
407 (and (eq_attr "memory" "load") | |
408 (and (eq_attr "type" "sseishft") | |
409 (eq_attr "length_immediate" "0")))) | |
410 "hsw_decodern,hsw_p01+hsw_p23") | |
411 | |
412 (define_insn_reservation "hsw_mmx_mul" 5 | |
413 (and (eq_attr "cpu" "haswell") | |
414 (and (eq_attr "memory" "none") | |
415 (eq_attr "type" "mmxmul,sseimul"))) | |
416 "hsw_decodern,hsw_p01") | |
417 | |
418 (define_insn_reservation "hsw_mmx_mul_load" 5 | |
419 (and (eq_attr "cpu" "haswell") | |
420 (and (eq_attr "memory" "none") | |
421 (eq_attr "type" "mmxmul,sseimul"))) | |
422 "hsw_decodern,hsw_p23+hsw_p01") | |
423 | |
424 (define_insn_reservation "hsw_sse_mmxcvt" 4 | |
425 (and (eq_attr "cpu" "haswell") | |
426 (and (eq_attr "mode" "DI") | |
427 (eq_attr "type" "mmxcvt"))) | |
428 "hsw_decodern,hsw_p1") | |
429 | |
430 ;; (define_insn_reservation "hsw_sse_mmxshft" 2 | |
431 ;; (and (eq_attr "cpu" "haswell") | |
432 ;; (and (eq_attr "mode" "TI") | |
433 ;; (eq_attr "type" "mmxshft"))) | |
434 ;; "hsw_decodern,hsw_p01") | |
435 | |
436 ;; The sfence instruction. | |
437 (define_insn_reservation "hsw_sse_sfence" 2 | |
438 (and (eq_attr "cpu" "haswell") | |
439 (and (eq_attr "memory" "unknown") | |
440 (eq_attr "type" "sse"))) | |
441 "hsw_decoder0,hsw_p23+hsw_p4") | |
442 | |
443 (define_insn_reservation "hsw_sse_SFDF" 3 | |
444 (and (eq_attr "cpu" "haswell") | |
445 (and (eq_attr "mode" "SF,DF") | |
446 (eq_attr "type" "sse"))) | |
447 "hsw_decodern,hsw_p01") | |
448 | |
449 (define_insn_reservation "hsw_sse_V4SF" 4 | |
450 (and (eq_attr "cpu" "haswell") | |
451 (and (eq_attr "mode" "V4SF") | |
452 (eq_attr "type" "sse"))) | |
453 "hsw_decodern,hsw_p01") | |
454 | |
455 (define_insn_reservation "hsw_sse_V8SF" 4 | |
456 (and (eq_attr "cpu" "haswell") | |
457 (and (eq_attr "mode" "V8SF,V4DF") | |
458 (eq_attr "type" "sse"))) | |
459 "hsw_decodern,hsw_p01") | |
460 | |
461 (define_insn_reservation "hsw_sse_addcmp" 3 | |
462 (and (eq_attr "cpu" "haswell") | |
463 (and (eq_attr "memory" "none") | |
464 (eq_attr "type" "sseadd1,ssecmp,ssecomi"))) | |
465 "hsw_decodern,hsw_p01") | |
466 | |
467 (define_insn_reservation "hsw_sse_addcmp_load" 3 | |
468 (and (eq_attr "cpu" "haswell") | |
469 (and (eq_attr "memory" "load") | |
470 (eq_attr "type" "sseadd1,ssecmp,ssecomi"))) | |
471 "hsw_decodern,hsw_p23+hsw_p01") | |
472 | |
473 (define_insn_reservation "hsw_sse_logic" 1 | |
474 (and (eq_attr "cpu" "haswell") | |
475 (and (eq_attr "memory" "none") | |
476 (eq_attr "type" "sselog,sselog1"))) | |
477 "hsw_decodern,hsw_p015") | |
478 | |
479 (define_insn_reservation "hsw_sse_logic_load" 2 | |
480 (and (eq_attr "cpu" "haswell") | |
481 (and (eq_attr "memory" "load") | |
482 (eq_attr "type" "sselog,sselog1"))) | |
483 "hsw_decodern,hsw_p015+hsw_p23") | |
484 | |
485 (define_insn_reservation "hsw_sse_add" 3 | |
486 (and (eq_attr "cpu" "haswell") | |
487 (and (eq_attr "memory" "none") | |
488 (eq_attr "type" "sseadd"))) | |
489 "hsw_decodern,hsw_p1|hsw_p5") | |
490 | |
491 (define_insn_reservation "hsw_sse_add_load" 3 | |
492 (and (eq_attr "cpu" "haswell") | |
493 (and (eq_attr "memory" "load") | |
494 (eq_attr "type" "sseadd"))) | |
495 "hsw_decodern,(hsw_p1|hsw_p5)+hsw_p23") | |
496 | |
497 (define_insn_reservation "hsw_sse_mul" 5 | |
498 (and (eq_attr "cpu" "haswell") | |
499 (and (eq_attr "memory" "none") | |
500 (eq_attr "type" "ssemul"))) | |
501 "hsw_decodern,hsw_p0") | |
502 | |
503 (define_insn_reservation "hsw_sse_mul_load" 5 | |
504 (and (eq_attr "cpu" "haswell") | |
505 (and (eq_attr "memory" "load") | |
506 (eq_attr "type" "ssemul"))) | |
507 "hsw_decodern,hsw_p0+hsw_p23") | |
508 ;; Use skylake pipeline. | |
509 (define_insn_reservation "hsw_sse_muladd" 5 | |
510 (and (eq_attr "cpu" "haswell") | |
511 (and (eq_attr "memory" "none") | |
512 (eq_attr "type" "ssemuladd"))) | |
513 "hsw_decodern,hsw_p01") | |
514 | |
515 (define_insn_reservation "hsw_sse_muladd_load" 5 | |
516 (and (eq_attr "cpu" "haswell") | |
517 (and (eq_attr "memory" "load") | |
518 (eq_attr "type" "ssemuladd"))) | |
519 "hsw_decodern,hsw_p01+hsw_p23") | |
520 | |
521 (define_insn_reservation "hsw_sse_div_SF" 18 | |
522 (and (eq_attr "cpu" "haswell") | |
523 (and (eq_attr "memory" "none") | |
524 (and (eq_attr "mode" "SF,V4SF,V8SF") | |
525 (eq_attr "type" "ssediv")))) | |
526 "hsw_decodern,hsw_p0,hsw_ssediv*14") | |
527 | |
528 (define_insn_reservation "hsw_sse_div_SF_load" 18 | |
529 (and (eq_attr "cpu" "haswell") | |
530 (and (eq_attr "memory" "none") | |
531 (and (eq_attr "mode" "SF,V4SF,V8SF") | |
532 (eq_attr "type" "ssediv")))) | |
533 "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*14") | |
534 | |
535 (define_insn_reservation "hsw_sse_div_DF" 28 | |
536 (and (eq_attr "cpu" "haswell") | |
537 (and (eq_attr "memory" "none") | |
538 (and (eq_attr "mode" "DF,V2DF,V4DF") | |
539 (eq_attr "type" "ssediv")))) | |
540 "hsw_decodern,hsw_p0,hsw_ssediv*20") | |
541 | |
542 (define_insn_reservation "hsw_sse_div_DF_load" 28 | |
543 (and (eq_attr "cpu" "haswell") | |
544 (and (eq_attr "memory" "none") | |
545 (and (eq_attr "mode" "DF,V2DF,V4DF") | |
546 (eq_attr "type" "ssediv")))) | |
547 "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*20") | |
548 | |
549 (define_insn_reservation "hsw_sse_icvt" 4 | |
550 (and (eq_attr "cpu" "haswell") | |
551 (and (eq_attr "memory" "none") | |
552 (eq_attr "type" "sseicvt"))) | |
553 "hsw_decodern,hsw_p1") | |
554 | |
555 (define_insn_reservation "hsw_sse_icvt_load" 4 | |
556 (and (eq_attr "cpu" "haswell") | |
557 (and (eq_attr "memory" "!none") | |
558 (eq_attr "type" "sseicvt"))) | |
559 "hsw_decodern,hsw_p23+hsw_p1") | |
560 | |
561 | |
562 (define_insn_reservation "hsw_sse_icvt_SI" 3 | |
563 (and (eq_attr "cpu" "haswell") | |
564 (and (eq_attr "memory" "none") | |
565 (and (eq_attr "mode" "SI") | |
566 (eq_attr "type" "sseicvt")))) | |
567 "hsw_decodern,hsw_p1") | |
568 | |
569 (define_insn_reservation "hsw_sse_icvt_SI_load" 3 | |
570 (and (eq_attr "cpu" "haswell") | |
571 (and (eq_attr "memory" "!none") | |
572 (and (eq_attr "mode" "SI") | |
573 (eq_attr "type" "sseicvt")))) | |
574 "hsw_decodern,hsw_p23+hsw_p1") | |
575 | |
576 (define_insn_reservation "hsw_sse_mov" 1 | |
577 (and (eq_attr "cpu" "haswell") | |
578 (and (eq_attr "memory" "none") | |
579 (eq_attr "type" "ssemov"))) | |
580 "hsw_decodern,hsw_p015") | |
581 | |
582 (define_insn_reservation "hsw_sse_mov_load" 2 | |
583 (and (eq_attr "cpu" "haswell") | |
584 (and (eq_attr "memory" "load") | |
585 (eq_attr "type" "ssemov"))) | |
586 "hsw_decodern,hsw_p23") | |
587 | |
588 (define_insn_reservation "hsw_sse_mov_store" 1 | |
589 (and (eq_attr "cpu" "haswell") | |
590 (and (eq_attr "memory" "store") | |
591 (eq_attr "type" "ssemov"))) | |
592 "hsw_decodern,hsw_p4p7") | |
593 | |
594 (define_insn_reservation "hsw_insn" 1 | |
595 (and (eq_attr "cpu" "haswell") | |
596 (and (eq_attr "memory" "none,unknown") | |
597 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp"))) | |
598 "hsw_decodern,hsw_p0156") | |
599 | |
600 (define_insn_reservation "hsw_insn_load" 1 | |
601 (and (eq_attr "cpu" "haswell") | |
602 (and (eq_attr "memory" "load") | |
603 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp"))) | |
604 "hsw_decodern,hsw_p23+hsw_p0156") | |
605 | |
606 (define_insn_reservation "hsw_insn_store" 1 | |
607 (and (eq_attr "cpu" "haswell") | |
608 (and (eq_attr "memory" "store") | |
609 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp"))) | |
610 "hsw_decodern,hsw_p0156+hsw_p4p7") | |
611 | |
612 ;; read-modify-store instructions produce 4 uops so they have to be | |
613 ;; decoded on hsw_decoder0 as well. | |
614 (define_insn_reservation "hsw_insn_both" 4 | |
615 (and (eq_attr "cpu" "haswell") | |
616 (and (eq_attr "memory" "both") | |
617 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp"))) | |
618 "hsw_decodern,hsw_p23+hsw_p0156+hsw_p4p7") |