111
|
1 ;; Scheduling for Haswell and derived processors.
|
131
|
2 ;; Copyright (C) 2004-2018 Free Software Foundation, Inc.
|
111
|
3 ;;
|
|
4 ;; This file is part of GCC.
|
|
5 ;;
|
|
6 ;; GCC is free software; you can redistribute it and/or modify
|
|
7 ;; it under the terms of the GNU General Public License as published by
|
|
8 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
9 ;; any later version.
|
|
10 ;;
|
|
11 ;; GCC is distributed in the hope that it will be useful,
|
|
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 ;; GNU General Public License for more details.
|
|
15 ;;
|
|
16 ;; You should have received a copy of the GNU General Public License
|
|
17 ;; along with GCC; see the file COPYING3. If not see
|
|
18 ;; <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 ;; The scheduling description in this file is based on core2.md.
|
|
21 ;; The major difference from the CORE2 pipeline is that HASWELL has
|
|
22 ;; two MU for load and one MU for store.
|
|
23 (define_automaton "haswell_decoder,haswell_core,haswell_idiv,haswell_fdiv,haswell_ssediv,haswell_load,haswell_store")
|
|
24
|
|
25 ;; The CPU domain, used for HASWELL bypass latencies
|
|
26 (define_attr "hsw_domain" "int,float,simd"
|
|
27 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
|
|
28 (const_string "float")
|
|
29 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
|
|
30 sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
|
|
31 ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
|
|
32 (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF")
|
|
33 (const_string "float")
|
|
34 (eq_attr "mode" "SI")
|
|
35 (const_string "int")]
|
|
36 (const_string "simd"))
|
|
37 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
|
|
38 (const_string "simd")]
|
|
39 (const_string "int")))
|
|
40
|
|
41 (define_cpu_unit "hsw_decoder0" "haswell_decoder")
|
|
42 (define_cpu_unit "hsw_decoder1" "haswell_decoder")
|
|
43 (define_cpu_unit "hsw_decoder2" "haswell_decoder")
|
|
44 (define_cpu_unit "hsw_decoder3" "haswell_decoder")
|
|
45
|
|
46 ;; We first wish to find an instruction for hsw_decoder0, so exclude
|
|
47 ;; other hsw_decoders from being reserved until hsw_decoder0 is
|
|
48 ;; reserved.
|
|
49 (presence_set "hsw_decoder1" "hsw_decoder0")
|
|
50 (presence_set "hsw_decoder2" "hsw_decoder0")
|
|
51 (presence_set "hsw_decoder3" "hsw_decoder0")
|
|
52
|
|
53 ;; Most instructions can be decoded on any of the three decoders.
|
|
54 (define_reservation "hsw_decodern" "(hsw_decoder0|hsw_decoder1|hsw_decoder2|hsw_decoder3)")
|
|
55
|
|
56 ;; The out-of-order core has eight pipelines. These are similar to the
|
|
57 ;; Pentium Pro's five pipelines. Port 2,3 are responsible for memory loads,
|
|
58 ;; port 7 for store address calculations, port 4 for memory stores, and
|
|
59 ;; ports 0, 1, 5 and 6 for everything else.
|
|
60
|
|
61 (define_cpu_unit "hsw_p0,hsw_p1,hsw_p5,hsw_p6" "haswell_core")
|
|
62 (define_cpu_unit "hsw_p2,hsw_p3" "haswell_load")
|
|
63 (define_cpu_unit "hsw_p4,hsw_p7" "haswell_store")
|
|
64 (define_cpu_unit "hsw_idiv" "haswell_idiv")
|
|
65 (define_cpu_unit "hsw_fdiv" "haswell_fdiv")
|
|
66 (define_cpu_unit "hsw_ssediv" "haswell_ssediv")
|
|
67
|
|
68 (define_reservation "hsw_p0156" "hsw_p0|hsw_p1|hsw_p5|hsw_p6")
|
|
69 (define_reservation "hsw_p0p1p5p6" "hsw_p0+hsw_p1+hsw_p5+hsw_p6")
|
|
70 (define_reservation "hsw_p23" "hsw_p2|hsw_p3")
|
|
71 (define_reservation "hsw_p4p7" "hsw_p4+hsw_p7")
|
|
72 (define_reservation "hsw_p237" "hsw_p2|hsw_p3|hsw_p7")
|
|
73 (define_reservation "hsw_p015" "hsw_p0|hsw_p1|hsw_p5")
|
|
74 (define_reservation "hsw_p01" "hsw_p0|hsw_p1")
|
|
75
|
|
76 (define_insn_reservation "hsw_complex_insn" 6
|
131
|
77 (and (eq_attr "cpu" "generic,haswell")
|
111
|
78 (eq_attr "type" "other,multi,str"))
|
|
79 "hsw_decoder0")
|
|
80
|
|
81 (define_insn_reservation "hsw_call" 1
|
131
|
82 (and (eq_attr "cpu" "generic,haswell")
|
111
|
83 (eq_attr "type" "call,callv"))
|
|
84 "hsw_decoder0")
|
|
85
|
|
86 ;; imov with memory operands does not use the integer units.
|
|
87 ;; imovx always decodes to one uop, and also doesn't use the integer
|
|
88 ;; units if it has memory operands.
|
|
89 (define_insn_reservation "hsw_imov" 1
|
131
|
90 (and (eq_attr "cpu" "generic,haswell")
|
111
|
91 (and (eq_attr "memory" "none")
|
|
92 (eq_attr "type" "imov,imovx")))
|
|
93 "hsw_decodern,hsw_p0156")
|
|
94
|
|
95 (define_insn_reservation "hsw_imov_load" 2
|
131
|
96 (and (eq_attr "cpu" "generic,haswell")
|
111
|
97 (and (eq_attr "memory" "load")
|
|
98 (eq_attr "type" "imov,imovx")))
|
|
99 "hsw_decodern,hsw_p23")
|
|
100
|
|
101 (define_insn_reservation "hsw_imov_store" 3
|
131
|
102 (and (eq_attr "cpu" "generic,haswell")
|
111
|
103 (and (eq_attr "memory" "store")
|
|
104 (eq_attr "type" "imov")))
|
|
105 "hsw_decodern,hsw_p4+(hsw_p2|hsw_p3|hsw_p7)")
|
|
106
|
|
107 (define_insn_reservation "hsw_icmov" 2
|
131
|
108 (and (eq_attr "cpu" "generic,haswell")
|
111
|
109 (and (eq_attr "memory" "none")
|
|
110 (eq_attr "type" "icmov")))
|
|
111 "hsw_decodern,hsw_p0156,hsw_p0156")
|
|
112
|
|
113 (define_insn_reservation "hsw_icmov_load" 2
|
131
|
114 (and (eq_attr "cpu" "generic,haswell")
|
111
|
115 (and (eq_attr "memory" "load")
|
|
116 (eq_attr "type" "icmov")))
|
|
117 "hsw_decodern,hsw_p23+hsw_p0156,hsw_p0156")
|
|
118
|
|
119 (define_insn_reservation "hsw_push_reg" 3
|
131
|
120 (and (eq_attr "cpu" "generic,haswell")
|
111
|
121 (and (eq_attr "memory" "store")
|
|
122 (eq_attr "type" "push")))
|
|
123 "hsw_decodern,hsw_p4+hsw_p237")
|
|
124
|
|
125 (define_insn_reservation "hsw_push_mem" 3
|
131
|
126 (and (eq_attr "cpu" "generic,haswell")
|
111
|
127 (and (eq_attr "memory" "both")
|
|
128 (eq_attr "type" "push")))
|
|
129 "hsw_decodern,hsw_p4+hsw_p237,hsw_p237")
|
|
130
|
|
131 ;; Consider lea latency as having 2 components.
|
|
132 (define_insn_reservation "hsw_lea" 1
|
131
|
133 (and (eq_attr "cpu" "generic,haswell")
|
111
|
134 (and (eq_attr "memory" "none")
|
|
135 (eq_attr "type" "lea")))
|
|
136 "hsw_decodern,hsw_p1|hsw_p5")
|
|
137
|
|
138 (define_insn_reservation "hsw_shift_rotate" 1
|
131
|
139 (and (eq_attr "cpu" "generic,haswell")
|
111
|
140 (and (eq_attr "memory" "none")
|
|
141 (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
|
|
142 "hsw_decodern,hsw_p0|hsw_p6")
|
|
143
|
|
144 (define_insn_reservation "hsw_shift_rotate_mem" 1
|
131
|
145 (and (eq_attr "cpu" "generic,haswell")
|
111
|
146 (and (eq_attr "memory" "!none")
|
|
147 (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
|
|
148 "hsw_decodern,(hsw_p0|hsw_p6)+hsw_p237+hsw_p4")
|
|
149
|
|
150 (define_insn_reservation "hsw_branch" 1
|
131
|
151 (and (eq_attr "cpu" "generic,haswell")
|
111
|
152 (and (eq_attr "memory" "none")
|
|
153 (eq_attr "type" "ibr")))
|
|
154 "hsw_decodern,hsw_p6")
|
|
155
|
|
156 (define_insn_reservation "hsw_indirect_branch" 2
|
131
|
157 (and (eq_attr "cpu" "generic,haswell")
|
111
|
158 (and (eq_attr "memory" "!none")
|
|
159 (eq_attr "type" "ibr")))
|
|
160 "hsw_decoder0,hsw_p23+hsw_p6")
|
|
161
|
|
162 (define_insn_reservation "hsw_leave" 4
|
131
|
163 (and (eq_attr "cpu" "generic,haswell")
|
111
|
164 (eq_attr "type" "leave"))
|
|
165 "hsw_decoder0,hsw_p23+hsw_p0156,hsw_p0156")
|
|
166
|
|
167 ;; imul and imulx with two/three operands only execute on port 1.
|
|
168 (define_insn_reservation "hsw_imul" 3
|
131
|
169 (and (eq_attr "cpu" "generic,haswell")
|
111
|
170 (and (eq_attr "memory" "none")
|
|
171 (eq_attr "type" "imul")))
|
|
172 "hsw_decodern,hsw_p1")
|
|
173
|
|
174 (define_insn_reservation "hsw_imul_mem" 3
|
131
|
175 (and (eq_attr "cpu" "generic,haswell")
|
111
|
176 (and (eq_attr "memory" "!none")
|
|
177 (eq_attr "type" "imul")))
|
|
178 "hsw_decodern,hsw_p23+hsw_p1")
|
|
179
|
|
180 (define_insn_reservation "hsw_imulx" 4
|
131
|
181 (and (eq_attr "cpu" "generic,haswell")
|
111
|
182 (and (eq_attr "memory" "none")
|
|
183 (eq_attr "type" "imulx")))
|
|
184 "hsw_decodern,hsw_p0156,hsw_p0156")
|
|
185
|
|
186 (define_insn_reservation "hsw_imulx_mem" 4
|
131
|
187 (and (eq_attr "cpu" "generic,haswell")
|
111
|
188 (and (eq_attr "memory" "!none")
|
|
189 (eq_attr "type" "imulx")))
|
|
190 "hsw_decodern,hsw_p23+hsw_p0156,(hsw_p0|hsw_p6|hsw_p6)")
|
|
191
|
|
192
|
|
193 ;; div and idiv are very similar, so we model them the same.
|
|
194 ;; Use the same latency for all QI,HI and SI modes.
|
|
195 (define_insn_reservation "hsw_idiv" 23
|
131
|
196 (and (eq_attr "cpu" "generic,haswell")
|
111
|
197 (and (eq_attr "memory" "none")
|
|
198 (eq_attr "type" "idiv")))
|
|
199 "hsw_decoder0,(hsw_p0p1p5p6+hsw_idiv)*9")
|
|
200
|
|
201 (define_insn_reservation "hsw_idiv_load" 23
|
131
|
202 (and (eq_attr "cpu" "generic,haswell")
|
111
|
203 (and (eq_attr "memory" "load")
|
|
204 (eq_attr "type" "idiv")))
|
|
205 "hsw_decoder0,hsw_p23+hsw_p0+hsw_idiv,(hsw_p0p1p5p6+hsw_idiv)*9")
|
|
206
|
|
207 ;; x87 floating point operations.
|
|
208
|
|
209 (define_insn_reservation "hsw_fxch" 0
|
131
|
210 (and (eq_attr "cpu" "generic,haswell")
|
111
|
211 (eq_attr "type" "fxch"))
|
|
212 "hsw_decodern")
|
|
213
|
|
214 (define_insn_reservation "hsw_fop" 3
|
131
|
215 (and (eq_attr "cpu" "generic,haswell")
|
111
|
216 (and (eq_attr "memory" "none,unknown")
|
|
217 (eq_attr "type" "fop")))
|
|
218 "hsw_decodern,hsw_p1")
|
|
219
|
|
220 (define_insn_reservation "hsw_fop_load" 5
|
131
|
221 (and (eq_attr "cpu" "generic,haswell")
|
111
|
222 (and (eq_attr "memory" "load")
|
|
223 (eq_attr "type" "fop")))
|
|
224 "hsw_decodern,hsw_p23+hsw_p1,hsw_p1")
|
|
225
|
|
226 (define_insn_reservation "hsw_fop_store" 3
|
131
|
227 (and (eq_attr "cpu" "generic,haswell")
|
111
|
228 (and (eq_attr "memory" "store")
|
|
229 (eq_attr "type" "fop")))
|
|
230 "hsw_decodern,hsw_p0,hsw_p0,hsw_p0+hsw_p4+hsw_p3")
|
|
231
|
|
232 (define_insn_reservation "hsw_fop_both" 5
|
131
|
233 (and (eq_attr "cpu" "generic,haswell")
|
111
|
234 (and (eq_attr "memory" "both")
|
|
235 (eq_attr "type" "fop")))
|
|
236 "hsw_decodern,hsw_p2+hsw_p0,hsw_p0+hsw_p4+hsw_p3")
|
|
237
|
|
238 (define_insn_reservation "hsw_fsgn" 1
|
131
|
239 (and (eq_attr "cpu" "generic,haswell")
|
111
|
240 (eq_attr "type" "fsgn"))
|
|
241 "hsw_decodern,hsw_p0")
|
|
242
|
|
243 (define_insn_reservation "hsw_fistp" 7
|
131
|
244 (and (eq_attr "cpu" "generic,haswell")
|
111
|
245 (eq_attr "type" "fistp"))
|
|
246 "hsw_decoder0,hsw_p1+hsw_p4+hsw_p23")
|
|
247
|
|
248 (define_insn_reservation "hsw_fcmov" 2
|
131
|
249 (and (eq_attr "cpu" "generic,haswell")
|
111
|
250 (eq_attr "type" "fcmov"))
|
|
251 "hsw_decoder0,hsw_p0+hsw_p5,hsw_p0")
|
|
252
|
|
253 (define_insn_reservation "hsw_fcmp" 1
|
131
|
254 (and (eq_attr "cpu" "generic,haswell")
|
111
|
255 (and (eq_attr "memory" "none")
|
|
256 (eq_attr "type" "fcmp")))
|
|
257 "hsw_decodern,hsw_p1")
|
|
258
|
|
259 (define_insn_reservation "hsw_fcmp_load" 1
|
131
|
260 (and (eq_attr "cpu" "generic,haswell")
|
111
|
261 (and (eq_attr "memory" "load")
|
|
262 (eq_attr "type" "fcmp")))
|
|
263 "hsw_decodern,hsw_p23+hsw_p1")
|
|
264
|
|
265 (define_insn_reservation "hsw_fmov" 1
|
131
|
266 (and (eq_attr "cpu" "generic,haswell")
|
111
|
267 (and (eq_attr "memory" "none")
|
|
268 (eq_attr "type" "fmov")))
|
|
269 "hsw_decodern,hsw_p01")
|
|
270
|
|
271 (define_insn_reservation "hsw_fmov_load" 3
|
131
|
272 (and (eq_attr "cpu" "generic,haswell")
|
111
|
273 (and (eq_attr "memory" "load")
|
|
274 (and (eq_attr "mode" "!XF")
|
|
275 (eq_attr "type" "fmov"))))
|
|
276 "hsw_decodern,hsw_p23")
|
|
277
|
|
278 (define_insn_reservation "hsw_fmov_XF_load" 3
|
131
|
279 (and (eq_attr "cpu" "generic,haswell")
|
111
|
280 (and (eq_attr "memory" "load")
|
|
281 (and (eq_attr "mode" "XF")
|
|
282 (eq_attr "type" "fmov"))))
|
|
283 "hsw_decodern,(hsw_p23+hsw_p0)*2")
|
|
284
|
|
285 (define_insn_reservation "hsw_fmov_store" 1
|
131
|
286 (and (eq_attr "cpu" "generic,haswell")
|
111
|
287 (and (eq_attr "memory" "store")
|
|
288 (and (eq_attr "mode" "!XF")
|
|
289 (eq_attr "type" "fmov"))))
|
|
290 "hsw_decodern,hsw_p4p7")
|
|
291
|
|
292 (define_insn_reservation "hsw_fmov_XF_store" 3
|
131
|
293 (and (eq_attr "cpu" "generic,haswell")
|
111
|
294 (and (eq_attr "memory" "store")
|
|
295 (and (eq_attr "mode" "XF")
|
|
296 (eq_attr "type" "fmov"))))
|
|
297 "hsw_decodern,hsw_p4p7,hsw_p4p7")
|
|
298
|
|
299 (define_insn_reservation "hsw_fmul" 4
|
131
|
300 (and (eq_attr "cpu" "generic,haswell")
|
111
|
301 (and (eq_attr "memory" "none")
|
|
302 (eq_attr "type" "fmul")))
|
|
303 "hsw_decodern,hsw_p01")
|
|
304
|
|
305 (define_insn_reservation "hsw_fmul_load" 4
|
131
|
306 (and (eq_attr "cpu" "generic,haswell")
|
111
|
307 (and (eq_attr "memory" "load")
|
|
308 (eq_attr "type" "fmul")))
|
|
309 "hsw_decodern,hsw_p23+hsw_p01")
|
|
310
|
|
311 ;; fdiv latencies depend on the mode of the operands. XFmode gives
|
|
312 ;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
|
|
313 ;; Division by a power of 2 takes only 9 cycles, but we cannot model
|
|
314 ;; that. Throughput is equal to latency - 1, which we model using the
|
|
315 ;; hsw_div automaton.
|
|
316 (define_insn_reservation "hsw_fdiv_SF" 18
|
131
|
317 (and (eq_attr "cpu" "generic,haswell")
|
111
|
318 (and (eq_attr "memory" "none")
|
|
319 (and (eq_attr "mode" "SF")
|
|
320 (eq_attr "type" "fdiv,fpspc"))))
|
|
321 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*16")
|
|
322
|
|
323 (define_insn_reservation "hsw_fdiv_SF_load" 19
|
131
|
324 (and (eq_attr "cpu" "generic,haswell")
|
111
|
325 (and (eq_attr "memory" "load")
|
|
326 (and (eq_attr "mode" "SF")
|
|
327 (eq_attr "type" "fdiv,fpspc"))))
|
|
328 "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*16")
|
|
329
|
|
330 (define_insn_reservation "hsw_fdiv_DF" 32
|
131
|
331 (and (eq_attr "cpu" "generic,haswell")
|
111
|
332 (and (eq_attr "memory" "none")
|
|
333 (and (eq_attr "mode" "DF")
|
|
334 (eq_attr "type" "fdiv,fpspc"))))
|
|
335 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*30")
|
|
336
|
|
337 (define_insn_reservation "hsw_fdiv_DF_load" 33
|
131
|
338 (and (eq_attr "cpu" "generic,haswell")
|
111
|
339 (and (eq_attr "memory" "load")
|
|
340 (and (eq_attr "mode" "DF")
|
|
341 (eq_attr "type" "fdiv,fpspc"))))
|
|
342 "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*30")
|
|
343
|
|
344 (define_insn_reservation "hsw_fdiv_XF" 38
|
131
|
345 (and (eq_attr "cpu" "generic,haswell")
|
111
|
346 (and (eq_attr "memory" "none")
|
|
347 (and (eq_attr "mode" "XF")
|
|
348 (eq_attr "type" "fdiv,fpspc"))))
|
|
349 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*36")
|
|
350
|
|
351 (define_insn_reservation "hsw_fdiv_XF_load" 39
|
131
|
352 (and (eq_attr "cpu" "generic,haswell")
|
111
|
353 (and (eq_attr "memory" "load")
|
|
354 (and (eq_attr "mode" "XF")
|
|
355 (eq_attr "type" "fdiv,fpspc"))))
|
|
356 "hsw_decodern,hsw_p2+hsw_p0+hsw_fdiv,hsw_fdiv*36")
|
|
357
|
|
358 ;; MMX instructions.
|
|
359
|
|
360 (define_insn_reservation "hsw_mmx_add" 1
|
131
|
361 (and (eq_attr "cpu" "generic,haswell")
|
111
|
362 (and (eq_attr "memory" "none")
|
|
363 (eq_attr "type" "mmxadd,sseiadd")))
|
|
364 "hsw_decodern,hsw_p1|hsw_p5")
|
|
365
|
|
366 (define_insn_reservation "hsw_mmx_add_load" 2
|
131
|
367 (and (eq_attr "cpu" "generic,haswell")
|
111
|
368 (and (eq_attr "memory" "load")
|
|
369 (eq_attr "type" "mmxadd,sseiadd")))
|
|
370 "hsw_decodern,hsw_p23+(hsw_p1|hsw_p5)")
|
|
371
|
|
372 (define_insn_reservation "hsw_mmx_shft" 1
|
131
|
373 (and (eq_attr "cpu" "generic,haswell")
|
111
|
374 (and (eq_attr "memory" "none")
|
|
375 (eq_attr "type" "mmxshft")))
|
|
376 "hsw_decodern,hsw_p0")
|
|
377
|
|
378 (define_insn_reservation "hsw_mmx_shft_load" 2
|
131
|
379 (and (eq_attr "cpu" "generic,haswell")
|
111
|
380 (and (eq_attr "memory" "load")
|
|
381 (eq_attr "type" "mmxshft")))
|
|
382 "hsw_decodern,hsw_p23+hsw_p0")
|
|
383
|
|
384 (define_insn_reservation "hsw_mmx_sse_shft" 1
|
131
|
385 (and (eq_attr "cpu" "generic,haswell")
|
111
|
386 (and (eq_attr "memory" "none")
|
|
387 (and (eq_attr "type" "sseishft")
|
|
388 (eq_attr "length_immediate" "!0"))))
|
|
389 "hsw_decodern,hsw_p01")
|
|
390
|
|
391 (define_insn_reservation "hsw_mmx_sse_shft_load" 2
|
131
|
392 (and (eq_attr "cpu" "generic,haswell")
|
111
|
393 (and (eq_attr "memory" "load")
|
|
394 (and (eq_attr "type" "sseishft")
|
|
395 (eq_attr "length_immediate" "!0"))))
|
|
396 "hsw_decodern,hsw_p01+hsw_p23")
|
|
397
|
|
398 (define_insn_reservation "hsw_mmx_sse_shft1" 2
|
131
|
399 (and (eq_attr "cpu" "generic,haswell")
|
111
|
400 (and (eq_attr "memory" "none")
|
|
401 (and (eq_attr "type" "sseishft")
|
|
402 (eq_attr "length_immediate" "0"))))
|
|
403 "hsw_decodern,hsw_p01")
|
|
404
|
|
405 (define_insn_reservation "hsw_mmx_sse_shft1_load" 3
|
131
|
406 (and (eq_attr "cpu" "generic,haswell")
|
111
|
407 (and (eq_attr "memory" "load")
|
|
408 (and (eq_attr "type" "sseishft")
|
|
409 (eq_attr "length_immediate" "0"))))
|
|
410 "hsw_decodern,hsw_p01+hsw_p23")
|
|
411
|
|
412 (define_insn_reservation "hsw_mmx_mul" 5
|
131
|
413 (and (eq_attr "cpu" "generic,haswell")
|
111
|
414 (and (eq_attr "memory" "none")
|
|
415 (eq_attr "type" "mmxmul,sseimul")))
|
|
416 "hsw_decodern,hsw_p01")
|
|
417
|
|
418 (define_insn_reservation "hsw_mmx_mul_load" 5
|
131
|
419 (and (eq_attr "cpu" "generic,haswell")
|
111
|
420 (and (eq_attr "memory" "none")
|
|
421 (eq_attr "type" "mmxmul,sseimul")))
|
|
422 "hsw_decodern,hsw_p23+hsw_p01")
|
|
423
|
|
424 (define_insn_reservation "hsw_sse_mmxcvt" 4
|
131
|
425 (and (eq_attr "cpu" "generic,haswell")
|
111
|
426 (and (eq_attr "mode" "DI")
|
|
427 (eq_attr "type" "mmxcvt")))
|
|
428 "hsw_decodern,hsw_p1")
|
|
429
|
|
430 ;; (define_insn_reservation "hsw_sse_mmxshft" 2
|
131
|
431 ;; (and (eq_attr "cpu" "generic,haswell")
|
111
|
432 ;; (and (eq_attr "mode" "TI")
|
|
433 ;; (eq_attr "type" "mmxshft")))
|
|
434 ;; "hsw_decodern,hsw_p01")
|
|
435
|
|
436 ;; The sfence instruction.
|
|
437 (define_insn_reservation "hsw_sse_sfence" 2
|
131
|
438 (and (eq_attr "cpu" "generic,haswell")
|
111
|
439 (and (eq_attr "memory" "unknown")
|
|
440 (eq_attr "type" "sse")))
|
|
441 "hsw_decoder0,hsw_p23+hsw_p4")
|
|
442
|
|
443 (define_insn_reservation "hsw_sse_SFDF" 3
|
131
|
444 (and (eq_attr "cpu" "generic,haswell")
|
111
|
445 (and (eq_attr "mode" "SF,DF")
|
|
446 (eq_attr "type" "sse")))
|
|
447 "hsw_decodern,hsw_p01")
|
|
448
|
|
449 (define_insn_reservation "hsw_sse_V4SF" 4
|
131
|
450 (and (eq_attr "cpu" "generic,haswell")
|
111
|
451 (and (eq_attr "mode" "V4SF")
|
|
452 (eq_attr "type" "sse")))
|
|
453 "hsw_decodern,hsw_p01")
|
|
454
|
|
455 (define_insn_reservation "hsw_sse_V8SF" 4
|
131
|
456 (and (eq_attr "cpu" "generic,haswell")
|
111
|
457 (and (eq_attr "mode" "V8SF,V4DF")
|
|
458 (eq_attr "type" "sse")))
|
|
459 "hsw_decodern,hsw_p01")
|
|
460
|
|
461 (define_insn_reservation "hsw_sse_addcmp" 3
|
131
|
462 (and (eq_attr "cpu" "generic,haswell")
|
111
|
463 (and (eq_attr "memory" "none")
|
|
464 (eq_attr "type" "sseadd1,ssecmp,ssecomi")))
|
|
465 "hsw_decodern,hsw_p01")
|
|
466
|
|
467 (define_insn_reservation "hsw_sse_addcmp_load" 3
|
131
|
468 (and (eq_attr "cpu" "generic,haswell")
|
111
|
469 (and (eq_attr "memory" "load")
|
|
470 (eq_attr "type" "sseadd1,ssecmp,ssecomi")))
|
|
471 "hsw_decodern,hsw_p23+hsw_p01")
|
|
472
|
|
473 (define_insn_reservation "hsw_sse_logic" 1
|
131
|
474 (and (eq_attr "cpu" "generic,haswell")
|
111
|
475 (and (eq_attr "memory" "none")
|
|
476 (eq_attr "type" "sselog,sselog1")))
|
|
477 "hsw_decodern,hsw_p015")
|
|
478
|
|
479 (define_insn_reservation "hsw_sse_logic_load" 2
|
131
|
480 (and (eq_attr "cpu" "generic,haswell")
|
111
|
481 (and (eq_attr "memory" "load")
|
|
482 (eq_attr "type" "sselog,sselog1")))
|
|
483 "hsw_decodern,hsw_p015+hsw_p23")
|
|
484
|
|
485 (define_insn_reservation "hsw_sse_add" 3
|
131
|
486 (and (eq_attr "cpu" "generic,haswell")
|
111
|
487 (and (eq_attr "memory" "none")
|
|
488 (eq_attr "type" "sseadd")))
|
|
489 "hsw_decodern,hsw_p1|hsw_p5")
|
|
490
|
|
491 (define_insn_reservation "hsw_sse_add_load" 3
|
131
|
492 (and (eq_attr "cpu" "generic,haswell")
|
111
|
493 (and (eq_attr "memory" "load")
|
|
494 (eq_attr "type" "sseadd")))
|
|
495 "hsw_decodern,(hsw_p1|hsw_p5)+hsw_p23")
|
|
496
|
|
497 (define_insn_reservation "hsw_sse_mul" 5
|
131
|
498 (and (eq_attr "cpu" "generic,haswell")
|
111
|
499 (and (eq_attr "memory" "none")
|
|
500 (eq_attr "type" "ssemul")))
|
|
501 "hsw_decodern,hsw_p0")
|
|
502
|
|
503 (define_insn_reservation "hsw_sse_mul_load" 5
|
131
|
504 (and (eq_attr "cpu" "generic,haswell")
|
111
|
505 (and (eq_attr "memory" "load")
|
|
506 (eq_attr "type" "ssemul")))
|
|
507 "hsw_decodern,hsw_p0+hsw_p23")
|
|
508 ;; Use skylake pipeline.
|
|
509 (define_insn_reservation "hsw_sse_muladd" 5
|
131
|
510 (and (eq_attr "cpu" "generic,haswell")
|
111
|
511 (and (eq_attr "memory" "none")
|
|
512 (eq_attr "type" "ssemuladd")))
|
|
513 "hsw_decodern,hsw_p01")
|
|
514
|
|
515 (define_insn_reservation "hsw_sse_muladd_load" 5
|
131
|
516 (and (eq_attr "cpu" "generic,haswell")
|
111
|
517 (and (eq_attr "memory" "load")
|
|
518 (eq_attr "type" "ssemuladd")))
|
|
519 "hsw_decodern,hsw_p01+hsw_p23")
|
|
520
|
|
521 (define_insn_reservation "hsw_sse_div_SF" 18
|
131
|
522 (and (eq_attr "cpu" "generic,haswell")
|
111
|
523 (and (eq_attr "memory" "none")
|
|
524 (and (eq_attr "mode" "SF,V4SF,V8SF")
|
|
525 (eq_attr "type" "ssediv"))))
|
|
526 "hsw_decodern,hsw_p0,hsw_ssediv*14")
|
|
527
|
|
528 (define_insn_reservation "hsw_sse_div_SF_load" 18
|
131
|
529 (and (eq_attr "cpu" "generic,haswell")
|
111
|
530 (and (eq_attr "memory" "none")
|
|
531 (and (eq_attr "mode" "SF,V4SF,V8SF")
|
|
532 (eq_attr "type" "ssediv"))))
|
|
533 "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*14")
|
|
534
|
|
535 (define_insn_reservation "hsw_sse_div_DF" 28
|
131
|
536 (and (eq_attr "cpu" "generic,haswell")
|
111
|
537 (and (eq_attr "memory" "none")
|
|
538 (and (eq_attr "mode" "DF,V2DF,V4DF")
|
|
539 (eq_attr "type" "ssediv"))))
|
|
540 "hsw_decodern,hsw_p0,hsw_ssediv*20")
|
|
541
|
|
542 (define_insn_reservation "hsw_sse_div_DF_load" 28
|
131
|
543 (and (eq_attr "cpu" "generic,haswell")
|
111
|
544 (and (eq_attr "memory" "none")
|
|
545 (and (eq_attr "mode" "DF,V2DF,V4DF")
|
|
546 (eq_attr "type" "ssediv"))))
|
|
547 "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*20")
|
|
548
|
|
549 (define_insn_reservation "hsw_sse_icvt" 4
|
131
|
550 (and (eq_attr "cpu" "generic,haswell")
|
111
|
551 (and (eq_attr "memory" "none")
|
|
552 (eq_attr "type" "sseicvt")))
|
|
553 "hsw_decodern,hsw_p1")
|
|
554
|
|
555 (define_insn_reservation "hsw_sse_icvt_load" 4
|
131
|
556 (and (eq_attr "cpu" "generic,haswell")
|
111
|
557 (and (eq_attr "memory" "!none")
|
|
558 (eq_attr "type" "sseicvt")))
|
|
559 "hsw_decodern,hsw_p23+hsw_p1")
|
|
560
|
|
561
|
|
562 (define_insn_reservation "hsw_sse_icvt_SI" 3
|
131
|
563 (and (eq_attr "cpu" "generic,haswell")
|
111
|
564 (and (eq_attr "memory" "none")
|
|
565 (and (eq_attr "mode" "SI")
|
|
566 (eq_attr "type" "sseicvt"))))
|
|
567 "hsw_decodern,hsw_p1")
|
|
568
|
|
569 (define_insn_reservation "hsw_sse_icvt_SI_load" 3
|
131
|
570 (and (eq_attr "cpu" "generic,haswell")
|
111
|
571 (and (eq_attr "memory" "!none")
|
|
572 (and (eq_attr "mode" "SI")
|
|
573 (eq_attr "type" "sseicvt"))))
|
|
574 "hsw_decodern,hsw_p23+hsw_p1")
|
|
575
|
|
576 (define_insn_reservation "hsw_sse_mov" 1
|
131
|
577 (and (eq_attr "cpu" "generic,haswell")
|
111
|
578 (and (eq_attr "memory" "none")
|
|
579 (eq_attr "type" "ssemov")))
|
|
580 "hsw_decodern,hsw_p015")
|
|
581
|
|
582 (define_insn_reservation "hsw_sse_mov_load" 2
|
131
|
583 (and (eq_attr "cpu" "generic,haswell")
|
111
|
584 (and (eq_attr "memory" "load")
|
|
585 (eq_attr "type" "ssemov")))
|
|
586 "hsw_decodern,hsw_p23")
|
|
587
|
|
588 (define_insn_reservation "hsw_sse_mov_store" 1
|
131
|
589 (and (eq_attr "cpu" "generic,haswell")
|
111
|
590 (and (eq_attr "memory" "store")
|
|
591 (eq_attr "type" "ssemov")))
|
|
592 "hsw_decodern,hsw_p4p7")
|
|
593
|
|
594 (define_insn_reservation "hsw_insn" 1
|
131
|
595 (and (eq_attr "cpu" "generic,haswell")
|
111
|
596 (and (eq_attr "memory" "none,unknown")
|
|
597 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
|
|
598 "hsw_decodern,hsw_p0156")
|
|
599
|
|
600 (define_insn_reservation "hsw_insn_load" 1
|
131
|
601 (and (eq_attr "cpu" "generic,haswell")
|
111
|
602 (and (eq_attr "memory" "load")
|
|
603 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
|
|
604 "hsw_decodern,hsw_p23+hsw_p0156")
|
|
605
|
|
606 (define_insn_reservation "hsw_insn_store" 1
|
131
|
607 (and (eq_attr "cpu" "generic,haswell")
|
111
|
608 (and (eq_attr "memory" "store")
|
|
609 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
|
|
610 "hsw_decodern,hsw_p0156+hsw_p4p7")
|
|
611
|
|
612 ;; read-modify-store instructions produce 4 uops so they have to be
|
|
613 ;; decoded on hsw_decoder0 as well.
|
|
614 (define_insn_reservation "hsw_insn_both" 4
|
131
|
615 (and (eq_attr "cpu" "generic,haswell")
|
111
|
616 (and (eq_attr "memory" "both")
|
|
617 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
|
|
618 "hsw_decodern,hsw_p23+hsw_p0156+hsw_p4p7")
|