comparison gcc/config/i386/bdver1.md @ 68:561a7518be6b

update gcc-4.6
author Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
date Sun, 21 Aug 2011 07:07:55 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
67:f6334be47118 68:561a7518be6b
1 ;; Copyright (C) 2010, Free Software Foundation, Inc.
2 ;;
3 ;; This file is part of GCC.
4 ;;
5 ;; GCC is free software; you can redistribute it and/or modify
6 ;; it under the terms of the GNU General Public License as published by
7 ;; the Free Software Foundation; either version 3, or (at your option)
8 ;; any later version.
9 ;;
10 ;; GCC is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;; GNU General Public License for more details.
14 ;;
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with GCC; see the file COPYING3. If not see
17 ;; <http://www.gnu.org/licenses/>.
18 ;;
19 ;; AMD bdver1 Scheduling
20 ;;
21 ;; The bdver1 contains four pipelined FP units, two integer units and
22 ;; two address generation units.
23 ;;
24 ;; The predecode logic is determining boundaries of instructions in the 64
25 ;; byte cache line. So the cache line straddling problem of K6 might be issue
26 ;; here as well, but it is not noted in the documentation.
27 ;;
28 ;; Three DirectPath instructions decoders and only one VectorPath decoder
29 ;; is available. They can decode three DirectPath instructions or one
30 ;; VectorPath instruction per cycle.
31 ;;
32 ;; The load/store queue unit is not attached to the schedulers but
33 ;; communicates with all the execution units separately instead.
34
35
36 (define_attr "bdver1_decode" "direct,vector,double"
37 (const_string "direct"))
38
39 (define_automaton "bdver1,bdver1_int,bdver1_load,bdver1_mult,bdver1_fp")
40
41 (define_cpu_unit "bdver1-decode0" "bdver1")
42 (define_cpu_unit "bdver1-decode1" "bdver1")
43 (define_cpu_unit "bdver1-decode2" "bdver1")
44 (define_cpu_unit "bdver1-decodev" "bdver1")
45
46 ;; Model the fact that double decoded instruction may take 2 cycles
47 ;; to decode when decoder2 and decoder0 in next cycle
48 ;; is used (this is needed to allow throughput of 1.5 double decoded
49 ;; instructions per cycle).
50 ;;
51 ;; In order to avoid dependence between reservation of decoder
52 ;; and other units, we model decoder as two stage fully pipelined unit
53 ;; and only double decoded instruction may occupy unit in the first cycle.
54 ;; With this scheme however two double instructions can be issued cycle0.
55 ;;
56 ;; Avoid this by using presence set requiring decoder0 to be allocated
57 ;; too. Vector decoded instructions then can't be issued when modeled
58 ;; as consuming decoder0+decoder1+decoder2.
59 ;; We solve that by specialized vector decoder unit and exclusion set.
60 (presence_set "bdver1-decode2" "bdver1-decode0")
61 (exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
62
63 (define_reservation "bdver1-vector" "nothing,bdver1-decodev")
64 (define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
65 (define_reservation "bdver1-direct" "nothing,
66 (bdver1-decode0 | bdver1-decode1
67 | bdver1-decode2)")
68 ;; Double instructions behaves like two direct instructions.
69 (define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
70 | (nothing,(bdver1-decode0 + bdver1-decode1))
71 | (nothing,(bdver1-decode1 + bdver1-decode2)))")
72
73
74 (define_cpu_unit "bdver1-ieu0" "bdver1_int")
75 (define_cpu_unit "bdver1-ieu1" "bdver1_int")
76 (define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
77
78 (define_cpu_unit "bdver1-agu0" "bdver1_int")
79 (define_cpu_unit "bdver1-agu1" "bdver1_int")
80 (define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
81
82 (define_cpu_unit "bdver1-mult" "bdver1_mult")
83
84 (define_cpu_unit "bdver1-load0" "bdver1_load")
85 (define_cpu_unit "bdver1-load1" "bdver1_load")
86 (define_reservation "bdver1-load" "bdver1-agu,
87 (bdver1-load0 | bdver1-load1),nothing")
88 ;; 128bit SSE instructions issue two loads at once.
89 (define_reservation "bdver1-load2" "bdver1-agu,
90 (bdver1-load0 + bdver1-load1),nothing")
91
92 (define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
93 ;; 128bit SSE instructions issue two stores at once.
94 (define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
95
96 ;; The FP operations start to execute at stage 12 in the pipeline, while
97 ;; integer operations start to execute at stage 9 for athlon and 11 for K8
98 ;; Compensate the difference for athlon because it results in significantly
99 ;; smaller automata.
100 ;; NOTE: the above information was just copied from athlon.md, and was not
101 ;; actually verified for bdver1.
102 (define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
103 ;; The floating point loads.
104 (define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
105 (define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
106
107 ;; Four FP units.
108 (define_cpu_unit "bdver1-ffma0" "bdver1_fp")
109 (define_cpu_unit "bdver1-ffma1" "bdver1_fp")
110 (define_cpu_unit "bdver1-fmal0" "bdver1_fp")
111 (define_cpu_unit "bdver1-fmal1" "bdver1_fp")
112
113 (define_reservation "bdver1-ffma" "(bdver1-ffma0 | bdver1-ffma1)")
114 (define_reservation "bdver1-fcvt" "bdver1-ffma0")
115 (define_reservation "bdver1-fmma" "bdver1-ffma0")
116 (define_reservation "bdver1-fxbar" "bdver1-ffma1")
117 (define_reservation "bdver1-fmal" "(bdver1-fmal0 | bdver1-fmal1)")
118 (define_reservation "bdver1-fsto" "bdver1-fmal1")
119
120 ;; Vector operations usually consume many of pipes.
121 (define_reservation "bdver1-fvector" "(bdver1-ffma0 + bdver1-ffma1
122 + bdver1-fmal0 + bdver1-fmal1)")
123
124 ;; Jump instructions are executed in the branch unit completely transparent to us.
125 (define_insn_reservation "bdver1_call" 0
126 (and (eq_attr "cpu" "bdver1")
127 (eq_attr "type" "call,callv"))
128 "bdver1-double,bdver1-agu,bdver1-ieu")
129 ;; PUSH mem is double path.
130 (define_insn_reservation "bdver1_push" 1
131 (and (eq_attr "cpu" "bdver1")
132 (eq_attr "type" "push"))
133 "bdver1-direct,bdver1-agu,bdver1-store")
134 ;; POP r16/mem are double path.
135 (define_insn_reservation "bdver1_pop" 1
136 (and (eq_attr "cpu" "bdver1")
137 (eq_attr "type" "pop"))
138 "bdver1-direct,(bdver1-ieu+bdver1-load)")
139 ;; LEAVE no latency info so far, assume same with amdfam10.
140 (define_insn_reservation "bdver1_leave" 3
141 (and (eq_attr "cpu" "bdver1")
142 (eq_attr "type" "leave"))
143 "bdver1-vector,(bdver1-ieu+bdver1-load)")
144 ;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
145 (define_insn_reservation "bdver1_lea" 1
146 (and (eq_attr "cpu" "bdver1")
147 (eq_attr "type" "lea"))
148 "bdver1-direct,bdver1-agu,nothing")
149
150 ;; MUL executes in special multiplier unit attached to IEU1.
151 (define_insn_reservation "bdver1_imul_DI" 6
152 (and (eq_attr "cpu" "bdver1")
153 (and (eq_attr "type" "imul")
154 (and (eq_attr "mode" "DI")
155 (eq_attr "memory" "none,unknown"))))
156 "bdver1-direct1,bdver1-ieu1,bdver1-mult,nothing,bdver1-ieu1")
157 (define_insn_reservation "bdver1_imul" 4
158 (and (eq_attr "cpu" "bdver1")
159 (and (eq_attr "type" "imul")
160 (eq_attr "memory" "none,unknown")))
161 "bdver1-direct1,bdver1-ieu1,bdver1-mult,bdver1-ieu1")
162 (define_insn_reservation "bdver1_imul_mem_DI" 10
163 (and (eq_attr "cpu" "bdver1")
164 (and (eq_attr "type" "imul")
165 (and (eq_attr "mode" "DI")
166 (eq_attr "memory" "load,both"))))
167 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,nothing,bdver1-ieu")
168 (define_insn_reservation "bdver1_imul_mem" 8
169 (and (eq_attr "cpu" "bdver1")
170 (and (eq_attr "type" "imul")
171 (eq_attr "memory" "load,both")))
172 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,bdver1-ieu")
173
174 ;; IDIV cannot execute in parallel with other instructions. Dealing with it
175 ;; as with short latency vector instruction is good approximation avoiding
176 ;; scheduler from trying too hard to can hide it's latency by overlap with
177 ;; other instructions.
178 ;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
179 ;; of the other code.
180 (define_insn_reservation "bdver1_idiv" 6
181 (and (eq_attr "cpu" "bdver1")
182 (and (eq_attr "type" "idiv")
183 (eq_attr "memory" "none,unknown")))
184 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
185
186 (define_insn_reservation "bdver1_idiv_mem" 10
187 (and (eq_attr "cpu" "bdver1")
188 (and (eq_attr "type" "idiv")
189 (eq_attr "memory" "load,both")))
190 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
191
192 ;; The parallelism of string instructions is not documented. Model it same way
193 ;; as IDIV to create smaller automata. This probably does not matter much.
194 ;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
195 (define_insn_reservation "bdver1_str" 6
196 (and (eq_attr "cpu" "bdver1")
197 (and (eq_attr "type" "str")
198 (eq_attr "memory" "load,both,store")))
199 "bdver1-vector,bdver1-load,bdver1-ieu0*6")
200
201 ;; Integer instructions.
202 (define_insn_reservation "bdver1_idirect" 1
203 (and (eq_attr "cpu" "bdver1")
204 (and (eq_attr "bdver1_decode" "direct")
205 (and (eq_attr "unit" "integer,unknown")
206 (eq_attr "memory" "none,unknown"))))
207 "bdver1-direct,bdver1-ieu")
208 (define_insn_reservation "bdver1_ivector" 2
209 (and (eq_attr "cpu" "bdver1")
210 (and (eq_attr "bdver1_decode" "vector")
211 (and (eq_attr "unit" "integer,unknown")
212 (eq_attr "memory" "none,unknown"))))
213 "bdver1-vector,bdver1-ieu,bdver1-ieu")
214 (define_insn_reservation "bdver1_idirect_loadmov" 4
215 (and (eq_attr "cpu" "bdver1")
216 (and (eq_attr "type" "imov")
217 (eq_attr "memory" "load")))
218 "bdver1-direct,bdver1-load")
219 (define_insn_reservation "bdver1_idirect_load" 5
220 (and (eq_attr "cpu" "bdver1")
221 (and (eq_attr "bdver1_decode" "direct")
222 (and (eq_attr "unit" "integer,unknown")
223 (eq_attr "memory" "load"))))
224 "bdver1-direct,bdver1-load,bdver1-ieu")
225 (define_insn_reservation "bdver1_ivector_load" 6
226 (and (eq_attr "cpu" "bdver1")
227 (and (eq_attr "bdver1_decode" "vector")
228 (and (eq_attr "unit" "integer,unknown")
229 (eq_attr "memory" "load"))))
230 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
231 (define_insn_reservation "bdver1_idirect_movstore" 4
232 (and (eq_attr "cpu" "bdver1")
233 (and (eq_attr "type" "imov")
234 (eq_attr "memory" "store")))
235 "bdver1-direct,bdver1-agu,bdver1-store")
236 (define_insn_reservation "bdver1_idirect_both" 4
237 (and (eq_attr "cpu" "bdver1")
238 (and (eq_attr "bdver1_decode" "direct")
239 (and (eq_attr "unit" "integer,unknown")
240 (eq_attr "memory" "both"))))
241 "bdver1-direct,bdver1-load,
242 bdver1-ieu,bdver1-store,
243 bdver1-store")
244 (define_insn_reservation "bdver1_ivector_both" 5
245 (and (eq_attr "cpu" "bdver1")
246 (and (eq_attr "bdver1_decode" "vector")
247 (and (eq_attr "unit" "integer,unknown")
248 (eq_attr "memory" "both"))))
249 "bdver1-vector,bdver1-load,
250 bdver1-ieu,
251 bdver1-ieu,
252 bdver1-store")
253 (define_insn_reservation "bdver1_idirect_store" 4
254 (and (eq_attr "cpu" "bdver1")
255 (and (eq_attr "bdver1_decode" "direct")
256 (and (eq_attr "unit" "integer,unknown")
257 (eq_attr "memory" "store"))))
258 "bdver1-direct,(bdver1-ieu+bdver1-agu),
259 bdver1-store")
260 (define_insn_reservation "bdver1_ivector_store" 5
261 (and (eq_attr "cpu" "bdver1")
262 (and (eq_attr "bdver1_decode" "vector")
263 (and (eq_attr "unit" "integer,unknown")
264 (eq_attr "memory" "store"))))
265 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
266 bdver1-store")
267
268 ;; BDVER1 floating point units.
269 (define_insn_reservation "bdver1_fldxf" 13
270 (and (eq_attr "cpu" "bdver1")
271 (and (eq_attr "type" "fmov")
272 (and (eq_attr "memory" "load")
273 (eq_attr "mode" "XF"))))
274 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
275 (define_insn_reservation "bdver1_fld" 5
276 (and (eq_attr "cpu" "bdver1")
277 (and (eq_attr "type" "fmov")
278 (eq_attr "memory" "load")))
279 "bdver1-direct,bdver1-fpload,bdver1-ffma")
280 (define_insn_reservation "bdver1_fstxf" 8
281 (and (eq_attr "cpu" "bdver1")
282 (and (eq_attr "type" "fmov")
283 (and (eq_attr "memory" "store,both")
284 (eq_attr "mode" "XF"))))
285 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
286 (define_insn_reservation "bdver1_fst" 2
287 (and (eq_attr "cpu" "bdver1")
288 (and (eq_attr "type" "fmov")
289 (eq_attr "memory" "store,both")))
290 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
291 (define_insn_reservation "bdver1_fist" 2
292 (and (eq_attr "cpu" "bdver1")
293 (eq_attr "type" "fistp,fisttp"))
294 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
295 (define_insn_reservation "bdver1_fmov_bdver1" 2
296 (and (eq_attr "cpu" "bdver1")
297 (eq_attr "type" "fmov"))
298 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
299 (define_insn_reservation "bdver1_fadd_load" 10
300 (and (eq_attr "cpu" "bdver1")
301 (and (eq_attr "type" "fop")
302 (eq_attr "memory" "load")))
303 "bdver1-direct,bdver1-fpload,bdver1-ffma")
304 (define_insn_reservation "bdver1_fadd" 6
305 (and (eq_attr "cpu" "bdver1")
306 (eq_attr "type" "fop"))
307 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
308 (define_insn_reservation "bdver1_fmul_load" 10
309 (and (eq_attr "cpu" "bdver1")
310 (and (eq_attr "type" "fmul")
311 (eq_attr "memory" "load")))
312 "bdver1-double,bdver1-fpload,bdver1-ffma")
313 (define_insn_reservation "bdver1_fmul" 6
314 (and (eq_attr "cpu" "bdver1")
315 (eq_attr "type" "fmul"))
316 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
317 (define_insn_reservation "bdver1_fsgn" 2
318 (and (eq_attr "cpu" "bdver1")
319 (eq_attr "type" "fsgn"))
320 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
321 (define_insn_reservation "bdver1_fdiv_load" 46
322 (and (eq_attr "cpu" "bdver1")
323 (and (eq_attr "type" "fdiv")
324 (eq_attr "memory" "load")))
325 "bdver1-direct,bdver1-fpload,bdver1-ffma")
326 (define_insn_reservation "bdver1_fdiv" 42
327 (and (eq_attr "cpu" "bdver1")
328 (eq_attr "type" "fdiv"))
329 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
330 (define_insn_reservation "bdver1_fpspc_load" 103
331 (and (eq_attr "cpu" "bdver1")
332 (and (eq_attr "type" "fpspc")
333 (eq_attr "memory" "load")))
334 "bdver1-vector,bdver1-fpload,bdver1-fvector")
335 (define_insn_reservation "bdver1_fpspc" 100
336 (and (eq_attr "cpu" "bdver1")
337 (and (eq_attr "type" "fpspc")
338 (eq_attr "memory" "load")))
339 "bdver1-vector,bdver1-fpload,bdver1-fvector")
340 (define_insn_reservation "bdver1_fcmov_load" 17
341 (and (eq_attr "cpu" "bdver1")
342 (and (eq_attr "type" "fcmov")
343 (eq_attr "memory" "load")))
344 "bdver1-vector,bdver1-fpload,bdver1-fvector")
345 (define_insn_reservation "bdver1_fcmov" 15
346 (and (eq_attr "cpu" "bdver1")
347 (eq_attr "type" "fcmov"))
348 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
349 (define_insn_reservation "bdver1_fcomi_load" 6
350 (and (eq_attr "cpu" "bdver1")
351 (and (eq_attr "type" "fcmp")
352 (and (eq_attr "bdver1_decode" "double")
353 (eq_attr "memory" "load"))))
354 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
355 (define_insn_reservation "bdver1_fcomi" 2
356 (and (eq_attr "cpu" "bdver1")
357 (and (eq_attr "bdver1_decode" "double")
358 (eq_attr "type" "fcmp")))
359 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
360 (define_insn_reservation "bdver1_fcom_load" 6
361 (and (eq_attr "cpu" "bdver1")
362 (and (eq_attr "type" "fcmp")
363 (eq_attr "memory" "load")))
364 "bdver1-direct,bdver1-fpload,bdver1-ffma")
365 (define_insn_reservation "bdver1_fcom" 2
366 (and (eq_attr "cpu" "bdver1")
367 (eq_attr "type" "fcmp"))
368 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
369 (define_insn_reservation "bdver1_fxch" 2
370 (and (eq_attr "cpu" "bdver1")
371 (eq_attr "type" "fxch"))
372 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
373
374 ;; SSE loads.
375 (define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
376 (and (eq_attr "cpu" "bdver1")
377 (and (eq_attr "type" "ssemov")
378 (and (eq_attr "prefix" "vex")
379 (and (eq_attr "movu" "1")
380 (and (eq_attr "mode" "V4SF,V2DF")
381 (eq_attr "memory" "load"))))))
382 "bdver1-direct,bdver1-fpload")
383 (define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
384 (and (eq_attr "cpu" "bdver1")
385 (and (eq_attr "type" "ssemov")
386 (and (eq_attr "movu" "1")
387 (and (eq_attr "mode" "V8SF,V4DF")
388 (eq_attr "memory" "load")))))
389 "bdver1-double,bdver1-fpload")
390 (define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
391 (and (eq_attr "cpu" "bdver1")
392 (and (eq_attr "type" "ssemov")
393 (and (eq_attr "movu" "1")
394 (and (eq_attr "mode" "V4SF,V2DF")
395 (eq_attr "memory" "load")))))
396 "bdver1-direct,bdver1-fpload,bdver1-fmal")
397 (define_insn_reservation "bdver1_ssevector_avx128_load" 4
398 (and (eq_attr "cpu" "bdver1")
399 (and (eq_attr "type" "ssemov")
400 (and (eq_attr "prefix" "vex")
401 (and (eq_attr "mode" "V4SF,V2DF,TI")
402 (eq_attr "memory" "load")))))
403 "bdver1-direct,bdver1-fpload,bdver1-fmal")
404 (define_insn_reservation "bdver1_ssevector_avx256_load" 5
405 (and (eq_attr "cpu" "bdver1")
406 (and (eq_attr "type" "ssemov")
407 (and (eq_attr "mode" "V8SF,V4DF,OI")
408 (eq_attr "memory" "load"))))
409 "bdver1-double,bdver1-fpload,bdver1-fmal")
410 (define_insn_reservation "bdver1_ssevector_sse128_load" 4
411 (and (eq_attr "cpu" "bdver1")
412 (and (eq_attr "type" "ssemov")
413 (and (eq_attr "mode" "V4SF,V2DF,TI")
414 (eq_attr "memory" "load"))))
415 "bdver1-direct,bdver1-fpload")
416 (define_insn_reservation "bdver1_ssescalar_movq_load" 4
417 (and (eq_attr "cpu" "bdver1")
418 (and (eq_attr "type" "ssemov")
419 (and (eq_attr "mode" "DI")
420 (eq_attr "memory" "load"))))
421 "bdver1-direct,bdver1-fpload,bdver1-fmal")
422 (define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
423 (and (eq_attr "cpu" "bdver1")
424 (and (eq_attr "type" "ssemov")
425 (and (eq_attr "prefix" "vex")
426 (and (eq_attr "mode" "SF")
427 (eq_attr "memory" "load")))))
428 "bdver1-direct,bdver1-fpload")
429 (define_insn_reservation "bdver1_ssescalar_sse128_load" 4
430 (and (eq_attr "cpu" "bdver1")
431 (and (eq_attr "type" "ssemov")
432 (and (eq_attr "mode" "SF,DF")
433 (eq_attr "memory" "load"))))
434 "bdver1-direct,bdver1-fpload, bdver1-ffma")
435 (define_insn_reservation "bdver1_mmxsse_load" 4
436 (and (eq_attr "cpu" "bdver1")
437 (and (eq_attr "type" "mmxmov,ssemov")
438 (eq_attr "memory" "load")))
439 "bdver1-direct,bdver1-fpload, bdver1-fmal")
440
441 ;; SSE stores.
442 (define_insn_reservation "bdver1_sse_store_avx256" 5
443 (and (eq_attr "cpu" "bdver1")
444 (and (eq_attr "type" "ssemov")
445 (and (eq_attr "mode" "V8SF,V4DF,OI")
446 (eq_attr "memory" "store,both"))))
447 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
448 (define_insn_reservation "bdver1_sse_store" 4
449 (and (eq_attr "cpu" "bdver1")
450 (and (eq_attr "type" "ssemov")
451 (and (eq_attr "mode" "V4SF,V2DF,TI")
452 (eq_attr "memory" "store,both"))))
453 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
454 (define_insn_reservation "bdver1_mmxsse_store_short" 4
455 (and (eq_attr "cpu" "bdver1")
456 (and (eq_attr "type" "mmxmov,ssemov")
457 (eq_attr "memory" "store,both")))
458 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
459
460 ;; Register moves.
461 (define_insn_reservation "bdver1_ssevector_avx256" 3
462 (and (eq_attr "cpu" "bdver1")
463 (and (eq_attr "type" "ssemov")
464 (and (eq_attr "mode" "V8SF,V4DF,OI")
465 (eq_attr "memory" "none"))))
466 "bdver1-double,bdver1-fpsched,bdver1-fmal")
467 (define_insn_reservation "bdver1_movss_movsd" 2
468 (and (eq_attr "cpu" "bdver1")
469 (and (eq_attr "type" "ssemov")
470 (and (eq_attr "mode" "SF,DF")
471 (eq_attr "memory" "none"))))
472 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
473 (define_insn_reservation "bdver1_mmxssemov" 2
474 (and (eq_attr "cpu" "bdver1")
475 (and (eq_attr "type" "mmxmov,ssemov")
476 (eq_attr "memory" "none")))
477 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
478 ;; SSE logs.
479 (define_insn_reservation "bdver1_sselog_load_256" 7
480 (and (eq_attr "cpu" "bdver1")
481 (and (eq_attr "type" "sselog,sselog1")
482 (and (eq_attr "mode" "V8SF")
483 (eq_attr "memory" "load"))))
484 "bdver1-double,bdver1-fpload,bdver1-fmal")
485 (define_insn_reservation "bdver1_sselog_256" 3
486 (and (eq_attr "cpu" "bdver1")
487 (and (eq_attr "type" "sselog,sselog1")
488 (eq_attr "mode" "V8SF")))
489 "bdver1-double,bdver1-fpsched,bdver1-fmal")
490 (define_insn_reservation "bdver1_sselog_load" 6
491 (and (eq_attr "cpu" "bdver1")
492 (and (eq_attr "type" "sselog,sselog1")
493 (eq_attr "memory" "load")))
494 "bdver1-direct,bdver1-fpload,bdver1-fxbar")
495 (define_insn_reservation "bdver1_sselog" 2
496 (and (eq_attr "cpu" "bdver1")
497 (eq_attr "type" "sselog,sselog1"))
498 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
499
500 ;; PCMP actually executes in FMAL.
501 (define_insn_reservation "bdver1_ssecmp_load" 6
502 (and (eq_attr "cpu" "bdver1")
503 (and (eq_attr "type" "ssecmp")
504 (eq_attr "memory" "load")))
505 "bdver1-direct,bdver1-fpload,bdver1-ffma")
506 (define_insn_reservation "bdver1_ssecmp" 2
507 (and (eq_attr "cpu" "bdver1")
508 (eq_attr "type" "ssecmp"))
509 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
510 (define_insn_reservation "bdver1_ssecomi_load" 6
511 (and (eq_attr "cpu" "bdver1")
512 (and (eq_attr "type" "ssecomi")
513 (eq_attr "memory" "load")))
514 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
515 (define_insn_reservation "bdver1_ssecomi" 2
516 (and (eq_attr "cpu" "bdver1")
517 (eq_attr "type" "ssecomi"))
518 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
519
520 ;; Conversions behaves very irregularly and the scheduling is critical here.
521 ;; Take each instruction separately.
522
523 ;; 256 bit conversion.
524 (define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
525 (and (eq_attr "cpu" "bdver1")
526 (and (eq_attr "type" "ssecvt")
527 (and (eq_attr "memory" "load")
528 (ior (ior (match_operand:V4DF 0 "register_operand")
529 (ior (match_operand:V8SF 0 "register_operand")
530 (match_operand:V8SI 0 "register_operand")))
531 (ior (match_operand:V4DF 1 "nonimmediate_operand")
532 (ior (match_operand:V8SF 1 "nonimmediate_operand")
533 (match_operand:V8SI 1 "nonimmediate_operand")))))))
534 "bdver1-vector,bdver1-fpload,bdver1-fvector")
535 (define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
536 (and (eq_attr "cpu" "bdver1")
537 (and (eq_attr "type" "ssecvt")
538 (and (eq_attr "memory" "none")
539 (ior (ior (match_operand:V4DF 0 "register_operand")
540 (ior (match_operand:V8SF 0 "register_operand")
541 (match_operand:V8SI 0 "register_operand")))
542 (ior (match_operand:V4DF 1 "nonimmediate_operand")
543 (ior (match_operand:V8SF 1 "nonimmediate_operand")
544 (match_operand:V8SI 1 "nonimmediate_operand")))))))
545 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
546 ;; CVTSS2SD, CVTSD2SS.
547 (define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
548 (and (eq_attr "cpu" "bdver1")
549 (and (eq_attr "type" "ssecvt")
550 (and (eq_attr "mode" "SF,DF")
551 (eq_attr "memory" "load"))))
552 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
553 (define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
554 (and (eq_attr "cpu" "bdver1")
555 (and (eq_attr "type" "ssecvt")
556 (and (eq_attr "mode" "SF,DF")
557 (eq_attr "memory" "none"))))
558 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
559 ;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
560 (define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
561 (and (eq_attr "cpu" "bdver1")
562 (and (eq_attr "type" "sseicvt")
563 (and (eq_attr "mode" "SF,DF")
564 (eq_attr "memory" "load"))))
565 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
566 (define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
567 (and (eq_attr "cpu" "bdver1")
568 (and (eq_attr "type" "sseicvt")
569 (and (eq_attr "mode" "SF,DF")
570 (eq_attr "memory" "none"))))
571 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
572 ;; CVTPD2PS.
573 (define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
574 (and (eq_attr "cpu" "bdver1")
575 (and (eq_attr "type" "ssecvt")
576 (and (eq_attr "memory" "load")
577 (and (match_operand:V4SF 0 "register_operand")
578 (match_operand:V2DF 1 "nonimmediate_operand")))))
579 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
580 (define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
581 (and (eq_attr "cpu" "bdver1")
582 (and (eq_attr "type" "ssecvt")
583 (and (eq_attr "memory" "none")
584 (and (match_operand:V4SF 0 "register_operand")
585 (match_operand:V2DF 1 "nonimmediate_operand")))))
586 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
587 ;; CVTPI2PS, CVTDQ2PS.
588 (define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
589 (and (eq_attr "cpu" "bdver1")
590 (and (eq_attr "type" "ssecvt")
591 (and (eq_attr "memory" "load")
592 (and (match_operand:V4SF 0 "register_operand")
593 (ior (match_operand:V2SI 1 "nonimmediate_operand")
594 (match_operand:V4SI 1 "nonimmediate_operand"))))))
595 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
596 (define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
597 (and (eq_attr "cpu" "bdver1")
598 (and (eq_attr "type" "ssecvt")
599 (and (eq_attr "memory" "none")
600 (and (match_operand:V4SF 0 "register_operand")
601 (ior (match_operand:V2SI 1 "nonimmediate_operand")
602 (match_operand:V4SI 1 "nonimmediate_operand"))))))
603 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
604 ;; CVTDQ2PD.
605 (define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
606 (and (eq_attr "cpu" "bdver1")
607 (and (eq_attr "type" "ssecvt")
608 (and (eq_attr "memory" "load")
609 (and (match_operand:V2DF 0 "register_operand")
610 (match_operand:V4SI 1 "nonimmediate_operand")))))
611 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
612 (define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
613 (and (eq_attr "cpu" "bdver1")
614 (and (eq_attr "type" "ssecvt")
615 (and (eq_attr "memory" "none")
616 (and (match_operand:V2DF 0 "register_operand")
617 (match_operand:V4SI 1 "nonimmediate_operand")))))
618 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
619 ;; CVTPS2PD, CVTPI2PD.
620 (define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
621 (and (eq_attr "cpu" "bdver1")
622 (and (eq_attr "type" "ssecvt")
623 (and (eq_attr "memory" "load")
624 (and (match_operand:V2DF 0 "register_operand")
625 (ior (match_operand:V2SI 1 "nonimmediate_operand")
626 (match_operand:V4SF 1 "nonimmediate_operand"))))))
627 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
628 (define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
629 (and (eq_attr "cpu" "bdver1")
630 (and (eq_attr "type" "ssecvt")
631 (and (eq_attr "memory" "load")
632 (and (match_operand:V2DF 0 "register_operand")
633 (ior (match_operand:V2SI 1 "nonimmediate_operand")
634 (match_operand:V4SF 1 "nonimmediate_operand"))))))
635 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
636 ;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
637 (define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
638 (and (eq_attr "cpu" "bdver1")
639 (and (eq_attr "type" "sseicvt")
640 (and (eq_attr "mode" "SI,DI")
641 (eq_attr "memory" "load"))))
642 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
643 (define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
644 (and (eq_attr "cpu" "bdver1")
645 (and (eq_attr "type" "sseicvt")
646 (and (eq_attr "mode" "SI,DI")
647 (eq_attr "memory" "none"))))
648 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
649 ;; CVTPD2PI, CVTTPD2PI.
650 (define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
651 (and (eq_attr "cpu" "bdver1")
652 (and (eq_attr "type" "ssecvt")
653 (and (eq_attr "memory" "load")
654 (and (match_operand:V2DF 1 "nonimmediate_operand")
655 (match_operand:V2SI 0 "register_operand")))))
656 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
657 (define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
658 (and (eq_attr "cpu" "bdver1")
659 (and (eq_attr "type" "ssecvt")
660 (and (eq_attr "memory" "none")
661 (and (match_operand:V2DF 1 "nonimmediate_operand")
662 (match_operand:V2SI 0 "register_operand")))))
663 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
664 ;; CVTPD2DQ, CVTTPD2DQ.
665 (define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
666 (and (eq_attr "cpu" "bdver1")
667 (and (eq_attr "type" "ssecvt")
668 (and (eq_attr "memory" "load")
669 (and (match_operand:V2DF 1 "nonimmediate_operand")
670 (match_operand:V4SI 0 "register_operand")))))
671 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
672 (define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
673 (and (eq_attr "cpu" "bdver1")
674 (and (eq_attr "type" "ssecvt")
675 (and (eq_attr "memory" "none")
676 (and (match_operand:V2DF 1 "nonimmediate_operand")
677 (match_operand:V4SI 0 "register_operand")))))
678 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
679 ;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
680 (define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
681 (and (eq_attr "cpu" "bdver1")
682 (and (eq_attr "type" "ssecvt")
683 (and (eq_attr "memory" "load")
684 (and (match_operand:V4SF 1 "nonimmediate_operand")
685 (ior (match_operand: V2SI 0 "register_operand")
686 (match_operand: V4SI 0 "register_operand"))))))
687 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
688 (define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
689 (and (eq_attr "cpu" "bdver1")
690 (and (eq_attr "type" "ssecvt")
691 (and (eq_attr "memory" "none")
692 (and (match_operand:V4SF 1 "nonimmediate_operand")
693 (ior (match_operand: V2SI 0 "register_operand")
694 (match_operand: V4SI 0 "register_operand"))))))
695 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
696
697 ;; SSE MUL, ADD, and MULADD.
698 (define_insn_reservation "bdver1_ssemuladd_load_256" 11
699 (and (eq_attr "cpu" "bdver1")
700 (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
701 (and (eq_attr "mode" "V8SF,V4DF")
702 (eq_attr "memory" "load"))))
703 "bdver1-double,bdver1-fpload,bdver1-ffma")
704 (define_insn_reservation "bdver1_ssemuladd_256" 7
705 (and (eq_attr "cpu" "bdver1")
706 (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
707 (and (eq_attr "mode" "V8SF,V4DF")
708 (eq_attr "memory" "none"))))
709 "bdver1-double,bdver1-fpsched,bdver1-ffma")
710 (define_insn_reservation "bdver1_ssemuladd_load" 10
711 (and (eq_attr "cpu" "bdver1")
712 (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
713 (eq_attr "memory" "load")))
714 "bdver1-direct,bdver1-fpload,bdver1-ffma")
715 (define_insn_reservation "bdver1_ssemuladd" 6
716 (and (eq_attr "cpu" "bdver1")
717 (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
718 (eq_attr "memory" "none")))
719 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
720 (define_insn_reservation "bdver1_sseimul_load" 8
721 (and (eq_attr "cpu" "bdver1")
722 (and (eq_attr "type" "sseimul")
723 (eq_attr "memory" "load")))
724 "bdver1-direct,bdver1-fpload,bdver1-fmma")
725 (define_insn_reservation "bdver1_sseimul" 4
726 (and (eq_attr "cpu" "bdver1")
727 (and (eq_attr "type" "sseimul")
728 (eq_attr "memory" "none")))
729 "bdver1-direct,bdver1-fpsched,bdver1-fmma")
730 (define_insn_reservation "bdver1_sseiadd_load" 6
731 (and (eq_attr "cpu" "bdver1")
732 (and (eq_attr "type" "sseiadd")
733 (eq_attr "memory" "load")))
734 "bdver1-direct,bdver1-fpload,bdver1-fmal")
735 (define_insn_reservation "bdver1_sseiadd" 2
736 (and (eq_attr "cpu" "bdver1")
737 (and (eq_attr "type" "sseiadd")
738 (eq_attr "memory" "none")))
739 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
740
741 ;; SSE DIV: no throughput information (assume same as amdfam10).
742 (define_insn_reservation "bdver1_ssediv_double_load_256" 31
743 (and (eq_attr "cpu" "bdver1")
744 (and (eq_attr "type" "ssediv")
745 (and (eq_attr "mode" "V4DF")
746 (eq_attr "memory" "load"))))
747 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
748 (define_insn_reservation "bdver1_ssediv_double_256" 27
749 (and (eq_attr "cpu" "bdver1")
750 (and (eq_attr "type" "ssediv")
751 (and (eq_attr "mode" "V4DF")
752 (eq_attr "memory" "none"))))
753 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
754 (define_insn_reservation "bdver1_ssediv_single_load_256" 28
755 (and (eq_attr "cpu" "bdver1")
756 (and (eq_attr "type" "ssediv")
757 (and (eq_attr "mode" "V8SF")
758 (eq_attr "memory" "load"))))
759 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
760 (define_insn_reservation "bdver1_ssediv_single_256" 24
761 (and (eq_attr "cpu" "bdver1")
762 (and (eq_attr "type" "ssediv")
763 (and (eq_attr "mode" "V8SF")
764 (eq_attr "memory" "none"))))
765 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
766 (define_insn_reservation "bdver1_ssediv_double_load" 31
767 (and (eq_attr "cpu" "bdver1")
768 (and (eq_attr "type" "ssediv")
769 (and (eq_attr "mode" "DF,V2DF")
770 (eq_attr "memory" "load"))))
771 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
772 (define_insn_reservation "bdver1_ssediv_double" 27
773 (and (eq_attr "cpu" "bdver1")
774 (and (eq_attr "type" "ssediv")
775 (and (eq_attr "mode" "DF,V2DF")
776 (eq_attr "memory" "none"))))
777 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
778 (define_insn_reservation "bdver1_ssediv_single_load" 28
779 (and (eq_attr "cpu" "bdver1")
780 (and (eq_attr "type" "ssediv")
781 (and (eq_attr "mode" "SF,V4SF")
782 (eq_attr "memory" "load"))))
783 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
784 (define_insn_reservation "bdver1_ssediv_single" 24
785 (and (eq_attr "cpu" "bdver1")
786 (and (eq_attr "type" "ssediv")
787 (and (eq_attr "mode" "SF,V4SF")
788 (eq_attr "memory" "none"))))
789 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
790
791 (define_insn_reservation "bdver1_sseins" 3
792 (and (eq_attr "cpu" "bdver1")
793 (and (eq_attr "type" "sseins")
794 (eq_attr "mode" "TI")))
795 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
796