comparison gcc/config/i386/x86-tune-costs.h @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Costs of operations of individual x86 CPUs. 1 /* Costs of operations of individual x86 CPUs.
2 Copyright (C) 1988-2018 Free Software Foundation, Inc. 2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
3 3
4 This file is part of GCC. 4 This file is part of GCC.
5 5
6 GCC is free software; you can redistribute it and/or modify 6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by 7 it under the terms of the GNU General Public License as published by
34 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, 34 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
35 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; 35 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
36 36
37 const 37 const
38 struct processor_costs ix86_size_cost = {/* costs for tuning for size */ 38 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
39 {
40 /* Start of register allocator costs. integer->integer move cost is 2. */
41 2, /* cost for loading QImode using movzbl */
42 {2, 2, 2}, /* cost of loading integer registers
43 in QImode, HImode and SImode.
44 Relative to reg-reg move (2). */
45 {2, 2, 2}, /* cost of storing integer registers */
46 2, /* cost of reg,reg fld/fst */
47 {2, 2, 2}, /* cost of loading fp registers
48 in SFmode, DFmode and XFmode */
49 {2, 2, 2}, /* cost of storing fp registers
50 in SFmode, DFmode and XFmode */
51 3, /* cost of moving MMX register */
52 {3, 3}, /* cost of loading MMX registers
53 in SImode and DImode */
54 {3, 3}, /* cost of storing MMX registers
55 in SImode and DImode */
56 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
57 {3, 3, 3, 3, 3}, /* cost of loading SSE registers
58 in 32,64,128,256 and 512-bit */
59 {3, 3, 3, 3, 3}, /* cost of storing SSE registers
60 in 32,64,128,256 and 512-bit */
61 3, 3, /* SSE->integer and integer->SSE moves */
62 /* End of register allocator costs. */
63 },
64
39 COSTS_N_BYTES (2), /* cost of an add instruction */ 65 COSTS_N_BYTES (2), /* cost of an add instruction */
40 COSTS_N_BYTES (3), /* cost of a lea instruction */ 66 COSTS_N_BYTES (3), /* cost of a lea instruction */
41 COSTS_N_BYTES (2), /* variable shift costs */ 67 COSTS_N_BYTES (2), /* variable shift costs */
42 COSTS_N_BYTES (3), /* constant shift costs */ 68 COSTS_N_BYTES (3), /* constant shift costs */
43 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 69 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
53 COSTS_N_BYTES (5)}, /* other */ 79 COSTS_N_BYTES (5)}, /* other */
54 COSTS_N_BYTES (3), /* cost of movsx */ 80 COSTS_N_BYTES (3), /* cost of movsx */
55 COSTS_N_BYTES (3), /* cost of movzx */ 81 COSTS_N_BYTES (3), /* cost of movzx */
56 0, /* "large" insn */ 82 0, /* "large" insn */
57 2, /* MOVE_RATIO */ 83 2, /* MOVE_RATIO */
58 84 2, /* CLEAR_RATIO */
59 /* All move costs are relative to integer->integer move times 2. */
60 2, /* cost for loading QImode using movzbl */
61 {2, 2, 2}, /* cost of loading integer registers 85 {2, 2, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode. 86 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */ 87 Relative to reg-reg move (2). */
64 {2, 2, 2}, /* cost of storing integer registers */ 88 {2, 2, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */ 89 {3, 3, 3, 3, 3}, /* cost of loading SSE register
66 {2, 2, 2}, /* cost of loading fp registers 90 in 32bit, 64bit, 128bit, 256bit and 512bit */
67 in SFmode, DFmode and XFmode */ 91 {3, 3, 3, 3, 3}, /* cost of storing SSE register
68 {2, 2, 2}, /* cost of storing fp registers 92 in 32bit, 64bit, 128bit, 256bit and 512bit */
69 in SFmode, DFmode and XFmode */
70 3, /* cost of moving MMX register */
71 {3, 3}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {3, 3}, /* cost of storing MMX registers
74 in SImode and DImode */
75 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
76 {3, 3, 3, 3, 3}, /* cost of loading SSE registers
77 in 32,64,128,256 and 512-bit */
78 {3, 3, 3, 3, 3}, /* cost of unaligned SSE load 93 {3, 3, 3, 3, 3}, /* cost of unaligned SSE load
79 in 128bit, 256bit and 512bit */ 94 in 128bit, 256bit and 512bit */
80 {3, 3, 3, 3, 3}, /* cost of storing SSE registers 95 {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
81 in 32,64,128,256 and 512-bit */
82 {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
83 in 128bit, 256bit and 512bit */ 96 in 128bit, 256bit and 512bit */
84 3, 3, /* SSE->integer and integer->SSE moves */ 97 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
98 3, /* cost of moving SSE register to integer. */
85 5, 0, /* Gather load static, per_elt. */ 99 5, 0, /* Gather load static, per_elt. */
86 5, 0, /* Gather store static, per_elt. */ 100 5, 0, /* Gather store static, per_elt. */
87 0, /* size of l1 cache */ 101 0, /* size of l1 cache */
88 0, /* size of l2 cache */ 102 0, /* size of l2 cache */
89 0, /* size of prefetch block */ 103 0, /* size of prefetch block */
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, 139 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
126 DUMMY_STRINGOP_ALGS}; 140 DUMMY_STRINGOP_ALGS};
127 141
128 static const 142 static const
129 struct processor_costs i386_cost = { /* 386 specific costs */ 143 struct processor_costs i386_cost = { /* 386 specific costs */
144 {
145 /* Start of register allocator costs. integer->integer move cost is 2. */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
162 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
163 in 32,64,128,256 and 512-bit */
164 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
165 in 32,64,128,256 and 512-bit */
166 3, 3, /* SSE->integer and integer->SSE moves */
167 /* End of register allocator costs. */
168 },
169
130 COSTS_N_INSNS (1), /* cost of an add instruction */ 170 COSTS_N_INSNS (1), /* cost of an add instruction */
131 COSTS_N_INSNS (1), /* cost of a lea instruction */ 171 COSTS_N_INSNS (1), /* cost of a lea instruction */
132 COSTS_N_INSNS (3), /* variable shift costs */ 172 COSTS_N_INSNS (3), /* variable shift costs */
133 COSTS_N_INSNS (2), /* constant shift costs */ 173 COSTS_N_INSNS (2), /* constant shift costs */
134 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 174 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
144 COSTS_N_INSNS (23)}, /* other */ 184 COSTS_N_INSNS (23)}, /* other */
145 COSTS_N_INSNS (3), /* cost of movsx */ 185 COSTS_N_INSNS (3), /* cost of movsx */
146 COSTS_N_INSNS (2), /* cost of movzx */ 186 COSTS_N_INSNS (2), /* cost of movzx */
147 15, /* "large" insn */ 187 15, /* "large" insn */
148 3, /* MOVE_RATIO */ 188 3, /* MOVE_RATIO */
149 189 3, /* CLEAR_RATIO */
150 /* All move costs are relative to integer->integer move times 2 and thus
151 they are latency*2. */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers 190 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode. 191 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */ 192 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */ 193 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */ 194 {4, 8, 16, 32, 64}, /* cost of loading SSE register
158 {8, 8, 8}, /* cost of loading fp registers 195 in 32bit, 64bit, 128bit, 256bit and 512bit */
159 in SFmode, DFmode and XFmode */ 196 {4, 8, 16, 32, 64}, /* cost of storing SSE register
160 {8, 8, 8}, /* cost of storing fp registers 197 in 32bit, 64bit, 128bit, 256bit and 512bit */
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
168 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
169 in 32,64,128,256 and 512-bit */
170 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ 198 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
171 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
172 in 32,64,128,256 and 512-bit */
173 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 199 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
174 3, 3, /* SSE->integer and integer->SSE moves */ 200 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
201 3, /* cost of moving SSE register to integer. */
175 4, 4, /* Gather load static, per_elt. */ 202 4, 4, /* Gather load static, per_elt. */
176 4, 4, /* Gather store static, per_elt. */ 203 4, 4, /* Gather store static, per_elt. */
177 0, /* size of l1 cache */ 204 0, /* size of l1 cache */
178 0, /* size of l2 cache */ 205 0, /* size of l2 cache */
179 0, /* size of prefetch block */ 206 0, /* size of prefetch block */
214 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, 241 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
215 DUMMY_STRINGOP_ALGS}; 242 DUMMY_STRINGOP_ALGS};
216 243
217 static const 244 static const
218 struct processor_costs i486_cost = { /* 486 specific costs */ 245 struct processor_costs i486_cost = { /* 486 specific costs */
246 {
247 /* Start of register allocator costs. integer->integer move cost is 2. */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
264 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
265 in 32,64,128,256 and 512-bit */
266 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
267 in 32,64,128,256 and 512-bit */
268 3, 3, /* SSE->integer and integer->SSE moves */
269 /* End of register allocator costs. */
270 },
271
219 COSTS_N_INSNS (1), /* cost of an add instruction */ 272 COSTS_N_INSNS (1), /* cost of an add instruction */
220 COSTS_N_INSNS (1), /* cost of a lea instruction */ 273 COSTS_N_INSNS (1), /* cost of a lea instruction */
221 COSTS_N_INSNS (3), /* variable shift costs */ 274 COSTS_N_INSNS (3), /* variable shift costs */
222 COSTS_N_INSNS (2), /* constant shift costs */ 275 COSTS_N_INSNS (2), /* constant shift costs */
223 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 276 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
233 COSTS_N_INSNS (40)}, /* other */ 286 COSTS_N_INSNS (40)}, /* other */
234 COSTS_N_INSNS (3), /* cost of movsx */ 287 COSTS_N_INSNS (3), /* cost of movsx */
235 COSTS_N_INSNS (2), /* cost of movzx */ 288 COSTS_N_INSNS (2), /* cost of movzx */
236 15, /* "large" insn */ 289 15, /* "large" insn */
237 3, /* MOVE_RATIO */ 290 3, /* MOVE_RATIO */
238 291 3, /* CLEAR_RATIO */
239 /* All move costs are relative to integer->integer move times 2 and thus
240 they are latency*2. */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers 292 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode. 293 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */ 294 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */ 295 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */ 296 {4, 8, 16, 32, 64}, /* cost of loading SSE register
247 {8, 8, 8}, /* cost of loading fp registers 297 in 32bit, 64bit, 128bit, 256bit and 512bit */
248 in SFmode, DFmode and XFmode */ 298 {4, 8, 16, 32, 64}, /* cost of storing SSE register
249 {8, 8, 8}, /* cost of storing fp registers 299 in 32bit, 64bit, 128bit, 256bit and 512bit */
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
257 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
258 in 32,64,128,256 and 512-bit */
259 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ 300 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
260 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
261 in 32,64,128,256 and 512-bit */
262 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 301 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
263 3, 3, /* SSE->integer and integer->SSE moves */ 302 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
303 3, /* cost of moving SSE register to integer. */
264 4, 4, /* Gather load static, per_elt. */ 304 4, 4, /* Gather load static, per_elt. */
265 4, 4, /* Gather store static, per_elt. */ 305 4, 4, /* Gather store static, per_elt. */
266 4, /* size of l1 cache. 486 has 8kB cache 306 4, /* size of l1 cache. 486 has 8kB cache
267 shared for code and data, so 4kB is 307 shared for code and data, so 4kB is
268 not really precise. */ 308 not really precise. */
305 {libcall, {{-1, rep_prefix_4_byte, false}}}, 345 {libcall, {{-1, rep_prefix_4_byte, false}}},
306 DUMMY_STRINGOP_ALGS}; 346 DUMMY_STRINGOP_ALGS};
307 347
308 static const 348 static const
309 struct processor_costs pentium_cost = { 349 struct processor_costs pentium_cost = {
350 {
351 /* Start of register allocator costs. integer->integer move cost is 2. */
352 6, /* cost for loading QImode using movzbl */
353 {2, 4, 2}, /* cost of loading integer registers
354 in QImode, HImode and SImode.
355 Relative to reg-reg move (2). */
356 {2, 4, 2}, /* cost of storing integer registers */
357 2, /* cost of reg,reg fld/fst */
358 {2, 2, 6}, /* cost of loading fp registers
359 in SFmode, DFmode and XFmode */
360 {4, 4, 6}, /* cost of storing fp registers
361 in SFmode, DFmode and XFmode */
362 8, /* cost of moving MMX register */
363 {8, 8}, /* cost of loading MMX registers
364 in SImode and DImode */
365 {8, 8}, /* cost of storing MMX registers
366 in SImode and DImode */
367 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
368 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
369 in 32,64,128,256 and 512-bit */
370 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
371 in 32,64,128,256 and 512-bit */
372 3, 3, /* SSE->integer and integer->SSE moves */
373 /* End of register allocator costs. */
374 },
375
310 COSTS_N_INSNS (1), /* cost of an add instruction */ 376 COSTS_N_INSNS (1), /* cost of an add instruction */
311 COSTS_N_INSNS (1), /* cost of a lea instruction */ 377 COSTS_N_INSNS (1), /* cost of a lea instruction */
312 COSTS_N_INSNS (4), /* variable shift costs */ 378 COSTS_N_INSNS (4), /* variable shift costs */
313 COSTS_N_INSNS (1), /* constant shift costs */ 379 COSTS_N_INSNS (1), /* constant shift costs */
314 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 380 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
324 COSTS_N_INSNS (25)}, /* other */ 390 COSTS_N_INSNS (25)}, /* other */
325 COSTS_N_INSNS (3), /* cost of movsx */ 391 COSTS_N_INSNS (3), /* cost of movsx */
326 COSTS_N_INSNS (2), /* cost of movzx */ 392 COSTS_N_INSNS (2), /* cost of movzx */
327 8, /* "large" insn */ 393 8, /* "large" insn */
328 6, /* MOVE_RATIO */ 394 6, /* MOVE_RATIO */
329 395 6, /* CLEAR_RATIO */
330 /* All move costs are relative to integer->integer move times 2 and thus
331 they are latency*2. */
332 6, /* cost for loading QImode using movzbl */
333 {2, 4, 2}, /* cost of loading integer registers 396 {2, 4, 2}, /* cost of loading integer registers
334 in QImode, HImode and SImode. 397 in QImode, HImode and SImode.
335 Relative to reg-reg move (2). */ 398 Relative to reg-reg move (2). */
336 {2, 4, 2}, /* cost of storing integer registers */ 399 {2, 4, 2}, /* cost of storing integer registers */
337 2, /* cost of reg,reg fld/fst */ 400 {4, 8, 16, 32, 64}, /* cost of loading SSE register
338 {2, 2, 6}, /* cost of loading fp registers 401 in 32bit, 64bit, 128bit, 256bit and 512bit */
339 in SFmode, DFmode and XFmode */ 402 {4, 8, 16, 32, 64}, /* cost of storing SSE register
340 {4, 4, 6}, /* cost of storing fp registers 403 in 32bit, 64bit, 128bit, 256bit and 512bit */
341 in SFmode, DFmode and XFmode */
342 8, /* cost of moving MMX register */
343 {8, 8}, /* cost of loading MMX registers
344 in SImode and DImode */
345 {8, 8}, /* cost of storing MMX registers
346 in SImode and DImode */
347 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
348 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
349 in 32,64,128,256 and 512-bit */
350 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ 404 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
351 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
352 in 32,64,128,256 and 512-bit */
353 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 405 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
354 3, 3, /* SSE->integer and integer->SSE moves */ 406 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
407 3, /* cost of moving SSE register to integer. */
355 4, 4, /* Gather load static, per_elt. */ 408 4, 4, /* Gather load static, per_elt. */
356 4, 4, /* Gather store static, per_elt. */ 409 4, 4, /* Gather store static, per_elt. */
357 8, /* size of l1 cache. */ 410 8, /* size of l1 cache. */
358 8, /* size of l2 cache */ 411 8, /* size of l2 cache */
359 0, /* size of prefetch block */ 412 0, /* size of prefetch block */
387 "16", /* Func alignment. */ 440 "16", /* Func alignment. */
388 }; 441 };
389 442
390 static const 443 static const
391 struct processor_costs lakemont_cost = { 444 struct processor_costs lakemont_cost = {
445 {
446 /* Start of register allocator costs. integer->integer move cost is 2. */
447 6, /* cost for loading QImode using movzbl */
448 {2, 4, 2}, /* cost of loading integer registers
449 in QImode, HImode and SImode.
450 Relative to reg-reg move (2). */
451 {2, 4, 2}, /* cost of storing integer registers */
452 2, /* cost of reg,reg fld/fst */
453 {2, 2, 6}, /* cost of loading fp registers
454 in SFmode, DFmode and XFmode */
455 {4, 4, 6}, /* cost of storing fp registers
456 in SFmode, DFmode and XFmode */
457 8, /* cost of moving MMX register */
458 {8, 8}, /* cost of loading MMX registers
459 in SImode and DImode */
460 {8, 8}, /* cost of storing MMX registers
461 in SImode and DImode */
462 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
463 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
464 in 32,64,128,256 and 512-bit */
465 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
466 in 32,64,128,256 and 512-bit */
467 3, 3, /* SSE->integer and integer->SSE moves */
468 /* End of register allocator costs. */
469 },
470
392 COSTS_N_INSNS (1), /* cost of an add instruction */ 471 COSTS_N_INSNS (1), /* cost of an add instruction */
393 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 472 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
394 COSTS_N_INSNS (1), /* variable shift costs */ 473 COSTS_N_INSNS (1), /* variable shift costs */
395 COSTS_N_INSNS (1), /* constant shift costs */ 474 COSTS_N_INSNS (1), /* constant shift costs */
396 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 475 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
406 COSTS_N_INSNS (25)}, /* other */ 485 COSTS_N_INSNS (25)}, /* other */
407 COSTS_N_INSNS (3), /* cost of movsx */ 486 COSTS_N_INSNS (3), /* cost of movsx */
408 COSTS_N_INSNS (2), /* cost of movzx */ 487 COSTS_N_INSNS (2), /* cost of movzx */
409 8, /* "large" insn */ 488 8, /* "large" insn */
410 17, /* MOVE_RATIO */ 489 17, /* MOVE_RATIO */
411 490 6, /* CLEAR_RATIO */
412 /* All move costs are relative to integer->integer move times 2 and thus
413 they are latency*2. */
414 6, /* cost for loading QImode using movzbl */
415 {2, 4, 2}, /* cost of loading integer registers 491 {2, 4, 2}, /* cost of loading integer registers
416 in QImode, HImode and SImode. 492 in QImode, HImode and SImode.
417 Relative to reg-reg move (2). */ 493 Relative to reg-reg move (2). */
418 {2, 4, 2}, /* cost of storing integer registers */ 494 {2, 4, 2}, /* cost of storing integer registers */
419 2, /* cost of reg,reg fld/fst */ 495 {4, 8, 16, 32, 64}, /* cost of loading SSE register
420 {2, 2, 6}, /* cost of loading fp registers 496 in 32bit, 64bit, 128bit, 256bit and 512bit */
421 in SFmode, DFmode and XFmode */ 497 {4, 8, 16, 32, 64}, /* cost of storing SSE register
422 {4, 4, 6}, /* cost of storing fp registers 498 in 32bit, 64bit, 128bit, 256bit and 512bit */
423 in SFmode, DFmode and XFmode */
424 8, /* cost of moving MMX register */
425 {8, 8}, /* cost of loading MMX registers
426 in SImode and DImode */
427 {8, 8}, /* cost of storing MMX registers
428 in SImode and DImode */
429 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
430 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
431 in 32,64,128,256 and 512-bit */
432 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ 499 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
433 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
434 in 32,64,128,256 and 512-bit */
435 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 500 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
436 3, 3, /* SSE->integer and integer->SSE moves */ 501 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
502 3, /* cost of moving SSE register to integer. */
437 4, 4, /* Gather load static, per_elt. */ 503 4, 4, /* Gather load static, per_elt. */
438 4, 4, /* Gather store static, per_elt. */ 504 4, 4, /* Gather store static, per_elt. */
439 8, /* size of l1 cache. */ 505 8, /* size of l1 cache. */
440 8, /* size of l2 cache */ 506 8, /* size of l2 cache */
441 0, /* size of prefetch block */ 507 0, /* size of prefetch block */
484 {8192, rep_prefix_4_byte, false}, 550 {8192, rep_prefix_4_byte, false},
485 {-1, libcall, false}}}, 551 {-1, libcall, false}}},
486 DUMMY_STRINGOP_ALGS}; 552 DUMMY_STRINGOP_ALGS};
487 static const 553 static const
488 struct processor_costs pentiumpro_cost = { 554 struct processor_costs pentiumpro_cost = {
555 {
556 /* Start of register allocator costs. integer->integer move cost is 2. */
557 2, /* cost for loading QImode using movzbl */
558 {4, 4, 4}, /* cost of loading integer registers
559 in QImode, HImode and SImode.
560 Relative to reg-reg move (2). */
561 {2, 2, 2}, /* cost of storing integer registers */
562 2, /* cost of reg,reg fld/fst */
563 {2, 2, 6}, /* cost of loading fp registers
564 in SFmode, DFmode and XFmode */
565 {4, 4, 6}, /* cost of storing fp registers
566 in SFmode, DFmode and XFmode */
567 2, /* cost of moving MMX register */
568 {2, 2}, /* cost of loading MMX registers
569 in SImode and DImode */
570 {2, 2}, /* cost of storing MMX registers
571 in SImode and DImode */
572 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
573 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
574 in 32,64,128,256 and 512-bit */
575 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
576 in 32,64,128,256 and 512-bit */
577 3, 3, /* SSE->integer and integer->SSE moves */
578 /* End of register allocator costs. */
579 },
580
489 COSTS_N_INSNS (1), /* cost of an add instruction */ 581 COSTS_N_INSNS (1), /* cost of an add instruction */
490 COSTS_N_INSNS (1), /* cost of a lea instruction */ 582 COSTS_N_INSNS (1), /* cost of a lea instruction */
491 COSTS_N_INSNS (1), /* variable shift costs */ 583 COSTS_N_INSNS (1), /* variable shift costs */
492 COSTS_N_INSNS (1), /* constant shift costs */ 584 COSTS_N_INSNS (1), /* constant shift costs */
493 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 585 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
503 COSTS_N_INSNS (17)}, /* other */ 595 COSTS_N_INSNS (17)}, /* other */
504 COSTS_N_INSNS (1), /* cost of movsx */ 596 COSTS_N_INSNS (1), /* cost of movsx */
505 COSTS_N_INSNS (1), /* cost of movzx */ 597 COSTS_N_INSNS (1), /* cost of movzx */
506 8, /* "large" insn */ 598 8, /* "large" insn */
507 6, /* MOVE_RATIO */ 599 6, /* MOVE_RATIO */
508 600 6, /* CLEAR_RATIO */
509 /* All move costs are relative to integer->integer move times 2 and thus
510 they are latency*2. */
511 2, /* cost for loading QImode using movzbl */
512 {4, 4, 4}, /* cost of loading integer registers 601 {4, 4, 4}, /* cost of loading integer registers
513 in QImode, HImode and SImode. 602 in QImode, HImode and SImode.
514 Relative to reg-reg move (2). */ 603 Relative to reg-reg move (2). */
515 {2, 2, 2}, /* cost of storing integer registers */ 604 {2, 2, 2}, /* cost of storing integer registers */
516 2, /* cost of reg,reg fld/fst */ 605 {4, 8, 16, 32, 64}, /* cost of loading SSE register
517 {2, 2, 6}, /* cost of loading fp registers 606 in 32bit, 64bit, 128bit, 256bit and 512bit */
518 in SFmode, DFmode and XFmode */ 607 {4, 8, 16, 32, 64}, /* cost of storing SSE register
519 {4, 4, 6}, /* cost of storing fp registers 608 in 32bit, 64bit, 128bit, 256bit and 512bit */
520 in SFmode, DFmode and XFmode */
521 2, /* cost of moving MMX register */
522 {2, 2}, /* cost of loading MMX registers
523 in SImode and DImode */
524 {2, 2}, /* cost of storing MMX registers
525 in SImode and DImode */
526 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
527 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
528 in 32,64,128,256 and 512-bit */
529 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ 609 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
530 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
531 in 32,64,128,256 and 512-bit */
532 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 610 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
533 3, 3, /* SSE->integer and integer->SSE moves */ 611 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
612 3, /* cost of moving SSE register to integer. */
534 4, 4, /* Gather load static, per_elt. */ 613 4, 4, /* Gather load static, per_elt. */
535 4, 4, /* Gather store static, per_elt. */ 614 4, 4, /* Gather store static, per_elt. */
536 8, /* size of l1 cache. */ 615 8, /* size of l1 cache. */
537 256, /* size of l2 cache */ 616 256, /* size of l2 cache */
538 32, /* size of prefetch block */ 617 32, /* size of prefetch block */
572 static stringop_algs geode_memset[2] = { 651 static stringop_algs geode_memset[2] = {
573 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 652 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
574 DUMMY_STRINGOP_ALGS}; 653 DUMMY_STRINGOP_ALGS};
575 static const 654 static const
576 struct processor_costs geode_cost = { 655 struct processor_costs geode_cost = {
656 {
657 /* Start of register allocator costs. integer->integer move cost is 2. */
658 2, /* cost for loading QImode using movzbl */
659 {2, 2, 2}, /* cost of loading integer registers
660 in QImode, HImode and SImode.
661 Relative to reg-reg move (2). */
662 {2, 2, 2}, /* cost of storing integer registers */
663 2, /* cost of reg,reg fld/fst */
664 {2, 2, 2}, /* cost of loading fp registers
665 in SFmode, DFmode and XFmode */
666 {4, 6, 6}, /* cost of storing fp registers
667 in SFmode, DFmode and XFmode */
668 2, /* cost of moving MMX register */
669 {2, 2}, /* cost of loading MMX registers
670 in SImode and DImode */
671 {2, 2}, /* cost of storing MMX registers
672 in SImode and DImode */
673 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
674 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
675 in 32,64,128,256 and 512-bit */
676 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
677 in 32,64,128,256 and 512-bit */
678 6, 6, /* SSE->integer and integer->SSE moves */
679 /* End of register allocator costs. */
680 },
681
577 COSTS_N_INSNS (1), /* cost of an add instruction */ 682 COSTS_N_INSNS (1), /* cost of an add instruction */
578 COSTS_N_INSNS (1), /* cost of a lea instruction */ 683 COSTS_N_INSNS (1), /* cost of a lea instruction */
579 COSTS_N_INSNS (2), /* variable shift costs */ 684 COSTS_N_INSNS (2), /* variable shift costs */
580 COSTS_N_INSNS (1), /* constant shift costs */ 685 COSTS_N_INSNS (1), /* constant shift costs */
581 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 686 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
591 COSTS_N_INSNS (39)}, /* other */ 696 COSTS_N_INSNS (39)}, /* other */
592 COSTS_N_INSNS (1), /* cost of movsx */ 697 COSTS_N_INSNS (1), /* cost of movsx */
593 COSTS_N_INSNS (1), /* cost of movzx */ 698 COSTS_N_INSNS (1), /* cost of movzx */
594 8, /* "large" insn */ 699 8, /* "large" insn */
595 4, /* MOVE_RATIO */ 700 4, /* MOVE_RATIO */
596 701 4, /* CLEAR_RATIO */
597 /* All move costs are relative to integer->integer move times 2 and thus
598 they are latency*2. */
599 2, /* cost for loading QImode using movzbl */
600 {2, 2, 2}, /* cost of loading integer registers 702 {2, 2, 2}, /* cost of loading integer registers
601 in QImode, HImode and SImode. 703 in QImode, HImode and SImode.
602 Relative to reg-reg move (2). */ 704 Relative to reg-reg move (2). */
603 {2, 2, 2}, /* cost of storing integer registers */ 705 {2, 2, 2}, /* cost of storing integer registers */
604 2, /* cost of reg,reg fld/fst */ 706 {2, 2, 8, 16, 32}, /* cost of loading SSE register
605 {2, 2, 2}, /* cost of loading fp registers 707 in 32bit, 64bit, 128bit, 256bit and 512bit */
606 in SFmode, DFmode and XFmode */ 708 {2, 2, 8, 16, 32}, /* cost of storing SSE register
607 {4, 6, 6}, /* cost of storing fp registers 709 in 32bit, 64bit, 128bit, 256bit and 512bit */
608 in SFmode, DFmode and XFmode */
609
610 2, /* cost of moving MMX register */
611 {2, 2}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {2, 2}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
616 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
617 in 32,64,128,256 and 512-bit */
618 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ 710 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
619 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
620 in 32,64,128,256 and 512-bit */
621 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 711 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
622 6, 6, /* SSE->integer and integer->SSE moves */ 712 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
713 6, /* cost of moving SSE register to integer. */
623 2, 2, /* Gather load static, per_elt. */ 714 2, 2, /* Gather load static, per_elt. */
624 2, 2, /* Gather store static, per_elt. */ 715 2, 2, /* Gather store static, per_elt. */
625 64, /* size of l1 cache. */ 716 64, /* size of l1 cache. */
626 128, /* size of l2 cache. */ 717 128, /* size of l2 cache. */
627 32, /* size of prefetch block */ 718 32, /* size of prefetch block */
661 static stringop_algs k6_memset[2] = { 752 static stringop_algs k6_memset[2] = {
662 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 753 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
663 DUMMY_STRINGOP_ALGS}; 754 DUMMY_STRINGOP_ALGS};
664 static const 755 static const
665 struct processor_costs k6_cost = { 756 struct processor_costs k6_cost = {
757 {
758 /* Start of register allocator costs. integer->integer move cost is 2. */
759 3, /* cost for loading QImode using movzbl */
760 {4, 5, 4}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {2, 3, 2}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {6, 6, 6}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {4, 4, 4}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {2, 2}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {2, 2}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
775 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
776 in 32,64,128,256 and 512-bit */
777 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
778 in 32,64,128,256 and 512-bit */
779 6, 6, /* SSE->integer and integer->SSE moves */
780 /* End of register allocator costs. */
781 },
782
666 COSTS_N_INSNS (1), /* cost of an add instruction */ 783 COSTS_N_INSNS (1), /* cost of an add instruction */
667 COSTS_N_INSNS (2), /* cost of a lea instruction */ 784 COSTS_N_INSNS (2), /* cost of a lea instruction */
668 COSTS_N_INSNS (1), /* variable shift costs */ 785 COSTS_N_INSNS (1), /* variable shift costs */
669 COSTS_N_INSNS (1), /* constant shift costs */ 786 COSTS_N_INSNS (1), /* constant shift costs */
670 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 787 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
680 COSTS_N_INSNS (18)}, /* other */ 797 COSTS_N_INSNS (18)}, /* other */
681 COSTS_N_INSNS (2), /* cost of movsx */ 798 COSTS_N_INSNS (2), /* cost of movsx */
682 COSTS_N_INSNS (2), /* cost of movzx */ 799 COSTS_N_INSNS (2), /* cost of movzx */
683 8, /* "large" insn */ 800 8, /* "large" insn */
684 4, /* MOVE_RATIO */ 801 4, /* MOVE_RATIO */
685 802 4, /* CLEAR_RATIO */
686 /* All move costs are relative to integer->integer move times 2 and thus
687 they are latency*2. */
688 3, /* cost for loading QImode using movzbl */
689 {4, 5, 4}, /* cost of loading integer registers 803 {4, 5, 4}, /* cost of loading integer registers
690 in QImode, HImode and SImode. 804 in QImode, HImode and SImode.
691 Relative to reg-reg move (2). */ 805 Relative to reg-reg move (2). */
692 {2, 3, 2}, /* cost of storing integer registers */ 806 {2, 3, 2}, /* cost of storing integer registers */
693 4, /* cost of reg,reg fld/fst */ 807 {2, 2, 8, 16, 32}, /* cost of loading SSE register
694 {6, 6, 6}, /* cost of loading fp registers 808 in 32bit, 64bit, 128bit, 256bit and 512bit */
695 in SFmode, DFmode and XFmode */ 809 {2, 2, 8, 16, 32}, /* cost of storing SSE register
696 {4, 4, 4}, /* cost of storing fp registers 810 in 32bit, 64bit, 128bit, 256bit and 512bit */
697 in SFmode, DFmode and XFmode */
698 2, /* cost of moving MMX register */
699 {2, 2}, /* cost of loading MMX registers
700 in SImode and DImode */
701 {2, 2}, /* cost of storing MMX registers
702 in SImode and DImode */
703 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
704 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
705 in 32,64,128,256 and 512-bit */
706 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ 811 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
707 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
708 in 32,64,128,256 and 512-bit */
709 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 812 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
710 6, 6, /* SSE->integer and integer->SSE moves */ 813 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
814 6, /* cost of moving SSE register to integer. */
711 2, 2, /* Gather load static, per_elt. */ 815 2, 2, /* Gather load static, per_elt. */
712 2, 2, /* Gather store static, per_elt. */ 816 2, 2, /* Gather store static, per_elt. */
713 32, /* size of l1 cache. */ 817 32, /* size of l1 cache. */
714 32, /* size of l2 cache. Some models 818 32, /* size of l2 cache. Some models
715 have integrated l2 cache, but 819 have integrated l2 cache, but
755 static stringop_algs athlon_memset[2] = { 859 static stringop_algs athlon_memset[2] = {
756 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 860 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
757 DUMMY_STRINGOP_ALGS}; 861 DUMMY_STRINGOP_ALGS};
758 static const 862 static const
759 struct processor_costs athlon_cost = { 863 struct processor_costs athlon_cost = {
864 {
865 /* Start of register allocator costs. integer->integer move cost is 2. */
866 4, /* cost for loading QImode using movzbl */
867 {3, 4, 3}, /* cost of loading integer registers
868 in QImode, HImode and SImode.
869 Relative to reg-reg move (2). */
870 {3, 4, 3}, /* cost of storing integer registers */
871 4, /* cost of reg,reg fld/fst */
872 {4, 4, 12}, /* cost of loading fp registers
873 in SFmode, DFmode and XFmode */
874 {6, 6, 8}, /* cost of storing fp registers
875 in SFmode, DFmode and XFmode */
876 2, /* cost of moving MMX register */
877 {4, 4}, /* cost of loading MMX registers
878 in SImode and DImode */
879 {4, 4}, /* cost of storing MMX registers
880 in SImode and DImode */
881 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
882 {4, 4, 12, 12, 24}, /* cost of loading SSE registers
883 in 32,64,128,256 and 512-bit */
884 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
885 in 32,64,128,256 and 512-bit */
886 5, 5, /* SSE->integer and integer->SSE moves */
887 /* End of register allocator costs. */
888 },
889
760 COSTS_N_INSNS (1), /* cost of an add instruction */ 890 COSTS_N_INSNS (1), /* cost of an add instruction */
761 COSTS_N_INSNS (2), /* cost of a lea instruction */ 891 COSTS_N_INSNS (2), /* cost of a lea instruction */
762 COSTS_N_INSNS (1), /* variable shift costs */ 892 COSTS_N_INSNS (1), /* variable shift costs */
763 COSTS_N_INSNS (1), /* constant shift costs */ 893 COSTS_N_INSNS (1), /* constant shift costs */
764 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 894 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
774 COSTS_N_INSNS (74)}, /* other */ 904 COSTS_N_INSNS (74)}, /* other */
775 COSTS_N_INSNS (1), /* cost of movsx */ 905 COSTS_N_INSNS (1), /* cost of movsx */
776 COSTS_N_INSNS (1), /* cost of movzx */ 906 COSTS_N_INSNS (1), /* cost of movzx */
777 8, /* "large" insn */ 907 8, /* "large" insn */
778 9, /* MOVE_RATIO */ 908 9, /* MOVE_RATIO */
779 909 6, /* CLEAR_RATIO */
780 /* All move costs are relative to integer->integer move times 2 and thus
781 they are latency*2. */
782 4, /* cost for loading QImode using movzbl */
783 {3, 4, 3}, /* cost of loading integer registers 910 {3, 4, 3}, /* cost of loading integer registers
784 in QImode, HImode and SImode. 911 in QImode, HImode and SImode.
785 Relative to reg-reg move (2). */ 912 Relative to reg-reg move (2). */
786 {3, 4, 3}, /* cost of storing integer registers */ 913 {3, 4, 3}, /* cost of storing integer registers */
787 4, /* cost of reg,reg fld/fst */ 914 {4, 4, 12, 12, 24}, /* cost of loading SSE register
788 {4, 4, 12}, /* cost of loading fp registers 915 in 32bit, 64bit, 128bit, 256bit and 512bit */
789 in SFmode, DFmode and XFmode */ 916 {4, 4, 10, 10, 20}, /* cost of storing SSE register
790 {6, 6, 8}, /* cost of storing fp registers 917 in 32bit, 64bit, 128bit, 256bit and 512bit */
791 in SFmode, DFmode and XFmode */
792 2, /* cost of moving MMX register */
793 {4, 4}, /* cost of loading MMX registers
794 in SImode and DImode */
795 {4, 4}, /* cost of storing MMX registers
796 in SImode and DImode */
797 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
798 {4, 4, 12, 12, 24}, /* cost of loading SSE registers
799 in 32,64,128,256 and 512-bit */
800 {4, 4, 12, 12, 24}, /* cost of unaligned loads. */ 918 {4, 4, 12, 12, 24}, /* cost of unaligned loads. */
801 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
802 in 32,64,128,256 and 512-bit */
803 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 919 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
804 5, 5, /* SSE->integer and integer->SSE moves */ 920 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
921 5, /* cost of moving SSE register to integer. */
805 4, 4, /* Gather load static, per_elt. */ 922 4, 4, /* Gather load static, per_elt. */
806 4, 4, /* Gather store static, per_elt. */ 923 4, 4, /* Gather store static, per_elt. */
807 64, /* size of l1 cache. */ 924 64, /* size of l1 cache. */
808 256, /* size of l2 cache. */ 925 256, /* size of l2 cache. */
809 64, /* size of prefetch block */ 926 64, /* size of prefetch block */
851 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 968 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
852 {libcall, {{48, unrolled_loop, false}, 969 {libcall, {{48, unrolled_loop, false},
853 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; 970 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
854 static const 971 static const
855 struct processor_costs k8_cost = { 972 struct processor_costs k8_cost = {
973 {
974 /* Start of register allocator costs. integer->integer move cost is 2. */
975 4, /* cost for loading QImode using movzbl */
976 {3, 4, 3}, /* cost of loading integer registers
977 in QImode, HImode and SImode.
978 Relative to reg-reg move (2). */
979 {3, 4, 3}, /* cost of storing integer registers */
980 4, /* cost of reg,reg fld/fst */
981 {4, 4, 12}, /* cost of loading fp registers
982 in SFmode, DFmode and XFmode */
983 {6, 6, 8}, /* cost of storing fp registers
984 in SFmode, DFmode and XFmode */
985 2, /* cost of moving MMX register */
986 {3, 3}, /* cost of loading MMX registers
987 in SImode and DImode */
988 {4, 4}, /* cost of storing MMX registers
989 in SImode and DImode */
990 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
991 {4, 3, 12, 12, 24}, /* cost of loading SSE registers
992 in 32,64,128,256 and 512-bit */
993 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
994 in 32,64,128,256 and 512-bit */
995 5, 5, /* SSE->integer and integer->SSE moves */
996 /* End of register allocator costs. */
997 },
998
856 COSTS_N_INSNS (1), /* cost of an add instruction */ 999 COSTS_N_INSNS (1), /* cost of an add instruction */
857 COSTS_N_INSNS (2), /* cost of a lea instruction */ 1000 COSTS_N_INSNS (2), /* cost of a lea instruction */
858 COSTS_N_INSNS (1), /* variable shift costs */ 1001 COSTS_N_INSNS (1), /* variable shift costs */
859 COSTS_N_INSNS (1), /* constant shift costs */ 1002 COSTS_N_INSNS (1), /* constant shift costs */
860 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1003 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (74)}, /* other */ 1013 COSTS_N_INSNS (74)}, /* other */
871 COSTS_N_INSNS (1), /* cost of movsx */ 1014 COSTS_N_INSNS (1), /* cost of movsx */
872 COSTS_N_INSNS (1), /* cost of movzx */ 1015 COSTS_N_INSNS (1), /* cost of movzx */
873 8, /* "large" insn */ 1016 8, /* "large" insn */
874 9, /* MOVE_RATIO */ 1017 9, /* MOVE_RATIO */
875 1018 6, /* CLEAR_RATIO */
876 /* All move costs are relative to integer->integer move times 2 and thus
877 they are latency*2. */
878 4, /* cost for loading QImode using movzbl */
879 {3, 4, 3}, /* cost of loading integer registers 1019 {3, 4, 3}, /* cost of loading integer registers
880 in QImode, HImode and SImode. 1020 in QImode, HImode and SImode.
881 Relative to reg-reg move (2). */ 1021 Relative to reg-reg move (2). */
882 {3, 4, 3}, /* cost of storing integer registers */ 1022 {3, 4, 3}, /* cost of storing integer registers */
883 4, /* cost of reg,reg fld/fst */ 1023 {4, 3, 12, 12, 24}, /* cost of loading SSE register
884 {4, 4, 12}, /* cost of loading fp registers 1024 in 32bit, 64bit, 128bit, 256bit and 512bit */
885 in SFmode, DFmode and XFmode */ 1025 {4, 4, 10, 10, 20}, /* cost of storing SSE register
886 {6, 6, 8}, /* cost of storing fp registers 1026 in 32bit, 64bit, 128bit, 256bit and 512bit */
887 in SFmode, DFmode and XFmode */
888 2, /* cost of moving MMX register */
889 {3, 3}, /* cost of loading MMX registers
890 in SImode and DImode */
891 {4, 4}, /* cost of storing MMX registers
892 in SImode and DImode */
893 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
894 {4, 3, 12, 12, 24}, /* cost of loading SSE registers
895 in 32,64,128,256 and 512-bit */
896 {4, 3, 12, 12, 24}, /* cost of unaligned loads. */ 1027 {4, 3, 12, 12, 24}, /* cost of unaligned loads. */
897 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
898 in 32,64,128,256 and 512-bit */
899 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 1028 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
900 5, 5, /* SSE->integer and integer->SSE moves */ 1029 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1030 5, /* cost of moving SSE register to integer. */
901 4, 4, /* Gather load static, per_elt. */ 1031 4, 4, /* Gather load static, per_elt. */
902 4, 4, /* Gather store static, per_elt. */ 1032 4, 4, /* Gather store static, per_elt. */
903 64, /* size of l1 cache. */ 1033 64, /* size of l1 cache. */
904 512, /* size of l2 cache. */ 1034 512, /* size of l2 cache. */
905 64, /* size of prefetch block */ 1035 64, /* size of prefetch block */
951 {libcall, {{8, loop, false}, {24, unrolled_loop, false}, 1081 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
952 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 1082 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
953 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, 1083 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
954 {-1, libcall, false}}}}; 1084 {-1, libcall, false}}}};
955 struct processor_costs amdfam10_cost = { 1085 struct processor_costs amdfam10_cost = {
1086 {
1087 /* Start of register allocator costs. integer->integer move cost is 2. */
1088 4, /* cost for loading QImode using movzbl */
1089 {3, 4, 3}, /* cost of loading integer registers
1090 in QImode, HImode and SImode.
1091 Relative to reg-reg move (2). */
1092 {3, 4, 3}, /* cost of storing integer registers */
1093 4, /* cost of reg,reg fld/fst */
1094 {4, 4, 12}, /* cost of loading fp registers
1095 in SFmode, DFmode and XFmode */
1096 {6, 6, 8}, /* cost of storing fp registers
1097 in SFmode, DFmode and XFmode */
1098 2, /* cost of moving MMX register */
1099 {3, 3}, /* cost of loading MMX registers
1100 in SImode and DImode */
1101 {4, 4}, /* cost of storing MMX registers
1102 in SImode and DImode */
1103 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1104 {4, 4, 3, 6, 12}, /* cost of loading SSE registers
1105 in 32,64,128,256 and 512-bit */
1106 {4, 4, 5, 10, 20}, /* cost of storing SSE registers
1107 in 32,64,128,256 and 512-bit */
1108 3, 3, /* SSE->integer and integer->SSE moves */
1109
1110 /* On K8:
1111 MOVD reg64, xmmreg Double FSTORE 4
1112 MOVD reg32, xmmreg Double FSTORE 4
1113 On AMDFAM10:
1114 MOVD reg64, xmmreg Double FADD 3
1115 1/1 1/1
1116 MOVD reg32, xmmreg Double FADD 3
1117 1/1 1/1 */
1118 /* End of register allocator costs. */
1119 },
1120
956 COSTS_N_INSNS (1), /* cost of an add instruction */ 1121 COSTS_N_INSNS (1), /* cost of an add instruction */
957 COSTS_N_INSNS (2), /* cost of a lea instruction */ 1122 COSTS_N_INSNS (2), /* cost of a lea instruction */
958 COSTS_N_INSNS (1), /* variable shift costs */ 1123 COSTS_N_INSNS (1), /* variable shift costs */
959 COSTS_N_INSNS (1), /* constant shift costs */ 1124 COSTS_N_INSNS (1), /* constant shift costs */
960 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1125 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
970 COSTS_N_INSNS (83)}, /* other */ 1135 COSTS_N_INSNS (83)}, /* other */
971 COSTS_N_INSNS (1), /* cost of movsx */ 1136 COSTS_N_INSNS (1), /* cost of movsx */
972 COSTS_N_INSNS (1), /* cost of movzx */ 1137 COSTS_N_INSNS (1), /* cost of movzx */
973 8, /* "large" insn */ 1138 8, /* "large" insn */
974 9, /* MOVE_RATIO */ 1139 9, /* MOVE_RATIO */
975 1140 6, /* CLEAR_RATIO */
976 /* All move costs are relative to integer->integer move times 2 and thus
977 they are latency*2. */
978 4, /* cost for loading QImode using movzbl */
979 {3, 4, 3}, /* cost of loading integer registers 1141 {3, 4, 3}, /* cost of loading integer registers
980 in QImode, HImode and SImode. 1142 in QImode, HImode and SImode.
981 Relative to reg-reg move (2). */ 1143 Relative to reg-reg move (2). */
982 {3, 4, 3}, /* cost of storing integer registers */ 1144 {3, 4, 3}, /* cost of storing integer registers */
983 4, /* cost of reg,reg fld/fst */ 1145 {4, 4, 3, 6, 12}, /* cost of loading SSE register
984 {4, 4, 12}, /* cost of loading fp registers 1146 in 32bit, 64bit, 128bit, 256bit and 512bit */
985 in SFmode, DFmode and XFmode */ 1147 {4, 4, 5, 10, 20}, /* cost of storing SSE register
986 {6, 6, 8}, /* cost of storing fp registers 1148 in 32bit, 64bit, 128bit, 256bit and 512bit */
987 in SFmode, DFmode and XFmode */
988 2, /* cost of moving MMX register */
989 {3, 3}, /* cost of loading MMX registers
990 in SImode and DImode */
991 {4, 4}, /* cost of storing MMX registers
992 in SImode and DImode */
993 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
994 {4, 4, 3, 6, 12}, /* cost of loading SSE registers
995 in 32,64,128,256 and 512-bit */
996 {4, 4, 3, 7, 12}, /* cost of unaligned loads. */ 1149 {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
997 {4, 4, 5, 10, 20}, /* cost of storing SSE registers
998 in 32,64,128,256 and 512-bit */
999 {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 1150 {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
1000 3, 3, /* SSE->integer and integer->SSE moves */ 1151 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1001 /* On K8: 1152 3, /* cost of moving SSE register to integer. */
1002 MOVD reg64, xmmreg Double FSTORE 4
1003 MOVD reg32, xmmreg Double FSTORE 4
1004 On AMDFAM10:
1005 MOVD reg64, xmmreg Double FADD 3
1006 1/1 1/1
1007 MOVD reg32, xmmreg Double FADD 3
1008 1/1 1/1 */
1009 4, 4, /* Gather load static, per_elt. */ 1153 4, 4, /* Gather load static, per_elt. */
1010 4, 4, /* Gather store static, per_elt. */ 1154 4, 4, /* Gather store static, per_elt. */
1011 64, /* size of l1 cache. */ 1155 64, /* size of l1 cache. */
1012 512, /* size of l2 cache. */ 1156 512, /* size of l2 cache. */
1013 64, /* size of prefetch block */ 1157 64, /* size of prefetch block */
1060 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 1204 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1061 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, 1205 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1062 {-1, libcall, false}}}}; 1206 {-1, libcall, false}}}};
1063 1207
1064 const struct processor_costs bdver_cost = { 1208 const struct processor_costs bdver_cost = {
1209 {
1210 /* Start of register allocator costs. integer->integer move cost is 2. */
1211 8, /* cost for loading QImode using movzbl */
1212 {8, 8, 8}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {8, 8, 8}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 28}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {10, 10, 18}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 4, /* cost of moving MMX register */
1222 {12, 12}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {10, 10}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1227 {12, 12, 10, 40, 60}, /* cost of loading SSE registers
1228 in 32,64,128,256 and 512-bit */
1229 {10, 10, 10, 40, 60}, /* cost of storing SSE registers
1230 in 32,64,128,256 and 512-bit */
1231 16, 20, /* SSE->integer and integer->SSE moves */
1232 /* End of register allocator costs. */
1233 },
1234
1065 COSTS_N_INSNS (1), /* cost of an add instruction */ 1235 COSTS_N_INSNS (1), /* cost of an add instruction */
1066 COSTS_N_INSNS (1), /* cost of a lea instruction */ 1236 COSTS_N_INSNS (1), /* cost of a lea instruction */
1067 COSTS_N_INSNS (1), /* variable shift costs */ 1237 COSTS_N_INSNS (1), /* variable shift costs */
1068 COSTS_N_INSNS (1), /* constant shift costs */ 1238 COSTS_N_INSNS (1), /* constant shift costs */
1069 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 1239 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1079 COSTS_N_INSNS (83)}, /* other */ 1249 COSTS_N_INSNS (83)}, /* other */
1080 COSTS_N_INSNS (1), /* cost of movsx */ 1250 COSTS_N_INSNS (1), /* cost of movsx */
1081 COSTS_N_INSNS (1), /* cost of movzx */ 1251 COSTS_N_INSNS (1), /* cost of movzx */
1082 8, /* "large" insn */ 1252 8, /* "large" insn */
1083 9, /* MOVE_RATIO */ 1253 9, /* MOVE_RATIO */
1084 1254 6, /* CLEAR_RATIO */
1085 /* All move costs are relative to integer->integer move times 2 and thus
1086 they are latency*2. */
1087 8, /* cost for loading QImode using movzbl */
1088 {8, 8, 8}, /* cost of loading integer registers 1255 {8, 8, 8}, /* cost of loading integer registers
1089 in QImode, HImode and SImode. 1256 in QImode, HImode and SImode.
1090 Relative to reg-reg move (2). */ 1257 Relative to reg-reg move (2). */
1091 {8, 8, 8}, /* cost of storing integer registers */ 1258 {8, 8, 8}, /* cost of storing integer registers */
1092 4, /* cost of reg,reg fld/fst */ 1259 {12, 12, 10, 40, 60}, /* cost of loading SSE register
1093 {12, 12, 28}, /* cost of loading fp registers 1260 in 32bit, 64bit, 128bit, 256bit and 512bit */
1094 in SFmode, DFmode and XFmode */ 1261 {10, 10, 10, 40, 60}, /* cost of storing SSE register
1095 {10, 10, 18}, /* cost of storing fp registers 1262 in 32bit, 64bit, 128bit, 256bit and 512bit */
1096 in SFmode, DFmode and XFmode */
1097 4, /* cost of moving MMX register */
1098 {12, 12}, /* cost of loading MMX registers
1099 in SImode and DImode */
1100 {10, 10}, /* cost of storing MMX registers
1101 in SImode and DImode */
1102 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1103 {12, 12, 10, 40, 60}, /* cost of loading SSE registers
1104 in 32,64,128,256 and 512-bit */
1105 {12, 12, 10, 40, 60}, /* cost of unaligned loads. */ 1263 {12, 12, 10, 40, 60}, /* cost of unaligned loads. */
1106 {10, 10, 10, 40, 60}, /* cost of storing SSE registers
1107 in 32,64,128,256 and 512-bit */
1108 {10, 10, 10, 40, 60}, /* cost of unaligned stores. */ 1264 {10, 10, 10, 40, 60}, /* cost of unaligned stores. */
1109 16, 20, /* SSE->integer and integer->SSE moves */ 1265 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1266 16, /* cost of moving SSE register to integer. */
1110 12, 12, /* Gather load static, per_elt. */ 1267 12, 12, /* Gather load static, per_elt. */
1111 10, 10, /* Gather store static, per_elt. */ 1268 10, 10, /* Gather store static, per_elt. */
1112 16, /* size of l1 cache. */ 1269 16, /* size of l1 cache. */
1113 2048, /* size of l2 cache. */ 1270 2048, /* size of l2 cache. */
1114 64, /* size of prefetch block */ 1271 64, /* size of prefetch block */
1162 {libcall, {{8, loop, false}, {24, unrolled_loop, false}, 1319 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1163 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 1320 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1164 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, 1321 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1165 {-1, libcall, false}}}}; 1322 {-1, libcall, false}}}};
1166 struct processor_costs znver1_cost = { 1323 struct processor_costs znver1_cost = {
1324 {
1325 /* Start of register allocator costs. integer->integer move cost is 2. */
1326
1327 /* reg-reg moves are done by renaming and thus they are even cheaper than
1328 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
1329 to doubles of latencies, we do not model this correctly. It does not
1330 seem to make practical difference to bump prices up even more. */
1331 6, /* cost for loading QImode using
1332 movzbl. */
1333 {6, 6, 6}, /* cost of loading integer registers
1334 in QImode, HImode and SImode.
1335 Relative to reg-reg move (2). */
1336 {8, 8, 8}, /* cost of storing integer
1337 registers. */
1338 2, /* cost of reg,reg fld/fst. */
1339 {6, 6, 16}, /* cost of loading fp registers
1340 in SFmode, DFmode and XFmode. */
1341 {8, 8, 16}, /* cost of storing fp registers
1342 in SFmode, DFmode and XFmode. */
1343 2, /* cost of moving MMX register. */
1344 {6, 6}, /* cost of loading MMX registers
1345 in SImode and DImode. */
1346 {8, 8}, /* cost of storing MMX registers
1347 in SImode and DImode. */
1348 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1349 {6, 6, 6, 12, 24}, /* cost of loading SSE registers
1350 in 32,64,128,256 and 512-bit. */
1351 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
1352 in 32,64,128,256 and 512-bit. */
1353 6, 6, /* SSE->integer and integer->SSE moves. */
1354 /* End of register allocator costs. */
1355 },
1356
1167 COSTS_N_INSNS (1), /* cost of an add instruction. */ 1357 COSTS_N_INSNS (1), /* cost of an add instruction. */
1168 COSTS_N_INSNS (1), /* cost of a lea instruction. */ 1358 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1169 COSTS_N_INSNS (1), /* variable shift costs. */ 1359 COSTS_N_INSNS (1), /* variable shift costs. */
1170 COSTS_N_INSNS (1), /* constant shift costs. */ 1360 COSTS_N_INSNS (1), /* constant shift costs. */
1171 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ 1361 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1184 COSTS_N_INSNS (45)}, /* other. */ 1374 COSTS_N_INSNS (45)}, /* other. */
1185 COSTS_N_INSNS (1), /* cost of movsx. */ 1375 COSTS_N_INSNS (1), /* cost of movsx. */
1186 COSTS_N_INSNS (1), /* cost of movzx. */ 1376 COSTS_N_INSNS (1), /* cost of movzx. */
1187 8, /* "large" insn. */ 1377 8, /* "large" insn. */
1188 9, /* MOVE_RATIO. */ 1378 9, /* MOVE_RATIO. */
1189 1379 6, /* CLEAR_RATIO */
1190 /* All move costs are relative to integer->integer move times 2 and thus
1191 they are latency*2. */
1192
1193 /* reg-reg moves are done by renaming and thus they are even cheaper than
1194 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
1195 to doubles of latencies, we do not model this correctly. It does not
1196 seem to make practical difference to bump prices up even more. */
1197 6, /* cost for loading QImode using
1198 movzbl. */
1199 {6, 6, 6}, /* cost of loading integer registers 1380 {6, 6, 6}, /* cost of loading integer registers
1200 in QImode, HImode and SImode. 1381 in QImode, HImode and SImode.
1201 Relative to reg-reg move (2). */ 1382 Relative to reg-reg move (2). */
1202 {8, 8, 8}, /* cost of storing integer 1383 {8, 8, 8}, /* cost of storing integer
1203 registers. */ 1384 registers. */
1204 2, /* cost of reg,reg fld/fst. */ 1385 {6, 6, 6, 12, 24}, /* cost of loading SSE register
1205 {6, 6, 16}, /* cost of loading fp registers 1386 in 32bit, 64bit, 128bit, 256bit and 512bit */
1206 in SFmode, DFmode and XFmode. */ 1387 {8, 8, 8, 16, 32}, /* cost of storing SSE register
1207 {8, 8, 16}, /* cost of storing fp registers 1388 in 32bit, 64bit, 128bit, 256bit and 512bit */
1208 in SFmode, DFmode and XFmode. */ 1389 {6, 6, 6, 12, 24}, /* cost of unaligned loads. */
1209 2, /* cost of moving MMX register. */ 1390 {8, 8, 8, 16, 32}, /* cost of unaligned stores. */
1210 {6, 6}, /* cost of loading MMX registers
1211 in SImode and DImode. */
1212 {8, 8}, /* cost of storing MMX registers
1213 in SImode and DImode. */
1214 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ 1391 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1215 {6, 6, 6, 12, 24}, /* cost of loading SSE registers 1392 6, /* cost of moving SSE register to integer. */
1216 in 32,64,128,256 and 512-bit. */
1217 {6, 6, 6, 12, 24}, /* cost of unaligned loads. */
1218 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
1219 in 32,64,128,256 and 512-bit. */
1220 {8, 8, 8, 16, 32}, /* cost of unaligned stores. */
1221 6, 6, /* SSE->integer and integer->SSE moves. */
1222 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, 1393 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1223 throughput 12. Approx 9 uops do not depend on vector size and every load 1394 throughput 12. Approx 9 uops do not depend on vector size and every load
1224 is 7 uops. */ 1395 is 7 uops. */
1225 18, 8, /* Gather load static, per_elt. */ 1396 18, 8, /* Gather load static, per_elt. */
1226 18, 10, /* Gather store static, per_elt. */ 1397 18, 10, /* Gather store static, per_elt. */
1271 "16", /* Jump alignment. */ 1442 "16", /* Jump alignment. */
1272 "0:0:8", /* Label alignment. */ 1443 "0:0:8", /* Label alignment. */
1273 "16", /* Func alignment. */ 1444 "16", /* Func alignment. */
1274 }; 1445 };
1275 1446
1447 /* ZNVER2 has optimized REP instruction for medium sized blocks, but for
1448 very small blocks it is better to use loop. For large blocks, libcall
1449 can do nontemporary accesses and beat inline considerably. */
1450 static stringop_algs znver2_memcpy[2] = {
1451 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1452 {-1, rep_prefix_4_byte, false}}},
1453 {libcall, {{16, loop, false}, {64, rep_prefix_4_byte, false},
1454 {-1, libcall, false}}}};
1455 static stringop_algs znver2_memset[2] = {
1456 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1457 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1458 {libcall, {{24, rep_prefix_4_byte, false}, {128, rep_prefix_8_byte, false},
1459 {-1, libcall, false}}}};
1460
1461 struct processor_costs znver2_cost = {
1462 {
1463 /* Start of register allocator costs. integer->integer move cost is 2. */
1464
1465 /* reg-reg moves are done by renaming and thus they are even cheaper than
1466 1 cycle. Because reg-reg move cost is 2 and following tables correspond
1467 to doubles of latencies, we do not model this correctly. It does not
1468 seem to make practical difference to bump prices up even more. */
1469 6, /* cost for loading QImode using
1470 movzbl. */
1471 {6, 6, 6}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {8, 8, 8}, /* cost of storing integer
1475 registers. */
1476 2, /* cost of reg,reg fld/fst. */
1477 {6, 6, 16}, /* cost of loading fp registers
1478 in SFmode, DFmode and XFmode. */
1479 {8, 8, 16}, /* cost of storing fp registers
1480 in SFmode, DFmode and XFmode. */
1481 2, /* cost of moving MMX register. */
1482 {6, 6}, /* cost of loading MMX registers
1483 in SImode and DImode. */
1484 {8, 8}, /* cost of storing MMX registers
1485 in SImode and DImode. */
1486 2, 2, 3, /* cost of moving XMM,YMM,ZMM
1487 register. */
1488 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
1489 in 32,64,128,256 and 512-bit. */
1490 {8, 8, 8, 8, 16}, /* cost of storing SSE registers
1491 in 32,64,128,256 and 512-bit. */
1492 6, 6, /* SSE->integer and integer->SSE
1493 moves. */
1494 /* End of register allocator costs. */
1495 },
1496
1497 COSTS_N_INSNS (1), /* cost of an add instruction. */
1498 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1499 COSTS_N_INSNS (1), /* variable shift costs. */
1500 COSTS_N_INSNS (1), /* constant shift costs. */
1501 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1502 COSTS_N_INSNS (3), /* HI. */
1503 COSTS_N_INSNS (3), /* SI. */
1504 COSTS_N_INSNS (3), /* DI. */
1505 COSTS_N_INSNS (3)}, /* other. */
1506 0, /* cost of multiply per each bit
1507 set. */
1508 /* Depending on parameters, idiv can get faster on ryzen. This is upper
1509 bound. */
1510 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
1511 COSTS_N_INSNS (22), /* HI. */
1512 COSTS_N_INSNS (30), /* SI. */
1513 COSTS_N_INSNS (45), /* DI. */
1514 COSTS_N_INSNS (45)}, /* other. */
1515 COSTS_N_INSNS (1), /* cost of movsx. */
1516 COSTS_N_INSNS (1), /* cost of movzx. */
1517 8, /* "large" insn. */
1518 9, /* MOVE_RATIO. */
1519 6, /* CLEAR_RATIO */
1520 {6, 6, 6}, /* cost of loading integer registers
1521 in QImode, HImode and SImode.
1522 Relative to reg-reg move (2). */
1523 {8, 8, 8}, /* cost of storing integer
1524 registers. */
1525 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
1526 in 32bit, 64bit, 128bit, 256bit and 512bit */
1527 {8, 8, 8, 8, 16}, /* cost of storing SSE register
1528 in 32bit, 64bit, 128bit, 256bit and 512bit */
1529 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
1530 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
1531 2, 2, 3, /* cost of moving XMM,YMM,ZMM
1532 register. */
1533 6, /* cost of moving SSE register to integer. */
1534 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1535 throughput 12. Approx 9 uops do not depend on vector size and every load
1536 is 7 uops. */
1537 18, 8, /* Gather load static, per_elt. */
1538 18, 10, /* Gather store static, per_elt. */
1539 32, /* size of l1 cache. */
1540 512, /* size of l2 cache. */
1541 64, /* size of prefetch block. */
1542 /* New AMD processors never drop prefetches; if they cannot be performed
1543 immediately, they are queued. We set number of simultaneous prefetches
1544 to a large constant to reflect this (it probably is not a good idea not
1545 to limit number of prefetches at all, as their execution also takes some
1546 time). */
1547 100, /* number of parallel prefetches. */
1548 3, /* Branch cost. */
1549 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1550 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1551 /* Latency of fdiv is 8-15. */
1552 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1555 /* Latency of fsqrt is 4-10. */
1556 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1557
1558 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1559 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1560 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
1561 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
1562 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1563 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
1564 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1565 /* 9-13. */
1566 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1567 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1568 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
1569 /* Zen can execute 4 integer operations per cycle. FP operations
1570 take 3 cycles and it can execute 2 integer additions and 2
1571 multiplications thus reassociation may make sense up to with of 6.
1572 SPEC2k6 bencharks suggests
1573 that 4 works better than 6 probably due to register pressure.
1574
1575 Integer vector operations are taken by FP unit and execute 3 vector
1576 plus/minus operations per cycle but only one multiply. This is adjusted
1577 in ix86_reassociation_width. */
1578 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1579 znver2_memcpy,
1580 znver2_memset,
1581 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1582 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
1583 "16", /* Loop alignment. */
1584 "16", /* Jump alignment. */
1585 "0:0:8", /* Label alignment. */
1586 "16", /* Func alignment. */
1587 };
1588
1276 /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ 1589 /* skylake_cost should produce code tuned for Skylake familly of CPUs. */
1277 static stringop_algs skylake_memcpy[2] = { 1590 static stringop_algs skylake_memcpy[2] = {
1278 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, 1591 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1279 {libcall, {{16, loop, false}, {512, unrolled_loop, false}, 1592 {libcall, {{16, loop, false}, {512, unrolled_loop, false},
1280 {-1, libcall, false}}}}; 1593 {-1, libcall, false}}}};
1287 {libcall, {{24, loop, true}, {512, unrolled_loop, false}, 1600 {libcall, {{24, loop, true}, {512, unrolled_loop, false},
1288 {-1, libcall, false}}}}; 1601 {-1, libcall, false}}}};
1289 1602
1290 static const 1603 static const
1291 struct processor_costs skylake_cost = { 1604 struct processor_costs skylake_cost = {
1605 {
1606 /* Start of register allocator costs. integer->integer move cost is 2. */
1607 6, /* cost for loading QImode using movzbl */
1608 {4, 4, 4}, /* cost of loading integer registers
1609 in QImode, HImode and SImode.
1610 Relative to reg-reg move (2). */
1611 {6, 6, 6}, /* cost of storing integer registers */
1612 2, /* cost of reg,reg fld/fst */
1613 {6, 6, 8}, /* cost of loading fp registers
1614 in SFmode, DFmode and XFmode */
1615 {6, 6, 10}, /* cost of storing fp registers
1616 in SFmode, DFmode and XFmode */
1617 2, /* cost of moving MMX register */
1618 {6, 6}, /* cost of loading MMX registers
1619 in SImode and DImode */
1620 {6, 6}, /* cost of storing MMX registers
1621 in SImode and DImode */
1622 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1623 {6, 6, 6, 10, 20}, /* cost of loading SSE registers
1624 in 32,64,128,256 and 512-bit */
1625 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1626 in 32,64,128,256 and 512-bit */
1627 6, 6, /* SSE->integer and integer->SSE moves */
1628 /* End of register allocator costs. */
1629 },
1630
1292 COSTS_N_INSNS (1), /* cost of an add instruction */ 1631 COSTS_N_INSNS (1), /* cost of an add instruction */
1293 COSTS_N_INSNS (1)+1, /* cost of a lea instruction */ 1632 COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
1294 COSTS_N_INSNS (1), /* variable shift costs */ 1633 COSTS_N_INSNS (1), /* variable shift costs */
1295 COSTS_N_INSNS (1), /* constant shift costs */ 1634 COSTS_N_INSNS (1), /* constant shift costs */
1296 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1308 COSTS_N_INSNS (76)}, /* other */ 1647 COSTS_N_INSNS (76)}, /* other */
1309 COSTS_N_INSNS (1), /* cost of movsx */ 1648 COSTS_N_INSNS (1), /* cost of movsx */
1310 COSTS_N_INSNS (0), /* cost of movzx */ 1649 COSTS_N_INSNS (0), /* cost of movzx */
1311 8, /* "large" insn */ 1650 8, /* "large" insn */
1312 17, /* MOVE_RATIO */ 1651 17, /* MOVE_RATIO */
1313 1652 6, /* CLEAR_RATIO */
1314 6, /* cost for loading QImode using movzbl */
1315 {4, 4, 4}, /* cost of loading integer registers 1653 {4, 4, 4}, /* cost of loading integer registers
1316 in QImode, HImode and SImode. 1654 in QImode, HImode and SImode.
1317 Relative to reg-reg move (2). */ 1655 Relative to reg-reg move (2). */
1318 {6, 6, 3}, /* cost of storing integer registers */ 1656 {6, 6, 6}, /* cost of storing integer registers */
1319 2, /* cost of reg,reg fld/fst */ 1657 {6, 6, 6, 10, 20}, /* cost of loading SSE register
1320 {6, 6, 8}, /* cost of loading fp registers 1658 in 32bit, 64bit, 128bit, 256bit and 512bit */
1321 in SFmode, DFmode and XFmode */ 1659 {8, 8, 8, 12, 24}, /* cost of storing SSE register
1322 {6, 6, 10}, /* cost of storing fp registers 1660 in 32bit, 64bit, 128bit, 256bit and 512bit */
1323 in SFmode, DFmode and XFmode */ 1661 {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
1324 2, /* cost of moving MMX register */ 1662 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
1325 {6, 6}, /* cost of loading MMX registers
1326 in SImode and DImode */
1327 {6, 6}, /* cost of storing MMX registers
1328 in SImode and DImode */
1329 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 1663 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1330 {6, 6, 6, 10, 20}, /* cost of loading SSE registers 1664 2, /* cost of moving SSE register to integer. */
1331 in 32,64,128,256 and 512-bit */
1332 {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
1333 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1334 in 32,64,128,256 and 512-bit */
1335 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
1336 2, 2, /* SSE->integer and integer->SSE moves */
1337 20, 8, /* Gather load static, per_elt. */ 1665 20, 8, /* Gather load static, per_elt. */
1338 22, 10, /* Gather store static, per_elt. */ 1666 22, 10, /* Gather store static, per_elt. */
1339 64, /* size of l1 cache. */ 1667 64, /* size of l1 cache. */
1340 512, /* size of l2 cache. */ 1668 512, /* size of l2 cache. */
1341 64, /* size of prefetch block */ 1669 64, /* size of prefetch block */
1380 {libcall, {{8, loop, false}, {24, unrolled_loop, false}, 1708 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1381 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 1709 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1382 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, 1710 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1383 {-1, libcall, false}}}}; 1711 {-1, libcall, false}}}};
1384 const struct processor_costs btver1_cost = { 1712 const struct processor_costs btver1_cost = {
1713 {
1714 /* Start of register allocator costs. integer->integer move cost is 2. */
1715 8, /* cost for loading QImode using movzbl */
1716 {6, 8, 6}, /* cost of loading integer registers
1717 in QImode, HImode and SImode.
1718 Relative to reg-reg move (2). */
1719 {6, 8, 6}, /* cost of storing integer registers */
1720 4, /* cost of reg,reg fld/fst */
1721 {12, 12, 28}, /* cost of loading fp registers
1722 in SFmode, DFmode and XFmode */
1723 {12, 12, 38}, /* cost of storing fp registers
1724 in SFmode, DFmode and XFmode */
1725 4, /* cost of moving MMX register */
1726 {10, 10}, /* cost of loading MMX registers
1727 in SImode and DImode */
1728 {12, 12}, /* cost of storing MMX registers
1729 in SImode and DImode */
1730 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1731 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1732 in 32,64,128,256 and 512-bit */
1733 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1734 in 32,64,128,256 and 512-bit */
1735 14, 14, /* SSE->integer and integer->SSE moves */
1736 /* End of register allocator costs. */
1737 },
1738
1385 COSTS_N_INSNS (1), /* cost of an add instruction */ 1739 COSTS_N_INSNS (1), /* cost of an add instruction */
1386 COSTS_N_INSNS (2), /* cost of a lea instruction */ 1740 COSTS_N_INSNS (2), /* cost of a lea instruction */
1387 COSTS_N_INSNS (1), /* variable shift costs */ 1741 COSTS_N_INSNS (1), /* variable shift costs */
1388 COSTS_N_INSNS (1), /* constant shift costs */ 1742 COSTS_N_INSNS (1), /* constant shift costs */
1389 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1743 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1399 COSTS_N_INSNS (83)}, /* other */ 1753 COSTS_N_INSNS (83)}, /* other */
1400 COSTS_N_INSNS (1), /* cost of movsx */ 1754 COSTS_N_INSNS (1), /* cost of movsx */
1401 COSTS_N_INSNS (1), /* cost of movzx */ 1755 COSTS_N_INSNS (1), /* cost of movzx */
1402 8, /* "large" insn */ 1756 8, /* "large" insn */
1403 9, /* MOVE_RATIO */ 1757 9, /* MOVE_RATIO */
1404 1758 6, /* CLEAR_RATIO */
1405 /* All move costs are relative to integer->integer move times 2 and thus
1406 they are latency*2. */
1407 8, /* cost for loading QImode using movzbl */
1408 {6, 8, 6}, /* cost of loading integer registers 1759 {6, 8, 6}, /* cost of loading integer registers
1409 in QImode, HImode and SImode. 1760 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */ 1761 Relative to reg-reg move (2). */
1411 {6, 8, 6}, /* cost of storing integer registers */ 1762 {6, 8, 6}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */ 1763 {10, 10, 12, 48, 96}, /* cost of loading SSE register
1413 {12, 12, 28}, /* cost of loading fp registers 1764 in 32bit, 64bit, 128bit, 256bit and 512bit */
1414 in SFmode, DFmode and XFmode */ 1765 {10, 10, 12, 48, 96}, /* cost of storing SSE register
1415 {12, 12, 38}, /* cost of storing fp registers 1766 in 32bit, 64bit, 128bit, 256bit and 512bit */
1416 in SFmode, DFmode and XFmode */
1417 4, /* cost of moving MMX register */
1418 {10, 10}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {12, 12}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1423 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1424 in 32,64,128,256 and 512-bit */
1425 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ 1767 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
1426 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1427 in 32,64,128,256 and 512-bit */
1428 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 1768 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
1429 14, 14, /* SSE->integer and integer->SSE moves */ 1769 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1770 14, /* cost of moving SSE register to integer. */
1430 10, 10, /* Gather load static, per_elt. */ 1771 10, 10, /* Gather load static, per_elt. */
1431 10, 10, /* Gather store static, per_elt. */ 1772 10, 10, /* Gather store static, per_elt. */
1432 32, /* size of l1 cache. */ 1773 32, /* size of l1 cache. */
1433 512, /* size of l2 cache. */ 1774 512, /* size of l2 cache. */
1434 64, /* size of prefetch block */ 1775 64, /* size of prefetch block */
1471 {libcall, {{8, loop, false}, {24, unrolled_loop, false}, 1812 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1472 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 1813 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1473 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, 1814 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1474 {-1, libcall, false}}}}; 1815 {-1, libcall, false}}}};
1475 const struct processor_costs btver2_cost = { 1816 const struct processor_costs btver2_cost = {
1817 {
1818 /* Start of register allocator costs. integer->integer move cost is 2. */
1819 8, /* cost for loading QImode using movzbl */
1820 {8, 8, 6}, /* cost of loading integer registers
1821 in QImode, HImode and SImode.
1822 Relative to reg-reg move (2). */
1823 {8, 8, 6}, /* cost of storing integer registers */
1824 4, /* cost of reg,reg fld/fst */
1825 {12, 12, 28}, /* cost of loading fp registers
1826 in SFmode, DFmode and XFmode */
1827 {12, 12, 38}, /* cost of storing fp registers
1828 in SFmode, DFmode and XFmode */
1829 4, /* cost of moving MMX register */
1830 {10, 10}, /* cost of loading MMX registers
1831 in SImode and DImode */
1832 {12, 12}, /* cost of storing MMX registers
1833 in SImode and DImode */
1834 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1835 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1836 in 32,64,128,256 and 512-bit */
1837 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1838 in 32,64,128,256 and 512-bit */
1839 14, 14, /* SSE->integer and integer->SSE moves */
1840 /* End of register allocator costs. */
1841 },
1842
1476 COSTS_N_INSNS (1), /* cost of an add instruction */ 1843 COSTS_N_INSNS (1), /* cost of an add instruction */
1477 COSTS_N_INSNS (2), /* cost of a lea instruction */ 1844 COSTS_N_INSNS (2), /* cost of a lea instruction */
1478 COSTS_N_INSNS (1), /* variable shift costs */ 1845 COSTS_N_INSNS (1), /* variable shift costs */
1479 COSTS_N_INSNS (1), /* constant shift costs */ 1846 COSTS_N_INSNS (1), /* constant shift costs */
1480 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1847 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (83)}, /* other */ 1857 COSTS_N_INSNS (83)}, /* other */
1491 COSTS_N_INSNS (1), /* cost of movsx */ 1858 COSTS_N_INSNS (1), /* cost of movsx */
1492 COSTS_N_INSNS (1), /* cost of movzx */ 1859 COSTS_N_INSNS (1), /* cost of movzx */
1493 8, /* "large" insn */ 1860 8, /* "large" insn */
1494 9, /* MOVE_RATIO */ 1861 9, /* MOVE_RATIO */
1495 1862 6, /* CLEAR_RATIO */
1496 /* All move costs are relative to integer->integer move times 2 and thus
1497 they are latency*2. */
1498 8, /* cost for loading QImode using movzbl */
1499 {8, 8, 6}, /* cost of loading integer registers 1863 {8, 8, 6}, /* cost of loading integer registers
1500 in QImode, HImode and SImode. 1864 in QImode, HImode and SImode.
1501 Relative to reg-reg move (2). */ 1865 Relative to reg-reg move (2). */
1502 {8, 8, 6}, /* cost of storing integer registers */ 1866 {8, 8, 6}, /* cost of storing integer registers */
1503 4, /* cost of reg,reg fld/fst */ 1867 {10, 10, 12, 48, 96}, /* cost of loading SSE register
1504 {12, 12, 28}, /* cost of loading fp registers 1868 in 32bit, 64bit, 128bit, 256bit and 512bit */
1505 in SFmode, DFmode and XFmode */ 1869 {10, 10, 12, 48, 96}, /* cost of storing SSE register
1506 {12, 12, 38}, /* cost of storing fp registers 1870 in 32bit, 64bit, 128bit, 256bit and 512bit */
1507 in SFmode, DFmode and XFmode */
1508 4, /* cost of moving MMX register */
1509 {10, 10}, /* cost of loading MMX registers
1510 in SImode and DImode */
1511 {12, 12}, /* cost of storing MMX registers
1512 in SImode and DImode */
1513 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1514 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1515 in 32,64,128,256 and 512-bit */
1516 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ 1871 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
1517 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1518 in 32,64,128,256 and 512-bit */
1519 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 1872 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
1520 14, 14, /* SSE->integer and integer->SSE moves */ 1873 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1874 14, /* cost of moving SSE register to integer. */
1521 10, 10, /* Gather load static, per_elt. */ 1875 10, 10, /* Gather load static, per_elt. */
1522 10, 10, /* Gather store static, per_elt. */ 1876 10, 10, /* Gather store static, per_elt. */
1523 32, /* size of l1 cache. */ 1877 32, /* size of l1 cache. */
1524 2048, /* size of l2 cache. */ 1878 2048, /* size of l2 cache. */
1525 64, /* size of prefetch block */ 1879 64, /* size of prefetch block */
1561 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 1915 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1562 DUMMY_STRINGOP_ALGS}; 1916 DUMMY_STRINGOP_ALGS};
1563 1917
1564 static const 1918 static const
1565 struct processor_costs pentium4_cost = { 1919 struct processor_costs pentium4_cost = {
1920 {
1921 /* Start of register allocator costs. integer->integer move cost is 2. */
1922 5, /* cost for loading QImode using movzbl */
1923 {4, 5, 4}, /* cost of loading integer registers
1924 in QImode, HImode and SImode.
1925 Relative to reg-reg move (2). */
1926 {2, 3, 2}, /* cost of storing integer registers */
1927 12, /* cost of reg,reg fld/fst */
1928 {14, 14, 14}, /* cost of loading fp registers
1929 in SFmode, DFmode and XFmode */
1930 {14, 14, 14}, /* cost of storing fp registers
1931 in SFmode, DFmode and XFmode */
1932 12, /* cost of moving MMX register */
1933 {16, 16}, /* cost of loading MMX registers
1934 in SImode and DImode */
1935 {16, 16}, /* cost of storing MMX registers
1936 in SImode and DImode */
1937 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
1938 {16, 16, 16, 32, 64}, /* cost of loading SSE registers
1939 in 32,64,128,256 and 512-bit */
1940 {16, 16, 16, 32, 64}, /* cost of storing SSE registers
1941 in 32,64,128,256 and 512-bit */
1942 20, 12, /* SSE->integer and integer->SSE moves */
1943 /* End of register allocator costs. */
1944 },
1945
1566 COSTS_N_INSNS (1), /* cost of an add instruction */ 1946 COSTS_N_INSNS (1), /* cost of an add instruction */
1567 COSTS_N_INSNS (3), /* cost of a lea instruction */ 1947 COSTS_N_INSNS (3), /* cost of a lea instruction */
1568 COSTS_N_INSNS (4), /* variable shift costs */ 1948 COSTS_N_INSNS (4), /* variable shift costs */
1569 COSTS_N_INSNS (4), /* constant shift costs */ 1949 COSTS_N_INSNS (4), /* constant shift costs */
1570 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 1950 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1580 COSTS_N_INSNS (56)}, /* other */ 1960 COSTS_N_INSNS (56)}, /* other */
1581 COSTS_N_INSNS (1), /* cost of movsx */ 1961 COSTS_N_INSNS (1), /* cost of movsx */
1582 COSTS_N_INSNS (1), /* cost of movzx */ 1962 COSTS_N_INSNS (1), /* cost of movzx */
1583 16, /* "large" insn */ 1963 16, /* "large" insn */
1584 6, /* MOVE_RATIO */ 1964 6, /* MOVE_RATIO */
1585 1965 6, /* CLEAR_RATIO */
1586 /* All move costs are relative to integer->integer move times 2 and thus
1587 they are latency*2. */
1588 5, /* cost for loading QImode using movzbl */
1589 {4, 5, 4}, /* cost of loading integer registers 1966 {4, 5, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode. 1967 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */ 1968 Relative to reg-reg move (2). */
1592 {2, 3, 2}, /* cost of storing integer registers */ 1969 {2, 3, 2}, /* cost of storing integer registers */
1593 12, /* cost of reg,reg fld/fst */ 1970 {16, 16, 16, 32, 64}, /* cost of loading SSE register
1594 {14, 14, 14}, /* cost of loading fp registers 1971 in 32bit, 64bit, 128bit, 256bit and 512bit */
1595 in SFmode, DFmode and XFmode */ 1972 {16, 16, 16, 32, 64}, /* cost of storing SSE register
1596 {14, 14, 14}, /* cost of storing fp registers 1973 in 32bit, 64bit, 128bit, 256bit and 512bit */
1597 in SFmode, DFmode and XFmode */ 1974 {32, 32, 32, 64, 128}, /* cost of unaligned loads. */
1598 12, /* cost of moving MMX register */ 1975 {32, 32, 32, 64, 128}, /* cost of unaligned stores. */
1599 {16, 16}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {16, 16}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */ 1976 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
1604 {16, 16, 16, 32, 64}, /* cost of loading SSE registers 1977 20, /* cost of moving SSE register to integer. */
1605 in 32,64,128,256 and 512-bit */
1606 {32, 32, 32, 64, 128}, /* cost of unaligned loads. */
1607 {16, 16, 16, 32, 64}, /* cost of storing SSE registers
1608 in 32,64,128,256 and 512-bit */
1609 {32, 32, 32, 64, 128}, /* cost of unaligned stores. */
1610 20, 12, /* SSE->integer and integer->SSE moves */
1611 16, 16, /* Gather load static, per_elt. */ 1978 16, 16, /* Gather load static, per_elt. */
1612 16, 16, /* Gather store static, per_elt. */ 1979 16, 16, /* Gather store static, per_elt. */
1613 8, /* size of l1 cache. */ 1980 8, /* size of l1 cache. */
1614 256, /* size of l2 cache. */ 1981 256, /* size of l2 cache. */
1615 64, /* size of prefetch block */ 1982 64, /* size of prefetch block */
1654 {libcall, {{24, loop, false}, {64, unrolled_loop, false}, 2021 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1655 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; 2022 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1656 2023
1657 static const 2024 static const
1658 struct processor_costs nocona_cost = { 2025 struct processor_costs nocona_cost = {
2026 {
2027 /* Start of register allocator costs. integer->integer move cost is 2. */
2028 4, /* cost for loading QImode using movzbl */
2029 {4, 4, 4}, /* cost of loading integer registers
2030 in QImode, HImode and SImode.
2031 Relative to reg-reg move (2). */
2032 {4, 4, 4}, /* cost of storing integer registers */
2033 12, /* cost of reg,reg fld/fst */
2034 {14, 14, 14}, /* cost of loading fp registers
2035 in SFmode, DFmode and XFmode */
2036 {14, 14, 14}, /* cost of storing fp registers
2037 in SFmode, DFmode and XFmode */
2038 14, /* cost of moving MMX register */
2039 {12, 12}, /* cost of loading MMX registers
2040 in SImode and DImode */
2041 {12, 12}, /* cost of storing MMX registers
2042 in SImode and DImode */
2043 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
2044 {12, 12, 12, 24, 48}, /* cost of loading SSE registers
2045 in 32,64,128,256 and 512-bit */
2046 {12, 12, 12, 24, 48}, /* cost of storing SSE registers
2047 in 32,64,128,256 and 512-bit */
2048 20, 12, /* SSE->integer and integer->SSE moves */
2049 /* End of register allocator costs. */
2050 },
2051
1659 COSTS_N_INSNS (1), /* cost of an add instruction */ 2052 COSTS_N_INSNS (1), /* cost of an add instruction */
1660 COSTS_N_INSNS (1), /* cost of a lea instruction */ 2053 COSTS_N_INSNS (1), /* cost of a lea instruction */
1661 COSTS_N_INSNS (1), /* variable shift costs */ 2054 COSTS_N_INSNS (1), /* variable shift costs */
1662 COSTS_N_INSNS (1), /* constant shift costs */ 2055 COSTS_N_INSNS (1), /* constant shift costs */
1663 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 2056 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1673 COSTS_N_INSNS (66)}, /* other */ 2066 COSTS_N_INSNS (66)}, /* other */
1674 COSTS_N_INSNS (1), /* cost of movsx */ 2067 COSTS_N_INSNS (1), /* cost of movsx */
1675 COSTS_N_INSNS (1), /* cost of movzx */ 2068 COSTS_N_INSNS (1), /* cost of movzx */
1676 16, /* "large" insn */ 2069 16, /* "large" insn */
1677 17, /* MOVE_RATIO */ 2070 17, /* MOVE_RATIO */
1678 2071 6, /* CLEAR_RATIO */
1679 /* All move costs are relative to integer->integer move times 2 and thus
1680 they are latency*2. */
1681 4, /* cost for loading QImode using movzbl */
1682 {4, 4, 4}, /* cost of loading integer registers 2072 {4, 4, 4}, /* cost of loading integer registers
1683 in QImode, HImode and SImode. 2073 in QImode, HImode and SImode.
1684 Relative to reg-reg move (2). */ 2074 Relative to reg-reg move (2). */
1685 {4, 4, 4}, /* cost of storing integer registers */ 2075 {4, 4, 4}, /* cost of storing integer registers */
1686 12, /* cost of reg,reg fld/fst */ 2076 {12, 12, 12, 24, 48}, /* cost of loading SSE register
1687 {14, 14, 14}, /* cost of loading fp registers 2077 in 32bit, 64bit, 128bit, 256bit and 512bit */
1688 in SFmode, DFmode and XFmode */ 2078 {12, 12, 12, 24, 48}, /* cost of storing SSE register
1689 {14, 14, 14}, /* cost of storing fp registers 2079 in 32bit, 64bit, 128bit, 256bit and 512bit */
1690 in SFmode, DFmode and XFmode */ 2080 {24, 24, 24, 48, 96}, /* cost of unaligned loads. */
1691 14, /* cost of moving MMX register */ 2081 {24, 24, 24, 48, 96}, /* cost of unaligned stores. */
1692 {12, 12}, /* cost of loading MMX registers
1693 in SImode and DImode */
1694 {12, 12}, /* cost of storing MMX registers
1695 in SImode and DImode */
1696 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */ 2082 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
1697 {12, 12, 12, 24, 48}, /* cost of loading SSE registers 2083 20, /* cost of moving SSE register to integer. */
1698 in 32,64,128,256 and 512-bit */
1699 {24, 24, 24, 48, 96}, /* cost of unaligned loads. */
1700 {12, 12, 12, 24, 48}, /* cost of storing SSE registers
1701 in 32,64,128,256 and 512-bit */
1702 {24, 24, 24, 48, 96}, /* cost of unaligned stores. */
1703 20, 12, /* SSE->integer and integer->SSE moves */
1704 12, 12, /* Gather load static, per_elt. */ 2084 12, 12, /* Gather load static, per_elt. */
1705 12, 12, /* Gather store static, per_elt. */ 2085 12, 12, /* Gather store static, per_elt. */
1706 8, /* size of l1 cache. */ 2086 8, /* size of l1 cache. */
1707 1024, /* size of l2 cache. */ 2087 1024, /* size of l2 cache. */
1708 64, /* size of prefetch block */ 2088 64, /* size of prefetch block */
1745 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 2125 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1746 {libcall, {{24, loop, false}, {32, unrolled_loop, false}, 2126 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1747 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; 2127 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1748 static const 2128 static const
1749 struct processor_costs atom_cost = { 2129 struct processor_costs atom_cost = {
2130 {
2131 /* Start of register allocator costs. integer->integer move cost is 2. */
2132 6, /* cost for loading QImode using movzbl */
2133 {6, 6, 6}, /* cost of loading integer registers
2134 in QImode, HImode and SImode.
2135 Relative to reg-reg move (2). */
2136 {6, 6, 6}, /* cost of storing integer registers */
2137 4, /* cost of reg,reg fld/fst */
2138 {6, 6, 18}, /* cost of loading fp registers
2139 in SFmode, DFmode and XFmode */
2140 {14, 14, 24}, /* cost of storing fp registers
2141 in SFmode, DFmode and XFmode */
2142 2, /* cost of moving MMX register */
2143 {8, 8}, /* cost of loading MMX registers
2144 in SImode and DImode */
2145 {10, 10}, /* cost of storing MMX registers
2146 in SImode and DImode */
2147 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2148 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2149 in 32,64,128,256 and 512-bit */
2150 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2151 in 32,64,128,256 and 512-bit */
2152 8, 6, /* SSE->integer and integer->SSE moves */
2153 /* End of register allocator costs. */
2154 },
2155
1750 COSTS_N_INSNS (1), /* cost of an add instruction */ 2156 COSTS_N_INSNS (1), /* cost of an add instruction */
1751 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 2157 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1752 COSTS_N_INSNS (1), /* variable shift costs */ 2158 COSTS_N_INSNS (1), /* variable shift costs */
1753 COSTS_N_INSNS (1), /* constant shift costs */ 2159 COSTS_N_INSNS (1), /* constant shift costs */
1754 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 2160 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1764 COSTS_N_INSNS (74)}, /* other */ 2170 COSTS_N_INSNS (74)}, /* other */
1765 COSTS_N_INSNS (1), /* cost of movsx */ 2171 COSTS_N_INSNS (1), /* cost of movsx */
1766 COSTS_N_INSNS (1), /* cost of movzx */ 2172 COSTS_N_INSNS (1), /* cost of movzx */
1767 8, /* "large" insn */ 2173 8, /* "large" insn */
1768 17, /* MOVE_RATIO */ 2174 17, /* MOVE_RATIO */
1769 2175 6, /* CLEAR_RATIO */
1770 /* All move costs are relative to integer->integer move times 2 and thus
1771 they are latency*2. */
1772 6, /* cost for loading QImode using movzbl */
1773 {6, 6, 6}, /* cost of loading integer registers 2176 {6, 6, 6}, /* cost of loading integer registers
1774 in QImode, HImode and SImode. 2177 in QImode, HImode and SImode.
1775 Relative to reg-reg move (2). */ 2178 Relative to reg-reg move (2). */
1776 {6, 6, 6}, /* cost of storing integer registers */ 2179 {6, 6, 6}, /* cost of storing integer registers */
1777 4, /* cost of reg,reg fld/fst */ 2180 {8, 8, 8, 16, 32}, /* cost of loading SSE register
1778 {6, 6, 18}, /* cost of loading fp registers 2181 in 32bit, 64bit, 128bit, 256bit and 512bit */
1779 in SFmode, DFmode and XFmode */ 2182 {8, 8, 8, 16, 32}, /* cost of storing SSE register
1780 {14, 14, 24}, /* cost of storing fp registers 2183 in 32bit, 64bit, 128bit, 256bit and 512bit */
1781 in SFmode, DFmode and XFmode */
1782 2, /* cost of moving MMX register */
1783 {8, 8}, /* cost of loading MMX registers
1784 in SImode and DImode */
1785 {10, 10}, /* cost of storing MMX registers
1786 in SImode and DImode */
1787 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1788 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
1789 in 32,64,128,256 and 512-bit */
1790 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ 2184 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
1791 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
1792 in 32,64,128,256 and 512-bit */
1793 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2185 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
1794 8, 6, /* SSE->integer and integer->SSE moves */ 2186 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2187 8, /* cost of moving SSE register to integer. */
1795 8, 8, /* Gather load static, per_elt. */ 2188 8, 8, /* Gather load static, per_elt. */
1796 8, 8, /* Gather store static, per_elt. */ 2189 8, 8, /* Gather store static, per_elt. */
1797 32, /* size of l1 cache. */ 2190 32, /* size of l1 cache. */
1798 256, /* size of l2 cache. */ 2191 256, /* size of l2 cache. */
1799 64, /* size of prefetch block */ 2192 64, /* size of prefetch block */
1836 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 2229 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1837 {libcall, {{24, loop, false}, {32, unrolled_loop, false}, 2230 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1838 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; 2231 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1839 static const 2232 static const
1840 struct processor_costs slm_cost = { 2233 struct processor_costs slm_cost = {
2234 {
2235 /* Start of register allocator costs. integer->integer move cost is 2. */
2236 8, /* cost for loading QImode using movzbl */
2237 {8, 8, 8}, /* cost of loading integer registers
2238 in QImode, HImode and SImode.
2239 Relative to reg-reg move (2). */
2240 {6, 6, 6}, /* cost of storing integer registers */
2241 2, /* cost of reg,reg fld/fst */
2242 {8, 8, 18}, /* cost of loading fp registers
2243 in SFmode, DFmode and XFmode */
2244 {6, 6, 18}, /* cost of storing fp registers
2245 in SFmode, DFmode and XFmode */
2246 2, /* cost of moving MMX register */
2247 {8, 8}, /* cost of loading MMX registers
2248 in SImode and DImode */
2249 {6, 6}, /* cost of storing MMX registers
2250 in SImode and DImode */
2251 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2252 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2253 in 32,64,128,256 and 512-bit */
2254 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2255 in 32,64,128,256 and 512-bit */
2256 8, 6, /* SSE->integer and integer->SSE moves */
2257 /* End of register allocator costs. */
2258 },
2259
1841 COSTS_N_INSNS (1), /* cost of an add instruction */ 2260 COSTS_N_INSNS (1), /* cost of an add instruction */
1842 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 2261 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1843 COSTS_N_INSNS (1), /* variable shift costs */ 2262 COSTS_N_INSNS (1), /* variable shift costs */
1844 COSTS_N_INSNS (1), /* constant shift costs */ 2263 COSTS_N_INSNS (1), /* constant shift costs */
1845 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 2264 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1855 COSTS_N_INSNS (74)}, /* other */ 2274 COSTS_N_INSNS (74)}, /* other */
1856 COSTS_N_INSNS (1), /* cost of movsx */ 2275 COSTS_N_INSNS (1), /* cost of movsx */
1857 COSTS_N_INSNS (1), /* cost of movzx */ 2276 COSTS_N_INSNS (1), /* cost of movzx */
1858 8, /* "large" insn */ 2277 8, /* "large" insn */
1859 17, /* MOVE_RATIO */ 2278 17, /* MOVE_RATIO */
1860 2279 6, /* CLEAR_RATIO */
1861 /* All move costs are relative to integer->integer move times 2 and thus
1862 they are latency*2. */
1863 8, /* cost for loading QImode using movzbl */
1864 {8, 8, 8}, /* cost of loading integer registers 2280 {8, 8, 8}, /* cost of loading integer registers
1865 in QImode, HImode and SImode. 2281 in QImode, HImode and SImode.
1866 Relative to reg-reg move (2). */ 2282 Relative to reg-reg move (2). */
1867 {6, 6, 6}, /* cost of storing integer registers */ 2283 {6, 6, 6}, /* cost of storing integer registers */
1868 2, /* cost of reg,reg fld/fst */ 2284 {8, 8, 8, 16, 32}, /* cost of loading SSE register
1869 {8, 8, 18}, /* cost of loading fp registers 2285 in 32bit, 64bit, 128bit, 256bit and 512bit */
1870 in SFmode, DFmode and XFmode */ 2286 {8, 8, 8, 16, 32}, /* cost of storing SSE register
1871 {6, 6, 18}, /* cost of storing fp registers 2287 in SImode, DImode and TImode. */
1872 in SFmode, DFmode and XFmode */
1873 2, /* cost of moving MMX register */
1874 {8, 8}, /* cost of loading MMX registers
1875 in SImode and DImode */
1876 {6, 6}, /* cost of storing MMX registers
1877 in SImode and DImode */
1878 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1879 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
1880 in 32,64,128,256 and 512-bit */
1881 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ 2288 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
1882 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
1883 in 32,64,128,256 and 512-bit */
1884 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2289 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
1885 8, 6, /* SSE->integer and integer->SSE moves */ 2290 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2291 8, /* cost of moving SSE register to integer. */
1886 8, 8, /* Gather load static, per_elt. */ 2292 8, 8, /* Gather load static, per_elt. */
1887 8, 8, /* Gather store static, per_elt. */ 2293 8, 8, /* Gather store static, per_elt. */
1888 32, /* size of l1 cache. */ 2294 32, /* size of l1 cache. */
1889 256, /* size of l2 cache. */ 2295 256, /* size of l2 cache. */
1890 64, /* size of prefetch block */ 2296 64, /* size of prefetch block */
1927 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, 2333 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1928 {libcall, {{24, loop, false}, {32, unrolled_loop, false}, 2334 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1929 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; 2335 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1930 static const 2336 static const
1931 struct processor_costs intel_cost = { 2337 struct processor_costs intel_cost = {
2338 {
2339 /* Start of register allocator costs. integer->integer move cost is 2. */
2340 6, /* cost for loading QImode using movzbl */
2341 {4, 4, 4}, /* cost of loading integer registers
2342 in QImode, HImode and SImode.
2343 Relative to reg-reg move (2). */
2344 {6, 6, 6}, /* cost of storing integer registers */
2345 2, /* cost of reg,reg fld/fst */
2346 {6, 6, 8}, /* cost of loading fp registers
2347 in SFmode, DFmode and XFmode */
2348 {6, 6, 10}, /* cost of storing fp registers
2349 in SFmode, DFmode and XFmode */
2350 2, /* cost of moving MMX register */
2351 {6, 6}, /* cost of loading MMX registers
2352 in SImode and DImode */
2353 {6, 6}, /* cost of storing MMX registers
2354 in SImode and DImode */
2355 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
2356 {6, 6, 6, 6, 6}, /* cost of loading SSE registers
2357 in 32,64,128,256 and 512-bit */
2358 {6, 6, 6, 6, 6}, /* cost of storing SSE registers
2359 in 32,64,128,256 and 512-bit */
2360 4, 4, /* SSE->integer and integer->SSE moves */
2361 /* End of register allocator costs. */
2362 },
2363
1932 COSTS_N_INSNS (1), /* cost of an add instruction */ 2364 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 2365 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1934 COSTS_N_INSNS (1), /* variable shift costs */ 2366 COSTS_N_INSNS (1), /* variable shift costs */
1935 COSTS_N_INSNS (1), /* constant shift costs */ 2367 COSTS_N_INSNS (1), /* constant shift costs */
1936 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 2368 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1946 COSTS_N_INSNS (74)}, /* other */ 2378 COSTS_N_INSNS (74)}, /* other */
1947 COSTS_N_INSNS (1), /* cost of movsx */ 2379 COSTS_N_INSNS (1), /* cost of movsx */
1948 COSTS_N_INSNS (1), /* cost of movzx */ 2380 COSTS_N_INSNS (1), /* cost of movzx */
1949 8, /* "large" insn */ 2381 8, /* "large" insn */
1950 17, /* MOVE_RATIO */ 2382 17, /* MOVE_RATIO */
1951 2383 6, /* CLEAR_RATIO */
1952 /* All move costs are relative to integer->integer move times 2 and thus
1953 they are latency*2. */
1954 6, /* cost for loading QImode using movzbl */
1955 {4, 4, 4}, /* cost of loading integer registers 2384 {4, 4, 4}, /* cost of loading integer registers
1956 in QImode, HImode and SImode. 2385 in QImode, HImode and SImode.
1957 Relative to reg-reg move (2). */ 2386 Relative to reg-reg move (2). */
1958 {6, 6, 6}, /* cost of storing integer registers */ 2387 {6, 6, 6}, /* cost of storing integer registers */
1959 2, /* cost of reg,reg fld/fst */ 2388 {6, 6, 6, 6, 6}, /* cost of loading SSE register
1960 {6, 6, 8}, /* cost of loading fp registers 2389 in 32bit, 64bit, 128bit, 256bit and 512bit */
1961 in SFmode, DFmode and XFmode */ 2390 {6, 6, 6, 6, 6}, /* cost of storing SSE register
1962 {6, 6, 10}, /* cost of storing fp registers 2391 in 32bit, 64bit, 128bit, 256bit and 512bit */
1963 in SFmode, DFmode and XFmode */ 2392 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
1964 2, /* cost of moving MMX register */ 2393 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
1965 {6, 6}, /* cost of loading MMX registers
1966 in SImode and DImode */
1967 {6, 6}, /* cost of storing MMX registers
1968 in SImode and DImode */
1969 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ 2394 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
1970 {6, 6, 6, 6, 6}, /* cost of loading SSE registers 2395 4, /* cost of moving SSE register to integer. */
1971 in 32,64,128,256 and 512-bit */
1972 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
1973 {6, 6, 6, 6, 6}, /* cost of storing SSE registers
1974 in 32,64,128,256 and 512-bit */
1975 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
1976 4, 4, /* SSE->integer and integer->SSE moves */
1977 6, 6, /* Gather load static, per_elt. */ 2396 6, 6, /* Gather load static, per_elt. */
1978 6, 6, /* Gather store static, per_elt. */ 2397 6, 6, /* Gather store static, per_elt. */
1979 32, /* size of l1 cache. */ 2398 32, /* size of l1 cache. */
1980 256, /* size of l2 cache. */ 2399 256, /* size of l2 cache. */
1981 64, /* size of prefetch block */ 2400 64, /* size of prefetch block */
1986 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 2405 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1987 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 2406 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1988 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 2407 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1989 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 2408 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1990 2409
1991 COSTS_N_INSNS (8), /* cost of cheap SSE instruction. */ 2410 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1992 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ 2411 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
1993 COSTS_N_INSNS (8), /* cost of MULSS instruction. */ 2412 COSTS_N_INSNS (8), /* cost of MULSS instruction. */
1994 COSTS_N_INSNS (8), /* cost of MULSD instruction. */ 2413 COSTS_N_INSNS (8), /* cost of MULSD instruction. */
1995 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ 2414 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
1996 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ 2415 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
2022 {-1, libcall, false}}}, 2441 {-1, libcall, false}}},
2023 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, 2442 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
2024 {-1, libcall, false}}}}; 2443 {-1, libcall, false}}}};
2025 static const 2444 static const
2026 struct processor_costs generic_cost = { 2445 struct processor_costs generic_cost = {
2446 {
2447 /* Start of register allocator costs. integer->integer move cost is 2. */
2448 6, /* cost for loading QImode using movzbl */
2449 {6, 6, 6}, /* cost of loading integer registers
2450 in QImode, HImode and SImode.
2451 Relative to reg-reg move (2). */
2452 {6, 6, 6}, /* cost of storing integer registers */
2453 4, /* cost of reg,reg fld/fst */
2454 {6, 6, 12}, /* cost of loading fp registers
2455 in SFmode, DFmode and XFmode */
2456 {6, 6, 12}, /* cost of storing fp registers
2457 in SFmode, DFmode and XFmode */
2458 2, /* cost of moving MMX register */
2459 {6, 6}, /* cost of loading MMX registers
2460 in SImode and DImode */
2461 {6, 6}, /* cost of storing MMX registers
2462 in SImode and DImode */
2463 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2464 {6, 6, 6, 10, 15}, /* cost of loading SSE registers
2465 in 32,64,128,256 and 512-bit */
2466 {6, 6, 6, 10, 15}, /* cost of storing SSE registers
2467 in 32,64,128,256 and 512-bit */
2468 6, 6, /* SSE->integer and integer->SSE moves */
2469 /* End of register allocator costs. */
2470 },
2471
2027 COSTS_N_INSNS (1), /* cost of an add instruction */ 2472 COSTS_N_INSNS (1), /* cost of an add instruction */
2028 /* Setting cost to 2 makes our current implementation of synth_mult result in 2473 /* Setting cost to 2 makes our current implementation of synth_mult result in
2029 use of unnecessary temporary registers causing regression on several 2474 use of unnecessary temporary registers causing regression on several
2030 SPECfp benchmarks. */ 2475 SPECfp benchmarks. */
2031 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 2476 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2044 COSTS_N_INSNS (74)}, /* other */ 2489 COSTS_N_INSNS (74)}, /* other */
2045 COSTS_N_INSNS (1), /* cost of movsx */ 2490 COSTS_N_INSNS (1), /* cost of movsx */
2046 COSTS_N_INSNS (1), /* cost of movzx */ 2491 COSTS_N_INSNS (1), /* cost of movzx */
2047 8, /* "large" insn */ 2492 8, /* "large" insn */
2048 17, /* MOVE_RATIO */ 2493 17, /* MOVE_RATIO */
2049 2494 6, /* CLEAR_RATIO */
2050 /* All move costs are relative to integer->integer move times 2 and thus
2051 they are latency*2. */
2052 6, /* cost for loading QImode using movzbl */
2053 {6, 6, 6}, /* cost of loading integer registers 2495 {6, 6, 6}, /* cost of loading integer registers
2054 in QImode, HImode and SImode. 2496 in QImode, HImode and SImode.
2055 Relative to reg-reg move (2). */ 2497 Relative to reg-reg move (2). */
2056 {6, 6, 6}, /* cost of storing integer registers */ 2498 {6, 6, 6}, /* cost of storing integer registers */
2057 4, /* cost of reg,reg fld/fst */ 2499 {6, 6, 6, 10, 15}, /* cost of loading SSE register
2058 {6, 6, 12}, /* cost of loading fp registers 2500 in 32bit, 64bit, 128bit, 256bit and 512bit */
2059 in SFmode, DFmode and XFmode */ 2501 {6, 6, 6, 10, 15}, /* cost of storing SSE register
2060 {6, 6, 12}, /* cost of storing fp registers 2502 in 32bit, 64bit, 128bit, 256bit and 512bit */
2061 in SFmode, DFmode and XFmode */ 2503 {6, 6, 6, 10, 15}, /* cost of unaligned loads. */
2062 2, /* cost of moving MMX register */ 2504 {6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2063 {6, 6}, /* cost of loading MMX registers
2064 in SImode and DImode */
2065 {6, 6}, /* cost of storing MMX registers
2066 in SImode and DImode */
2067 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ 2505 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2068 {6, 6, 6, 10, 15}, /* cost of loading SSE registers 2506 6, /* cost of moving SSE register to integer. */
2069 in 32,64,128,256 and 512-bit */
2070 {6, 6, 6, 10, 15}, /* cost of unaligned loads. */
2071 {6, 6, 6, 10, 15}, /* cost of storing SSE registers
2072 in 32,64,128,256 and 512-bit */
2073 {6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2074 6, 6, /* SSE->integer and integer->SSE moves */
2075 18, 6, /* Gather load static, per_elt. */ 2507 18, 6, /* Gather load static, per_elt. */
2076 18, 6, /* Gather store static, per_elt. */ 2508 18, 6, /* Gather store static, per_elt. */
2077 32, /* size of l1 cache. */ 2509 32, /* size of l1 cache. */
2078 512, /* size of l2 cache. */ 2510 512, /* size of l2 cache. */
2079 64, /* size of prefetch block */ 2511 64, /* size of prefetch block */
2122 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true}, 2554 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2123 {-1, libcall, false}}}}; 2555 {-1, libcall, false}}}};
2124 2556
2125 static const 2557 static const
2126 struct processor_costs core_cost = { 2558 struct processor_costs core_cost = {
2559 {
2560 /* Start of register allocator costs. integer->integer move cost is 2. */
2561 6, /* cost for loading QImode using movzbl */
2562 {4, 4, 4}, /* cost of loading integer registers
2563 in QImode, HImode and SImode.
2564 Relative to reg-reg move (2). */
2565 {6, 6, 6}, /* cost of storing integer registers */
2566 2, /* cost of reg,reg fld/fst */
2567 {6, 6, 8}, /* cost of loading fp registers
2568 in SFmode, DFmode and XFmode */
2569 {6, 6, 10}, /* cost of storing fp registers
2570 in SFmode, DFmode and XFmode */
2571 2, /* cost of moving MMX register */
2572 {6, 6}, /* cost of loading MMX registers
2573 in SImode and DImode */
2574 {6, 6}, /* cost of storing MMX registers
2575 in SImode and DImode */
2576 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2577 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
2578 in 32,64,128,256 and 512-bit */
2579 {6, 6, 6, 6, 12}, /* cost of storing SSE registers
2580 in 32,64,128,256 and 512-bit */
2581 6, 6, /* SSE->integer and integer->SSE moves */
2582 /* End of register allocator costs. */
2583 },
2584
2127 COSTS_N_INSNS (1), /* cost of an add instruction */ 2585 COSTS_N_INSNS (1), /* cost of an add instruction */
2128 /* On all chips taken into consideration lea is 2 cycles and more. With 2586 /* On all chips taken into consideration lea is 2 cycles and more. With
2129 this cost however our current implementation of synth_mult results in 2587 this cost however our current implementation of synth_mult results in
2130 use of unnecessary temporary registers causing regression on several 2588 use of unnecessary temporary registers causing regression on several
2131 SPECfp benchmarks. */ 2589 SPECfp benchmarks. */
2148 COSTS_N_INSNS (81)}, /* other */ 2606 COSTS_N_INSNS (81)}, /* other */
2149 COSTS_N_INSNS (1), /* cost of movsx */ 2607 COSTS_N_INSNS (1), /* cost of movsx */
2150 COSTS_N_INSNS (1), /* cost of movzx */ 2608 COSTS_N_INSNS (1), /* cost of movzx */
2151 8, /* "large" insn */ 2609 8, /* "large" insn */
2152 17, /* MOVE_RATIO */ 2610 17, /* MOVE_RATIO */
2153 2611 6, /* CLEAR_RATIO */
2154 /* All move costs are relative to integer->integer move times 2 and thus
2155 they are latency*2. */
2156 6, /* cost for loading QImode using movzbl */
2157 {4, 4, 4}, /* cost of loading integer registers 2612 {4, 4, 4}, /* cost of loading integer registers
2158 in QImode, HImode and SImode. 2613 in QImode, HImode and SImode.
2159 Relative to reg-reg move (2). */ 2614 Relative to reg-reg move (2). */
2160 {6, 6, 6}, /* cost of storing integer registers */ 2615 {6, 6, 6}, /* cost of storing integer registers */
2161 2, /* cost of reg,reg fld/fst */ 2616 {6, 6, 6, 6, 12}, /* cost of loading SSE register
2162 {6, 6, 8}, /* cost of loading fp registers 2617 in 32bit, 64bit, 128bit, 256bit and 512bit */
2163 in SFmode, DFmode and XFmode */ 2618 {6, 6, 6, 6, 12}, /* cost of storing SSE register
2164 {6, 6, 10}, /* cost of storing fp registers 2619 in 32bit, 64bit, 128bit, 256bit and 512bit */
2165 in SFmode, DFmode and XFmode */ 2620 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
2166 2, /* cost of moving MMX register */ 2621 {6, 6, 6, 6, 12}, /* cost of unaligned stores. */
2167 {6, 6}, /* cost of loading MMX registers
2168 in SImode and DImode */
2169 {6, 6}, /* cost of storing MMX registers
2170 in SImode and DImode */
2171 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 2622 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2172 {6, 6, 6, 6, 12}, /* cost of loading SSE registers 2623 2, /* cost of moving SSE register to integer. */
2173 in 32,64,128,256 and 512-bit */
2174 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
2175 {6, 6, 6, 6, 12}, /* cost of storing SSE registers
2176 in 32,64,128,256 and 512-bit */
2177 {6, 6, 6, 6, 12}, /* cost of unaligned stores. */
2178 2, 2, /* SSE->integer and integer->SSE moves */
2179 /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops, 2624 /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
2180 rec. throughput 6. 2625 rec. throughput 6.
2181 So 5 uops statically and one uops per load. */ 2626 So 5 uops statically and one uops per load. */
2182 10, 6, /* Gather load static, per_elt. */ 2627 10, 6, /* Gather load static, per_elt. */
2183 10, 6, /* Gather store static, per_elt. */ 2628 10, 6, /* Gather store static, per_elt. */