Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/i386/x86-tune.def @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Definitions of x86 tunable features. | 1 /* Definitions of x86 tunable features. |
2 Copyright (C) 2013-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2013-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify | 6 GCC is free software; you can redistribute it and/or modify |
7 it under the terms of the GNU General Public License as published by | 7 it under the terms of the GNU General Public License as published by |
39 /*****************************************************************************/ | 39 /*****************************************************************************/ |
40 | 40 |
41 /* X86_TUNE_SCHEDULE: Enable scheduling. */ | 41 /* X86_TUNE_SCHEDULE: Enable scheduling. */ |
42 DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", | 42 DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", |
43 m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | 43 m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT |
44 | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) | 44 | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT |
45 | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC) | |
45 | 46 |
46 /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming | 47 /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming |
47 on modern chips. Preffer stores affecting whole integer register | 48 on modern chips. Preffer stores affecting whole integer register |
48 over partial stores. For example preffer MOVZBL or MOVQ to load 8bit | 49 over partial stores. For example preffer MOVZBL or MOVQ to load 8bit |
49 value over movb. */ | 50 value over movb. */ |
50 DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", | 51 DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", |
51 m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | 52 m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 |
52 | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC) | 53 | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL |
54 | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | |
55 | m_GENERIC) | |
53 | 56 |
54 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store | 57 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store |
55 destinations to be 128bit to allow register renaming on 128bit SSE units, | 58 destinations to be 128bit to allow register renaming on 128bit SSE units, |
56 but usually results in one extra microop on 64bit SSE units. | 59 but usually results in one extra microop on 64bit SSE units. |
57 Experimental results shows that disabling this option on P4 brings over 20% | 60 Experimental results shows that disabling this option on P4 brings over 20% |
72 This is because Core2 resolves dependencies on whole flags register | 75 This is because Core2 resolves dependencies on whole flags register |
73 and such sequences introduce false dependency on previous instruction | 76 and such sequences introduce false dependency on previous instruction |
74 setting full flags. | 77 setting full flags. |
75 | 78 |
76 The flags does not affect generation of INC and DEC that is controlled | 79 The flags does not affect generation of INC and DEC that is controlled |
77 by X86_TUNE_USE_INCDEC. | 80 by X86_TUNE_USE_INCDEC. */ |
78 | 81 |
79 This flag may be dropped from generic once core2-corei5 machines are | |
80 rare enough. */ | |
81 DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", | 82 DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", |
82 m_CORE2 | m_GENERIC) | 83 m_CORE2) |
83 | 84 |
84 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid | 85 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid |
85 partial dependencies. */ | 86 partial dependencies. */ |
86 DEF_TUNE (X86_TUNE_MOVX, "movx", | 87 DEF_TUNE (X86_TUNE_MOVX, "movx", |
87 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | 88 m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE |
88 | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC) | 89 | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL |
90 | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | |
91 | m_CORE_AVX2 | m_TREMONT | m_GENERIC) | |
89 | 92 |
90 /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by | 93 /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by |
91 full sized loads. */ | 94 full sized loads. */ |
92 DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", | 95 DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", |
93 m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | 96 m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL |
94 | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC) | 97 | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE |
98 | m_TREMONT | m_GENERIC) | |
95 | 99 |
96 /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent | 100 /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent |
97 conditional jump instruction for 32 bit TARGET. | 101 conditional jump instruction for 32 bit TARGET. */ |
98 FIXME: revisit for generic. */ | |
99 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", | 102 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", |
100 m_CORE_ALL | m_BDVER | m_ZNVER1) | 103 m_CORE_ALL | m_BDVER | m_ZNVER1 | m_GENERIC) |
101 | 104 |
102 /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent | 105 /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent |
103 conditional jump instruction for TARGET_64BIT. | 106 conditional jump instruction for TARGET_64BIT. */ |
104 FIXME: revisit for generic. */ | |
105 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", | 107 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", |
106 m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1) | 108 m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER | m_ZNVER1 | m_GENERIC) |
107 | 109 |
108 /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a | 110 /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a |
109 subsequent conditional jump instruction when the condition jump | 111 subsequent conditional jump instruction when the condition jump |
110 check sign flag (SF) or overflow flag (OF). */ | 112 check sign flag (SF) or overflow flag (OF). */ |
111 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", | 113 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", |
112 m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1) | 114 m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER | m_ZNVER1 | m_GENERIC) |
113 | 115 |
114 /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional | 116 /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional |
115 jump instruction when the alu instruction produces the CCFLAG consumed by | 117 jump instruction when the alu instruction produces the CCFLAG consumed by |
116 the conditional jump instruction. */ | 118 the conditional jump instruction. */ |
117 DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", | 119 DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", |
118 m_SANDYBRIDGE | m_HASWELL) | 120 m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC) |
119 | 121 |
120 | 122 |
121 /*****************************************************************************/ | 123 /*****************************************************************************/ |
122 /* Function prologue, epilogue and function calling sequences. */ | 124 /* Function prologue, epilogue and function calling sequences. */ |
123 /*****************************************************************************/ | 125 /*****************************************************************************/ |
126 arguments in prologue/epilogue instead of separately for each call | 128 arguments in prologue/epilogue instead of separately for each call |
127 by push/pop instructions. | 129 by push/pop instructions. |
128 This increase code size by about 5% in 32bit mode, less so in 64bit mode | 130 This increase code size by about 5% in 32bit mode, less so in 64bit mode |
129 because parameters are passed in registers. It is considerable | 131 because parameters are passed in registers. It is considerable |
130 win for targets without stack engine that prevents multple push operations | 132 win for targets without stack engine that prevents multple push operations |
131 to happen in parallel. | 133 to happen in parallel. */ |
132 | 134 |
133 FIXME: the flags is incorrectly enabled for amdfam10, Bulldozer, | |
134 Bobcat and Generic. This is because disabling it causes large | |
135 regression on mgrid due to IRA limitation leading to unecessary | |
136 use of the frame pointer in 32bit mode. */ | |
137 DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", | 135 DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", |
138 m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | 136 m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL |
139 | m_ATHLON_K8) | 137 | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ATHLON_K8) |
140 | 138 |
141 /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are | 139 /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are |
142 considered on critical path. */ | 140 considered on critical path. */ |
143 DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move", | 141 DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move", |
144 m_PPRO | m_ATHLON_K8) | 142 m_PPRO | m_ATHLON_K8) |
190 of conditional jump or directly preceded by other jump instruction. | 188 of conditional jump or directly preceded by other jump instruction. |
191 This is important for AND K8-AMDFAM10 because the branch prediction | 189 This is important for AND K8-AMDFAM10 because the branch prediction |
192 architecture expect at most one jump per 2 byte window. Failing to | 190 architecture expect at most one jump per 2 byte window. Failing to |
193 pad returns leads to misaligned return stack. */ | 191 pad returns leads to misaligned return stack. */ |
194 DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", | 192 DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", |
195 m_ATHLON_K8 | m_AMDFAM10 | m_GENERIC) | 193 m_ATHLON_K8 | m_AMDFAM10) |
196 | 194 |
197 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more | 195 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more |
198 than 4 branch instructions in the 16 byte window. */ | 196 than 4 branch instructions in the 16 byte window. */ |
199 DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", | 197 DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", |
200 m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | 198 m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM |
201 |m_INTEL | m_ATHLON_K8 | m_AMDFAM10) | 199 | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL | m_ATHLON_K8 |
200 | m_AMDFAM10) | |
202 | 201 |
203 /*****************************************************************************/ | 202 /*****************************************************************************/ |
204 /* Integer instruction selection tuning */ | 203 /* Integer instruction selection tuning */ |
205 /*****************************************************************************/ | 204 /*****************************************************************************/ |
206 | 205 |
216 | 215 |
217 /* X86_TUNE_READ_MODIFY: Enable use of read-modify instructions such | 216 /* X86_TUNE_READ_MODIFY: Enable use of read-modify instructions such |
218 as "add mem, reg". */ | 217 as "add mem, reg". */ |
219 DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO)) | 218 DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO)) |
220 | 219 |
221 /* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */ | 220 /* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. |
221 | |
222 Core2 and nehalem has stall of 7 cycles for partial flag register stalls. | |
223 Sandy bridge and Ivy bridge generate extra uop. On Haswell this extra uop | |
224 is output only when the values needs to be really merged, which is not | |
225 done by GCC generated code. */ | |
222 DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", | 226 DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", |
223 ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | 227 ~(m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE |
224 | m_KNL | m_KNM | m_GENERIC)) | 228 | m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GOLDMONT |
229 | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)) | |
225 | 230 |
226 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred | 231 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred |
227 for DFmode copies */ | 232 for DFmode copies */ |
228 DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", | 233 DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", |
229 ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | 234 ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT |
230 | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)) | 235 | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT |
236 | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)) | |
231 | 237 |
232 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag | 238 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag |
233 will impact LEA instruction selection. */ | 239 will impact LEA instruction selection. */ |
234 DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL | 240 DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL |
235 | m_KNM | m_INTEL) | 241 | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL) |
236 | 242 |
237 /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */ | 243 /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */ |
238 DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr", | 244 DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr", |
239 m_BONNELL | m_SILVERMONT | m_KNL | m_KNM) | 245 m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT |
246 | m_KNL | m_KNM) | |
240 | 247 |
241 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is | 248 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is |
242 vector path on AMD machines. | 249 vector path on AMD machines. |
243 FIXME: Do we need to enable this for core? */ | 250 FIXME: Do we need to enable this for core? */ |
244 DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM32_MEM, "slow_imul_imm32_mem", | 251 DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM32_MEM, "slow_imul_imm32_mem", |
251 m_K8 | m_AMDFAM10) | 258 m_K8 | m_AMDFAM10) |
252 | 259 |
253 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for | 260 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for |
254 a conditional move. */ | 261 a conditional move. */ |
255 DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", | 262 DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", |
256 m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL) | 263 m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_KNL |
264 | m_KNM | m_TREMONT | m_INTEL) | |
257 | 265 |
258 /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such | 266 /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such |
259 as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */ | 267 as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */ |
260 DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA) | 268 DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA) |
261 | 269 |
270 | 278 |
271 /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ | 279 /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ |
272 DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", | 280 DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", |
273 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | 281 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT |
274 | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | 282 | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER |
275 | m_BTVER | m_ZNVER1 | m_GENERIC) | 283 | m_BTVER | m_ZNVER1 | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT |
284 | m_GENERIC) | |
276 | 285 |
277 /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ | 286 /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ |
278 DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", | 287 DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", |
279 ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | 288 ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL |
280 | m_K6)) | 289 | m_K6 | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT)) |
281 | 290 |
282 /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */ | 291 /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */ |
283 DEF_TUNE (X86_TUNE_USE_BT, "use_bt", | 292 DEF_TUNE (X86_TUNE_USE_BT, "use_bt", |
284 m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | 293 m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL |
285 | m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC) | 294 | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS |
295 | m_TREMONT | m_GENERIC) | |
286 | 296 |
287 /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency | 297 /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency |
288 for bit-manipulation instructions. */ | 298 for bit-manipulation instructions. */ |
289 DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", | 299 DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", |
290 m_SANDYBRIDGE | m_HASWELL | m_GENERIC) | 300 m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC) |
291 | 301 |
292 /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based | 302 /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based |
293 on hardware capabilities. Bdver3 hardware has a loop buffer which makes | 303 on hardware capabilities. Bdver3 hardware has a loop buffer which makes |
294 unrolling small loop less important. For, such architectures we adjust | 304 unrolling small loop less important. For, such architectures we adjust |
295 the unroll factor so that the unrolled loop fits the loop buffer. */ | 305 the unroll factor so that the unrolled loop fits the loop buffer. */ |
296 DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) | 306 DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) |
297 | 307 |
298 /* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in | 308 /* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in |
299 if-converted sequence to one. */ | 309 if-converted sequence to one. */ |
300 DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", | 310 DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", |
301 m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC) | 311 m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT |
312 | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC) | |
302 | 313 |
303 /*****************************************************************************/ | 314 /*****************************************************************************/ |
304 /* 387 instruction selection tuning */ | 315 /* 387 instruction selection tuning */ |
305 /*****************************************************************************/ | 316 /*****************************************************************************/ |
306 | 317 |
312 | 323 |
313 /* X86_TUNE_USE_SIMODE_FIOP: Enables use of x87 instructions with 32bit | 324 /* X86_TUNE_USE_SIMODE_FIOP: Enables use of x87 instructions with 32bit |
314 integer operand. */ | 325 integer operand. */ |
315 DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", | 326 DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", |
316 ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | 327 ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL |
317 | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GENERIC)) | 328 | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE |
329 | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)) | |
318 | 330 |
319 /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ | 331 /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ |
320 DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) | 332 DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) |
321 | 333 |
322 /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ | 334 /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ |
323 DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", | 335 DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", |
324 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | 336 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT |
325 | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC) | 337 | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GOLDMONT |
338 | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC) | |
326 | 339 |
327 /*****************************************************************************/ | 340 /*****************************************************************************/ |
328 /* SSE instruction selection tuning */ | 341 /* SSE instruction selection tuning */ |
329 /*****************************************************************************/ | 342 /*****************************************************************************/ |
330 | 343 |
334 m_CORE_ALL) | 347 m_CORE_ALL) |
335 | 348 |
336 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead | 349 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead |
337 of a sequence loading registers by parts. */ | 350 of a sequence loading registers by parts. */ |
338 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", | 351 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", |
339 m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM | 352 m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM |
340 | m_INTEL | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC) | 353 | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS |
354 | m_TREMONT | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC) | |
341 | 355 |
342 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead | 356 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead |
343 of a sequence loading registers by parts. */ | 357 of a sequence loading registers by parts. */ |
344 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", | 358 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", |
345 m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM | 359 m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM |
346 | m_INTEL | m_BDVER | m_ZNVER1 | m_GENERIC) | 360 | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS |
361 | m_TREMONT | m_BDVER | m_ZNVER1 | m_GENERIC) | |
347 | 362 |
348 /* Use packed single precision instructions where posisble. I.e. movups instead | 363 /* Use packed single precision instructions where posisble. I.e. movups instead |
349 of movupd. */ | 364 of movupd. */ |
350 DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", | 365 DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", |
351 m_BDVER | m_ZNVER1) | 366 m_BDVER | m_ZNVER1) |
362 | 377 |
363 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer | 378 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer |
364 to SSE registers. If disabled, the moves will be done by storing | 379 to SSE registers. If disabled, the moves will be done by storing |
365 the value to memory and reloading. */ | 380 the value to memory and reloading. */ |
366 DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_TO_VEC, "inter_unit_moves_to_vec", | 381 DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_TO_VEC, "inter_unit_moves_to_vec", |
367 ~(m_AMD_MULTIPLE | m_GENERIC)) | 382 ~(m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC)) |
368 | 383 |
369 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from SSE | 384 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from SSE |
370 to integer registers. If disabled, the moves will be done by storing | 385 to integer registers. If disabled, the moves will be done by storing |
371 the value to memory and reloading. */ | 386 the value to memory and reloading. */ |
372 DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec", | 387 DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec", |
373 ~m_ATHLON_K8) | 388 ~m_ATHLON_K8) |
374 | 389 |
375 /* X86_TUNE_INTER_UNIT_CONVERSIONS: Enable float<->integer conversions | 390 /* X86_TUNE_INTER_UNIT_CONVERSIONS: Enable float<->integer conversions |
376 to use both SSE and integer registers at a same time. | 391 to use both SSE and integer registers at a same time. */ |
377 FIXME: revisit importance of this for generic. */ | |
378 DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", | 392 DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", |
379 ~(m_AMDFAM10 | m_BDVER)) | 393 ~(m_AMDFAM10 | m_BDVER)) |
380 | 394 |
381 /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for | 395 /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for |
382 fp converts to destination register. */ | 396 fp converts to destination register. */ |
383 DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts", | 397 DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts", |
384 m_SILVERMONT | m_KNL | m_KNM | m_INTEL) | 398 m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS |
399 | m_TREMONT | m_INTEL) | |
385 | 400 |
386 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion | 401 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion |
387 from FP to FP. This form of instructions avoids partial write to the | 402 from FP to FP. This form of instructions avoids partial write to the |
388 destination. */ | 403 destination. */ |
389 DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts", | 404 DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts", |
393 from integer to FP. */ | 408 from integer to FP. */ |
394 DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) | 409 DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) |
395 | 410 |
396 /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */ | 411 /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */ |
397 DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", | 412 DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", |
398 m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL) | 413 m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT |
414 | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL) | |
399 | 415 |
400 /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */ | 416 /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */ |
401 DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", | 417 DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", |
402 m_SILVERMONT | m_INTEL) | 418 m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL) |
419 | |
420 /* X86_TUNE_USE_GATHER: Use gather instructions. */ | |
421 DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", | |
422 ~(m_ZNVER1 | m_GENERIC)) | |
423 | |
424 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or | |
425 smaller FMA chain. */ | |
426 DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1) | |
403 | 427 |
404 /*****************************************************************************/ | 428 /*****************************************************************************/ |
405 /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ | 429 /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ |
406 /*****************************************************************************/ | 430 /*****************************************************************************/ |
407 | 431 |
417 | 441 |
418 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for | 442 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for |
419 the auto-vectorizer. */ | 443 the auto-vectorizer. */ |
420 DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 | 444 DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 |
421 | m_ZNVER1) | 445 | m_ZNVER1) |
446 | |
447 /* X86_TUNE_AVX256_OPTIMAL: Use 256-bit AVX instructions instead of 512-bit AVX | |
448 instructions in the auto-vectorizer. */ | |
449 DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) | |
422 | 450 |
423 /*****************************************************************************/ | 451 /*****************************************************************************/ |
424 /* Historical relics: tuning flags that helps a specific old CPU designs */ | 452 /* Historical relics: tuning flags that helps a specific old CPU designs */ |
425 /*****************************************************************************/ | 453 /*****************************************************************************/ |
426 | 454 |
534 | 562 |
535 /* X86_TUNE_PROMOTE_QI_REGS: This enables generic code that promotes all 8bit | 563 /* X86_TUNE_PROMOTE_QI_REGS: This enables generic code that promotes all 8bit |
536 arithmetic to 32bit via PROMOTE_MODE macro. This code generation scheme | 564 arithmetic to 32bit via PROMOTE_MODE macro. This code generation scheme |
537 is usually used for RISC targets. */ | 565 is usually used for RISC targets. */ |
538 DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U) | 566 DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U) |
567 | |
568 /* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion | |
569 before a transfer of control flow out of the function. */ | |
570 DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL) |