Mercurial > hg > CbC > CbC_gcc
diff gcc/config/i386/x86-tune-costs.h @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
line wrap: on
line diff
--- a/gcc/config/i386/x86-tune-costs.h Thu Oct 25 07:37:49 2018 +0900 +++ b/gcc/config/i386/x86-tune-costs.h Thu Feb 13 11:34:05 2020 +0900 @@ -1,5 +1,5 @@ /* Costs of operations of individual x86 CPUs. - Copyright (C) 1988-2018 Free Software Foundation, Inc. + Copyright (C) 1988-2020 Free Software Foundation, Inc. This file is part of GCC. @@ -36,6 +36,32 @@ const struct processor_costs ix86_size_cost = {/* costs for tuning for size */ + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 2, /* cost for loading QImode using movzbl */ + {2, 2, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 2}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {2, 2, 2}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 3, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {3, 3}, /* cost of storing MMX registers + in SImode and DImode */ + 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */ + {3, 3, 3, 3, 3}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {3, 3, 3, 3, 3}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 3, 3, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_BYTES (2), /* cost of an add instruction */ COSTS_N_BYTES (3), /* cost of a lea instruction */ COSTS_N_BYTES (2), /* variable shift costs */ @@ -55,33 +81,21 @@ COSTS_N_BYTES (3), /* cost of movzx */ 0, /* "large" insn */ 2, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2. */ - 2, /* cost for loading QImode using movzbl */ + 2, /* CLEAR_RATIO */ {2, 2, 2}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 2, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 2}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {2, 2, 2}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 3, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers - in SImode and DImode */ - {3, 3}, /* cost of storing MMX registers - in SImode and DImode */ - 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */ - {3, 3, 3, 3, 3}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ + {3, 3, 3, 3, 3}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {3, 3, 3, 3, 3}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ {3, 3, 3, 3, 3}, /* cost of unaligned SSE load in 128bit, 256bit and 512bit */ - {3, 3, 3, 3, 3}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {3, 3, 3, 3, 3}, /* cost of unaligned SSE store + {3, 3, 3, 3, 3}, /* cost of unaligned SSE store in 128bit, 256bit and 512bit */ - 3, 3, /* SSE->integer and integer->SSE moves */ + 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */ + 3, /* cost of moving SSE register to integer. */ 5, 0, /* Gather load static, per_elt. */ 5, 0, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ @@ -127,6 +141,32 @@ static const struct processor_costs i386_cost = { /* 386 specific costs */ + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 4, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {8, 8, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {8, 8, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 8, 16, 32, 64}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 3, 3, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (3), /* variable shift costs */ @@ -146,32 +186,19 @@ COSTS_N_INSNS (2), /* cost of movzx */ 15, /* "large" insn */ 3, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 4, /* cost for loading QImode using movzbl */ + 3, /* CLEAR_RATIO */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 4, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {8, 8, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {8, 8, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {4, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 8}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 8, 16, 32, 64}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ + {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 8, 16, 32, 64}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ - {4, 8, 16, 32, 64}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ - 3, 3, /* SSE->integer and integer->SSE moves */ + 3, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ @@ -216,6 +243,32 @@ static const struct processor_costs i486_cost = { /* 486 specific costs */ + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 4, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {8, 8, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {8, 8, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 8, 16, 32, 64}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 3, 3, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (3), /* variable shift costs */ @@ -235,32 +288,19 @@ COSTS_N_INSNS (2), /* cost of movzx */ 15, /* "large" insn */ 3, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 4, /* cost for loading QImode using movzbl */ + 3, /* CLEAR_RATIO */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 4, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {8, 8, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {8, 8, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {4, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 8}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 8, 16, 32, 64}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ + {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 8, 16, 32, 64}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ - {4, 8, 16, 32, 64}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ - 3, 3, /* SSE->integer and integer->SSE moves */ + 3, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 4, /* size of l1 cache. 486 has 8kB cache @@ -307,6 +347,32 @@ static const struct processor_costs pentium_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 8, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 8, 16, 32, 64}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 3, 3, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (4), /* variable shift costs */ @@ -326,32 +392,19 @@ COSTS_N_INSNS (2), /* cost of movzx */ 8, /* "large" insn */ 6, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 6, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 4, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 8, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {8, 8}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 8, 16, 32, 64}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ + {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 8, 16, 32, 64}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ - {4, 8, 16, 32, 64}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ - 3, 3, /* SSE->integer and integer->SSE moves */ + 3, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -389,6 +442,32 @@ static const struct processor_costs lakemont_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 8, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 8, 16, 32, 64}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 3, 3, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -408,32 +487,19 @@ COSTS_N_INSNS (2), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 6, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 4, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 8, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {8, 8}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 8, 16, 32, 64}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ + {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 8, 16, 32, 64}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ - {4, 8, 16, 32, 64}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ - 3, 3, /* SSE->integer and integer->SSE moves */ + 3, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -486,6 +552,32 @@ DUMMY_STRINGOP_ALGS}; static const struct processor_costs pentiumpro_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 2, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 8, 16, 32, 64}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 3, 3, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -505,32 +597,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 6, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 2, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {4, 4, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 2, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {2, 2}, /* cost of loading MMX registers - in SImode and DImode */ - {2, 2}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 8, 16, 32, 64}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ + {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 8, 16, 32, 64}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ - {4, 8, 16, 32, 64}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ - 3, 3, /* SSE->integer and integer->SSE moves */ + 3, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -574,6 +653,32 @@ DUMMY_STRINGOP_ALGS}; static const struct processor_costs geode_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 2, /* cost for loading QImode using movzbl */ + {2, 2, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 2}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 6, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {2, 2, 8, 16, 32}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {2, 2, 8, 16, 32}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 6, 6, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (2), /* variable shift costs */ @@ -593,33 +698,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 4, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 2, /* cost for loading QImode using movzbl */ + 4, /* CLEAR_RATIO */ {2, 2, 2}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 2, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 2}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 6, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - - 2, /* cost of moving MMX register */ - {2, 2}, /* cost of loading MMX registers - in SImode and DImode */ - {2, 2}, /* cost of storing MMX registers - in SImode and DImode */ + {2, 2, 8, 16, 32}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {2, 2, 8, 16, 32}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ + {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {2, 2, 8, 16, 32}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ - {2, 2, 8, 16, 32}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ - 6, 6, /* SSE->integer and integer->SSE moves */ + 6, /* cost of moving SSE register to integer. */ 2, 2, /* Gather load static, per_elt. */ 2, 2, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -663,6 +754,32 @@ DUMMY_STRINGOP_ALGS}; static const struct processor_costs k6_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 3, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {2, 2, 8, 16, 32}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {2, 2, 8, 16, 32}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 6, 6, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -682,32 +799,19 @@ COSTS_N_INSNS (2), /* cost of movzx */ 8, /* "large" insn */ 4, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 3, /* cost for loading QImode using movzbl */ + 4, /* CLEAR_RATIO */ {4, 5, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 3, 2}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {6, 6, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 4}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {2, 2}, /* cost of loading MMX registers - in SImode and DImode */ - {2, 2}, /* cost of storing MMX registers - in SImode and DImode */ + {2, 2, 8, 16, 32}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {2, 2, 8, 16, 32}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ + {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {2, 2, 8, 16, 32}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ - {2, 2, 8, 16, 32}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ - 6, 6, /* SSE->integer and integer->SSE moves */ + 6, /* cost of moving SSE register to integer. */ 2, 2, /* Gather load static, per_elt. */ 2, 2, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -757,6 +861,32 @@ DUMMY_STRINGOP_ALGS}; static const struct processor_costs athlon_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 4, 12, 12, 24}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 4, 10, 10, 20}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 5, 5, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -776,32 +906,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 9, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 4, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {3, 4, 3}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {3, 4, 3}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {4, 4, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {4, 4}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 4, 12, 12, 24}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 4, 10, 10, 20}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 4, 12, 12, 24}, /* cost of unaligned loads. */ + {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 4, 12, 12, 24}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 4, 12, 12, 24}, /* cost of unaligned loads. */ - {4, 4, 10, 10, 20}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ - 5, 5, /* SSE->integer and integer->SSE moves */ + 5, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -853,6 +970,32 @@ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; static const struct processor_costs k8_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 3, 12, 12, 24}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 4, 10, 10, 20}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 5, 5, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -872,32 +1015,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 9, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 4, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {3, 4, 3}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {3, 4, 3}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {4, 4, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 3, 12, 12, 24}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 4, 10, 10, 20}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 3, 12, 12, 24}, /* cost of unaligned loads. */ + {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 3, 12, 12, 24}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 3, 12, 12, 24}, /* cost of unaligned loads. */ - {4, 4, 10, 10, 20}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ - 5, 5, /* SSE->integer and integer->SSE moves */ + 5, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -953,6 +1083,41 @@ {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; struct processor_costs amdfam10_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {4, 4, 3, 6, 12}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {4, 4, 5, 10, 20}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 3, 3, /* SSE->integer and integer->SSE moves */ + + /* On K8: + MOVD reg64, xmmreg Double FSTORE 4 + MOVD reg32, xmmreg Double FSTORE 4 + On AMDFAM10: + MOVD reg64, xmmreg Double FADD 3 + 1/1 1/1 + MOVD reg32, xmmreg Double FADD 3 + 1/1 1/1 */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -972,40 +1137,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 9, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 4, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {3, 4, 3}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {3, 4, 3}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {4, 4, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ + {4, 4, 3, 6, 12}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 4, 5, 10, 20}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {4, 4, 3, 7, 12}, /* cost of unaligned loads. */ + {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 4, 3, 6, 12}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {4, 4, 3, 7, 12}, /* cost of unaligned loads. */ - {4, 4, 5, 10, 20}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ - 3, 3, /* SSE->integer and integer->SSE moves */ - /* On K8: - MOVD reg64, xmmreg Double FSTORE 4 - MOVD reg32, xmmreg Double FSTORE 4 - On AMDFAM10: - MOVD reg64, xmmreg Double FADD 3 - 1/1 1/1 - MOVD reg32, xmmreg Double FADD 3 - 1/1 1/1 */ + 3, /* cost of moving SSE register to integer. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1062,6 +1206,32 @@ {-1, libcall, false}}}}; const struct processor_costs bdver_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 8, /* cost for loading QImode using movzbl */ + {8, 8, 8}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 28}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {10, 10, 18}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 4, /* cost of moving MMX register */ + {12, 12}, /* cost of loading MMX registers + in SImode and DImode */ + {10, 10}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {12, 12, 10, 40, 60}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {10, 10, 10, 40, 60}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 16, 20, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1081,32 +1251,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 9, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 8, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {8, 8, 8}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {8, 8, 8}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {12, 12, 28}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {10, 10, 18}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 4, /* cost of moving MMX register */ - {12, 12}, /* cost of loading MMX registers - in SImode and DImode */ - {10, 10}, /* cost of storing MMX registers - in SImode and DImode */ + {12, 12, 10, 40, 60}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {10, 10, 10, 40, 60}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {12, 12, 10, 40, 60}, /* cost of unaligned loads. */ + {10, 10, 10, 40, 60}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {12, 12, 10, 40, 60}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {12, 12, 10, 40, 60}, /* cost of unaligned loads. */ - {10, 10, 10, 40, 60}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {10, 10, 10, 40, 60}, /* cost of unaligned stores. */ - 16, 20, /* SSE->integer and integer->SSE moves */ + 16, /* cost of moving SSE register to integer. */ 12, 12, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ @@ -1164,31 +1321,8 @@ {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; struct processor_costs znver1_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction. */ - COSTS_N_INSNS (1), /* cost of a lea instruction. */ - COSTS_N_INSNS (1), /* variable shift costs. */ - COSTS_N_INSNS (1), /* constant shift costs. */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ - COSTS_N_INSNS (3), /* HI. */ - COSTS_N_INSNS (3), /* SI. */ - COSTS_N_INSNS (3), /* DI. */ - COSTS_N_INSNS (3)}, /* other. */ - 0, /* cost of multiply per each bit - set. */ - /* Depending on parameters, idiv can get faster on ryzen. This is upper - bound. */ - {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ - COSTS_N_INSNS (22), /* HI. */ - COSTS_N_INSNS (30), /* SI. */ - COSTS_N_INSNS (45), /* DI. */ - COSTS_N_INSNS (45)}, /* other. */ - COSTS_N_INSNS (1), /* cost of movsx. */ - COSTS_N_INSNS (1), /* cost of movzx. */ - 8, /* "large" insn. */ - 9, /* MOVE_RATIO. */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ + { + /* Start of register allocator costs. integer->integer move cost is 2. */ /* reg-reg moves are done by renaming and thus they are even cheaper than 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond @@ -1214,11 +1348,48 @@ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ {6, 6, 6, 12, 24}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit. */ - {6, 6, 6, 12, 24}, /* cost of unaligned loads. */ {8, 8, 8, 16, 32}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit. */ + 6, 6, /* SSE->integer and integer->SSE moves. */ + /* End of register allocator costs. */ + }, + + COSTS_N_INSNS (1), /* cost of an add instruction. */ + COSTS_N_INSNS (1), /* cost of a lea instruction. */ + COSTS_N_INSNS (1), /* variable shift costs. */ + COSTS_N_INSNS (1), /* constant shift costs. */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ + COSTS_N_INSNS (3), /* HI. */ + COSTS_N_INSNS (3), /* SI. */ + COSTS_N_INSNS (3), /* DI. */ + COSTS_N_INSNS (3)}, /* other. */ + 0, /* cost of multiply per each bit + set. */ + /* Depending on parameters, idiv can get faster on ryzen. This is upper + bound. */ + {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ + COSTS_N_INSNS (22), /* HI. */ + COSTS_N_INSNS (30), /* SI. */ + COSTS_N_INSNS (45), /* DI. */ + COSTS_N_INSNS (45)}, /* other. */ + COSTS_N_INSNS (1), /* cost of movsx. */ + COSTS_N_INSNS (1), /* cost of movzx. */ + 8, /* "large" insn. */ + 9, /* MOVE_RATIO. */ + 6, /* CLEAR_RATIO */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer + registers. */ + {6, 6, 6, 12, 24}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {8, 8, 8, 16, 32}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 12, 24}, /* cost of unaligned loads. */ {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ - 6, 6, /* SSE->integer and integer->SSE moves. */ + 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ + 6, /* cost of moving SSE register to integer. */ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, throughput 12. Approx 9 uops do not depend on vector size and every load is 7 uops. */ @@ -1273,6 +1444,148 @@ "16", /* Func alignment. */ }; +/* ZNVER2 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs znver2_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {64, rep_prefix_4_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs znver2_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, rep_prefix_4_byte, false}, {128, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; + +struct processor_costs znver2_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + + /* reg-reg moves are done by renaming and thus they are even cheaper than + 1 cycle. Because reg-reg move cost is 2 and following tables correspond + to doubles of latencies, we do not model this correctly. It does not + seem to make practical difference to bump prices up even more. */ + 6, /* cost for loading QImode using + movzbl. */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer + registers. */ + 2, /* cost of reg,reg fld/fst. */ + {6, 6, 16}, /* cost of loading fp registers + in SFmode, DFmode and XFmode. */ + {8, 8, 16}, /* cost of storing fp registers + in SFmode, DFmode and XFmode. */ + 2, /* cost of moving MMX register. */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode. */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode. */ + 2, 2, 3, /* cost of moving XMM,YMM,ZMM + register. */ + {6, 6, 6, 6, 12}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit. */ + {8, 8, 8, 8, 16}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit. */ + 6, 6, /* SSE->integer and integer->SSE + moves. */ + /* End of register allocator costs. */ + }, + + COSTS_N_INSNS (1), /* cost of an add instruction. */ + COSTS_N_INSNS (1), /* cost of a lea instruction. */ + COSTS_N_INSNS (1), /* variable shift costs. */ + COSTS_N_INSNS (1), /* constant shift costs. */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ + COSTS_N_INSNS (3), /* HI. */ + COSTS_N_INSNS (3), /* SI. */ + COSTS_N_INSNS (3), /* DI. */ + COSTS_N_INSNS (3)}, /* other. */ + 0, /* cost of multiply per each bit + set. */ + /* Depending on parameters, idiv can get faster on ryzen. This is upper + bound. */ + {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ + COSTS_N_INSNS (22), /* HI. */ + COSTS_N_INSNS (30), /* SI. */ + COSTS_N_INSNS (45), /* DI. */ + COSTS_N_INSNS (45)}, /* other. */ + COSTS_N_INSNS (1), /* cost of movsx. */ + COSTS_N_INSNS (1), /* cost of movzx. */ + 8, /* "large" insn. */ + 9, /* MOVE_RATIO. */ + 6, /* CLEAR_RATIO */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer + registers. */ + {6, 6, 6, 6, 12}, /* cost of loading SSE registers + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {8, 8, 8, 8, 16}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 6, 12}, /* cost of unaligned loads. */ + {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ + 2, 2, 3, /* cost of moving XMM,YMM,ZMM + register. */ + 6, /* cost of moving SSE register to integer. */ + /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, + throughput 12. Approx 9 uops do not depend on vector size and every load + is 7 uops. */ + 18, 8, /* Gather load static, per_elt. */ + 18, 10, /* Gather store static, per_elt. */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block. */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches. */ + 3, /* Branch cost. */ + COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + /* Latency of fdiv is 8-15. */ + COSTS_N_INSNS (15), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + /* Latency of fsqrt is 4-10. */ + COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of MULSS instruction. */ + COSTS_N_INSNS (3), /* cost of MULSD instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ + COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ + /* 9-13. */ + COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + /* Zen can execute 4 integer operations per cycle. FP operations + take 3 cycles and it can execute 2 integer additions and 2 + multiplications thus reassociation may make sense up to with of 6. + SPEC2k6 bencharks suggests + that 4 works better than 6 probably due to register pressure. + + Integer vector operations are taken by FP unit and execute 3 vector + plus/minus operations per cycle but only one multiply. This is adjusted + in ix86_reassociation_width. */ + 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + znver2_memcpy, + znver2_memset, + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ +}; + /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ static stringop_algs skylake_memcpy[2] = { {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, @@ -1289,6 +1602,32 @@ static const struct processor_costs skylake_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {6, 6, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 10}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {6, 6}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ + {6, 6, 6, 10, 20}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {8, 8, 8, 12, 24}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 6, 6, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1)+1, /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1310,30 +1649,19 @@ COSTS_N_INSNS (0), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - - 6, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {4, 4, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ - {6, 6, 3}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 10}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ + {6, 6, 6}, /* cost of storing integer registers */ + {6, 6, 6, 10, 20}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {8, 8, 8, 12, 24}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 10, 20}, /* cost of unaligned loads. */ + {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 10, 20}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 10, 20}, /* cost of unaligned loads. */ - {8, 8, 8, 12, 24}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ - 2, 2, /* SSE->integer and integer->SSE moves */ + 2, /* cost of moving SSE register to integer. */ 20, 8, /* Gather load static, per_elt. */ 22, 10, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1382,6 +1710,32 @@ {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; const struct processor_costs btver1_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 8, /* cost for loading QImode using movzbl */ + {6, 8, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 8, 6}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 28}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {12, 12, 38}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 4, /* cost of moving MMX register */ + {10, 10}, /* cost of loading MMX registers + in SImode and DImode */ + {12, 12}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {10, 10, 12, 48, 96}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {10, 10, 12, 48, 96}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 14, 14, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1401,32 +1755,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 9, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 8, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {6, 8, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {6, 8, 6}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {12, 12, 28}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {12, 12, 38}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 4, /* cost of moving MMX register */ - {10, 10}, /* cost of loading MMX registers - in SImode and DImode */ - {12, 12}, /* cost of storing MMX registers - in SImode and DImode */ + {10, 10, 12, 48, 96}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {10, 10, 12, 48, 96}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ + {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {10, 10, 12, 48, 96}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ - {10, 10, 12, 48, 96}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ - 14, 14, /* SSE->integer and integer->SSE moves */ + 14, /* cost of moving SSE register to integer. */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -1473,6 +1814,32 @@ {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; const struct processor_costs btver2_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 8, /* cost for loading QImode using movzbl */ + {8, 8, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 6}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 28}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {12, 12, 38}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 4, /* cost of moving MMX register */ + {10, 10}, /* cost of loading MMX registers + in SImode and DImode */ + {12, 12}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {10, 10, 12, 48, 96}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {10, 10, 12, 48, 96}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 14, 14, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1492,32 +1859,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 9, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 8, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {8, 8, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {8, 8, 6}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {12, 12, 28}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {12, 12, 38}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 4, /* cost of moving MMX register */ - {10, 10}, /* cost of loading MMX registers - in SImode and DImode */ - {12, 12}, /* cost of storing MMX registers - in SImode and DImode */ + {10, 10, 12, 48, 96}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {10, 10, 12, 48, 96}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ + {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {10, 10, 12, 48, 96}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ - {10, 10, 12, 48, 96}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ - 14, 14, /* SSE->integer and integer->SSE moves */ + 14, /* cost of moving SSE register to integer. */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -1563,6 +1917,32 @@ static const struct processor_costs pentium4_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 5, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 12, /* cost of reg,reg fld/fst */ + {14, 14, 14}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {14, 14, 14}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 12, /* cost of moving MMX register */ + {16, 16}, /* cost of loading MMX registers + in SImode and DImode */ + {16, 16}, /* cost of storing MMX registers + in SImode and DImode */ + 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */ + {16, 16, 16, 32, 64}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {16, 16, 16, 32, 64}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 20, 12, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (3), /* cost of a lea instruction */ COSTS_N_INSNS (4), /* variable shift costs */ @@ -1582,32 +1962,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 16, /* "large" insn */ 6, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 5, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {4, 5, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {2, 3, 2}, /* cost of storing integer registers */ - 12, /* cost of reg,reg fld/fst */ - {14, 14, 14}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {14, 14, 14}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 12, /* cost of moving MMX register */ - {16, 16}, /* cost of loading MMX registers - in SImode and DImode */ - {16, 16}, /* cost of storing MMX registers - in SImode and DImode */ + {16, 16, 16, 32, 64}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {16, 16, 16, 32, 64}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {32, 32, 32, 64, 128}, /* cost of unaligned loads. */ + {32, 32, 32, 64, 128}, /* cost of unaligned stores. */ 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */ - {16, 16, 16, 32, 64}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {32, 32, 32, 64, 128}, /* cost of unaligned loads. */ - {16, 16, 16, 32, 64}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {32, 32, 32, 64, 128}, /* cost of unaligned stores. */ - 20, 12, /* SSE->integer and integer->SSE moves */ + 20, /* cost of moving SSE register to integer. */ 16, 16, /* Gather load static, per_elt. */ 16, 16, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -1656,6 +2023,32 @@ static const struct processor_costs nocona_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 12, /* cost of reg,reg fld/fst */ + {14, 14, 14}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {14, 14, 14}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 14, /* cost of moving MMX register */ + {12, 12}, /* cost of loading MMX registers + in SImode and DImode */ + {12, 12}, /* cost of storing MMX registers + in SImode and DImode */ + 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */ + {12, 12, 12, 24, 48}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {12, 12, 12, 24, 48}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 20, 12, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1), /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1675,32 +2068,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 16, /* "large" insn */ 17, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 4, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {4, 4, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {4, 4, 4}, /* cost of storing integer registers */ - 12, /* cost of reg,reg fld/fst */ - {14, 14, 14}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {14, 14, 14}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 14, /* cost of moving MMX register */ - {12, 12}, /* cost of loading MMX registers - in SImode and DImode */ - {12, 12}, /* cost of storing MMX registers - in SImode and DImode */ + {12, 12, 12, 24, 48}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {12, 12, 12, 24, 48}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {24, 24, 24, 48, 96}, /* cost of unaligned loads. */ + {24, 24, 24, 48, 96}, /* cost of unaligned stores. */ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */ - {12, 12, 12, 24, 48}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {24, 24, 24, 48, 96}, /* cost of unaligned loads. */ - {12, 12, 12, 24, 48}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {24, 24, 24, 48, 96}, /* cost of unaligned stores. */ - 20, 12, /* SSE->integer and integer->SSE moves */ + 20, /* cost of moving SSE register to integer. */ 12, 12, /* Gather load static, per_elt. */ 12, 12, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -1747,6 +2127,32 @@ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; static const struct processor_costs atom_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 18}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {14, 14, 24}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {10, 10}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {8, 8, 8, 16, 32}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {8, 8, 8, 16, 32}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 8, 6, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1766,32 +2172,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 6, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {6, 6, 6}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {6, 6, 18}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {14, 14, 24}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {10, 10}, /* cost of storing MMX registers - in SImode and DImode */ + {8, 8, 8, 16, 32}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {8, 8, 8, 16, 32}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ + {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {8, 8, 8, 16, 32}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ - {8, 8, 8, 16, 32}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ - 8, 6, /* SSE->integer and integer->SSE moves */ + 8, /* cost of moving SSE register to integer. */ 8, 8, /* Gather load static, per_elt. */ 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -1838,6 +2231,32 @@ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; static const struct processor_costs slm_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 8, /* cost for loading QImode using movzbl */ + {8, 8, 8}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {8, 8, 18}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 18}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {6, 6}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ + {8, 8, 8, 16, 32}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {8, 8, 8, 16, 32}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 8, 6, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1857,32 +2276,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 8, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {8, 8, 8}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {6, 6, 6}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {8, 8, 18}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 18}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ + {8, 8, 8, 16, 32}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {8, 8, 8, 16, 32}, /* cost of storing SSE register + in SImode, DImode and TImode. */ + {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ + {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {8, 8, 8, 16, 32}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ - {8, 8, 8, 16, 32}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ - 8, 6, /* SSE->integer and integer->SSE moves */ + 8, /* cost of moving SSE register to integer. */ 8, 8, /* Gather load static, per_elt. */ 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -1929,6 +2335,32 @@ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; static const struct processor_costs intel_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {6, 6, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 10}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {6, 6}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ + {6, 6, 6, 6, 6}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {6, 6, 6, 6, 6}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 4, 4, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ COSTS_N_INSNS (1), /* variable shift costs */ @@ -1948,32 +2380,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 6, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {4, 4, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {6, 6, 6}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 10}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ + {6, 6, 6, 6, 6}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 6, 6}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ + {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 6, 6}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - {6, 6, 6, 6, 6}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - 4, 4, /* SSE->integer and integer->SSE moves */ + 4, /* cost of moving SSE register to integer. */ 6, 6, /* Gather load static, per_elt. */ 6, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -1988,7 +2407,7 @@ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - COSTS_N_INSNS (8), /* cost of cheap SSE instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (8), /* cost of MULSS instruction. */ COSTS_N_INSNS (8), /* cost of MULSD instruction. */ @@ -2024,6 +2443,32 @@ {-1, libcall, false}}}}; static const struct processor_costs generic_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 12}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {6, 6}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ + {6, 6, 6, 10, 15}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {6, 6, 6, 10, 15}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 6, 6, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ /* Setting cost to 2 makes our current implementation of synth_mult result in use of unnecessary temporary registers causing regression on several @@ -2046,32 +2491,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 6, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {6, 6, 6}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {6, 6, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 12}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ + {6, 6, 6, 10, 15}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 10, 15}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 10, 15}, /* cost of unaligned loads. */ + {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 10, 15}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 10, 15}, /* cost of unaligned loads. */ - {6, 6, 6, 10, 15}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ - 6, 6, /* SSE->integer and integer->SSE moves */ + 6, /* cost of moving SSE register to integer. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -2124,6 +2556,32 @@ static const struct processor_costs core_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {6, 6, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 10}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {6, 6}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ + {6, 6, 6, 6, 12}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {6, 6, 6, 6, 12}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 6, 6, /* SSE->integer and integer->SSE moves */ + /* End of register allocator costs. */ + }, + COSTS_N_INSNS (1), /* cost of an add instruction */ /* On all chips taken into consideration lea is 2 cycles and more. With this cost however our current implementation of synth_mult results in @@ -2150,32 +2608,19 @@ COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - - /* All move costs are relative to integer->integer move times 2 and thus - they are latency*2. */ - 6, /* cost for loading QImode using movzbl */ + 6, /* CLEAR_RATIO */ {4, 4, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ {6, 6, 6}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 10}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ + {6, 6, 6, 6, 12}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 6, 12}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 6, 12}, /* cost of unaligned loads. */ + {6, 6, 6, 6, 12}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 6, 12}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 6, 12}, /* cost of unaligned loads. */ - {6, 6, 6, 6, 12}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 6, 12}, /* cost of unaligned stores. */ - 2, 2, /* SSE->integer and integer->SSE moves */ + 2, /* cost of moving SSE register to integer. */ /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops, rec. throughput 6. So 5 uops statically and one uops per load. */